diff options
author | Stefan Israelsson Tampe <stefan.itampe@gmail.com> | 2019-03-12 18:30:03 +0100 |
---|---|---|
committer | Stefan Israelsson Tampe <stefan.itampe@gmail.com> | 2019-03-12 18:30:03 +0100 |
commit | 0bd6e0b49bf32eaef5419485611d48361bf482bb (patch) | |
tree | ebd504a1d3eb8fbffe3b651e22ea96d1740683db /modules | |
parent | b8a78bac9e310893bb5b931d46016a95be1899c8 (diff) |
cleanup
Diffstat (limited to 'modules')
35 files changed, 0 insertions, 10783 deletions
diff --git a/modules/language/python/module/.#_md5.scm b/modules/language/python/module/.#_md5.scm deleted file mode 120000 index c8d0ed8..0000000 --- a/modules/language/python/module/.#_md5.scm +++ /dev/null @@ -1 +0,0 @@ -stis@lapwine.2086:1535196580
\ No newline at end of file diff --git a/modules/language/python/module/.#_sha1.scm b/modules/language/python/module/.#_sha1.scm deleted file mode 120000 index c8d0ed8..0000000 --- a/modules/language/python/module/.#_sha1.scm +++ /dev/null @@ -1 +0,0 @@ -stis@lapwine.2086:1535196580
\ No newline at end of file diff --git a/modules/language/python/module/.#_sha256.scm b/modules/language/python/module/.#_sha256.scm deleted file mode 120000 index c8d0ed8..0000000 --- a/modules/language/python/module/.#_sha256.scm +++ /dev/null @@ -1 +0,0 @@ -stis@lapwine.2086:1535196580
\ No newline at end of file diff --git a/modules/language/python/module/.#bz2.py b/modules/language/python/module/.#bz2.py deleted file mode 120000 index c8d0ed8..0000000 --- a/modules/language/python/module/.#bz2.py +++ /dev/null @@ -1 +0,0 @@ -stis@lapwine.2086:1535196580
\ No newline at end of file diff --git a/modules/language/python/module/email/mime/#audio.py# b/modules/language/python/module/email/mime/#audio.py# deleted file mode 100644 index ccd08fe..0000000 --- a/modules/language/python/module/email/mime/#audio.py# +++ /dev/null @@ -1,76 +0,0 @@ -module(email,mime,audio) - -# Copyright (C) 2001-2007 Python Software Foundation -# Author: Anthony Baxter -# Contact: email-sig@python.org - -"""Class representing audio/* type MIME documents.""" - -__all__ = ['MIMEAudio'] - -import sndhdr - -from io import BytesIO -import email.encoders as encoders -from email.mime.nonmultipart import MIMENonMultipart - - - -_sndhdr_MIMEmap = {'au' : 'basic', - 'wav' :'x-wav', - 'aiff':'x-aiff', - 'aifc':'x-aiff', - } - -# There are others in sndhdr that don't have MIME types. :( -# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? -def _whatsnd(data): - """Try to identify a sound file type. - - sndhdr.what() has a pretty cruddy interface, unfortunately. This is why - we re-do it here. It would be easier to reverse engineer the Unix 'file' - command and use the standard 'magic' file, as shipped with a modern Unix. - """ - hdr = data[:512] - fakefile = BytesIO(hdr) - for testfn in sndhdr.tests: - res = testfn(hdr, fakefile) - if res is not None: - return _sndhdr_MIMEmap.get(res[0]) - return None - - - -class MIMEAudio(MIMENonMultipart): - """Class for generating audio/* MIME documents.""" - - def __init__(self, _audiodata, _subtype=None, - _encoder=encoders.encode_base64, *, policy=None, **_params): - """Create an audio/* type MIME document. - - _audiodata is a string containing the raw audio data. If this data - can be decoded by the standard Python `sndhdr' module, then the - subtype will be automatically included in the Content-Type header. - Otherwise, you can specify the specific audio subtype via the - _subtype parameter. If _subtype is not given, and no subtype can be - guessed, a TypeError is raised. - - _encoder is a function which will perform the actual encoding for - transport of the image data. It takes one argument, which is this - Image instance. It should use get_payload() and set_payload() to - change the payload to the encoded form. It should also add any - Content-Transfer-Encoding or other headers to the message as - necessary. The default encoding is Base64. - - Any additional keyword arguments are passed to the base class - constructor, which turns them into parameters on the Content-Type - header. - """ - if _subtype is None: - _subtype = _whatsnd(_audiodata) - if _subtype is None: - raise TypeError('Could not find audio MIME subtype') - MIMENonMultipart.__init__(self, 'audio', _subtype, policy=policy, - **_params) - self.set_payload(_audiodata) - _encoder(self) diff --git a/modules/language/python/module/email/mime/#image.py# b/modules/language/python/module/email/mime/#image.py# deleted file mode 100644 index 5346dac..0000000 --- a/modules/language/python/module/email/mime/#image.py# +++ /dev/null @@ -1,49 +0,0 @@ -module(email,mime,image) - -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Class representing image/* type MIME documents.""" - -__all__ = ['MIMEImage'] - -import imghdr - -import email.encoders as encoders -from email.mime.nonmultipart import MIMENonMultipart - - - -class MIMEImage(MIMENonMultipart): - """Class for generating image/* type MIME documents.""" - - def __init__(self, _imagedata, _subtype=None, - _encoder=encoders.encode_base64, *, policy=None, **_params): - """Create an image/* type MIME document. - - _imagedata is a string containing the raw image data. If this data - can be decoded by the standard Python `imghdr' module, then the - subtype will be automatically included in the Content-Type header. - Otherwise, you can specify the specific image subtype via the _subtype - parameter. - - _encoder is a function which will perform the actual encoding for - transport of the image data. It takes one argument, which is this - Image instance. It should use get_payload() and set_payload() to - change the payload to the encoded form. It should also add any - Content-Transfer-Encoding or other headers to the message as - necessary. The default encoding is Base64. - - Any additional keyword arguments are passed to the base class - constructor, which turns them into parameters on the Content-Type - header. - """ - if _subtype is None: - _subtype = imghdr.what(None, _imagedata) - if _subtype is None: - raise TypeError('Could not guess image MIME subtype') - MIMENonMultipart.__init__(self, 'image', _subtype, policy=policy, - **_params) - self.set_payload(_imagedata) - _encoder(self) diff --git a/modules/language/python/module/email/mime/.#audio.py b/modules/language/python/module/email/mime/.#audio.py deleted file mode 120000 index 6876c49..0000000 --- a/modules/language/python/module/email/mime/.#audio.py +++ /dev/null @@ -1 +0,0 @@ -stis@lapwine.3569:1536593089
\ No newline at end of file diff --git a/modules/language/python/module/email/mime/application.py~ b/modules/language/python/module/email/mime/application.py~ deleted file mode 100644 index 6877e55..0000000 --- a/modules/language/python/module/email/mime/application.py~ +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Keith Dart -# Contact: email-sig@python.org - -"""Class representing application/* type MIME documents.""" - -__all__ = ["MIMEApplication"] - -from email import encoders -from email.mime.nonmultipart import MIMENonMultipart - - -class MIMEApplication(MIMENonMultipart): - """Class for generating application/* MIME documents.""" - - def __init__(self, _data, _subtype='octet-stream', - _encoder=encoders.encode_base64, *, policy=None, **_params): - """Create an application/* type MIME document. - - _data is a string containing the raw application data. - - _subtype is the MIME content type subtype, defaulting to - 'octet-stream'. - - _encoder is a function which will perform the actual encoding for - transport of the application data, defaulting to base64 encoding. - - Any additional keyword arguments are passed to the base class - constructor, which turns them into parameters on the Content-Type - header. - """ - if _subtype is None: - raise TypeError('Invalid application MIME subtype') - MIMENonMultipart.__init__(self, 'application', _subtype, policy=policy, - **_params) - self.set_payload(_data) - _encoder(self) diff --git a/modules/language/python/module/email/mime/multipart.py~ b/modules/language/python/module/email/mime/multipart.py~ deleted file mode 100644 index 2d3f288..0000000 --- a/modules/language/python/module/email/mime/multipart.py~ +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2002-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Base class for MIME multipart/* type messages.""" - -__all__ = ['MIMEMultipart'] - -from email.mime.base import MIMEBase - - - -class MIMEMultipart(MIMEBase): - """Base class for MIME multipart/* type messages.""" - - def __init__(self, _subtype='mixed', boundary=None, _subparts=None, - *, policy=None, - **_params): - """Creates a multipart/* type message. - - By default, creates a multipart/mixed message, with proper - Content-Type and MIME-Version headers. - - _subtype is the subtype of the multipart content type, defaulting to - `mixed'. - - boundary is the multipart boundary string. By default it is - calculated as needed. - - _subparts is a sequence of initial subparts for the payload. It - must be an iterable object, such as a list. You can always - attach new subparts to the message by using the attach() method. - - Additional parameters for the Content-Type header are taken from the - keyword arguments (or passed into the _params argument). - """ - MIMEBase.__init__(self, 'multipart', _subtype, policy=policy, **_params) - - # Initialise _payload to an empty list as the Message superclass's - # implementation of is_multipart assumes that _payload is a list for - # multipart messages. - self._payload = [] - - if _subparts: - for p in _subparts: - self.attach(p) - if boundary: - self.set_boundary(boundary) diff --git a/modules/language/python/module/http/cookiejar.py~ b/modules/language/python/module/http/cookiejar.py~ deleted file mode 100644 index adf956d..0000000 --- a/modules/language/python/module/http/cookiejar.py~ +++ /dev/null @@ -1,2098 +0,0 @@ -r"""HTTP cookie handling for web clients. - -This module has (now fairly distant) origins in Gisle Aas' Perl module -HTTP::Cookies, from the libwww-perl library. - -Docstrings, comments and debug strings in this code refer to the -attributes of the HTTP cookie system as cookie-attributes, to distinguish -them clearly from Python attributes. - -Class diagram (note that BSDDBCookieJar and the MSIE* classes are not -distributed with the Python standard library, but are available from -http://wwwsearch.sf.net/): - - CookieJar____ - / \ \ - FileCookieJar \ \ - / | \ \ \ - MozillaCookieJar | LWPCookieJar \ \ - | | \ - | ---MSIEBase | \ - | / | | \ - | / MSIEDBCookieJar BSDDBCookieJar - |/ - MSIECookieJar - -""" - -__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', - 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar'] - -import copy -import datetime -import re -import time -import urllib.parse, urllib.request -try: - import threading as _threading -except ImportError: - import dummy_threading as _threading -import http.client # only for the default HTTP port -from calendar import timegm - -debug = False # set to True to enable debugging via the logging module -logger = None - -def _debug(*args): - if not debug: - return - global logger - if not logger: - import logging - logger = logging.getLogger("http.cookiejar") - return logger.debug(*args) - - -DEFAULT_HTTP_PORT = str(http.client.HTTP_PORT) -MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " - "instance initialised with one)") - -def _warn_unhandled_exception(): - # There are a few catch-all except: statements in this module, for - # catching input that's bad in unexpected ways. Warn if any - # exceptions are caught there. - import io, warnings, traceback - f = io.StringIO() - traceback.print_exc(None, f) - msg = f.getvalue() - warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2) - - -# Date/time conversion -# ----------------------------------------------------------------------------- - -EPOCH_YEAR = 1970 -def _timegm(tt): - year, month, mday, hour, min, sec = tt[:6] - if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and - (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): - return timegm(tt) - else: - return None - -DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] -MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] -MONTHS_LOWER = [] -for month in MONTHS: MONTHS_LOWER.append(month.lower()) - -def time2isoz(t=None): - """Return a string representing time in seconds since epoch, t. - - If the function is called without an argument, it will use the current - time. - - The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", - representing Universal Time (UTC, aka GMT). An example of this format is: - - 1994-11-24 08:49:37Z - - """ - if t is None: - dt = datetime.datetime.utcnow() - else: - dt = datetime.datetime.utcfromtimestamp(t) - return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( - dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second) - -def time2netscape(t=None): - """Return a string representing time in seconds since epoch, t. - - If the function is called without an argument, it will use the current - time. - - The format of the returned string is like this: - - Wed, DD-Mon-YYYY HH:MM:SS GMT - - """ - if t is None: - dt = datetime.datetime.utcnow() - else: - dt = datetime.datetime.utcfromtimestamp(t) - return "%s, %02d-%s-%04d %02d:%02d:%02d GMT" % ( - DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1], - dt.year, dt.hour, dt.minute, dt.second) - - -UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} - -TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII) -def offset_from_tz_string(tz): - offset = None - if tz in UTC_ZONES: - offset = 0 - else: - m = TIMEZONE_RE.search(tz) - if m: - offset = 3600 * int(m.group(2)) - if m.group(3): - offset = offset + 60 * int(m.group(3)) - if m.group(1) == '-': - offset = -offset - return offset - -def _str2time(day, mon, yr, hr, min, sec, tz): - yr = int(yr) - if yr > datetime.MAXYEAR: - return None - - # translate month name to number - # month numbers start with 1 (January) - try: - mon = MONTHS_LOWER.index(mon.lower())+1 - except ValueError: - # maybe it's already a number - try: - imon = int(mon) - except ValueError: - return None - if 1 <= imon <= 12: - mon = imon - else: - return None - - # make sure clock elements are defined - if hr is None: hr = 0 - if min is None: min = 0 - if sec is None: sec = 0 - - day = int(day) - hr = int(hr) - min = int(min) - sec = int(sec) - - if yr < 1000: - # find "obvious" year - cur_yr = time.localtime(time.time())[0] - m = cur_yr % 100 - tmp = yr - yr = yr + cur_yr - m - m = m - tmp - if abs(m) > 50: - if m > 0: yr = yr + 100 - else: yr = yr - 100 - - # convert UTC time tuple to seconds since epoch (not timezone-adjusted) - t = _timegm((yr, mon, day, hr, min, sec, tz)) - - if t is not None: - # adjust time using timezone string, to get absolute time since epoch - if tz is None: - tz = "UTC" - tz = tz.upper() - offset = offset_from_tz_string(tz) - if offset is None: - return None - t = t - offset - - return t - -STRICT_DATE_RE = re.compile( - r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " - r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII) -WEEKDAY_RE = re.compile( - r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII) -LOOSE_HTTP_DATE_RE = re.compile( - r"""^ - (\d\d?) # day - (?:\s+|[-\/]) - (\w+) # month - (?:\s+|[-\/]) - (\d+) # year - (?: - (?:\s+|:) # separator before clock - (\d\d?):(\d\d) # hour:min - (?::(\d\d))? # optional seconds - )? # optional clock - \s* - ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone - \s* - (?:\(\w+\))? # ASCII representation of timezone in parens. - \s*$""", re.X | re.ASCII) -def http2time(text): - """Returns time in seconds since epoch of time represented by a string. - - Return value is an integer. - - None is returned if the format of str is unrecognized, the time is outside - the representable range, or the timezone string is not recognized. If the - string contains no timezone, UTC is assumed. - - The timezone in the string may be numerical (like "-0800" or "+0100") or a - string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the - timezone strings equivalent to UTC (zero offset) are known to the function. - - The function loosely parses the following formats: - - Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format - Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format - Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format - 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) - 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) - 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday) - - The parser ignores leading and trailing whitespace. The time may be - absent. - - If the year is given with only 2 digits, the function will select the - century that makes the year closest to the current date. - - """ - # fast exit for strictly conforming string - m = STRICT_DATE_RE.search(text) - if m: - g = m.groups() - mon = MONTHS_LOWER.index(g[1].lower()) + 1 - tt = (int(g[2]), mon, int(g[0]), - int(g[3]), int(g[4]), float(g[5])) - return _timegm(tt) - - # No, we need some messy parsing... - - # clean up - text = text.lstrip() - text = WEEKDAY_RE.sub("", text, 1) # Useless weekday - - # tz is time zone specifier string - day, mon, yr, hr, min, sec, tz = [None]*7 - - # loose regexp parse - m = LOOSE_HTTP_DATE_RE.search(text) - if m is not None: - day, mon, yr, hr, min, sec, tz = m.groups() - else: - return None # bad format - - return _str2time(day, mon, yr, hr, min, sec, tz) - -ISO_DATE_RE = re.compile( - r"""^ - (\d{4}) # year - [-\/]? - (\d\d?) # numerical month - [-\/]? - (\d\d?) # day - (?: - (?:\s+|[-:Tt]) # separator before clock - (\d\d?):?(\d\d) # hour:min - (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) - )? # optional clock - \s* - ([-+]?\d\d?:?(:?\d\d)? - |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) - \s*$""", re.X | re. ASCII) -def iso2time(text): - """ - As for http2time, but parses the ISO 8601 formats: - - 1994-02-03 14:15:29 -0100 -- ISO 8601 format - 1994-02-03 14:15:29 -- zone is optional - 1994-02-03 -- only date - 1994-02-03T14:15:29 -- Use T as separator - 19940203T141529Z -- ISO 8601 compact format - 19940203 -- only date - - """ - # clean up - text = text.lstrip() - - # tz is time zone specifier string - day, mon, yr, hr, min, sec, tz = [None]*7 - - # loose regexp parse - m = ISO_DATE_RE.search(text) - if m is not None: - # XXX there's an extra bit of the timezone I'm ignoring here: is - # this the right thing to do? - yr, mon, day, hr, min, sec, tz, _ = m.groups() - else: - return None # bad format - - return _str2time(day, mon, yr, hr, min, sec, tz) - - -# Header parsing -# ----------------------------------------------------------------------------- - -def unmatched(match): - """Return unmatched part of re.Match object.""" - start, end = match.span(0) - return match.string[:start]+match.string[end:] - -HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)") -HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") -HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)") -HEADER_ESCAPE_RE = re.compile(r"\\(.)") -def split_header_words(header_values): - r"""Parse header values into a list of lists containing key,value pairs. - - The function knows how to deal with ",", ";" and "=" as well as quoted - values after "=". A list of space separated tokens are parsed as if they - were separated by ";". - - If the header_values passed as argument contains multiple values, then they - are treated as if they were a single value separated by comma ",". - - This means that this function is useful for parsing header fields that - follow this syntax (BNF as from the HTTP/1.1 specification, but we relax - the requirement for tokens). - - headers = #header - header = (token | parameter) *( [";"] (token | parameter)) - - token = 1*<any CHAR except CTLs or separators> - separators = "(" | ")" | "<" | ">" | "@" - | "," | ";" | ":" | "\" | <"> - | "/" | "[" | "]" | "?" | "=" - | "{" | "}" | SP | HT - - quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) - qdtext = <any TEXT except <">> - quoted-pair = "\" CHAR - - parameter = attribute "=" value - attribute = token - value = token | quoted-string - - Each header is represented by a list of key/value pairs. The value for a - simple token (not part of a parameter) is None. Syntactically incorrect - headers will not necessarily be parsed as you would want. - - This is easier to describe with some examples: - - >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) - [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] - >>> split_header_words(['text/html; charset="iso-8859-1"']) - [[('text/html', None), ('charset', 'iso-8859-1')]] - >>> split_header_words([r'Basic realm="\"foo\bar\""']) - [[('Basic', None), ('realm', '"foobar"')]] - - """ - assert not isinstance(header_values, str) - result = [] - for text in header_values: - orig_text = text - pairs = [] - while text: - m = HEADER_TOKEN_RE.search(text) - if m: - text = unmatched(m) - name = m.group(1) - m = HEADER_QUOTED_VALUE_RE.search(text) - if m: # quoted value - text = unmatched(m) - value = m.group(1) - value = HEADER_ESCAPE_RE.sub(r"\1", value) - else: - m = HEADER_VALUE_RE.search(text) - if m: # unquoted value - text = unmatched(m) - value = m.group(1) - value = value.rstrip() - else: - # no value, a lone token - value = None - pairs.append((name, value)) - elif text.lstrip().startswith(","): - # concatenated headers, as per RFC 2616 section 4.2 - text = text.lstrip()[1:] - if pairs: result.append(pairs) - pairs = [] - else: - # skip junk - non_junk, nr_junk_chars = re.subn(r"^[=\s;]*", "", text) - assert nr_junk_chars > 0, ( - "split_header_words bug: '%s', '%s', %s" % - (orig_text, text, pairs)) - text = non_junk - if pairs: result.append(pairs) - return result - -HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") -def join_header_words(lists): - """Do the inverse (almost) of the conversion done by split_header_words. - - Takes a list of lists of (key, value) pairs and produces a single header - value. Attribute values are quoted if needed. - - >>> join_header_words([[("text/plain", None), ("charset", "iso-8859-1")]]) - 'text/plain; charset="iso-8859-1"' - >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859-1")]]) - 'text/plain, charset="iso-8859-1"' - - """ - headers = [] - for pairs in lists: - attr = [] - for k, v in pairs: - if v is not None: - if not re.search(r"^\w+$", v): - v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ - v = '"%s"' % v - k = "%s=%s" % (k, v) - attr.append(k) - if attr: headers.append("; ".join(attr)) - return ", ".join(headers) - -def strip_quotes(text): - if text.startswith('"'): - text = text[1:] - if text.endswith('"'): - text = text[:-1] - return text - -def parse_ns_headers(ns_headers): - """Ad-hoc parser for Netscape protocol cookie-attributes. - - The old Netscape cookie format for Set-Cookie can for instance contain - an unquoted "," in the expires field, so we have to use this ad-hoc - parser instead of split_header_words. - - XXX This may not make the best possible effort to parse all the crap - that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient - parser is probably better, so could do worse than following that if - this ever gives any trouble. - - Currently, this is also used for parsing RFC 2109 cookies. - - """ - known_attrs = ("expires", "domain", "path", "secure", - # RFC 2109 attrs (may turn up in Netscape cookies, too) - "version", "port", "max-age") - - result = [] - for ns_header in ns_headers: - pairs = [] - version_set = False - - # XXX: The following does not strictly adhere to RFCs in that empty - # names and values are legal (the former will only appear once and will - # be overwritten if multiple occurrences are present). This is - # mostly to deal with backwards compatibility. - for ii, param in enumerate(ns_header.split(';')): - param = param.strip() - - key, sep, val = param.partition('=') - key = key.strip() - - if not key: - if ii == 0: - break - else: - continue - - # allow for a distinction between present and empty and missing - # altogether - val = val.strip() if sep else None - - if ii != 0: - lc = key.lower() - if lc in known_attrs: - key = lc - - if key == "version": - # This is an RFC 2109 cookie. - if val is not None: - val = strip_quotes(val) - version_set = True - elif key == "expires": - # convert expires date to seconds since epoch - if val is not None: - val = http2time(strip_quotes(val)) # None if invalid - pairs.append((key, val)) - - if pairs: - if not version_set: - pairs.append(("version", "0")) - result.append(pairs) - - return result - - -IPV4_RE = re.compile(r"\.\d+$", re.ASCII) -def is_HDN(text): - """Return True if text is a host domain name.""" - # XXX - # This may well be wrong. Which RFC is HDN defined in, if any (for - # the purposes of RFC 2965)? - # For the current implementation, what about IPv6? Remember to look - # at other uses of IPV4_RE also, if change this. - if IPV4_RE.search(text): - return False - if text == "": - return False - if text[0] == "." or text[-1] == ".": - return False - return True - -def domain_match(A, B): - """Return True if domain A domain-matches domain B, according to RFC 2965. - - A and B may be host domain names or IP addresses. - - RFC 2965, section 1: - - Host names can be specified either as an IP address or a HDN string. - Sometimes we compare one host name with another. (Such comparisons SHALL - be case-insensitive.) Host A's name domain-matches host B's if - - * their host name strings string-compare equal; or - - * A is a HDN string and has the form NB, where N is a non-empty - name string, B has the form .B', and B' is a HDN string. (So, - x.y.com domain-matches .Y.com but not Y.com.) - - Note that domain-match is not a commutative operation: a.b.c.com - domain-matches .c.com, but not the reverse. - - """ - # Note that, if A or B are IP addresses, the only relevant part of the - # definition of the domain-match algorithm is the direct string-compare. - A = A.lower() - B = B.lower() - if A == B: - return True - if not is_HDN(A): - return False - i = A.rfind(B) - if i == -1 or i == 0: - # A does not have form NB, or N is the empty string - return False - if not B.startswith("."): - return False - if not is_HDN(B[1:]): - return False - return True - -def liberal_is_HDN(text): - """Return True if text is a sort-of-like a host domain name. - - For accepting/blocking domains. - - """ - if IPV4_RE.search(text): - return False - return True - -def user_domain_match(A, B): - """For blocking/accepting domains. - - A and B may be host domain names or IP addresses. - - """ - A = A.lower() - B = B.lower() - if not (liberal_is_HDN(A) and liberal_is_HDN(B)): - if A == B: - # equal IP addresses - return True - return False - initial_dot = B.startswith(".") - if initial_dot and A.endswith(B): - return True - if not initial_dot and A == B: - return True - return False - -cut_port_re = re.compile(r":\d+$", re.ASCII) -def request_host(request): - """Return request-host, as defined by RFC 2965. - - Variation from RFC: returned value is lowercased, for convenient - comparison. - - """ - url = request.get_full_url() - host = urllib.parse.urlparse(url)[1] - if host == "": - host = request.get_header("Host", "") - - # remove port, if present - host = cut_port_re.sub("", host, 1) - return host.lower() - -def eff_request_host(request): - """Return a tuple (request-host, effective request-host name). - - As defined by RFC 2965, except both are lowercased. - - """ - erhn = req_host = request_host(request) - if req_host.find(".") == -1 and not IPV4_RE.search(req_host): - erhn = req_host + ".local" - return req_host, erhn - -def request_path(request): - """Path component of request-URI, as defined by RFC 2965.""" - url = request.get_full_url() - parts = urllib.parse.urlsplit(url) - path = escape_path(parts.path) - if not path.startswith("/"): - # fix bad RFC 2396 absoluteURI - path = "/" + path - return path - -def request_port(request): - host = request.host - i = host.find(':') - if i >= 0: - port = host[i+1:] - try: - int(port) - except ValueError: - _debug("nonnumeric port: '%s'", port) - return None - else: - port = DEFAULT_HTTP_PORT - return port - -# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't -# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). -HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" -ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") -def uppercase_escaped_char(match): - return "%%%s" % match.group(1).upper() -def escape_path(path): - """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" - # There's no knowing what character encoding was used to create URLs - # containing %-escapes, but since we have to pick one to escape invalid - # path characters, we pick UTF-8, as recommended in the HTML 4.0 - # specification: - # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 - # And here, kind of: draft-fielding-uri-rfc2396bis-03 - # (And in draft IRI specification: draft-duerst-iri-05) - # (And here, for new URI schemes: RFC 2718) - path = urllib.parse.quote(path, HTTP_PATH_SAFE) - path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) - return path - -def reach(h): - """Return reach of host h, as defined by RFC 2965, section 1. - - The reach R of a host name H is defined as follows: - - * If - - - H is the host domain name of a host; and, - - - H has the form A.B; and - - - A has no embedded (that is, interior) dots; and - - - B has at least one embedded dot, or B is the string "local". - then the reach of H is .B. - - * Otherwise, the reach of H is H. - - >>> reach("www.acme.com") - '.acme.com' - >>> reach("acme.com") - 'acme.com' - >>> reach("acme.local") - '.local' - - """ - i = h.find(".") - if i >= 0: - #a = h[:i] # this line is only here to show what a is - b = h[i+1:] - i = b.find(".") - if is_HDN(h) and (i >= 0 or b == "local"): - return "."+b - return h - -def is_third_party(request): - """ - - RFC 2965, section 3.3.6: - - An unverifiable transaction is to a third-party host if its request- - host U does not domain-match the reach R of the request-host O in the - origin transaction. - - """ - req_host = request_host(request) - if not domain_match(req_host, reach(request.origin_req_host)): - return True - else: - return False - - -class Cookie: - """HTTP Cookie. - - This class represents both Netscape and RFC 2965 cookies. - - This is deliberately a very simple class. It just holds attributes. It's - possible to construct Cookie instances that don't comply with the cookie - standards. CookieJar.make_cookies is the factory function for Cookie - objects -- it deals with cookie parsing, supplying defaults, and - normalising to the representation used in this class. CookiePolicy is - responsible for checking them to see whether they should be accepted from - and returned to the server. - - Note that the port may be present in the headers, but unspecified ("Port" - rather than"Port=80", for example); if this is the case, port is None. - - """ - - def __init__(self, version, name, value, - port, port_specified, - domain, domain_specified, domain_initial_dot, - path, path_specified, - secure, - expires, - discard, - comment, - comment_url, - rest, - rfc2109=False, - ): - - if version is not None: version = int(version) - if expires is not None: expires = int(float(expires)) - if port is None and port_specified is True: - raise ValueError("if port is None, port_specified must be false") - - self.version = version - self.name = name - self.value = value - self.port = port - self.port_specified = port_specified - # normalise case, as per RFC 2965 section 3.3.3 - self.domain = domain.lower() - self.domain_specified = domain_specified - # Sigh. We need to know whether the domain given in the - # cookie-attribute had an initial dot, in order to follow RFC 2965 - # (as clarified in draft errata). Needed for the returned $Domain - # value. - self.domain_initial_dot = domain_initial_dot - self.path = path - self.path_specified = path_specified - self.secure = secure - self.expires = expires - self.discard = discard - self.comment = comment - self.comment_url = comment_url - self.rfc2109 = rfc2109 - - self._rest = copy.copy(rest) - - def has_nonstandard_attr(self, name): - return name in self._rest - def get_nonstandard_attr(self, name, default=None): - return self._rest.get(name, default) - def set_nonstandard_attr(self, name, value): - self._rest[name] = value - - def is_expired(self, now=None): - if now is None: now = time.time() - if (self.expires is not None) and (self.expires <= now): - return True - return False - - def __str__(self): - if self.port is None: p = "" - else: p = ":"+self.port - limit = self.domain + p + self.path - if self.value is not None: - namevalue = "%s=%s" % (self.name, self.value) - else: - namevalue = self.name - return "<Cookie %s for %s>" % (namevalue, limit) - - def __repr__(self): - args = [] - for name in ("version", "name", "value", - "port", "port_specified", - "domain", "domain_specified", "domain_initial_dot", - "path", "path_specified", - "secure", "expires", "discard", "comment", "comment_url", - ): - attr = getattr(self, name) - args.append("%s=%s" % (name, repr(attr))) - args.append("rest=%s" % repr(self._rest)) - args.append("rfc2109=%s" % repr(self.rfc2109)) - return "%s(%s)" % (self.__class__.__name__, ", ".join(args)) - - -class CookiePolicy: - """Defines which cookies get accepted from and returned to server. - - May also modify cookies, though this is probably a bad idea. - - The subclass DefaultCookiePolicy defines the standard rules for Netscape - and RFC 2965 cookies -- override that if you want a customized policy. - - """ - def set_ok(self, cookie, request): - """Return true if (and only if) cookie should be accepted from server. - - Currently, pre-expired cookies never get this far -- the CookieJar - class deletes such cookies itself. - - """ - raise NotImplementedError() - - def return_ok(self, cookie, request): - """Return true if (and only if) cookie should be returned to server.""" - raise NotImplementedError() - - def domain_return_ok(self, domain, request): - """Return false if cookies should not be returned, given cookie domain. - """ - return True - - def path_return_ok(self, path, request): - """Return false if cookies should not be returned, given cookie path. - """ - return True - - -class DefaultCookiePolicy(CookiePolicy): - """Implements the standard rules for accepting and returning cookies.""" - - DomainStrictNoDots = 1 - DomainStrictNonDomain = 2 - DomainRFC2965Match = 4 - - DomainLiberal = 0 - DomainStrict = DomainStrictNoDots|DomainStrictNonDomain - - def __init__(self, - blocked_domains=None, allowed_domains=None, - netscape=True, rfc2965=False, - rfc2109_as_netscape=None, - hide_cookie2=False, - strict_domain=False, - strict_rfc2965_unverifiable=True, - strict_ns_unverifiable=False, - strict_ns_domain=DomainLiberal, - strict_ns_set_initial_dollar=False, - strict_ns_set_path=False, - ): - """Constructor arguments should be passed as keyword arguments only.""" - self.netscape = netscape - self.rfc2965 = rfc2965 - self.rfc2109_as_netscape = rfc2109_as_netscape - self.hide_cookie2 = hide_cookie2 - self.strict_domain = strict_domain - self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable - self.strict_ns_unverifiable = strict_ns_unverifiable - self.strict_ns_domain = strict_ns_domain - self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar - self.strict_ns_set_path = strict_ns_set_path - - if blocked_domains is not None: - self._blocked_domains = tuple(blocked_domains) - else: - self._blocked_domains = () - - if allowed_domains is not None: - allowed_domains = tuple(allowed_domains) - self._allowed_domains = allowed_domains - - def blocked_domains(self): - """Return the sequence of blocked domains (as a tuple).""" - return self._blocked_domains - def set_blocked_domains(self, blocked_domains): - """Set the sequence of blocked domains.""" - self._blocked_domains = tuple(blocked_domains) - - def is_blocked(self, domain): - for blocked_domain in self._blocked_domains: - if user_domain_match(domain, blocked_domain): - return True - return False - - def allowed_domains(self): - """Return None, or the sequence of allowed domains (as a tuple).""" - return self._allowed_domains - def set_allowed_domains(self, allowed_domains): - """Set the sequence of allowed domains, or None.""" - if allowed_domains is not None: - allowed_domains = tuple(allowed_domains) - self._allowed_domains = allowed_domains - - def is_not_allowed(self, domain): - if self._allowed_domains is None: - return False - for allowed_domain in self._allowed_domains: - if user_domain_match(domain, allowed_domain): - return False - return True - - def set_ok(self, cookie, request): - """ - If you override .set_ok(), be sure to call this method. If it returns - false, so should your subclass (assuming your subclass wants to be more - strict about which cookies to accept). - - """ - _debug(" - checking cookie %s=%s", cookie.name, cookie.value) - - assert cookie.name is not None - - for n in "version", "verifiability", "name", "path", "domain", "port": - fn_name = "set_ok_"+n - fn = getattr(self, fn_name) - if not fn(cookie, request): - return False - - return True - - def set_ok_version(self, cookie, request): - if cookie.version is None: - # Version is always set to 0 by parse_ns_headers if it's a Netscape - # cookie, so this must be an invalid RFC 2965 cookie. - _debug(" Set-Cookie2 without version attribute (%s=%s)", - cookie.name, cookie.value) - return False - if cookie.version > 0 and not self.rfc2965: - _debug(" RFC 2965 cookies are switched off") - return False - elif cookie.version == 0 and not self.netscape: - _debug(" Netscape cookies are switched off") - return False - return True - - def set_ok_verifiability(self, cookie, request): - if request.unverifiable and is_third_party(request): - if cookie.version > 0 and self.strict_rfc2965_unverifiable: - _debug(" third-party RFC 2965 cookie during " - "unverifiable transaction") - return False - elif cookie.version == 0 and self.strict_ns_unverifiable: - _debug(" third-party Netscape cookie during " - "unverifiable transaction") - return False - return True - - def set_ok_name(self, cookie, request): - # Try and stop servers setting V0 cookies designed to hack other - # servers that know both V0 and V1 protocols. - if (cookie.version == 0 and self.strict_ns_set_initial_dollar and - cookie.name.startswith("$")): - _debug(" illegal name (starts with '$'): '%s'", cookie.name) - return False - return True - - def set_ok_path(self, cookie, request): - if cookie.path_specified: - req_path = request_path(request) - if ((cookie.version > 0 or - (cookie.version == 0 and self.strict_ns_set_path)) and - not req_path.startswith(cookie.path)): - _debug(" path attribute %s is not a prefix of request " - "path %s", cookie.path, req_path) - return False - return True - - def set_ok_domain(self, cookie, request): - if self.is_blocked(cookie.domain): - _debug(" domain %s is in user block-list", cookie.domain) - return False - if self.is_not_allowed(cookie.domain): - _debug(" domain %s is not in user allow-list", cookie.domain) - return False - if cookie.domain_specified: - req_host, erhn = eff_request_host(request) - domain = cookie.domain - if self.strict_domain and (domain.count(".") >= 2): - # XXX This should probably be compared with the Konqueror - # (kcookiejar.cpp) and Mozilla implementations, but it's a - # losing battle. - i = domain.rfind(".") - j = domain.rfind(".", 0, i) - if j == 0: # domain like .foo.bar - tld = domain[i+1:] - sld = domain[j+1:i] - if sld.lower() in ("co", "ac", "com", "edu", "org", "net", - "gov", "mil", "int", "aero", "biz", "cat", "coop", - "info", "jobs", "mobi", "museum", "name", "pro", - "travel", "eu") and len(tld) == 2: - # domain like .co.uk - _debug(" country-code second level domain %s", domain) - return False - if domain.startswith("."): - undotted_domain = domain[1:] - else: - undotted_domain = domain - embedded_dots = (undotted_domain.find(".") >= 0) - if not embedded_dots and domain != ".local": - _debug(" non-local domain %s contains no embedded dot", - domain) - return False - if cookie.version == 0: - if (not erhn.endswith(domain) and - (not erhn.startswith(".") and - not ("."+erhn).endswith(domain))): - _debug(" effective request-host %s (even with added " - "initial dot) does not end with %s", - erhn, domain) - return False - if (cookie.version > 0 or - (self.strict_ns_domain & self.DomainRFC2965Match)): - if not domain_match(erhn, domain): - _debug(" effective request-host %s does not domain-match " - "%s", erhn, domain) - return False - if (cookie.version > 0 or - (self.strict_ns_domain & self.DomainStrictNoDots)): - host_prefix = req_host[:-len(domain)] - if (host_prefix.find(".") >= 0 and - not IPV4_RE.search(req_host)): - _debug(" host prefix %s for domain %s contains a dot", - host_prefix, domain) - return False - return True - - def set_ok_port(self, cookie, request): - if cookie.port_specified: - req_port = request_port(request) - if req_port is None: - req_port = "80" - else: - req_port = str(req_port) - for p in cookie.port.split(","): - try: - int(p) - except ValueError: - _debug(" bad port %s (not numeric)", p) - return False - if p == req_port: - break - else: - _debug(" request port (%s) not found in %s", - req_port, cookie.port) - return False - return True - - def return_ok(self, cookie, request): - """ - If you override .return_ok(), be sure to call this method. If it - returns false, so should your subclass (assuming your subclass wants to - be more strict about which cookies to return). - - """ - # Path has already been checked by .path_return_ok(), and domain - # blocking done by .domain_return_ok(). - _debug(" - checking cookie %s=%s", cookie.name, cookie.value) - - for n in "version", "verifiability", "secure", "expires", "port", "domain": - fn_name = "return_ok_"+n - fn = getattr(self, fn_name) - if not fn(cookie, request): - return False - return True - - def return_ok_version(self, cookie, request): - if cookie.version > 0 and not self.rfc2965: - _debug(" RFC 2965 cookies are switched off") - return False - elif cookie.version == 0 and not self.netscape: - _debug(" Netscape cookies are switched off") - return False - return True - - def return_ok_verifiability(self, cookie, request): - if request.unverifiable and is_third_party(request): - if cookie.version > 0 and self.strict_rfc2965_unverifiable: - _debug(" third-party RFC 2965 cookie during unverifiable " - "transaction") - return False - elif cookie.version == 0 and self.strict_ns_unverifiable: - _debug(" third-party Netscape cookie during unverifiable " - "transaction") - return False - return True - - def return_ok_secure(self, cookie, request): - if cookie.secure and request.type != "https": - _debug(" secure cookie with non-secure request") - return False - return True - - def return_ok_expires(self, cookie, request): - if cookie.is_expired(self._now): - _debug(" cookie expired") - return False - return True - - def return_ok_port(self, cookie, request): - if cookie.port: - req_port = request_port(request) - if req_port is None: - req_port = "80" - for p in cookie.port.split(","): - if p == req_port: - break - else: - _debug(" request port %s does not match cookie port %s", - req_port, cookie.port) - return False - return True - - def return_ok_domain(self, cookie, request): - req_host, erhn = eff_request_host(request) - domain = cookie.domain - - # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't - if (cookie.version == 0 and - (self.strict_ns_domain & self.DomainStrictNonDomain) and - not cookie.domain_specified and domain != erhn): - _debug(" cookie with unspecified domain does not string-compare " - "equal to request domain") - return False - - if cookie.version > 0 and not domain_match(erhn, domain): - _debug(" effective request-host name %s does not domain-match " - "RFC 2965 cookie domain %s", erhn, domain) - return False - if cookie.version == 0 and not ("."+erhn).endswith(domain): - _debug(" request-host %s does not match Netscape cookie domain " - "%s", req_host, domain) - return False - return True - - def domain_return_ok(self, domain, request): - # Liberal check of. This is here as an optimization to avoid - # having to load lots of MSIE cookie files unless necessary. - req_host, erhn = eff_request_host(request) - if not req_host.startswith("."): - req_host = "."+req_host - if not erhn.startswith("."): - erhn = "."+erhn - if not (req_host.endswith(domain) or erhn.endswith(domain)): - #_debug(" request domain %s does not match cookie domain %s", - # req_host, domain) - return False - - if self.is_blocked(domain): - _debug(" domain %s is in user block-list", domain) - return False - if self.is_not_allowed(domain): - _debug(" domain %s is not in user allow-list", domain) - return False - - return True - - def path_return_ok(self, path, request): - _debug("- checking cookie path=%s", path) - req_path = request_path(request) - if not req_path.startswith(path): - _debug(" %s does not path-match %s", req_path, path) - return False - return True - - -def vals_sorted_by_key(adict): - keys = sorted(adict.keys()) - return map(adict.get, keys) - -def deepvalues(mapping): - """Iterates over nested mapping, depth-first, in sorted order by key.""" - values = vals_sorted_by_key(mapping) - for obj in values: - mapping = False - try: - obj.items - except AttributeError: - pass - else: - mapping = True - yield from deepvalues(obj) - if not mapping: - yield obj - - -# Used as second parameter to dict.get() method, to distinguish absent -# dict key from one with a None value. -class Absent: pass - -class CookieJar: - """Collection of HTTP cookies. - - You may not need to know about this class: try - urllib.request.build_opener(HTTPCookieProcessor).open(url). - """ - - non_word_re = re.compile(r"\W") - quote_re = re.compile(r"([\"\\])") - strict_domain_re = re.compile(r"\.?[^.]*") - domain_re = re.compile(r"[^.]*") - dots_re = re.compile(r"^\.+") - - magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII) - - def __init__(self, policy=None): - if policy is None: - policy = DefaultCookiePolicy() - self._policy = policy - - self._cookies_lock = _threading.RLock() - self._cookies = {} - - def set_policy(self, policy): - self._policy = policy - - def _cookies_for_domain(self, domain, request): - cookies = [] - if not self._policy.domain_return_ok(domain, request): - return [] - _debug("Checking %s for cookies to return", domain) - cookies_by_path = self._cookies[domain] - for path in cookies_by_path.keys(): - if not self._policy.path_return_ok(path, request): - continue - cookies_by_name = cookies_by_path[path] - for cookie in cookies_by_name.values(): - if not self._policy.return_ok(cookie, request): - _debug(" not returning cookie") - continue - _debug(" it's a match") - cookies.append(cookie) - return cookies - - def _cookies_for_request(self, request): - """Return a list of cookies to be returned to server.""" - cookies = [] - for domain in self._cookies.keys(): - cookies.extend(self._cookies_for_domain(domain, request)) - return cookies - - def _cookie_attrs(self, cookies): - """Return a list of cookie-attributes to be returned to server. - - like ['foo="bar"; $Path="/"', ...] - - The $Version attribute is also added when appropriate (currently only - once per request). - - """ - # add cookies in order of most specific (ie. longest) path first - cookies.sort(key=lambda a: len(a.path), reverse=True) - - version_set = False - - attrs = [] - for cookie in cookies: - # set version of Cookie header - # XXX - # What should it be if multiple matching Set-Cookie headers have - # different versions themselves? - # Answer: there is no answer; was supposed to be settled by - # RFC 2965 errata, but that may never appear... - version = cookie.version - if not version_set: - version_set = True - if version > 0: - attrs.append("$Version=%s" % version) - - # quote cookie value if necessary - # (not for Netscape protocol, which already has any quotes - # intact, due to the poorly-specified Netscape Cookie: syntax) - if ((cookie.value is not None) and - self.non_word_re.search(cookie.value) and version > 0): - value = self.quote_re.sub(r"\\\1", cookie.value) - else: - value = cookie.value - - # add cookie-attributes to be returned in Cookie header - if cookie.value is None: - attrs.append(cookie.name) - else: - attrs.append("%s=%s" % (cookie.name, value)) - if version > 0: - if cookie.path_specified: - attrs.append('$Path="%s"' % cookie.path) - if cookie.domain.startswith("."): - domain = cookie.domain - if (not cookie.domain_initial_dot and - domain.startswith(".")): - domain = domain[1:] - attrs.append('$Domain="%s"' % domain) - if cookie.port is not None: - p = "$Port" - if cookie.port_specified: - p = p + ('="%s"' % cookie.port) - attrs.append(p) - - return attrs - - def add_cookie_header(self, request): - """Add correct Cookie: header to request (urllib.request.Request object). - - The Cookie2 header is also added unless policy.hide_cookie2 is true. - - """ - _debug("add_cookie_header") - self._cookies_lock.acquire() - try: - - self._policy._now = self._now = int(time.time()) - - cookies = self._cookies_for_request(request) - - attrs = self._cookie_attrs(cookies) - if attrs: - if not request.has_header("Cookie"): - request.add_unredirected_header( - "Cookie", "; ".join(attrs)) - - # if necessary, advertise that we know RFC 2965 - if (self._policy.rfc2965 and not self._policy.hide_cookie2 and - not request.has_header("Cookie2")): - for cookie in cookies: - if cookie.version != 1: - request.add_unredirected_header("Cookie2", '$Version="1"') - break - - finally: - self._cookies_lock.release() - - self.clear_expired_cookies() - - def _normalized_cookie_tuples(self, attrs_set): - """Return list of tuples containing normalised cookie information. - - attrs_set is the list of lists of key,value pairs extracted from - the Set-Cookie or Set-Cookie2 headers. - - Tuples are name, value, standard, rest, where name and value are the - cookie name and value, standard is a dictionary containing the standard - cookie-attributes (discard, secure, version, expires or max-age, - domain, path and port) and rest is a dictionary containing the rest of - the cookie-attributes. - - """ - cookie_tuples = [] - - boolean_attrs = "discard", "secure" - value_attrs = ("version", - "expires", "max-age", - "domain", "path", "port", - "comment", "commenturl") - - for cookie_attrs in attrs_set: - name, value = cookie_attrs[0] - - # Build dictionary of standard cookie-attributes (standard) and - # dictionary of other cookie-attributes (rest). - - # Note: expiry time is normalised to seconds since epoch. V0 - # cookies should have the Expires cookie-attribute, and V1 cookies - # should have Max-Age, but since V1 includes RFC 2109 cookies (and - # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we - # accept either (but prefer Max-Age). - max_age_set = False - - bad_cookie = False - - standard = {} - rest = {} - for k, v in cookie_attrs[1:]: - lc = k.lower() - # don't lose case distinction for unknown fields - if lc in value_attrs or lc in boolean_attrs: - k = lc - if k in boolean_attrs and v is None: - # boolean cookie-attribute is present, but has no value - # (like "discard", rather than "port=80") - v = True - if k in standard: - # only first value is significant - continue - if k == "domain": - if v is None: - _debug(" missing value for domain attribute") - bad_cookie = True - break - # RFC 2965 section 3.3.3 - v = v.lower() - if k == "expires": - if max_age_set: - # Prefer max-age to expires (like Mozilla) - continue - if v is None: - _debug(" missing or invalid value for expires " - "attribute: treating as session cookie") - continue - if k == "max-age": - max_age_set = True - try: - v = int(v) - except ValueError: - _debug(" missing or invalid (non-numeric) value for " - "max-age attribute") - bad_cookie = True - break - # convert RFC 2965 Max-Age to seconds since epoch - # XXX Strictly you're supposed to follow RFC 2616 - # age-calculation rules. Remember that zero Max-Age - # is a request to discard (old and new) cookie, though. - k = "expires" - v = self._now + v - if (k in value_attrs) or (k in boolean_attrs): - if (v is None and - k not in ("port", "comment", "commenturl")): - _debug(" missing value for %s attribute" % k) - bad_cookie = True - break - standard[k] = v - else: - rest[k] = v - - if bad_cookie: - continue - - cookie_tuples.append((name, value, standard, rest)) - - return cookie_tuples - - def _cookie_from_cookie_tuple(self, tup, request): - # standard is dict of standard cookie-attributes, rest is dict of the - # rest of them - name, value, standard, rest = tup - - domain = standard.get("domain", Absent) - path = standard.get("path", Absent) - port = standard.get("port", Absent) - expires = standard.get("expires", Absent) - - # set the easy defaults - version = standard.get("version", None) - if version is not None: - try: - version = int(version) - except ValueError: - return None # invalid version, ignore cookie - secure = standard.get("secure", False) - # (discard is also set if expires is Absent) - discard = standard.get("discard", False) - comment = standard.get("comment", None) - comment_url = standard.get("commenturl", None) - - # set default path - if path is not Absent and path != "": - path_specified = True - path = escape_path(path) - else: - path_specified = False - path = request_path(request) - i = path.rfind("/") - if i != -1: - if version == 0: - # Netscape spec parts company from reality here - path = path[:i] - else: - path = path[:i+1] - if len(path) == 0: path = "/" - - # set default domain - domain_specified = domain is not Absent - # but first we have to remember whether it starts with a dot - domain_initial_dot = False - if domain_specified: - domain_initial_dot = bool(domain.startswith(".")) - if domain is Absent: - req_host, erhn = eff_request_host(request) - domain = erhn - elif not domain.startswith("."): - domain = "."+domain - - # set default port - port_specified = False - if port is not Absent: - if port is None: - # Port attr present, but has no value: default to request port. - # Cookie should then only be sent back on that port. - port = request_port(request) - else: - port_specified = True - port = re.sub(r"\s+", "", port) - else: - # No port attr present. Cookie can be sent back on any port. - port = None - - # set default expires and discard - if expires is Absent: - expires = None - discard = True - elif expires <= self._now: - # Expiry date in past is request to delete cookie. This can't be - # in DefaultCookiePolicy, because can't delete cookies there. - try: - self.clear(domain, path, name) - except KeyError: - pass - _debug("Expiring cookie, domain='%s', path='%s', name='%s'", - domain, path, name) - return None - - return Cookie(version, - name, value, - port, port_specified, - domain, domain_specified, domain_initial_dot, - path, path_specified, - secure, - expires, - discard, - comment, - comment_url, - rest) - - def _cookies_from_attrs_set(self, attrs_set, request): - cookie_tuples = self._normalized_cookie_tuples(attrs_set) - - cookies = [] - for tup in cookie_tuples: - cookie = self._cookie_from_cookie_tuple(tup, request) - if cookie: cookies.append(cookie) - return cookies - - def _process_rfc2109_cookies(self, cookies): - rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None) - if rfc2109_as_ns is None: - rfc2109_as_ns = not self._policy.rfc2965 - for cookie in cookies: - if cookie.version == 1: - cookie.rfc2109 = True - if rfc2109_as_ns: - # treat 2109 cookies as Netscape cookies rather than - # as RFC2965 cookies - cookie.version = 0 - - def make_cookies(self, response, request): - """Return sequence of Cookie objects extracted from response object.""" - # get cookie-attributes for RFC 2965 and Netscape protocols - headers = response.info() - rfc2965_hdrs = headers.get_all("Set-Cookie2", []) - ns_hdrs = headers.get_all("Set-Cookie", []) - - rfc2965 = self._policy.rfc2965 - netscape = self._policy.netscape - - if ((not rfc2965_hdrs and not ns_hdrs) or - (not ns_hdrs and not rfc2965) or - (not rfc2965_hdrs and not netscape) or - (not netscape and not rfc2965)): - return [] # no relevant cookie headers: quick exit - - try: - cookies = self._cookies_from_attrs_set( - split_header_words(rfc2965_hdrs), request) - except Exception: - _warn_unhandled_exception() - cookies = [] - - if ns_hdrs and netscape: - try: - # RFC 2109 and Netscape cookies - ns_cookies = self._cookies_from_attrs_set( - parse_ns_headers(ns_hdrs), request) - except Exception: - _warn_unhandled_exception() - ns_cookies = [] - self._process_rfc2109_cookies(ns_cookies) - - # Look for Netscape cookies (from Set-Cookie headers) that match - # corresponding RFC 2965 cookies (from Set-Cookie2 headers). - # For each match, keep the RFC 2965 cookie and ignore the Netscape - # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are - # bundled in with the Netscape cookies for this purpose, which is - # reasonable behaviour. - if rfc2965: - lookup = {} - for cookie in cookies: - lookup[(cookie.domain, cookie.path, cookie.name)] = None - - def no_matching_rfc2965(ns_cookie, lookup=lookup): - key = ns_cookie.domain, ns_cookie.path, ns_cookie.name - return key not in lookup - ns_cookies = filter(no_matching_rfc2965, ns_cookies) - - if ns_cookies: - cookies.extend(ns_cookies) - - return cookies - - def set_cookie_if_ok(self, cookie, request): - """Set a cookie if policy says it's OK to do so.""" - self._cookies_lock.acquire() - try: - self._policy._now = self._now = int(time.time()) - - if self._policy.set_ok(cookie, request): - self.set_cookie(cookie) - - - finally: - self._cookies_lock.release() - - def set_cookie(self, cookie): - """Set a cookie, without checking whether or not it should be set.""" - c = self._cookies - self._cookies_lock.acquire() - try: - if cookie.domain not in c: c[cookie.domain] = {} - c2 = c[cookie.domain] - if cookie.path not in c2: c2[cookie.path] = {} - c3 = c2[cookie.path] - c3[cookie.name] = cookie - finally: - self._cookies_lock.release() - - def extract_cookies(self, response, request): - """Extract cookies from response, where allowable given the request.""" - _debug("extract_cookies: %s", response.info()) - self._cookies_lock.acquire() - try: - self._policy._now = self._now = int(time.time()) - - for cookie in self.make_cookies(response, request): - if self._policy.set_ok(cookie, request): - _debug(" setting cookie: %s", cookie) - self.set_cookie(cookie) - finally: - self._cookies_lock.release() - - def clear(self, domain=None, path=None, name=None): - """Clear some cookies. - - Invoking this method without arguments will clear all cookies. If - given a single argument, only cookies belonging to that domain will be - removed. If given two arguments, cookies belonging to the specified - path within that domain are removed. If given three arguments, then - the cookie with the specified name, path and domain is removed. - - Raises KeyError if no matching cookie exists. - - """ - if name is not None: - if (domain is None) or (path is None): - raise ValueError( - "domain and path must be given to remove a cookie by name") - del self._cookies[domain][path][name] - elif path is not None: - if domain is None: - raise ValueError( - "domain must be given to remove cookies by path") - del self._cookies[domain][path] - elif domain is not None: - del self._cookies[domain] - else: - self._cookies = {} - - def clear_session_cookies(self): - """Discard all session cookies. - - Note that the .save() method won't save session cookies anyway, unless - you ask otherwise by passing a true ignore_discard argument. - - """ - self._cookies_lock.acquire() - try: - for cookie in self: - if cookie.discard: - self.clear(cookie.domain, cookie.path, cookie.name) - finally: - self._cookies_lock.release() - - def clear_expired_cookies(self): - """Discard all expired cookies. - - You probably don't need to call this method: expired cookies are never - sent back to the server (provided you're using DefaultCookiePolicy), - this method is called by CookieJar itself every so often, and the - .save() method won't save expired cookies anyway (unless you ask - otherwise by passing a true ignore_expires argument). - - """ - self._cookies_lock.acquire() - try: - now = time.time() - for cookie in self: - if cookie.is_expired(now): - self.clear(cookie.domain, cookie.path, cookie.name) - finally: - self._cookies_lock.release() - - def __iter__(self): - return deepvalues(self._cookies) - - def __len__(self): - """Return number of contained cookies.""" - i = 0 - for cookie in self: i = i + 1 - return i - - def __repr__(self): - r = [] - for cookie in self: r.append(repr(cookie)) - return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r)) - - def __str__(self): - r = [] - for cookie in self: r.append(str(cookie)) - return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r)) - - -# derives from OSError for backwards-compatibility with Python 2.4.0 -class LoadError(OSError): pass - -class FileCookieJar(CookieJar): - """CookieJar that can be loaded from and saved to a file.""" - - def __init__(self, filename=None, delayload=False, policy=None): - """ - Cookies are NOT loaded from the named file until either the .load() or - .revert() method is called. - - """ - CookieJar.__init__(self, policy) - if filename is not None: - try: - filename+"" - except: - raise ValueError("filename must be string-like") - self.filename = filename - self.delayload = bool(delayload) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - """Save cookies to a file.""" - raise NotImplementedError() - - def load(self, filename=None, ignore_discard=False, ignore_expires=False): - """Load cookies from a file.""" - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - with open(filename) as f: - self._really_load(f, filename, ignore_discard, ignore_expires) - - def revert(self, filename=None, - ignore_discard=False, ignore_expires=False): - """Clear all cookies and reload cookies from a saved file. - - Raises LoadError (or OSError) if reversion is not successful; the - object's state will not be altered if this happens. - - """ - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - self._cookies_lock.acquire() - try: - - old_state = copy.deepcopy(self._cookies) - self._cookies = {} - try: - self.load(filename, ignore_discard, ignore_expires) - except OSError: - self._cookies = old_state - raise - - finally: - self._cookies_lock.release() - - -def lwp_cookie_str(cookie): - """Return string representation of Cookie in the LWP cookie file format. - - Actually, the format is extended a bit -- see module docstring. - - """ - h = [(cookie.name, cookie.value), - ("path", cookie.path), - ("domain", cookie.domain)] - if cookie.port is not None: h.append(("port", cookie.port)) - if cookie.path_specified: h.append(("path_spec", None)) - if cookie.port_specified: h.append(("port_spec", None)) - if cookie.domain_initial_dot: h.append(("domain_dot", None)) - if cookie.secure: h.append(("secure", None)) - if cookie.expires: h.append(("expires", - time2isoz(float(cookie.expires)))) - if cookie.discard: h.append(("discard", None)) - if cookie.comment: h.append(("comment", cookie.comment)) - if cookie.comment_url: h.append(("commenturl", cookie.comment_url)) - - keys = sorted(cookie._rest.keys()) - for k in keys: - h.append((k, str(cookie._rest[k]))) - - h.append(("version", str(cookie.version))) - - return join_header_words([h]) - -class LWPCookieJar(FileCookieJar): - """ - The LWPCookieJar saves a sequence of "Set-Cookie3" lines. - "Set-Cookie3" is the format used by the libwww-perl library, not known - to be compatible with any browser, but which is easy to read and - doesn't lose information about RFC 2965 cookies. - - Additional methods - - as_lwp_str(ignore_discard=True, ignore_expired=True) - - """ - - def as_lwp_str(self, ignore_discard=True, ignore_expires=True): - """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers. - - ignore_discard and ignore_expires: see docstring for FileCookieJar.save - - """ - now = time.time() - r = [] - for cookie in self: - if not ignore_discard and cookie.discard: - continue - if not ignore_expires and cookie.is_expired(now): - continue - r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie)) - return "\n".join(r+[""]) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - with open(filename, "w") as f: - # There really isn't an LWP Cookies 2.0 format, but this indicates - # that there is extra information in here (domain_dot and - # port_spec) while still being compatible with libwww-perl, I hope. - f.write("#LWP-Cookies-2.0\n") - f.write(self.as_lwp_str(ignore_discard, ignore_expires)) - - def _really_load(self, f, filename, ignore_discard, ignore_expires): - magic = f.readline() - if not self.magic_re.search(magic): - msg = ("%r does not look like a Set-Cookie3 (LWP) format " - "file" % filename) - raise LoadError(msg) - - now = time.time() - - header = "Set-Cookie3:" - boolean_attrs = ("port_spec", "path_spec", "domain_dot", - "secure", "discard") - value_attrs = ("version", - "port", "path", "domain", - "expires", - "comment", "commenturl") - - try: - while 1: - line = f.readline() - if line == "": break - if not line.startswith(header): - continue - line = line[len(header):].strip() - - for data in split_header_words([line]): - name, value = data[0] - standard = {} - rest = {} - for k in boolean_attrs: - standard[k] = False - for k, v in data[1:]: - if k is not None: - lc = k.lower() - else: - lc = None - # don't lose case distinction for unknown fields - if (lc in value_attrs) or (lc in boolean_attrs): - k = lc - if k in boolean_attrs: - if v is None: v = True - standard[k] = v - elif k in value_attrs: - standard[k] = v - else: - rest[k] = v - - h = standard.get - expires = h("expires") - discard = h("discard") - if expires is not None: - expires = iso2time(expires) - if expires is None: - discard = True - domain = h("domain") - domain_specified = domain.startswith(".") - c = Cookie(h("version"), name, value, - h("port"), h("port_spec"), - domain, domain_specified, h("domain_dot"), - h("path"), h("path_spec"), - h("secure"), - expires, - discard, - h("comment"), - h("commenturl"), - rest) - if not ignore_discard and c.discard: - continue - if not ignore_expires and c.is_expired(now): - continue - self.set_cookie(c) - except OSError: - raise - except Exception: - _warn_unhandled_exception() - raise LoadError("invalid Set-Cookie3 format file %r: %r" % - (filename, line)) - - -class MozillaCookieJar(FileCookieJar): - """ - - WARNING: you may want to backup your browser's cookies file if you use - this class to save cookies. I *think* it works, but there have been - bugs in the past! - - This class differs from CookieJar only in the format it uses to save and - load cookies to and from a file. This class uses the Mozilla/Netscape - `cookies.txt' format. lynx uses this file format, too. - - Don't expect cookies saved while the browser is running to be noticed by - the browser (in fact, Mozilla on unix will overwrite your saved cookies if - you change them on disk while it's running; on Windows, you probably can't - save at all while the browser is running). - - Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to - Netscape cookies on saving. - - In particular, the cookie version and port number information is lost, - together with information about whether or not Path, Port and Discard were - specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the - domain as set in the HTTP header started with a dot (yes, I'm aware some - domains in Netscape files start with a dot and some don't -- trust me, you - really don't want to know any more about this). - - Note that though Mozilla and Netscape use the same format, they use - slightly different headers. The class saves cookies using the Netscape - header by default (Mozilla can cope with that). - - """ - magic_re = re.compile("#( Netscape)? HTTP Cookie File") - header = """\ -# Netscape HTTP Cookie File -# http://curl.haxx.se/rfc/cookie_spec.html -# This is a generated file! Do not edit. - -""" - - def _really_load(self, f, filename, ignore_discard, ignore_expires): - now = time.time() - - magic = f.readline() - if not self.magic_re.search(magic): - raise LoadError( - "%r does not look like a Netscape format cookies file" % - filename) - - try: - while 1: - line = f.readline() - if line == "": break - - # last field may be absent, so keep any trailing tab - if line.endswith("\n"): line = line[:-1] - - # skip comments and blank lines XXX what is $ for? - if (line.strip().startswith(("#", "$")) or - line.strip() == ""): - continue - - domain, domain_specified, path, secure, expires, name, value = \ - line.split("\t") - secure = (secure == "TRUE") - domain_specified = (domain_specified == "TRUE") - if name == "": - # cookies.txt regards 'Set-Cookie: foo' as a cookie - # with no name, whereas http.cookiejar regards it as a - # cookie with no value. - name = value - value = None - - initial_dot = domain.startswith(".") - assert domain_specified == initial_dot - - discard = False - if expires == "": - expires = None - discard = True - - # assume path_specified is false - c = Cookie(0, name, value, - None, False, - domain, domain_specified, initial_dot, - path, False, - secure, - expires, - discard, - None, - None, - {}) - if not ignore_discard and c.discard: - continue - if not ignore_expires and c.is_expired(now): - continue - self.set_cookie(c) - - except OSError: - raise - except Exception: - _warn_unhandled_exception() - raise LoadError("invalid Netscape format cookies file %r: %r" % - (filename, line)) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - with open(filename, "w") as f: - f.write(self.header) - now = time.time() - for cookie in self: - if not ignore_discard and cookie.discard: - continue - if not ignore_expires and cookie.is_expired(now): - continue - if cookie.secure: secure = "TRUE" - else: secure = "FALSE" - if cookie.domain.startswith("."): initial_dot = "TRUE" - else: initial_dot = "FALSE" - if cookie.expires is not None: - expires = str(cookie.expires) - else: - expires = "" - if cookie.value is None: - # cookies.txt regards 'Set-Cookie: foo' as a cookie - # with no name, whereas http.cookiejar regards it as a - # cookie with no value. - name = "" - value = cookie.name - else: - name = cookie.name - value = cookie.value - f.write( - "\t".join([cookie.domain, initial_dot, cookie.path, - secure, expires, name, value])+ - "\n") diff --git a/modules/language/python/module/http/cookies.py~ b/modules/language/python/module/http/cookies.py~ deleted file mode 100644 index be3b080..0000000 --- a/modules/language/python/module/http/cookies.py~ +++ /dev/null @@ -1,635 +0,0 @@ -#### -# Copyright 2000 by Timothy O'Malley <timo@alum.mit.edu> -# -# All Rights Reserved -# -# Permission to use, copy, modify, and distribute this software -# and its documentation for any purpose and without fee is hereby -# granted, provided that the above copyright notice appear in all -# copies and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Timothy O'Malley not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS -# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY -# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR -# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -# PERFORMANCE OF THIS SOFTWARE. -# -#### -# -# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp -# by Timothy O'Malley <timo@alum.mit.edu> -# -# Cookie.py is a Python module for the handling of HTTP -# cookies as a Python dictionary. See RFC 2109 for more -# information on cookies. -# -# The original idea to treat Cookies as a dictionary came from -# Dave Mitchell (davem@magnet.com) in 1995, when he released the -# first version of nscookie.py. -# -#### - -r""" -Here's a sample session to show how to use this module. -At the moment, this is the only documentation. - -The Basics ----------- - -Importing is easy... - - >>> from http import cookies - -Most of the time you start by creating a cookie. - - >>> C = cookies.SimpleCookie() - -Once you've created your Cookie, you can add values just as if it were -a dictionary. - - >>> C = cookies.SimpleCookie() - >>> C["fig"] = "newton" - >>> C["sugar"] = "wafer" - >>> C.output() - 'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer' - -Notice that the printable representation of a Cookie is the -appropriate format for a Set-Cookie: header. This is the -default behavior. You can change the header and printed -attributes by using the .output() function - - >>> C = cookies.SimpleCookie() - >>> C["rocky"] = "road" - >>> C["rocky"]["path"] = "/cookie" - >>> print(C.output(header="Cookie:")) - Cookie: rocky=road; Path=/cookie - >>> print(C.output(attrs=[], header="Cookie:")) - Cookie: rocky=road - -The load() method of a Cookie extracts cookies from a string. In a -CGI script, you would use this method to extract the cookies from the -HTTP_COOKIE environment variable. - - >>> C = cookies.SimpleCookie() - >>> C.load("chips=ahoy; vienna=finger") - >>> C.output() - 'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger' - -The load() method is darn-tootin smart about identifying cookies -within a string. Escaped quotation marks, nested semicolons, and other -such trickeries do not confuse it. - - >>> C = cookies.SimpleCookie() - >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') - >>> print(C) - Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" - -Each element of the Cookie also supports all of the RFC 2109 -Cookie attributes. Here's an example which sets the Path -attribute. - - >>> C = cookies.SimpleCookie() - >>> C["oreo"] = "doublestuff" - >>> C["oreo"]["path"] = "/" - >>> print(C) - Set-Cookie: oreo=doublestuff; Path=/ - -Each dictionary element has a 'value' attribute, which gives you -back the value associated with the key. - - >>> C = cookies.SimpleCookie() - >>> C["twix"] = "none for you" - >>> C["twix"].value - 'none for you' - -The SimpleCookie expects that all values should be standard strings. -Just to be sure, SimpleCookie invokes the str() builtin to convert -the value to a string, when the values are set dictionary-style. - - >>> C = cookies.SimpleCookie() - >>> C["number"] = 7 - >>> C["string"] = "seven" - >>> C["number"].value - '7' - >>> C["string"].value - 'seven' - >>> C.output() - 'Set-Cookie: number=7\r\nSet-Cookie: string=seven' - -Finis. -""" - -# -# Import our required modules -# -import re -import string - -__all__ = ["CookieError", "BaseCookie", "SimpleCookie"] - -_nulljoin = ''.join -_semispacejoin = '; '.join -_spacejoin = ' '.join - -def _warn_deprecated_setter(setter): - import warnings - msg = ('The .%s setter is deprecated. The attribute will be read-only in ' - 'future releases. Please use the set() method instead.' % setter) - warnings.warn(msg, DeprecationWarning, stacklevel=3) - -# -# Define an exception visible to External modules -# -class CookieError(Exception): - pass - - -# These quoting routines conform to the RFC2109 specification, which in -# turn references the character definitions from RFC2068. They provide -# a two-way quoting algorithm. Any non-text character is translated -# into a 4 character sequence: a forward-slash followed by the -# three-digit octal equivalent of the character. Any '\' or '"' is -# quoted with a preceding '\' slash. -# Because of the way browsers really handle cookies (as opposed to what -# the RFC says) we also encode "," and ";". -# -# These are taken from RFC2068 and RFC2109. -# _LegalChars is the list of chars which don't require "'s -# _Translator hash-table for fast quoting -# -_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:" -_UnescapedChars = _LegalChars + ' ()/<=>?@[]{}' - -_Translator = {n: '\\%03o' % n - for n in set(range(256)) - set(map(ord, _UnescapedChars))} -_Translator.update({ - ord('"'): '\\"', - ord('\\'): '\\\\', -}) - -_is_legal_key = re.compile('[%s]+' % re.escape(_LegalChars)).fullmatch - -def _quote(str): - r"""Quote a string for use in a cookie header. - - If the string does not need to be double-quoted, then just return the - string. Otherwise, surround the string in doublequotes and quote - (with a \) special characters. - """ - if str is None or _is_legal_key(str): - return str - else: - return '"' + str.translate(_Translator) + '"' - - -_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") -_QuotePatt = re.compile(r"[\\].") - -def _unquote(str): - # If there aren't any doublequotes, - # then there can't be any special characters. See RFC 2109. - if str is None or len(str) < 2: - return str - if str[0] != '"' or str[-1] != '"': - return str - - # We have to assume that we must decode this string. - # Down to work. - - # Remove the "s - str = str[1:-1] - - # Check for special sequences. Examples: - # \012 --> \n - # \" --> " - # - i = 0 - n = len(str) - res = [] - while 0 <= i < n: - o_match = _OctalPatt.search(str, i) - q_match = _QuotePatt.search(str, i) - if not o_match and not q_match: # Neither matched - res.append(str[i:]) - break - # else: - j = k = -1 - if o_match: - j = o_match.start(0) - if q_match: - k = q_match.start(0) - if q_match and (not o_match or k < j): # QuotePatt matched - res.append(str[i:k]) - res.append(str[k+1]) - i = k + 2 - else: # OctalPatt matched - res.append(str[i:j]) - res.append(chr(int(str[j+1:j+4], 8))) - i = j + 4 - return _nulljoin(res) - -# The _getdate() routine is used to set the expiration time in the cookie's HTTP -# header. By default, _getdate() returns the current time in the appropriate -# "expires" format for a Set-Cookie header. The one optional argument is an -# offset from now, in seconds. For example, an offset of -3600 means "one hour -# ago". The offset may be a floating point number. -# - -_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] - -_monthname = [None, - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - -def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname): - from time import gmtime, time - now = time() - year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future) - return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % \ - (weekdayname[wd], day, monthname[month], year, hh, mm, ss) - - -class Morsel(dict): - """A class to hold ONE (key, value) pair. - - In a cookie, each such pair may have several attributes, so this class is - used to keep the attributes associated with the appropriate key,value pair. - This class also includes a coded_value attribute, which is used to hold - the network representation of the value. This is most useful when Python - objects are pickled for network transit. - """ - # RFC 2109 lists these attributes as reserved: - # path comment domain - # max-age secure version - # - # For historical reasons, these attributes are also reserved: - # expires - # - # This is an extension from Microsoft: - # httponly - # - # This dictionary provides a mapping from the lowercase - # variant on the left to the appropriate traditional - # formatting on the right. - _reserved = { - "expires" : "expires", - "path" : "Path", - "comment" : "Comment", - "domain" : "Domain", - "max-age" : "Max-Age", - "secure" : "Secure", - "httponly" : "HttpOnly", - "version" : "Version", - } - - _flags = {'secure', 'httponly'} - - def __init__(self): - # Set defaults - self._key = self._value = self._coded_value = None - - # Set default attributes - for key in self._reserved: - dict.__setitem__(self, key, "") - - @property - def key(self): - return self._key - - @key.setter - def key(self, key): - _warn_deprecated_setter('key') - self._key = key - - @property - def value(self): - return self._value - - @value.setter - def value(self, value): - _warn_deprecated_setter('value') - self._value = value - - @property - def coded_value(self): - return self._coded_value - - @coded_value.setter - def coded_value(self, coded_value): - _warn_deprecated_setter('coded_value') - self._coded_value = coded_value - - def __setitem__(self, K, V): - K = K.lower() - if not K in self._reserved: - raise CookieError("Invalid attribute %r" % (K,)) - dict.__setitem__(self, K, V) - - def setdefault(self, key, val=None): - key = key.lower() - if key not in self._reserved: - raise CookieError("Invalid attribute %r" % (key,)) - return dict.setdefault(self, key, val) - - def __eq__(self, morsel): - if not isinstance(morsel, Morsel): - return NotImplemented - return (dict.__eq__(self, morsel) and - self._value == morsel._value and - self._key == morsel._key and - self._coded_value == morsel._coded_value) - - __ne__ = object.__ne__ - - def copy(self): - morsel = Morsel() - dict.update(morsel, self) - morsel.__dict__.update(self.__dict__) - return morsel - - def update(self, values): - data = {} - for key, val in dict(values).items(): - key = key.lower() - if key not in self._reserved: - raise CookieError("Invalid attribute %r" % (key,)) - data[key] = val - dict.update(self, data) - - def isReservedKey(self, K): - return K.lower() in self._reserved - - def set(self, key, val, coded_val, LegalChars=_LegalChars): - if LegalChars != _LegalChars: - import warnings - warnings.warn( - 'LegalChars parameter is deprecated, ignored and will ' - 'be removed in future versions.', DeprecationWarning, - stacklevel=2) - - if key.lower() in self._reserved: - raise CookieError('Attempt to set a reserved key %r' % (key,)) - if not _is_legal_key(key): - raise CookieError('Illegal key %r' % (key,)) - - # It's a good key, so save it. - self._key = key - self._value = val - self._coded_value = coded_val - - def __getstate__(self): - return { - 'key': self._key, - 'value': self._value, - 'coded_value': self._coded_value, - } - - def __setstate__(self, state): - self._key = state['key'] - self._value = state['value'] - self._coded_value = state['coded_value'] - - def output(self, attrs=None, header="Set-Cookie:"): - return "%s %s" % (header, self.OutputString(attrs)) - - __str__ = output - - def __repr__(self): - return '<%s: %s>' % (self.__class__.__name__, self.OutputString()) - - def js_output(self, attrs=None): - # Print javascript - return """ - <script type="text/javascript"> - <!-- begin hiding - document.cookie = \"%s\"; - // end hiding --> - </script> - """ % (self.OutputString(attrs).replace('"', r'\"')) - - def OutputString(self, attrs=None): - # Build up our result - # - result = [] - append = result.append - - # First, the key=value pair - append("%s=%s" % (self.key, self.coded_value)) - - # Now add any defined attributes - if attrs is None: - attrs = self._reserved - items = sorted(self.items()) - for key, value in items: - if value == "": - continue - if key not in attrs: - continue - if key == "expires" and isinstance(value, int): - append("%s=%s" % (self._reserved[key], _getdate(value))) - elif key == "max-age" and isinstance(value, int): - append("%s=%d" % (self._reserved[key], value)) - elif key in self._flags: - if value: - append(str(self._reserved[key])) - else: - append("%s=%s" % (self._reserved[key], value)) - - # Return the result - return _semispacejoin(result) - - -# -# Pattern for finding cookie -# -# This used to be strict parsing based on the RFC2109 and RFC2068 -# specifications. I have since discovered that MSIE 3.0x doesn't -# follow the character rules outlined in those specs. As a -# result, the parsing rules here are less strict. -# - -_LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=" -_LegalValueChars = _LegalKeyChars + r'\[\]' -_CookiePattern = re.compile(r""" - \s* # Optional whitespace at start of cookie - (?P<key> # Start of group 'key' - [""" + _LegalKeyChars + r"""]+? # Any word of at least one letter - ) # End of group 'key' - ( # Optional group: there may not be a value. - \s*=\s* # Equal Sign - (?P<val> # Start of group 'val' - "(?:[^\\"]|\\.)*" # Any doublequoted string - | # or - \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr - | # or - [""" + _LegalValueChars + r"""]* # Any word or empty string - ) # End of group 'val' - )? # End of optional value group - \s* # Any number of spaces. - (\s+|;|$) # Ending either at space, semicolon, or EOS. - """, re.ASCII | re.VERBOSE) # re.ASCII may be removed if safe. - - -# At long last, here is the cookie class. Using this class is almost just like -# using a dictionary. See this module's docstring for example usage. -# -class BaseCookie(dict): - """A container class for a set of Morsels.""" - - def value_decode(self, val): - """real_value, coded_value = value_decode(STRING) - Called prior to setting a cookie's value from the network - representation. The VALUE is the value read from HTTP - header. - Override this function to modify the behavior of cookies. - """ - return val, val - - def value_encode(self, val): - """real_value, coded_value = value_encode(VALUE) - Called prior to setting a cookie's value from the dictionary - representation. The VALUE is the value being assigned. - Override this function to modify the behavior of cookies. - """ - strval = str(val) - return strval, strval - - def __init__(self, input=None): - if input: - self.load(input) - - def __set(self, key, real_value, coded_value): - """Private method for setting a cookie's value""" - M = self.get(key, Morsel()) - M.set(key, real_value, coded_value) - dict.__setitem__(self, key, M) - - def __setitem__(self, key, value): - """Dictionary style assignment.""" - if isinstance(value, Morsel): - # allow assignment of constructed Morsels (e.g. for pickling) - dict.__setitem__(self, key, value) - else: - rval, cval = self.value_encode(value) - self.__set(key, rval, cval) - - def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"): - """Return a string suitable for HTTP.""" - result = [] - items = sorted(self.items()) - for key, value in items: - result.append(value.output(attrs, header)) - return sep.join(result) - - __str__ = output - - def __repr__(self): - l = [] - items = sorted(self.items()) - for key, value in items: - l.append('%s=%s' % (key, repr(value.value))) - return '<%s: %s>' % (self.__class__.__name__, _spacejoin(l)) - - def js_output(self, attrs=None): - """Return a string suitable for JavaScript.""" - result = [] - items = sorted(self.items()) - for key, value in items: - result.append(value.js_output(attrs)) - return _nulljoin(result) - - def load(self, rawdata): - """Load cookies from a string (presumably HTTP_COOKIE) or - from a dictionary. Loading cookies from a dictionary 'd' - is equivalent to calling: - map(Cookie.__setitem__, d.keys(), d.values()) - """ - if isinstance(rawdata, str): - self.__parse_string(rawdata) - else: - # self.update() wouldn't call our custom __setitem__ - for key, value in rawdata.items(): - self[key] = value - return - - def __parse_string(self, str, patt=_CookiePattern): - i = 0 # Our starting point - n = len(str) # Length of string - parsed_items = [] # Parsed (type, key, value) triples - morsel_seen = False # A key=value pair was previously encountered - - TYPE_ATTRIBUTE = 1 - TYPE_KEYVALUE = 2 - - # We first parse the whole cookie string and reject it if it's - # syntactically invalid (this helps avoid some classes of injection - # attacks). - while 0 <= i < n: - # Start looking for a cookie - match = patt.match(str, i) - if not match: - # No more cookies - break - - key, value = match.group("key"), match.group("val") - i = match.end(0) - - if key[0] == "$": - if not morsel_seen: - # We ignore attributes which pertain to the cookie - # mechanism as a whole, such as "$Version". - # See RFC 2965. (Does anyone care?) - continue - parsed_items.append((TYPE_ATTRIBUTE, key[1:], value)) - elif key.lower() in Morsel._reserved: - if not morsel_seen: - # Invalid cookie string - return - if value is None: - if key.lower() in Morsel._flags: - parsed_items.append((TYPE_ATTRIBUTE, key, True)) - else: - # Invalid cookie string - return - else: - parsed_items.append((TYPE_ATTRIBUTE, key, _unquote(value))) - elif value is not None: - parsed_items.append((TYPE_KEYVALUE, key, self.value_decode(value))) - morsel_seen = True - else: - # Invalid cookie string - return - - # The cookie string is valid, apply it. - M = None # current morsel - for tp, key, value in parsed_items: - if tp == TYPE_ATTRIBUTE: - assert M is not None - M[key] = value - else: - assert tp == TYPE_KEYVALUE - rval, cval = value - self.__set(key, rval, cval) - M = self[key] - - -class SimpleCookie(BaseCookie): - """ - SimpleCookie supports strings as cookie values. When setting - the value using the dictionary assignment notation, SimpleCookie - calls the builtin str() to convert the value to a string. Values - received from HTTP are kept as strings. - """ - def value_decode(self, val): - return _unquote(val), val - - def value_encode(self, val): - strval = str(val) - return strval, _quote(strval) diff --git a/modules/language/python/module/http/server.py~ b/modules/language/python/module/http/server.py~ deleted file mode 100644 index e12e45b..0000000 --- a/modules/language/python/module/http/server.py~ +++ /dev/null @@ -1,1211 +0,0 @@ -"""HTTP server classes. - -Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see -SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, -and CGIHTTPRequestHandler for CGI scripts. - -It does, however, optionally implement HTTP/1.1 persistent connections, -as of version 0.3. - -Notes on CGIHTTPRequestHandler ------------------------------- - -This class implements GET and POST requests to cgi-bin scripts. - -If the os.fork() function is not present (e.g. on Windows), -subprocess.Popen() is used as a fallback, with slightly altered semantics. - -In all cases, the implementation is intentionally naive -- all -requests are executed synchronously. - -SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL --- it may execute arbitrary Python code or external programs. - -Note that status code 200 is sent prior to execution of a CGI script, so -scripts cannot send other status codes such as 302 (redirect). - -XXX To do: - -- log requests even later (to capture byte count) -- log user-agent header and other interesting goodies -- send error log to separate file -""" - - -# See also: -# -# HTTP Working Group T. Berners-Lee -# INTERNET-DRAFT R. T. Fielding -# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen -# Expires September 8, 1995 March 8, 1995 -# -# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt -# -# and -# -# Network Working Group R. Fielding -# Request for Comments: 2616 et al -# Obsoletes: 2068 June 1999 -# Category: Standards Track -# -# URL: http://www.faqs.org/rfcs/rfc2616.html - -# Log files -# --------- -# -# Here's a quote from the NCSA httpd docs about log file format. -# -# | The logfile format is as follows. Each line consists of: -# | -# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb -# | -# | host: Either the DNS name or the IP number of the remote client -# | rfc931: Any information returned by identd for this person, -# | - otherwise. -# | authuser: If user sent a userid for authentication, the user name, -# | - otherwise. -# | DD: Day -# | Mon: Month (calendar name) -# | YYYY: Year -# | hh: hour (24-hour format, the machine's timezone) -# | mm: minutes -# | ss: seconds -# | request: The first line of the HTTP request as sent by the client. -# | ddd: the status code returned by the server, - if not available. -# | bbbb: the total number of bytes sent, -# | *not including the HTTP/1.0 header*, - if not available -# | -# | You can determine the name of the file accessed through request. -# -# (Actually, the latter is only true if you know the server configuration -# at the time the request was made!) - -__version__ = "0.6" - -__all__ = [ - "HTTPServer", "BaseHTTPRequestHandler", - "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler", -] - -import email.utils -import html -import http.client -import io -import mimetypes -import os -import posixpath -import select -import shutil -import socket # For gethostbyaddr() -import socketserver -import sys -import time -import urllib.parse -import copy -import argparse - -from http import HTTPStatus - - -# Default error message template -DEFAULT_ERROR_MESSAGE = """\ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> - <head> - <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> - <title>Error response</title> - </head> - <body> - <h1>Error response</h1> - <p>Error code: %(code)d</p> - <p>Message: %(message)s.</p> - <p>Error code explanation: %(code)s - %(explain)s.</p> - </body> -</html> -""" - -DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" - -class HTTPServer(socketserver.TCPServer): - - allow_reuse_address = 1 # Seems to make sense in testing environment - - def server_bind(self): - """Override server_bind to store the server name.""" - socketserver.TCPServer.server_bind(self) - host, port = self.server_address[:2] - self.server_name = socket.getfqdn(host) - self.server_port = port - - -class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): - - """HTTP request handler base class. - - The following explanation of HTTP serves to guide you through the - code as well as to expose any misunderstandings I may have about - HTTP (so you don't need to read the code to figure out I'm wrong - :-). - - HTTP (HyperText Transfer Protocol) is an extensible protocol on - top of a reliable stream transport (e.g. TCP/IP). The protocol - recognizes three parts to a request: - - 1. One line identifying the request type and path - 2. An optional set of RFC-822-style headers - 3. An optional data part - - The headers and data are separated by a blank line. - - The first line of the request has the form - - <command> <path> <version> - - where <command> is a (case-sensitive) keyword such as GET or POST, - <path> is a string containing path information for the request, - and <version> should be the string "HTTP/1.0" or "HTTP/1.1". - <path> is encoded using the URL encoding scheme (using %xx to signify - the ASCII character with hex code xx). - - The specification specifies that lines are separated by CRLF but - for compatibility with the widest range of clients recommends - servers also handle LF. Similarly, whitespace in the request line - is treated sensibly (allowing multiple spaces between components - and allowing trailing whitespace). - - Similarly, for output, lines ought to be separated by CRLF pairs - but most clients grok LF characters just fine. - - If the first line of the request has the form - - <command> <path> - - (i.e. <version> is left out) then this is assumed to be an HTTP - 0.9 request; this form has no optional headers and data part and - the reply consists of just the data. - - The reply form of the HTTP 1.x protocol again has three parts: - - 1. One line giving the response code - 2. An optional set of RFC-822-style headers - 3. The data - - Again, the headers and data are separated by a blank line. - - The response code line has the form - - <version> <responsecode> <responsestring> - - where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), - <responsecode> is a 3-digit response code indicating success or - failure of the request, and <responsestring> is an optional - human-readable string explaining what the response code means. - - This server parses the request and the headers, and then calls a - function specific to the request type (<command>). Specifically, - a request SPAM will be handled by a method do_SPAM(). If no - such method exists the server sends an error response to the - client. If it exists, it is called with no arguments: - - do_SPAM() - - Note that the request name is case sensitive (i.e. SPAM and spam - are different requests). - - The various request details are stored in instance variables: - - - client_address is the client IP address in the form (host, - port); - - - command, path and version are the broken-down request line; - - - headers is an instance of email.message.Message (or a derived - class) containing the header information; - - - rfile is a file object open for reading positioned at the - start of the optional input data part; - - - wfile is a file object open for writing. - - IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! - - The first thing to be written must be the response line. Then - follow 0 or more header lines, then a blank line, and then the - actual data (if any). The meaning of the header lines depends on - the command executed by the server; in most cases, when data is - returned, there should be at least one header line of the form - - Content-type: <type>/<subtype> - - where <type> and <subtype> should be registered MIME types, - e.g. "text/html" or "text/plain". - - """ - - # The Python system version, truncated to its first component. - sys_version = "Python/" + sys.version.split()[0] - - # The server software version. You may want to override this. - # The format is multiple whitespace-separated strings, - # where each string is of the form name[/version]. - server_version = "BaseHTTP/" + __version__ - - error_message_format = DEFAULT_ERROR_MESSAGE - error_content_type = DEFAULT_ERROR_CONTENT_TYPE - - # The default request version. This only affects responses up until - # the point where the request line is parsed, so it mainly decides what - # the client gets back when sending a malformed request line. - # Most web servers default to HTTP 0.9, i.e. don't send a status line. - default_request_version = "HTTP/0.9" - - def parse_request(self): - """Parse a request (internal). - - The request should be stored in self.raw_requestline; the results - are in self.command, self.path, self.request_version and - self.headers. - - Return True for success, False for failure; on failure, an - error is sent back. - - """ - self.command = None # set in case of error on the first line - self.request_version = version = self.default_request_version - self.close_connection = True - requestline = str(self.raw_requestline, 'iso-8859-1') - requestline = requestline.rstrip('\r\n') - self.requestline = requestline - words = requestline.split() - if len(words) == 3: - command, path, version = words - try: - if version[:5] != 'HTTP/': - raise ValueError - base_version_number = version.split('/', 1)[1] - version_number = base_version_number.split(".") - # RFC 2145 section 3.1 says there can be only one "." and - # - major and minor numbers MUST be treated as - # separate integers; - # - HTTP/2.4 is a lower version than HTTP/2.13, which in - # turn is lower than HTTP/12.3; - # - Leading zeros MUST be ignored by recipients. - if len(version_number) != 2: - raise ValueError - version_number = int(version_number[0]), int(version_number[1]) - except (ValueError, IndexError): - self.send_error( - HTTPStatus.BAD_REQUEST, - "Bad request version (%r)" % version) - return False - if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": - self.close_connection = False - if version_number >= (2, 0): - self.send_error( - HTTPStatus.HTTP_VERSION_NOT_SUPPORTED, - "Invalid HTTP version (%s)" % base_version_number) - return False - elif len(words) == 2: - command, path = words - self.close_connection = True - if command != 'GET': - self.send_error( - HTTPStatus.BAD_REQUEST, - "Bad HTTP/0.9 request type (%r)" % command) - return False - elif not words: - return False - else: - self.send_error( - HTTPStatus.BAD_REQUEST, - "Bad request syntax (%r)" % requestline) - return False - self.command, self.path, self.request_version = command, path, version - - # Examine the headers and look for a Connection directive. - try: - self.headers = http.client.parse_headers(self.rfile, - _class=self.MessageClass) - except http.client.LineTooLong as err: - self.send_error( - HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, - "Line too long", - str(err)) - return False - except http.client.HTTPException as err: - self.send_error( - HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, - "Too many headers", - str(err) - ) - return False - - conntype = self.headers.get('Connection', "") - if conntype.lower() == 'close': - self.close_connection = True - elif (conntype.lower() == 'keep-alive' and - self.protocol_version >= "HTTP/1.1"): - self.close_connection = False - # Examine the headers and look for an Expect directive - expect = self.headers.get('Expect', "") - if (expect.lower() == "100-continue" and - self.protocol_version >= "HTTP/1.1" and - self.request_version >= "HTTP/1.1"): - if not self.handle_expect_100(): - return False - return True - - def handle_expect_100(self): - """Decide what to do with an "Expect: 100-continue" header. - - If the client is expecting a 100 Continue response, we must - respond with either a 100 Continue or a final response before - waiting for the request body. The default is to always respond - with a 100 Continue. You can behave differently (for example, - reject unauthorized requests) by overriding this method. - - This method should either return True (possibly after sending - a 100 Continue response) or send an error response and return - False. - - """ - self.send_response_only(HTTPStatus.CONTINUE) - self.end_headers() - return True - - def handle_one_request(self): - """Handle a single HTTP request. - - You normally don't need to override this method; see the class - __doc__ string for information on how to handle specific HTTP - commands such as GET and POST. - - """ - try: - self.raw_requestline = self.rfile.readline(65537) - if len(self.raw_requestline) > 65536: - self.requestline = '' - self.request_version = '' - self.command = '' - self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG) - return - if not self.raw_requestline: - self.close_connection = True - return - if not self.parse_request(): - # An error code has been sent, just exit - return - mname = 'do_' + self.command - if not hasattr(self, mname): - self.send_error( - HTTPStatus.NOT_IMPLEMENTED, - "Unsupported method (%r)" % self.command) - return - method = getattr(self, mname) - method() - self.wfile.flush() #actually send the response if not already done. - except socket.timeout as e: - #a read or a write timed out. Discard this connection - self.log_error("Request timed out: %r", e) - self.close_connection = True - return - - def handle(self): - """Handle multiple requests if necessary.""" - self.close_connection = True - - self.handle_one_request() - while not self.close_connection: - self.handle_one_request() - - def send_error(self, code, message=None, explain=None): - """Send and log an error reply. - - Arguments are - * code: an HTTP error code - 3 digits - * message: a simple optional 1 line reason phrase. - *( HTAB / SP / VCHAR / %x80-FF ) - defaults to short entry matching the response code - * explain: a detailed message defaults to the long entry - matching the response code. - - This sends an error response (so it must be called before any - output has been generated), logs the error, and finally sends - a piece of HTML explaining the error to the user. - - """ - - try: - shortmsg, longmsg = self.responses[code] - except KeyError: - shortmsg, longmsg = '???', '???' - if message is None: - message = shortmsg - if explain is None: - explain = longmsg - self.log_error("code %d, message %s", code, message) - self.send_response(code, message) - self.send_header('Connection', 'close') - - # Message body is omitted for cases described in: - # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified) - # - RFC7231: 6.3.6. 205(Reset Content) - body = None - if (code >= 200 and - code not in (HTTPStatus.NO_CONTENT, - HTTPStatus.RESET_CONTENT, - HTTPStatus.NOT_MODIFIED)): - # HTML encode to prevent Cross Site Scripting attacks - # (see bug #1100201) - content = (self.error_message_format % { - 'code': code, - 'message': html.escape(message, quote=False), - 'explain': html.escape(explain, quote=False) - }) - body = content.encode('UTF-8', 'replace') - self.send_header("Content-Type", self.error_content_type) - self.send_header('Content-Length', int(len(body))) - self.end_headers() - - if self.command != 'HEAD' and body: - self.wfile.write(body) - - def send_response(self, code, message=None): - """Add the response header to the headers buffer and log the - response code. - - Also send two standard headers with the server software - version and the current date. - - """ - self.log_request(code) - self.send_response_only(code, message) - self.send_header('Server', self.version_string()) - self.send_header('Date', self.date_time_string()) - - def send_response_only(self, code, message=None): - """Send the response header only.""" - if self.request_version != 'HTTP/0.9': - if message is None: - if code in self.responses: - message = self.responses[code][0] - else: - message = '' - if not hasattr(self, '_headers_buffer'): - self._headers_buffer = [] - self._headers_buffer.append(("%s %d %s\r\n" % - (self.protocol_version, code, message)).encode( - 'latin-1', 'strict')) - - def send_header(self, keyword, value): - """Send a MIME header to the headers buffer.""" - if self.request_version != 'HTTP/0.9': - if not hasattr(self, '_headers_buffer'): - self._headers_buffer = [] - self._headers_buffer.append( - ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) - - if keyword.lower() == 'connection': - if value.lower() == 'close': - self.close_connection = True - elif value.lower() == 'keep-alive': - self.close_connection = False - - def end_headers(self): - """Send the blank line ending the MIME headers.""" - if self.request_version != 'HTTP/0.9': - self._headers_buffer.append(b"\r\n") - self.flush_headers() - - def flush_headers(self): - if hasattr(self, '_headers_buffer'): - self.wfile.write(b"".join(self._headers_buffer)) - self._headers_buffer = [] - - def log_request(self, code='-', size='-'): - """Log an accepted request. - - This is called by send_response(). - - """ - if isinstance(code, HTTPStatus): - code = code.value - self.log_message('"%s" %s %s', - self.requestline, str(code), str(size)) - - def log_error(self, format, *args): - """Log an error. - - This is called when a request cannot be fulfilled. By - default it passes the message on to log_message(). - - Arguments are the same as for log_message(). - - XXX This should go to the separate error log. - - """ - - self.log_message(format, *args) - - def log_message(self, format, *args): - """Log an arbitrary message. - - This is used by all other logging functions. Override - it if you have specific logging wishes. - - The first argument, FORMAT, is a format string for the - message to be logged. If the format string contains - any % escapes requiring parameters, they should be - specified as subsequent arguments (it's just like - printf!). - - The client ip and current date/time are prefixed to - every message. - - """ - - sys.stderr.write("%s - - [%s] %s\n" % - (self.address_string(), - self.log_date_time_string(), - format%args)) - - def version_string(self): - """Return the server software version string.""" - return self.server_version + ' ' + self.sys_version - - def date_time_string(self, timestamp=None): - """Return the current date and time formatted for a message header.""" - if timestamp is None: - timestamp = time.time() - return email.utils.formatdate(timestamp, usegmt=True) - - def log_date_time_string(self): - """Return the current time formatted for logging.""" - now = time.time() - year, month, day, hh, mm, ss, x, y, z = time.localtime(now) - s = "%02d/%3s/%04d %02d:%02d:%02d" % ( - day, self.monthname[month], year, hh, mm, ss) - return s - - weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] - - monthname = [None, - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - - def address_string(self): - """Return the client address.""" - - return self.client_address[0] - - # Essentially static class variables - - # The version of the HTTP protocol we support. - # Set this to HTTP/1.1 to enable automatic keepalive - protocol_version = "HTTP/1.0" - - # MessageClass used to parse headers - MessageClass = http.client.HTTPMessage - - # hack to maintain backwards compatibility - responses = { - v: (v.phrase, v.description) - for v in HTTPStatus.__members__.values() - } - - -class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): - - """Simple HTTP request handler with GET and HEAD commands. - - This serves files from the current directory and any of its - subdirectories. The MIME type for files is determined by - calling the .guess_type() method. - - The GET and HEAD requests are identical except that the HEAD - request omits the actual contents of the file. - - """ - - server_version = "SimpleHTTP/" + __version__ - - def do_GET(self): - """Serve a GET request.""" - f = self.send_head() - if f: - try: - self.copyfile(f, self.wfile) - finally: - f.close() - - def do_HEAD(self): - """Serve a HEAD request.""" - f = self.send_head() - if f: - f.close() - - def send_head(self): - """Common code for GET and HEAD commands. - - This sends the response code and MIME headers. - - Return value is either a file object (which has to be copied - to the outputfile by the caller unless the command was HEAD, - and must be closed by the caller under all circumstances), or - None, in which case the caller has nothing further to do. - - """ - path = self.translate_path(self.path) - f = None - if os.path.isdir(path): - parts = urllib.parse.urlsplit(self.path) - if not parts.path.endswith('/'): - # redirect browser - doing basically what apache does - self.send_response(HTTPStatus.MOVED_PERMANENTLY) - new_parts = (parts[0], parts[1], parts[2] + '/', - parts[3], parts[4]) - new_url = urllib.parse.urlunsplit(new_parts) - self.send_header("Location", new_url) - self.end_headers() - return None - for index in "index.html", "index.htm": - index = os.path.join(path, index) - if os.path.exists(index): - path = index - break - else: - return self.list_directory(path) - ctype = self.guess_type(path) - try: - f = open(path, 'rb') - except OSError: - self.send_error(HTTPStatus.NOT_FOUND, "File not found") - return None - try: - self.send_response(HTTPStatus.OK) - self.send_header("Content-type", ctype) - fs = os.fstat(f.fileno()) - self.send_header("Content-Length", str(fs[6])) - self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) - self.end_headers() - return f - except: - f.close() - raise - - def list_directory(self, path): - """Helper to produce a directory listing (absent index.html). - - Return value is either a file object, or None (indicating an - error). In either case, the headers are sent, making the - interface the same as for send_head(). - - """ - try: - list = os.listdir(path) - except OSError: - self.send_error( - HTTPStatus.NOT_FOUND, - "No permission to list directory") - return None - list.sort(key=lambda a: a.lower()) - r = [] - try: - displaypath = urllib.parse.unquote(self.path, - errors='surrogatepass') - except UnicodeDecodeError: - displaypath = urllib.parse.unquote(path) - displaypath = html.escape(displaypath, quote=False) - enc = sys.getfilesystemencoding() - title = 'Directory listing for %s' % displaypath - r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' - '"http://www.w3.org/TR/html4/strict.dtd">') - r.append('<html>\n<head>') - r.append('<meta http-equiv="Content-Type" ' - 'content="text/html; charset=%s">' % enc) - r.append('<title>%s</title>\n</head>' % title) - r.append('<body>\n<h1>%s</h1>' % title) - r.append('<hr>\n<ul>') - for name in list: - fullname = os.path.join(path, name) - displayname = linkname = name - # Append / for directories or @ for symbolic links - if os.path.isdir(fullname): - displayname = name + "/" - linkname = name + "/" - if os.path.islink(fullname): - displayname = name + "@" - # Note: a link to a directory displays with @ and links with / - r.append('<li><a href="%s">%s</a></li>' - % (urllib.parse.quote(linkname, - errors='surrogatepass'), - html.escape(displayname, quote=False))) - r.append('</ul>\n<hr>\n</body>\n</html>\n') - encoded = '\n'.join(r).encode(enc, 'surrogateescape') - f = io.BytesIO() - f.write(encoded) - f.seek(0) - self.send_response(HTTPStatus.OK) - self.send_header("Content-type", "text/html; charset=%s" % enc) - self.send_header("Content-Length", str(len(encoded))) - self.end_headers() - return f - - def translate_path(self, path): - """Translate a /-separated PATH to the local filename syntax. - - Components that mean special things to the local file system - (e.g. drive or directory names) are ignored. (XXX They should - probably be diagnosed.) - - """ - # abandon query parameters - path = path.split('?',1)[0] - path = path.split('#',1)[0] - # Don't forget explicit trailing slash when normalizing. Issue17324 - trailing_slash = path.rstrip().endswith('/') - try: - path = urllib.parse.unquote(path, errors='surrogatepass') - except UnicodeDecodeError: - path = urllib.parse.unquote(path) - path = posixpath.normpath(path) - words = path.split('/') - words = filter(None, words) - path = os.getcwd() - for word in words: - if os.path.dirname(word) or word in (os.curdir, os.pardir): - # Ignore components that are not a simple file/directory name - continue - path = os.path.join(path, word) - if trailing_slash: - path += '/' - return path - - def copyfile(self, source, outputfile): - """Copy all data between two file objects. - - The SOURCE argument is a file object open for reading - (or anything with a read() method) and the DESTINATION - argument is a file object open for writing (or - anything with a write() method). - - The only reason for overriding this would be to change - the block size or perhaps to replace newlines by CRLF - -- note however that this the default server uses this - to copy binary data as well. - - """ - shutil.copyfileobj(source, outputfile) - - def guess_type(self, path): - """Guess the type of a file. - - Argument is a PATH (a filename). - - Return value is a string of the form type/subtype, - usable for a MIME Content-type header. - - The default implementation looks the file's extension - up in the table self.extensions_map, using application/octet-stream - as a default; however it would be permissible (if - slow) to look inside the data to make a better guess. - - """ - - base, ext = posixpath.splitext(path) - if ext in self.extensions_map: - return self.extensions_map[ext] - ext = ext.lower() - if ext in self.extensions_map: - return self.extensions_map[ext] - else: - return self.extensions_map[''] - - if not mimetypes.inited: - mimetypes.init() # try to read system mime.types - extensions_map = mimetypes.types_map.copy() - extensions_map.update({ - '': 'application/octet-stream', # Default - '.py': 'text/plain', - '.c': 'text/plain', - '.h': 'text/plain', - }) - - -# Utilities for CGIHTTPRequestHandler - -def _url_collapse_path(path): - """ - Given a URL path, remove extra '/'s and '.' path elements and collapse - any '..' references and returns a collapsed path. - - Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. - The utility of this function is limited to is_cgi method and helps - preventing some security attacks. - - Returns: The reconstituted URL, which will always start with a '/'. - - Raises: IndexError if too many '..' occur within the path. - - """ - # Query component should not be involved. - path, _, query = path.partition('?') - path = urllib.parse.unquote(path) - - # Similar to os.path.split(os.path.normpath(path)) but specific to URL - # path semantics rather than local operating system semantics. - path_parts = path.split('/') - head_parts = [] - for part in path_parts[:-1]: - if part == '..': - head_parts.pop() # IndexError if more '..' than prior parts - elif part and part != '.': - head_parts.append( part ) - if path_parts: - tail_part = path_parts.pop() - if tail_part: - if tail_part == '..': - head_parts.pop() - tail_part = '' - elif tail_part == '.': - tail_part = '' - else: - tail_part = '' - - if query: - tail_part = '?'.join((tail_part, query)) - - splitpath = ('/' + '/'.join(head_parts), tail_part) - collapsed_path = "/".join(splitpath) - - return collapsed_path - - - -nobody = None - -def nobody_uid(): - """Internal routine to get nobody's uid""" - global nobody - if nobody: - return nobody - try: - import pwd - except ImportError: - return -1 - try: - nobody = pwd.getpwnam('nobody')[2] - except KeyError: - nobody = 1 + max(x[2] for x in pwd.getpwall()) - return nobody - - -def executable(path): - """Test for executable file.""" - return os.access(path, os.X_OK) - - -class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): - - """Complete HTTP server with GET, HEAD and POST commands. - - GET and HEAD also support running CGI scripts. - - The POST command is *only* implemented for CGI scripts. - - """ - - # Determine platform specifics - have_fork = hasattr(os, 'fork') - - # Make rfile unbuffered -- we need to read one line and then pass - # the rest to a subprocess, so we can't use buffered input. - rbufsize = 0 - - def do_POST(self): - """Serve a POST request. - - This is only implemented for CGI scripts. - - """ - - if self.is_cgi(): - self.run_cgi() - else: - self.send_error( - HTTPStatus.NOT_IMPLEMENTED, - "Can only POST to CGI scripts") - - def send_head(self): - """Version of send_head that support CGI scripts""" - if self.is_cgi(): - return self.run_cgi() - else: - return SimpleHTTPRequestHandler.send_head(self) - - def is_cgi(self): - """Test whether self.path corresponds to a CGI script. - - Returns True and updates the cgi_info attribute to the tuple - (dir, rest) if self.path requires running a CGI script. - Returns False otherwise. - - If any exception is raised, the caller should assume that - self.path was rejected as invalid and act accordingly. - - The default implementation tests whether the normalized url - path begins with one of the strings in self.cgi_directories - (and the next character is a '/' or the end of the string). - - """ - collapsed_path = _url_collapse_path(self.path) - dir_sep = collapsed_path.find('/', 1) - head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] - if head in self.cgi_directories: - self.cgi_info = head, tail - return True - return False - - - cgi_directories = ['/cgi-bin', '/htbin'] - - def is_executable(self, path): - """Test whether argument path is an executable file.""" - return executable(path) - - def is_python(self, path): - """Test whether argument path is a Python script.""" - head, tail = os.path.splitext(path) - return tail.lower() in (".py", ".pyw") - - def run_cgi(self): - """Execute a CGI script.""" - dir, rest = self.cgi_info - path = dir + '/' + rest - i = path.find('/', len(dir)+1) - while i >= 0: - nextdir = path[:i] - nextrest = path[i+1:] - - scriptdir = self.translate_path(nextdir) - if os.path.isdir(scriptdir): - dir, rest = nextdir, nextrest - i = path.find('/', len(dir)+1) - else: - break - - # find an explicit query string, if present. - rest, _, query = rest.partition('?') - - # dissect the part after the directory name into a script name & - # a possible additional path, to be stored in PATH_INFO. - i = rest.find('/') - if i >= 0: - script, rest = rest[:i], rest[i:] - else: - script, rest = rest, '' - - scriptname = dir + '/' + script - scriptfile = self.translate_path(scriptname) - if not os.path.exists(scriptfile): - self.send_error( - HTTPStatus.NOT_FOUND, - "No such CGI script (%r)" % scriptname) - return - if not os.path.isfile(scriptfile): - self.send_error( - HTTPStatus.FORBIDDEN, - "CGI script is not a plain file (%r)" % scriptname) - return - ispy = self.is_python(scriptname) - if self.have_fork or not ispy: - if not self.is_executable(scriptfile): - self.send_error( - HTTPStatus.FORBIDDEN, - "CGI script is not executable (%r)" % scriptname) - return - - # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html - # XXX Much of the following could be prepared ahead of time! - env = copy.deepcopy(os.environ) - env['SERVER_SOFTWARE'] = self.version_string() - env['SERVER_NAME'] = self.server.server_name - env['GATEWAY_INTERFACE'] = 'CGI/1.1' - env['SERVER_PROTOCOL'] = self.protocol_version - env['SERVER_PORT'] = str(self.server.server_port) - env['REQUEST_METHOD'] = self.command - uqrest = urllib.parse.unquote(rest) - env['PATH_INFO'] = uqrest - env['PATH_TRANSLATED'] = self.translate_path(uqrest) - env['SCRIPT_NAME'] = scriptname - if query: - env['QUERY_STRING'] = query - env['REMOTE_ADDR'] = self.client_address[0] - authorization = self.headers.get("authorization") - if authorization: - authorization = authorization.split() - if len(authorization) == 2: - import base64, binascii - env['AUTH_TYPE'] = authorization[0] - if authorization[0].lower() == "basic": - try: - authorization = authorization[1].encode('ascii') - authorization = base64.decodebytes(authorization).\ - decode('ascii') - except (binascii.Error, UnicodeError): - pass - else: - authorization = authorization.split(':') - if len(authorization) == 2: - env['REMOTE_USER'] = authorization[0] - # XXX REMOTE_IDENT - if self.headers.get('content-type') is None: - env['CONTENT_TYPE'] = self.headers.get_content_type() - else: - env['CONTENT_TYPE'] = self.headers['content-type'] - length = self.headers.get('content-length') - if length: - env['CONTENT_LENGTH'] = length - referer = self.headers.get('referer') - if referer: - env['HTTP_REFERER'] = referer - accept = [] - for line in self.headers.getallmatchingheaders('accept'): - if line[:1] in "\t\n\r ": - accept.append(line.strip()) - else: - accept = accept + line[7:].split(',') - env['HTTP_ACCEPT'] = ','.join(accept) - ua = self.headers.get('user-agent') - if ua: - env['HTTP_USER_AGENT'] = ua - co = filter(None, self.headers.get_all('cookie', [])) - cookie_str = ', '.join(co) - if cookie_str: - env['HTTP_COOKIE'] = cookie_str - # XXX Other HTTP_* headers - # Since we're setting the env in the parent, provide empty - # values to override previously set values - for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', - 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): - env.setdefault(k, "") - - self.send_response(HTTPStatus.OK, "Script output follows") - self.flush_headers() - - decoded_query = query.replace('+', ' ') - - if self.have_fork: - # Unix -- fork as we should - args = [script] - if '=' not in decoded_query: - args.append(decoded_query) - nobody = nobody_uid() - self.wfile.flush() # Always flush before forking - pid = os.fork() - if pid != 0: - # Parent - pid, sts = os.waitpid(pid, 0) - # throw away additional data [see bug #427345] - while select.select([self.rfile], [], [], 0)[0]: - if not self.rfile.read(1): - break - if sts: - self.log_error("CGI script exit status %#x", sts) - return - # Child - try: - try: - os.setuid(nobody) - except OSError: - pass - os.dup2(self.rfile.fileno(), 0) - os.dup2(self.wfile.fileno(), 1) - os.execve(scriptfile, args, env) - except: - self.server.handle_error(self.request, self.client_address) - os._exit(127) - - else: - # Non-Unix -- use subprocess - import subprocess - cmdline = [scriptfile] - if self.is_python(scriptfile): - interp = sys.executable - if interp.lower().endswith("w.exe"): - # On Windows, use python.exe, not pythonw.exe - interp = interp[:-5] + interp[-4:] - cmdline = [interp, '-u'] + cmdline - if '=' not in query: - cmdline.append(query) - self.log_message("command: %s", subprocess.list2cmdline(cmdline)) - try: - nbytes = int(length) - except (TypeError, ValueError): - nbytes = 0 - p = subprocess.Popen(cmdline, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env = env - ) - if self.command.lower() == "post" and nbytes > 0: - data = self.rfile.read(nbytes) - else: - data = None - # throw away additional data [see bug #427345] - while select.select([self.rfile._sock], [], [], 0)[0]: - if not self.rfile._sock.recv(1): - break - stdout, stderr = p.communicate(data) - self.wfile.write(stdout) - if stderr: - self.log_error('%s', stderr) - p.stderr.close() - p.stdout.close() - status = p.returncode - if status: - self.log_error("CGI script exit status %#x", status) - else: - self.log_message("CGI script exited OK") - - -def test(HandlerClass=BaseHTTPRequestHandler, - ServerClass=HTTPServer, protocol="HTTP/1.0", port=8000, bind=""): - """Test the HTTP request handler class. - - This runs an HTTP server on port 8000 (or the port argument). - - """ - server_address = (bind, port) - - HandlerClass.protocol_version = protocol - with ServerClass(server_address, HandlerClass) as httpd: - sa = httpd.socket.getsockname() - serve_message = "Serving HTTP on {host} port {port} (http://{host}:{port}/) ..." - print(serve_message.format(host=sa[0], port=sa[1])) - try: - httpd.serve_forever() - except KeyboardInterrupt: - print("\nKeyboard interrupt received, exiting.") - sys.exit(0) - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--cgi', action='store_true', - help='Run as CGI Server') - parser.add_argument('--bind', '-b', default='', metavar='ADDRESS', - help='Specify alternate bind address ' - '[default: all interfaces]') - parser.add_argument('port', action='store', - default=8000, type=int, - nargs='?', - help='Specify alternate port [default: 8000]') - args = parser.parse_args() - if args.cgi: - handler_class = CGIHTTPRequestHandler - else: - handler_class = SimpleHTTPRequestHandler - test(HandlerClass=handler_class, port=args.port, bind=args.bind) diff --git a/modules/language/python/module/urllib/#error.py# b/modules/language/python/module/urllib/#error.py# deleted file mode 100644 index 7df0238..0000000 --- a/modules/language/python/module/urllib/#error.py# +++ /dev/null @@ -1,77 +0,0 @@ -module(urllib,error) - -"""Exception classes raised by urllib. - -The base exception class is URLError, which inherits from OSError. It -doesn't define any behavior of its own, but is the base class for all -exceptions defined in this package. - -HTTPError is an exception class that is also a valid HTTP response -instance. It behaves this way because HTTP protocol errors are valid -responses, with a status code, headers, and a body. In some contexts, -an application may want to handle an exception like a regular -response. -""" - -import urllib.response - -__all__ = ['URLError', 'HTTPError', 'ContentTooShortError'] - -class URLError(OSError): - # URLError is a sub-type of OSError, but it doesn't share any of - # the implementation. need to override __init__ and __str__. - # It sets self.args for compatibility with other EnvironmentError - # subclasses, but args doesn't have the typical format with errno in - # slot 0 and strerror in slot 1. This may be better than nothing. - def __init__(self, reason, filename=None): - self.args = reason, - self.reason = reason - if filename is not None: - self.filename = filename - - def __str__(self): - return '<urlopen error %s>' % self.reason - - -class HTTPError(URLError, urllib.response.addinfourl): - """Raised when HTTP error occurs, but also acts like non-error return""" - __super_init = urllib.response.addinfourl.__init__ - - def __init__(self, url, code, msg, hdrs, fp): - self.code = code - self.msg = msg - self.hdrs = hdrs - self.fp = fp - self.filename = url - # The addinfourl classes depend on fp being a valid file - # object. In some cases, the HTTPError may not have a valid - # file object. If this happens, the simplest workaround is to - # not initialize the base classes. - if fp is not None: - self.__super_init(fp, hdrs, url, code) - - def __str__(self): - return 'HTTP Error %s: %s' % (self.code, self.msg) - - def __repr__(self): - return '<HTTPError %s: %r>' % (self.code, self.msg) - - # since URLError specifies a .reason attribute, HTTPError should also - # provide this attribute. See issue13211 for discussion. - @property - def reason(self): - return self.msg - - @property - def headers(self): - return self.hdrs - - @headers.setter - def headers(self, headers): - self.hdrs = headers - -class ContentTooShortError(URLError): - """Exception raised when downloaded size does not match content-length.""" - def __init__(self, message, content): - URLError.__init__(self, message) - self.content = content diff --git a/modules/language/python/module/urllib/.#error.py b/modules/language/python/module/urllib/.#error.py deleted file mode 120000 index c8d0ed8..0000000 --- a/modules/language/python/module/urllib/.#error.py +++ /dev/null @@ -1 +0,0 @@ -stis@lapwine.2086:1535196580
\ No newline at end of file diff --git a/modules/language/python/module/xml/#sax.py# b/modules/language/python/module/xml/#sax.py# deleted file mode 100644 index eccc3fe..0000000 --- a/modules/language/python/module/xml/#sax.py# +++ /dev/null @@ -1,108 +0,0 @@ -module(xml.sax) -"""Simple API for XML (SAX) implementation for Python. - -This module provides an implementation of the SAX 2 interface; -information about the Java version of the interface can be found at -http://www.megginson.com/SAX/. The Python version of the interface is -documented at <...>. - -This package contains the following modules: - -handler -- Base classes and constants which define the SAX 2 API for - the 'client-side' of SAX for Python. - -saxutils -- Implementation of the convenience classes commonly used to - work with SAX. - -xmlreader -- Base classes and constants which define the SAX 2 API for - the parsers used with SAX for Python. - -expatreader -- Driver that allows use of the Expat parser with SAX. -""" - -from .xmlreader import InputSource -from .handler import ContentHandler, ErrorHandler -from ._exceptions import SAXException, SAXNotRecognizedException, \ - SAXParseException, SAXNotSupportedException, \ - SAXReaderNotAvailable - - -def parse(source, handler, errorHandler=ErrorHandler()): - parser = make_parser() - parser.setContentHandler(handler) - parser.setErrorHandler(errorHandler) - parser.parse(source) - -def parseString(string, handler, errorHandler=ErrorHandler()): - import io - if errorHandler is None: - errorHandler = ErrorHandler() - parser = make_parser() - parser.setContentHandler(handler) - parser.setErrorHandler(errorHandler) - - inpsrc = InputSource() - if isinstance(string, str): - inpsrc.setCharacterStream(io.StringIO(string)) - else: - inpsrc.setByteStream(io.BytesIO(string)) - parser.parse(inpsrc) - -# this is the parser list used by the make_parser function if no -# alternatives are given as parameters to the function - -default_parser_list = ["xml.sax.expatreader"] - -# tell modulefinder that importing sax potentially imports expatreader -_false = 0 -if _false: - import xml.sax.expatreader - -import os, sys -if "PY_SAX_PARSER" in os.environ: - default_parser_list = os.environ["PY_SAX_PARSER"].split(",") -del os - -_key = "python.xml.sax.parser" -if sys.platform[:4] == "java" and sys.registry.containsKey(_key): - default_parser_list = sys.registry.getProperty(_key).split(",") - - -def make_parser(parser_list = []): - """Creates and returns a SAX parser. - - Creates the first parser it is able to instantiate of the ones - given in the list created by doing parser_list + - default_parser_list. The lists must contain the names of Python - modules containing both a SAX parser and a create_parser function.""" - - for parser_name in parser_list + default_parser_list: - try: - return _create_parser(parser_name) - except ImportError as e: - import sys - if parser_name in sys.modules: - # The parser module was found, but importing it - # failed unexpectedly, pass this exception through - raise - except SAXReaderNotAvailable: - # The parser module detected that it won't work properly, - # so try the next one - pass - - raise SAXReaderNotAvailable("No parsers found", None) - -# --- Internal utility methods used by make_parser - -if sys.platform[ : 4] == "java": - def _create_parser(parser_name): - from org.python.core import imp - drv_module = imp.importName(parser_name, 0, globals()) - return drv_module.create_parser() - -else: - def _create_parser(parser_name): - drv_module = __import__(parser_name,{},{},['create_parser']) - return drv_module.create_parser() - -del sys diff --git a/modules/language/python/module/xml/dom.py~ b/modules/language/python/module/xml/dom.py~ deleted file mode 100644 index 97cf9a6..0000000 --- a/modules/language/python/module/xml/dom.py~ +++ /dev/null @@ -1,140 +0,0 @@ -"""W3C Document Object Model implementation for Python. - -The Python mapping of the Document Object Model is documented in the -Python Library Reference in the section on the xml.dom package. - -This package contains the following modules: - -minidom -- A simple implementation of the Level 1 DOM with namespace - support added (based on the Level 2 specification) and other - minor Level 2 functionality. - -pulldom -- DOM builder supporting on-demand tree-building for selected - subtrees of the document. - -""" - - -class Node: - """Class giving the NodeType constants.""" - __slots__ = () - - # DOM implementations may use this as a base class for their own - # Node implementations. If they don't, the constants defined here - # should still be used as the canonical definitions as they match - # the values given in the W3C recommendation. Client code can - # safely refer to these values in all tests of Node.nodeType - # values. - - ELEMENT_NODE = 1 - ATTRIBUTE_NODE = 2 - TEXT_NODE = 3 - CDATA_SECTION_NODE = 4 - ENTITY_REFERENCE_NODE = 5 - ENTITY_NODE = 6 - PROCESSING_INSTRUCTION_NODE = 7 - COMMENT_NODE = 8 - DOCUMENT_NODE = 9 - DOCUMENT_TYPE_NODE = 10 - DOCUMENT_FRAGMENT_NODE = 11 - NOTATION_NODE = 12 - - -#ExceptionCode -INDEX_SIZE_ERR = 1 -DOMSTRING_SIZE_ERR = 2 -HIERARCHY_REQUEST_ERR = 3 -WRONG_DOCUMENT_ERR = 4 -INVALID_CHARACTER_ERR = 5 -NO_DATA_ALLOWED_ERR = 6 -NO_MODIFICATION_ALLOWED_ERR = 7 -NOT_FOUND_ERR = 8 -NOT_SUPPORTED_ERR = 9 -INUSE_ATTRIBUTE_ERR = 10 -INVALID_STATE_ERR = 11 -SYNTAX_ERR = 12 -INVALID_MODIFICATION_ERR = 13 -NAMESPACE_ERR = 14 -INVALID_ACCESS_ERR = 15 -VALIDATION_ERR = 16 - - -class DOMException(Exception): - """Abstract base class for DOM exceptions. - Exceptions with specific codes are specializations of this class.""" - - def __init__(self, *args, **kw): - if self.__class__ is DOMException: - raise RuntimeError( - "DOMException should not be instantiated directly") - Exception.__init__(self, *args, **kw) - - def _get_code(self): - return self.code - - -class IndexSizeErr(DOMException): - code = INDEX_SIZE_ERR - -class DomstringSizeErr(DOMException): - code = DOMSTRING_SIZE_ERR - -class HierarchyRequestErr(DOMException): - code = HIERARCHY_REQUEST_ERR - -class WrongDocumentErr(DOMException): - code = WRONG_DOCUMENT_ERR - -class InvalidCharacterErr(DOMException): - code = INVALID_CHARACTER_ERR - -class NoDataAllowedErr(DOMException): - code = NO_DATA_ALLOWED_ERR - -class NoModificationAllowedErr(DOMException): - code = NO_MODIFICATION_ALLOWED_ERR - -class NotFoundErr(DOMException): - code = NOT_FOUND_ERR - -class NotSupportedErr(DOMException): - code = NOT_SUPPORTED_ERR - -class InuseAttributeErr(DOMException): - code = INUSE_ATTRIBUTE_ERR - -class InvalidStateErr(DOMException): - code = INVALID_STATE_ERR - -class SyntaxErr(DOMException): - code = SYNTAX_ERR - -class InvalidModificationErr(DOMException): - code = INVALID_MODIFICATION_ERR - -class NamespaceErr(DOMException): - code = NAMESPACE_ERR - -class InvalidAccessErr(DOMException): - code = INVALID_ACCESS_ERR - -class ValidationErr(DOMException): - code = VALIDATION_ERR - -class UserDataHandler: - """Class giving the operation constants for UserDataHandler.handle().""" - - # Based on DOM Level 3 (WD 9 April 2002) - - NODE_CLONED = 1 - NODE_IMPORTED = 2 - NODE_DELETED = 3 - NODE_RENAMED = 4 - -XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace" -XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/" -XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml" -EMPTY_NAMESPACE = None -EMPTY_PREFIX = None - -from .domreg import getDOMImplementation, registerDOMImplementation diff --git a/modules/language/python/module/xml/dom/#xmlbuilder.py# b/modules/language/python/module/xml/dom/#xmlbuilder.py# deleted file mode 100644 index a2a4d1b..0000000 --- a/modules/language/python/module/xml/dom/#xmlbuilder.py# +++ /dev/null @@ -1,412 +0,0 @@ -module(xml,dom,xmlbuilder) - -"""Implementation of the DOM Level 3 'LS-Load' feature.""" - -import copy -import warnings -import xml.dom - -from xml.dom.NodeFilter import NodeFilter - - -__all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"] - - -class Options: - """Features object that has variables set for each DOMBuilder feature. - - The DOMBuilder class uses an instance of this class to pass settings to - the ExpatBuilder class. - """ - - # Note that the DOMBuilder class in LoadSave constrains which of these - # values can be set using the DOM Level 3 LoadSave feature. - - namespaces = 1 - namespace_declarations = True - validation = False - external_parameter_entities = True - external_general_entities = True - external_dtd_subset = True - validate_if_schema = False - validate = False - datatype_normalization = False - create_entity_ref_nodes = True - entities = True - whitespace_in_element_content = True - cdata_sections = True - comments = True - charset_overrides_xml_encoding = True - infoset = False - supported_mediatypes_only = False - - errorHandler = None - filter = None - - -class DOMBuilder: - entityResolver = None - errorHandler = None - filter = None - - ACTION_REPLACE = 1 - ACTION_APPEND_AS_CHILDREN = 2 - ACTION_INSERT_AFTER = 3 - ACTION_INSERT_BEFORE = 4 - - _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN, - ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE) - - def __init__(self): - self._options = Options() - - def _get_entityResolver(self): - return self.entityResolver - def _set_entityResolver(self, entityResolver): - self.entityResolver = entityResolver - - def _get_errorHandler(self): - return self.errorHandler - def _set_errorHandler(self, errorHandler): - self.errorHandler = errorHandler - - def _get_filter(self): - return self.filter - def _set_filter(self, filter): - self.filter = filter - - def setFeature(self, name, state): - if self.supportsFeature(name): - state = state and 1 or 0 - try: - settings = self._settings[(_name_xform(name), state)] - except KeyError: - raise xml.dom.NotSupportedErr( - "unsupported feature: %r" % (name,)) - else: - for name, value in settings: - setattr(self._options, name, value) - else: - raise xml.dom.NotFoundErr("unknown feature: " + repr(name)) - - def supportsFeature(self, name): - return hasattr(self._options, _name_xform(name)) - - def canSetFeature(self, name, state): - key = (_name_xform(name), state and 1 or 0) - return key in self._settings - - # This dictionary maps from (feature,value) to a list of - # (option,value) pairs that should be set on the Options object. - # If a (feature,value) setting is not in this dictionary, it is - # not supported by the DOMBuilder. - # - _settings = { - ("namespace_declarations", 0): [ - ("namespace_declarations", 0)], - ("namespace_declarations", 1): [ - ("namespace_declarations", 1)], - ("validation", 0): [ - ("validation", 0)], - ("external_general_entities", 0): [ - ("external_general_entities", 0)], - ("external_general_entities", 1): [ - ("external_general_entities", 1)], - ("external_parameter_entities", 0): [ - ("external_parameter_entities", 0)], - ("external_parameter_entities", 1): [ - ("external_parameter_entities", 1)], - ("validate_if_schema", 0): [ - ("validate_if_schema", 0)], - ("create_entity_ref_nodes", 0): [ - ("create_entity_ref_nodes", 0)], - ("create_entity_ref_nodes", 1): [ - ("create_entity_ref_nodes", 1)], - ("entities", 0): [ - ("create_entity_ref_nodes", 0), - ("entities", 0)], - ("entities", 1): [ - ("entities", 1)], - ("whitespace_in_element_content", 0): [ - ("whitespace_in_element_content", 0)], - ("whitespace_in_element_content", 1): [ - ("whitespace_in_element_content", 1)], - ("cdata_sections", 0): [ - ("cdata_sections", 0)], - ("cdata_sections", 1): [ - ("cdata_sections", 1)], - ("comments", 0): [ - ("comments", 0)], - ("comments", 1): [ - ("comments", 1)], - ("charset_overrides_xml_encoding", 0): [ - ("charset_overrides_xml_encoding", 0)], - ("charset_overrides_xml_encoding", 1): [ - ("charset_overrides_xml_encoding", 1)], - ("infoset", 0): [], - ("infoset", 1): [ - ("namespace_declarations", 0), - ("validate_if_schema", 0), - ("create_entity_ref_nodes", 0), - ("entities", 0), - ("cdata_sections", 0), - ("datatype_normalization", 1), - ("whitespace_in_element_content", 1), - ("comments", 1), - ("charset_overrides_xml_encoding", 1)], - ("supported_mediatypes_only", 0): [ - ("supported_mediatypes_only", 0)], - ("namespaces", 0): [ - ("namespaces", 0)], - ("namespaces", 1): [ - ("namespaces", 1)], - } - - def getFeature(self, name): - xname = _name_xform(name) - try: - return getattr(self._options, xname) - except AttributeError: - if name == "infoset": - options = self._options - return (options.datatype_normalization - and options.whitespace_in_element_content - and options.comments - and options.charset_overrides_xml_encoding - and not (options.namespace_declarations - or options.validate_if_schema - or options.create_entity_ref_nodes - or options.entities - or options.cdata_sections)) - raise xml.dom.NotFoundErr("feature %s not known" % repr(name)) - - def parseURI(self, uri): - if self.entityResolver: - input = self.entityResolver.resolveEntity(None, uri) - else: - input = DOMEntityResolver().resolveEntity(None, uri) - return self.parse(input) - - def parse(self, input): - options = copy.copy(self._options) - options.filter = self.filter - options.errorHandler = self.errorHandler - fp = input.byteStream - if fp is None and options.systemId: - import urllib.request - fp = urllib.request.urlopen(input.systemId) - return self._parse_bytestream(fp, options) - - def parseWithContext(self, input, cnode, action): - if action not in self._legal_actions: - raise ValueError("not a legal action") - raise NotImplementedError("Haven't written this yet...") - - def _parse_bytestream(self, stream, options): - import xml.dom.expatbuilder - builder = xml.dom.expatbuilder.makeBuilder(options) - return builder.parseFile(stream) - - -def _name_xform(name): - return name.lower().replace('-', '_') - - -class DOMEntityResolver(object): - __slots__ = '_opener', - - def resolveEntity(self, publicId, systemId): - assert systemId is not None - source = DOMInputSource() - source.publicId = publicId - source.systemId = systemId - source.byteStream = self._get_opener().open(systemId) - - # determine the encoding if the transport provided it - source.encoding = self._guess_media_encoding(source) - - # determine the base URI is we can - import posixpath, urllib.parse - parts = urllib.parse.urlparse(systemId) - scheme, netloc, path, params, query, fragment = parts - # XXX should we check the scheme here as well? - if path and not path.endswith("/"): - path = posixpath.dirname(path) + "/" - parts = scheme, netloc, path, params, query, fragment - source.baseURI = urllib.parse.urlunparse(parts) - - return source - - def _get_opener(self): - try: - return self._opener - except AttributeError: - self._opener = self._create_opener() - return self._opener - - def _create_opener(self): - import urllib.request - return urllib.request.build_opener() - - def _guess_media_encoding(self, source): - info = source.byteStream.info() - if "Content-Type" in info: - for param in info.getplist(): - if param.startswith("charset="): - return param.split("=", 1)[1].lower() - - -class DOMInputSource(object): - __slots__ = ('byteStream', 'characterStream', 'stringData', - 'encoding', 'publicId', 'systemId', 'baseURI') - - def __init__(self): - self.byteStream = None - self.characterStream = None - self.stringData = None - self.encoding = None - self.publicId = None - self.systemId = None - self.baseURI = None - - def _get_byteStream(self): - return self.byteStream - def _set_byteStream(self, byteStream): - self.byteStream = byteStream - - def _get_characterStream(self): - return self.characterStream - def _set_characterStream(self, characterStream): - self.characterStream = characterStream - - def _get_stringData(self): - return self.stringData - def _set_stringData(self, data): - self.stringData = data - - def _get_encoding(self): - return self.encoding - def _set_encoding(self, encoding): - self.encoding = encoding - - def _get_publicId(self): - return self.publicId - def _set_publicId(self, publicId): - self.publicId = publicId - - def _get_systemId(self): - return self.systemId - def _set_systemId(self, systemId): - self.systemId = systemId - - def _get_baseURI(self): - return self.baseURI - def _set_baseURI(self, uri): - self.baseURI = uri - - -class DOMBuilderFilter: - """Element filter which can be used to tailor construction of - a DOM instance. - """ - - # There's really no need for this class; concrete implementations - # should just implement the endElement() and startElement() - # methods as appropriate. Using this makes it easy to only - # implement one of them. - - FILTER_ACCEPT = 1 - FILTER_REJECT = 2 - FILTER_SKIP = 3 - FILTER_INTERRUPT = 4 - - whatToShow = NodeFilter.SHOW_ALL - - def _get_whatToShow(self): - return self.whatToShow - - def acceptNode(self, element): - return self.FILTER_ACCEPT - - def startContainer(self, element): - return self.FILTER_ACCEPT - -del NodeFilter - - -class _AsyncDeprecatedProperty: - def warn(self, cls): - clsname = cls.__name__ - warnings.warn( - "{cls}.async is deprecated; use {cls}.async_".format(cls=clsname), - DeprecationWarning) - - def __get__(self, instance, cls): - self.warn(cls) - if instance is not None: - return instance.async_ - return False - - def __set__(self, instance, value): - self.warn(type(instance)) - setattr(instance, 'async_', value) - - -class DocumentLS: - """Mixin to create documents that conform to the load/save spec.""" - - async_ = False - locals()['async'] = _AsyncDeprecatedProperty() # Avoid DeprecationWarning - - def _get_async(self): - return False - - def _set_async(self, flag): - if flag: - raise xml.dom.NotSupportedErr( - "asynchronous document loading is not supported") - - def abort(self): - # What does it mean to "clear" a document? Does the - # documentElement disappear? - raise NotImplementedError( - "haven't figured out what this means yet") - - def load(self, uri): - raise NotImplementedError("haven't written this yet") - - def loadXML(self, source): - raise NotImplementedError("haven't written this yet") - - def saveXML(self, snode): - if snode is None: - snode = self - elif snode.ownerDocument is not self: - raise xml.dom.WrongDocumentErr() - return snode.toxml() - - -del _AsyncDeprecatedProperty - - -class DOMImplementationLS: - MODE_SYNCHRONOUS = 1 - MODE_ASYNCHRONOUS = 2 - - def createDOMBuilder(self, mode, schemaType): - if schemaType is not None: - raise xml.dom.NotSupportedErr( - "schemaType not yet supported") - if mode == self.MODE_SYNCHRONOUS: - return DOMBuilder() - if mode == self.MODE_ASYNCHRONOUS: - raise xml.dom.NotSupportedErr( - "asynchronous builders are not supported") - raise ValueError("unknown value for mode") - - def createDOMWriter(self): - raise NotImplementedError( - "the writer interface hasn't been written yet!") - - def createDOMInputSource(self): - return DOMInputSource() diff --git a/modules/language/python/module/xml/dom/NodeFilter.py~ b/modules/language/python/module/xml/dom/NodeFilter.py~ deleted file mode 100644 index 640e0bf..0000000 --- a/modules/language/python/module/xml/dom/NodeFilter.py~ +++ /dev/null @@ -1,27 +0,0 @@ -# This is the Python mapping for interface NodeFilter from -# DOM2-Traversal-Range. It contains only constants. - -class NodeFilter: - """ - This is the DOM2 NodeFilter interface. It contains only constants. - """ - FILTER_ACCEPT = 1 - FILTER_REJECT = 2 - FILTER_SKIP = 3 - - SHOW_ALL = 0xFFFFFFFF - SHOW_ELEMENT = 0x00000001 - SHOW_ATTRIBUTE = 0x00000002 - SHOW_TEXT = 0x00000004 - SHOW_CDATA_SECTION = 0x00000008 - SHOW_ENTITY_REFERENCE = 0x00000010 - SHOW_ENTITY = 0x00000020 - SHOW_PROCESSING_INSTRUCTION = 0x00000040 - SHOW_COMMENT = 0x00000080 - SHOW_DOCUMENT = 0x00000100 - SHOW_DOCUMENT_TYPE = 0x00000200 - SHOW_DOCUMENT_FRAGMENT = 0x00000400 - SHOW_NOTATION = 0x00000800 - - def acceptNode(self, node): - raise NotImplementedError diff --git a/modules/language/python/module/xml/dom/domreg.py~ b/modules/language/python/module/xml/dom/domreg.py~ deleted file mode 100644 index 8c3d901..0000000 --- a/modules/language/python/module/xml/dom/domreg.py~ +++ /dev/null @@ -1,97 +0,0 @@ -"""Registration facilities for DOM. This module should not be used -directly. Instead, the functions getDOMImplementation and -registerDOMImplementation should be imported from xml.dom.""" - -# This is a list of well-known implementations. Well-known names -# should be published by posting to xml-sig@python.org, and are -# subsequently recorded in this file. - -well_known_implementations = { - 'minidom':'xml.dom.minidom', - '4DOM': 'xml.dom.DOMImplementation', - } - -# DOM implementations not officially registered should register -# themselves with their - -registered = {} - -def registerDOMImplementation(name, factory): - """registerDOMImplementation(name, factory) - - Register the factory function with the name. The factory function - should return an object which implements the DOMImplementation - interface. The factory function can either return the same object, - or a new one (e.g. if that implementation supports some - customization).""" - - registered[name] = factory - -def _good_enough(dom, features): - "_good_enough(dom, features) -> Return 1 if the dom offers the features" - for f,v in features: - if not dom.hasFeature(f,v): - return 0 - return 1 - -def getDOMImplementation(name=None, features=()): - """getDOMImplementation(name = None, features = ()) -> DOM implementation. - - Return a suitable DOM implementation. The name is either - well-known, the module name of a DOM implementation, or None. If - it is not None, imports the corresponding module and returns - DOMImplementation object if the import succeeds. - - If name is not given, consider the available implementations to - find one with the required feature set. If no implementation can - be found, raise an ImportError. The features list must be a sequence - of (feature, version) pairs which are passed to hasFeature.""" - - import os - creator = None - mod = well_known_implementations.get(name) - if mod: - mod = __import__(mod, {}, {}, ['getDOMImplementation']) - return mod.getDOMImplementation() - elif name: - return registered[name]() - elif "PYTHON_DOM" in os.environ: - return getDOMImplementation(name = os.environ["PYTHON_DOM"]) - - # User did not specify a name, try implementations in arbitrary - # order, returning the one that has the required features - if isinstance(features, str): - features = _parse_feature_string(features) - for creator in registered.values(): - dom = creator() - if _good_enough(dom, features): - return dom - - for creator in well_known_implementations.keys(): - try: - dom = getDOMImplementation(name = creator) - except Exception: # typically ImportError, or AttributeError - continue - if _good_enough(dom, features): - return dom - - raise ImportError("no suitable DOM implementation found") - -def _parse_feature_string(s): - features = [] - parts = s.split() - i = 0 - length = len(parts) - while i < length: - feature = parts[i] - if feature[0] in "0123456789": - raise ValueError("bad feature name: %r" % (feature,)) - i = i + 1 - version = None - if i < length: - v = parts[i] - if v[0] in "0123456789": - i = i + 1 - version = v - features.append((feature, version)) - return tuple(features) diff --git a/modules/language/python/module/xml/dom/expatbuilder.py~ b/modules/language/python/module/xml/dom/expatbuilder.py~ deleted file mode 100644 index 2bd835b..0000000 --- a/modules/language/python/module/xml/dom/expatbuilder.py~ +++ /dev/null @@ -1,965 +0,0 @@ -"""Facility to use the Expat parser to load a minidom instance -from a string or file. - -This avoids all the overhead of SAX and pulldom to gain performance. -""" - -# Warning! -# -# This module is tightly bound to the implementation details of the -# minidom DOM and can't be used with other DOM implementations. This -# is due, in part, to a lack of appropriate methods in the DOM (there is -# no way to create Entity and Notation nodes via the DOM Level 2 -# interface), and for performance. The latter is the cause of some fairly -# cryptic code. -# -# Performance hacks: -# -# - .character_data_handler() has an extra case in which continuing -# data is appended to an existing Text node; this can be a -# speedup since pyexpat can break up character data into multiple -# callbacks even though we set the buffer_text attribute on the -# parser. This also gives us the advantage that we don't need a -# separate normalization pass. -# -# - Determining that a node exists is done using an identity comparison -# with None rather than a truth test; this avoids searching for and -# calling any methods on the node object if it exists. (A rather -# nice speedup is achieved this way as well!) - -from xml.dom import xmlbuilder, minidom, Node -from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE -from xml.parsers import expat -from xml.dom.minidom import _append_child, _set_attribute_node -from xml.dom.NodeFilter import NodeFilter - -TEXT_NODE = Node.TEXT_NODE -CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE -DOCUMENT_NODE = Node.DOCUMENT_NODE - -FILTER_ACCEPT = xmlbuilder.DOMBuilderFilter.FILTER_ACCEPT -FILTER_REJECT = xmlbuilder.DOMBuilderFilter.FILTER_REJECT -FILTER_SKIP = xmlbuilder.DOMBuilderFilter.FILTER_SKIP -FILTER_INTERRUPT = xmlbuilder.DOMBuilderFilter.FILTER_INTERRUPT - -theDOMImplementation = minidom.getDOMImplementation() - -# Expat typename -> TypeInfo -_typeinfo_map = { - "CDATA": minidom.TypeInfo(None, "cdata"), - "ENUM": minidom.TypeInfo(None, "enumeration"), - "ENTITY": minidom.TypeInfo(None, "entity"), - "ENTITIES": minidom.TypeInfo(None, "entities"), - "ID": minidom.TypeInfo(None, "id"), - "IDREF": minidom.TypeInfo(None, "idref"), - "IDREFS": minidom.TypeInfo(None, "idrefs"), - "NMTOKEN": minidom.TypeInfo(None, "nmtoken"), - "NMTOKENS": minidom.TypeInfo(None, "nmtokens"), - } - -class ElementInfo(object): - __slots__ = '_attr_info', '_model', 'tagName' - - def __init__(self, tagName, model=None): - self.tagName = tagName - self._attr_info = [] - self._model = model - - def __getstate__(self): - return self._attr_info, self._model, self.tagName - - def __setstate__(self, state): - self._attr_info, self._model, self.tagName = state - - def getAttributeType(self, aname): - for info in self._attr_info: - if info[1] == aname: - t = info[-2] - if t[0] == "(": - return _typeinfo_map["ENUM"] - else: - return _typeinfo_map[info[-2]] - return minidom._no_type - - def getAttributeTypeNS(self, namespaceURI, localName): - return minidom._no_type - - def isElementContent(self): - if self._model: - type = self._model[0] - return type not in (expat.model.XML_CTYPE_ANY, - expat.model.XML_CTYPE_MIXED) - else: - return False - - def isEmpty(self): - if self._model: - return self._model[0] == expat.model.XML_CTYPE_EMPTY - else: - return False - - def isId(self, aname): - for info in self._attr_info: - if info[1] == aname: - return info[-2] == "ID" - return False - - def isIdNS(self, euri, ename, auri, aname): - # not sure this is meaningful - return self.isId((auri, aname)) - -def _intern(builder, s): - return builder._intern_setdefault(s, s) - -def _parse_ns_name(builder, name): - assert ' ' in name - parts = name.split(' ') - intern = builder._intern_setdefault - if len(parts) == 3: - uri, localname, prefix = parts - prefix = intern(prefix, prefix) - qname = "%s:%s" % (prefix, localname) - qname = intern(qname, qname) - localname = intern(localname, localname) - elif len(parts) == 2: - uri, localname = parts - prefix = EMPTY_PREFIX - qname = localname = intern(localname, localname) - else: - raise ValueError("Unsupported syntax: spaces in URIs not supported: %r" % name) - return intern(uri, uri), localname, prefix, qname - - -class ExpatBuilder: - """Document builder that uses Expat to build a ParsedXML.DOM document - instance.""" - - def __init__(self, options=None): - if options is None: - options = xmlbuilder.Options() - self._options = options - if self._options.filter is not None: - self._filter = FilterVisibilityController(self._options.filter) - else: - self._filter = None - # This *really* doesn't do anything in this case, so - # override it with something fast & minimal. - self._finish_start_element = id - self._parser = None - self.reset() - - def createParser(self): - """Create a new parser object.""" - return expat.ParserCreate() - - def getParser(self): - """Return the parser object, creating a new one if needed.""" - if not self._parser: - self._parser = self.createParser() - self._intern_setdefault = self._parser.intern.setdefault - self._parser.buffer_text = True - self._parser.ordered_attributes = True - self._parser.specified_attributes = True - self.install(self._parser) - return self._parser - - def reset(self): - """Free all data structures used during DOM construction.""" - self.document = theDOMImplementation.createDocument( - EMPTY_NAMESPACE, None, None) - self.curNode = self.document - self._elem_info = self.document._elem_info - self._cdata = False - - def install(self, parser): - """Install the callbacks needed to build the DOM into the parser.""" - # This creates circular references! - parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler - parser.StartElementHandler = self.first_element_handler - parser.EndElementHandler = self.end_element_handler - parser.ProcessingInstructionHandler = self.pi_handler - if self._options.entities: - parser.EntityDeclHandler = self.entity_decl_handler - parser.NotationDeclHandler = self.notation_decl_handler - if self._options.comments: - parser.CommentHandler = self.comment_handler - if self._options.cdata_sections: - parser.StartCdataSectionHandler = self.start_cdata_section_handler - parser.EndCdataSectionHandler = self.end_cdata_section_handler - parser.CharacterDataHandler = self.character_data_handler_cdata - else: - parser.CharacterDataHandler = self.character_data_handler - parser.ExternalEntityRefHandler = self.external_entity_ref_handler - parser.XmlDeclHandler = self.xml_decl_handler - parser.ElementDeclHandler = self.element_decl_handler - parser.AttlistDeclHandler = self.attlist_decl_handler - - def parseFile(self, file): - """Parse a document from a file object, returning the document - node.""" - parser = self.getParser() - first_buffer = True - try: - while 1: - buffer = file.read(16*1024) - if not buffer: - break - parser.Parse(buffer, 0) - if first_buffer and self.document.documentElement: - self._setup_subset(buffer) - first_buffer = False - parser.Parse("", True) - except ParseEscape: - pass - doc = self.document - self.reset() - self._parser = None - return doc - - def parseString(self, string): - """Parse a document from a string, returning the document node.""" - parser = self.getParser() - try: - parser.Parse(string, True) - self._setup_subset(string) - except ParseEscape: - pass - doc = self.document - self.reset() - self._parser = None - return doc - - def _setup_subset(self, buffer): - """Load the internal subset if there might be one.""" - if self.document.doctype: - extractor = InternalSubsetExtractor() - extractor.parseString(buffer) - subset = extractor.getSubset() - self.document.doctype.internalSubset = subset - - def start_doctype_decl_handler(self, doctypeName, systemId, publicId, - has_internal_subset): - doctype = self.document.implementation.createDocumentType( - doctypeName, publicId, systemId) - doctype.ownerDocument = self.document - _append_child(self.document, doctype) - self.document.doctype = doctype - if self._filter and self._filter.acceptNode(doctype) == FILTER_REJECT: - self.document.doctype = None - del self.document.childNodes[-1] - doctype = None - self._parser.EntityDeclHandler = None - self._parser.NotationDeclHandler = None - if has_internal_subset: - if doctype is not None: - doctype.entities._seq = [] - doctype.notations._seq = [] - self._parser.CommentHandler = None - self._parser.ProcessingInstructionHandler = None - self._parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler - - def end_doctype_decl_handler(self): - if self._options.comments: - self._parser.CommentHandler = self.comment_handler - self._parser.ProcessingInstructionHandler = self.pi_handler - if not (self._elem_info or self._filter): - self._finish_end_element = id - - def pi_handler(self, target, data): - node = self.document.createProcessingInstruction(target, data) - _append_child(self.curNode, node) - if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: - self.curNode.removeChild(node) - - def character_data_handler_cdata(self, data): - childNodes = self.curNode.childNodes - if self._cdata: - if ( self._cdata_continue - and childNodes[-1].nodeType == CDATA_SECTION_NODE): - childNodes[-1].appendData(data) - return - node = self.document.createCDATASection(data) - self._cdata_continue = True - elif childNodes and childNodes[-1].nodeType == TEXT_NODE: - node = childNodes[-1] - value = node.data + data - node.data = value - return - else: - node = minidom.Text() - node.data = data - node.ownerDocument = self.document - _append_child(self.curNode, node) - - def character_data_handler(self, data): - childNodes = self.curNode.childNodes - if childNodes and childNodes[-1].nodeType == TEXT_NODE: - node = childNodes[-1] - node.data = node.data + data - return - node = minidom.Text() - node.data = node.data + data - node.ownerDocument = self.document - _append_child(self.curNode, node) - - def entity_decl_handler(self, entityName, is_parameter_entity, value, - base, systemId, publicId, notationName): - if is_parameter_entity: - # we don't care about parameter entities for the DOM - return - if not self._options.entities: - return - node = self.document._create_entity(entityName, publicId, - systemId, notationName) - if value is not None: - # internal entity - # node *should* be readonly, but we'll cheat - child = self.document.createTextNode(value) - node.childNodes.append(child) - self.document.doctype.entities._seq.append(node) - if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: - del self.document.doctype.entities._seq[-1] - - def notation_decl_handler(self, notationName, base, systemId, publicId): - node = self.document._create_notation(notationName, publicId, systemId) - self.document.doctype.notations._seq.append(node) - if self._filter and self._filter.acceptNode(node) == FILTER_ACCEPT: - del self.document.doctype.notations._seq[-1] - - def comment_handler(self, data): - node = self.document.createComment(data) - _append_child(self.curNode, node) - if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: - self.curNode.removeChild(node) - - def start_cdata_section_handler(self): - self._cdata = True - self._cdata_continue = False - - def end_cdata_section_handler(self): - self._cdata = False - self._cdata_continue = False - - def external_entity_ref_handler(self, context, base, systemId, publicId): - return 1 - - def first_element_handler(self, name, attributes): - if self._filter is None and not self._elem_info: - self._finish_end_element = id - self.getParser().StartElementHandler = self.start_element_handler - self.start_element_handler(name, attributes) - - def start_element_handler(self, name, attributes): - node = self.document.createElement(name) - _append_child(self.curNode, node) - self.curNode = node - - if attributes: - for i in range(0, len(attributes), 2): - a = minidom.Attr(attributes[i], EMPTY_NAMESPACE, - None, EMPTY_PREFIX) - value = attributes[i+1] - a.value = value - a.ownerDocument = self.document - _set_attribute_node(node, a) - - if node is not self.document.documentElement: - self._finish_start_element(node) - - def _finish_start_element(self, node): - if self._filter: - # To be general, we'd have to call isSameNode(), but this - # is sufficient for minidom: - if node is self.document.documentElement: - return - filt = self._filter.startContainer(node) - if filt == FILTER_REJECT: - # ignore this node & all descendents - Rejecter(self) - elif filt == FILTER_SKIP: - # ignore this node, but make it's children become - # children of the parent node - Skipper(self) - else: - return - self.curNode = node.parentNode - node.parentNode.removeChild(node) - node.unlink() - - # If this ever changes, Namespaces.end_element_handler() needs to - # be changed to match. - # - def end_element_handler(self, name): - curNode = self.curNode - self.curNode = curNode.parentNode - self._finish_end_element(curNode) - - def _finish_end_element(self, curNode): - info = self._elem_info.get(curNode.tagName) - if info: - self._handle_white_text_nodes(curNode, info) - if self._filter: - if curNode is self.document.documentElement: - return - if self._filter.acceptNode(curNode) == FILTER_REJECT: - self.curNode.removeChild(curNode) - curNode.unlink() - - def _handle_white_text_nodes(self, node, info): - if (self._options.whitespace_in_element_content - or not info.isElementContent()): - return - - # We have element type information and should remove ignorable - # whitespace; identify for text nodes which contain only - # whitespace. - L = [] - for child in node.childNodes: - if child.nodeType == TEXT_NODE and not child.data.strip(): - L.append(child) - - # Remove ignorable whitespace from the tree. - for child in L: - node.removeChild(child) - - def element_decl_handler(self, name, model): - info = self._elem_info.get(name) - if info is None: - self._elem_info[name] = ElementInfo(name, model) - else: - assert info._model is None - info._model = model - - def attlist_decl_handler(self, elem, name, type, default, required): - info = self._elem_info.get(elem) - if info is None: - info = ElementInfo(elem) - self._elem_info[elem] = info - info._attr_info.append( - [None, name, None, None, default, 0, type, required]) - - def xml_decl_handler(self, version, encoding, standalone): - self.document.version = version - self.document.encoding = encoding - # This is still a little ugly, thanks to the pyexpat API. ;-( - if standalone >= 0: - if standalone: - self.document.standalone = True - else: - self.document.standalone = False - - -# Don't include FILTER_INTERRUPT, since that's checked separately -# where allowed. -_ALLOWED_FILTER_RETURNS = (FILTER_ACCEPT, FILTER_REJECT, FILTER_SKIP) - -class FilterVisibilityController(object): - """Wrapper around a DOMBuilderFilter which implements the checks - to make the whatToShow filter attribute work.""" - - __slots__ = 'filter', - - def __init__(self, filter): - self.filter = filter - - def startContainer(self, node): - mask = self._nodetype_mask[node.nodeType] - if self.filter.whatToShow & mask: - val = self.filter.startContainer(node) - if val == FILTER_INTERRUPT: - raise ParseEscape - if val not in _ALLOWED_FILTER_RETURNS: - raise ValueError( - "startContainer() returned illegal value: " + repr(val)) - return val - else: - return FILTER_ACCEPT - - def acceptNode(self, node): - mask = self._nodetype_mask[node.nodeType] - if self.filter.whatToShow & mask: - val = self.filter.acceptNode(node) - if val == FILTER_INTERRUPT: - raise ParseEscape - if val == FILTER_SKIP: - # move all child nodes to the parent, and remove this node - parent = node.parentNode - for child in node.childNodes[:]: - parent.appendChild(child) - # node is handled by the caller - return FILTER_REJECT - if val not in _ALLOWED_FILTER_RETURNS: - raise ValueError( - "acceptNode() returned illegal value: " + repr(val)) - return val - else: - return FILTER_ACCEPT - - _nodetype_mask = { - Node.ELEMENT_NODE: NodeFilter.SHOW_ELEMENT, - Node.ATTRIBUTE_NODE: NodeFilter.SHOW_ATTRIBUTE, - Node.TEXT_NODE: NodeFilter.SHOW_TEXT, - Node.CDATA_SECTION_NODE: NodeFilter.SHOW_CDATA_SECTION, - Node.ENTITY_REFERENCE_NODE: NodeFilter.SHOW_ENTITY_REFERENCE, - Node.ENTITY_NODE: NodeFilter.SHOW_ENTITY, - Node.PROCESSING_INSTRUCTION_NODE: NodeFilter.SHOW_PROCESSING_INSTRUCTION, - Node.COMMENT_NODE: NodeFilter.SHOW_COMMENT, - Node.DOCUMENT_NODE: NodeFilter.SHOW_DOCUMENT, - Node.DOCUMENT_TYPE_NODE: NodeFilter.SHOW_DOCUMENT_TYPE, - Node.DOCUMENT_FRAGMENT_NODE: NodeFilter.SHOW_DOCUMENT_FRAGMENT, - Node.NOTATION_NODE: NodeFilter.SHOW_NOTATION, - } - - -class FilterCrutch(object): - __slots__ = '_builder', '_level', '_old_start', '_old_end' - - def __init__(self, builder): - self._level = 0 - self._builder = builder - parser = builder._parser - self._old_start = parser.StartElementHandler - self._old_end = parser.EndElementHandler - parser.StartElementHandler = self.start_element_handler - parser.EndElementHandler = self.end_element_handler - -class Rejecter(FilterCrutch): - __slots__ = () - - def __init__(self, builder): - FilterCrutch.__init__(self, builder) - parser = builder._parser - for name in ("ProcessingInstructionHandler", - "CommentHandler", - "CharacterDataHandler", - "StartCdataSectionHandler", - "EndCdataSectionHandler", - "ExternalEntityRefHandler", - ): - setattr(parser, name, None) - - def start_element_handler(self, *args): - self._level = self._level + 1 - - def end_element_handler(self, *args): - if self._level == 0: - # restore the old handlers - parser = self._builder._parser - self._builder.install(parser) - parser.StartElementHandler = self._old_start - parser.EndElementHandler = self._old_end - else: - self._level = self._level - 1 - -class Skipper(FilterCrutch): - __slots__ = () - - def start_element_handler(self, *args): - node = self._builder.curNode - self._old_start(*args) - if self._builder.curNode is not node: - self._level = self._level + 1 - - def end_element_handler(self, *args): - if self._level == 0: - # We're popping back out of the node we're skipping, so we - # shouldn't need to do anything but reset the handlers. - self._builder._parser.StartElementHandler = self._old_start - self._builder._parser.EndElementHandler = self._old_end - self._builder = None - else: - self._level = self._level - 1 - self._old_end(*args) - - -# framework document used by the fragment builder. -# Takes a string for the doctype, subset string, and namespace attrs string. - -_FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID = \ - "http://xml.python.org/entities/fragment-builder/internal" - -_FRAGMENT_BUILDER_TEMPLATE = ( - '''\ -<!DOCTYPE wrapper - %%s [ - <!ENTITY fragment-builder-internal - SYSTEM "%s"> -%%s -]> -<wrapper %%s ->&fragment-builder-internal;</wrapper>''' - % _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID) - - -class FragmentBuilder(ExpatBuilder): - """Builder which constructs document fragments given XML source - text and a context node. - - The context node is expected to provide information about the - namespace declarations which are in scope at the start of the - fragment. - """ - - def __init__(self, context, options=None): - if context.nodeType == DOCUMENT_NODE: - self.originalDocument = context - self.context = context - else: - self.originalDocument = context.ownerDocument - self.context = context - ExpatBuilder.__init__(self, options) - - def reset(self): - ExpatBuilder.reset(self) - self.fragment = None - - def parseFile(self, file): - """Parse a document fragment from a file object, returning the - fragment node.""" - return self.parseString(file.read()) - - def parseString(self, string): - """Parse a document fragment from a string, returning the - fragment node.""" - self._source = string - parser = self.getParser() - doctype = self.originalDocument.doctype - ident = "" - if doctype: - subset = doctype.internalSubset or self._getDeclarations() - if doctype.publicId: - ident = ('PUBLIC "%s" "%s"' - % (doctype.publicId, doctype.systemId)) - elif doctype.systemId: - ident = 'SYSTEM "%s"' % doctype.systemId - else: - subset = "" - nsattrs = self._getNSattrs() # get ns decls from node's ancestors - document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs) - try: - parser.Parse(document, 1) - except: - self.reset() - raise - fragment = self.fragment - self.reset() -## self._parser = None - return fragment - - def _getDeclarations(self): - """Re-create the internal subset from the DocumentType node. - - This is only needed if we don't already have the - internalSubset as a string. - """ - doctype = self.context.ownerDocument.doctype - s = "" - if doctype: - for i in range(doctype.notations.length): - notation = doctype.notations.item(i) - if s: - s = s + "\n " - s = "%s<!NOTATION %s" % (s, notation.nodeName) - if notation.publicId: - s = '%s PUBLIC "%s"\n "%s">' \ - % (s, notation.publicId, notation.systemId) - else: - s = '%s SYSTEM "%s">' % (s, notation.systemId) - for i in range(doctype.entities.length): - entity = doctype.entities.item(i) - if s: - s = s + "\n " - s = "%s<!ENTITY %s" % (s, entity.nodeName) - if entity.publicId: - s = '%s PUBLIC "%s"\n "%s"' \ - % (s, entity.publicId, entity.systemId) - elif entity.systemId: - s = '%s SYSTEM "%s"' % (s, entity.systemId) - else: - s = '%s "%s"' % (s, entity.firstChild.data) - if entity.notationName: - s = "%s NOTATION %s" % (s, entity.notationName) - s = s + ">" - return s - - def _getNSattrs(self): - return "" - - def external_entity_ref_handler(self, context, base, systemId, publicId): - if systemId == _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID: - # this entref is the one that we made to put the subtree - # in; all of our given input is parsed in here. - old_document = self.document - old_cur_node = self.curNode - parser = self._parser.ExternalEntityParserCreate(context) - # put the real document back, parse into the fragment to return - self.document = self.originalDocument - self.fragment = self.document.createDocumentFragment() - self.curNode = self.fragment - try: - parser.Parse(self._source, 1) - finally: - self.curNode = old_cur_node - self.document = old_document - self._source = None - return -1 - else: - return ExpatBuilder.external_entity_ref_handler( - self, context, base, systemId, publicId) - - -class Namespaces: - """Mix-in class for builders; adds support for namespaces.""" - - def _initNamespaces(self): - # list of (prefix, uri) ns declarations. Namespace attrs are - # constructed from this and added to the element's attrs. - self._ns_ordered_prefixes = [] - - def createParser(self): - """Create a new namespace-handling parser.""" - parser = expat.ParserCreate(namespace_separator=" ") - parser.namespace_prefixes = True - return parser - - def install(self, parser): - """Insert the namespace-handlers onto the parser.""" - ExpatBuilder.install(self, parser) - if self._options.namespace_declarations: - parser.StartNamespaceDeclHandler = ( - self.start_namespace_decl_handler) - - def start_namespace_decl_handler(self, prefix, uri): - """Push this namespace declaration on our storage.""" - self._ns_ordered_prefixes.append((prefix, uri)) - - def start_element_handler(self, name, attributes): - if ' ' in name: - uri, localname, prefix, qname = _parse_ns_name(self, name) - else: - uri = EMPTY_NAMESPACE - qname = name - localname = None - prefix = EMPTY_PREFIX - node = minidom.Element(qname, uri, prefix, localname) - node.ownerDocument = self.document - _append_child(self.curNode, node) - self.curNode = node - - if self._ns_ordered_prefixes: - for prefix, uri in self._ns_ordered_prefixes: - if prefix: - a = minidom.Attr(_intern(self, 'xmlns:' + prefix), - XMLNS_NAMESPACE, prefix, "xmlns") - else: - a = minidom.Attr("xmlns", XMLNS_NAMESPACE, - "xmlns", EMPTY_PREFIX) - a.value = uri - a.ownerDocument = self.document - _set_attribute_node(node, a) - del self._ns_ordered_prefixes[:] - - if attributes: - node._ensure_attributes() - _attrs = node._attrs - _attrsNS = node._attrsNS - for i in range(0, len(attributes), 2): - aname = attributes[i] - value = attributes[i+1] - if ' ' in aname: - uri, localname, prefix, qname = _parse_ns_name(self, aname) - a = minidom.Attr(qname, uri, localname, prefix) - _attrs[qname] = a - _attrsNS[(uri, localname)] = a - else: - a = minidom.Attr(aname, EMPTY_NAMESPACE, - aname, EMPTY_PREFIX) - _attrs[aname] = a - _attrsNS[(EMPTY_NAMESPACE, aname)] = a - a.ownerDocument = self.document - a.value = value - a.ownerElement = node - - if __debug__: - # This only adds some asserts to the original - # end_element_handler(), so we only define this when -O is not - # used. If changing one, be sure to check the other to see if - # it needs to be changed as well. - # - def end_element_handler(self, name): - curNode = self.curNode - if ' ' in name: - uri, localname, prefix, qname = _parse_ns_name(self, name) - assert (curNode.namespaceURI == uri - and curNode.localName == localname - and curNode.prefix == prefix), \ - "element stack messed up! (namespace)" - else: - assert curNode.nodeName == name, \ - "element stack messed up - bad nodeName" - assert curNode.namespaceURI == EMPTY_NAMESPACE, \ - "element stack messed up - bad namespaceURI" - self.curNode = curNode.parentNode - self._finish_end_element(curNode) - - -class ExpatBuilderNS(Namespaces, ExpatBuilder): - """Document builder that supports namespaces.""" - - def reset(self): - ExpatBuilder.reset(self) - self._initNamespaces() - - -class FragmentBuilderNS(Namespaces, FragmentBuilder): - """Fragment builder that supports namespaces.""" - - def reset(self): - FragmentBuilder.reset(self) - self._initNamespaces() - - def _getNSattrs(self): - """Return string of namespace attributes from this element and - ancestors.""" - # XXX This needs to be re-written to walk the ancestors of the - # context to build up the namespace information from - # declarations, elements, and attributes found in context. - # Otherwise we have to store a bunch more data on the DOM - # (though that *might* be more reliable -- not clear). - attrs = "" - context = self.context - L = [] - while context: - if hasattr(context, '_ns_prefix_uri'): - for prefix, uri in context._ns_prefix_uri.items(): - # add every new NS decl from context to L and attrs string - if prefix in L: - continue - L.append(prefix) - if prefix: - declname = "xmlns:" + prefix - else: - declname = "xmlns" - if attrs: - attrs = "%s\n %s='%s'" % (attrs, declname, uri) - else: - attrs = " %s='%s'" % (declname, uri) - context = context.parentNode - return attrs - - -class ParseEscape(Exception): - """Exception raised to short-circuit parsing in InternalSubsetExtractor.""" - pass - -class InternalSubsetExtractor(ExpatBuilder): - """XML processor which can rip out the internal document type subset.""" - - subset = None - - def getSubset(self): - """Return the internal subset as a string.""" - return self.subset - - def parseFile(self, file): - try: - ExpatBuilder.parseFile(self, file) - except ParseEscape: - pass - - def parseString(self, string): - try: - ExpatBuilder.parseString(self, string) - except ParseEscape: - pass - - def install(self, parser): - parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler - parser.StartElementHandler = self.start_element_handler - - def start_doctype_decl_handler(self, name, publicId, systemId, - has_internal_subset): - if has_internal_subset: - parser = self.getParser() - self.subset = [] - parser.DefaultHandler = self.subset.append - parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler - else: - raise ParseEscape() - - def end_doctype_decl_handler(self): - s = ''.join(self.subset).replace('\r\n', '\n').replace('\r', '\n') - self.subset = s - raise ParseEscape() - - def start_element_handler(self, name, attrs): - raise ParseEscape() - - -def parse(file, namespaces=True): - """Parse a document, returning the resulting Document node. - - 'file' may be either a file name or an open file object. - """ - if namespaces: - builder = ExpatBuilderNS() - else: - builder = ExpatBuilder() - - if isinstance(file, str): - with open(file, 'rb') as fp: - result = builder.parseFile(fp) - else: - result = builder.parseFile(file) - return result - - -def parseString(string, namespaces=True): - """Parse a document from a string, returning the resulting - Document node. - """ - if namespaces: - builder = ExpatBuilderNS() - else: - builder = ExpatBuilder() - return builder.parseString(string) - - -def parseFragment(file, context, namespaces=True): - """Parse a fragment of a document, given the context from which it - was originally extracted. context should be the parent of the - node(s) which are in the fragment. - - 'file' may be either a file name or an open file object. - """ - if namespaces: - builder = FragmentBuilderNS(context) - else: - builder = FragmentBuilder(context) - - if isinstance(file, str): - with open(file, 'rb') as fp: - result = builder.parseFile(fp) - else: - result = builder.parseFile(file) - return result - - -def parseFragmentString(string, context, namespaces=True): - """Parse a fragment of a document from a string, given the context - from which it was originally extracted. context should be the - parent of the node(s) which are in the fragment. - """ - if namespaces: - builder = FragmentBuilderNS(context) - else: - builder = FragmentBuilder(context) - return builder.parseString(string) - - -def makeBuilder(options): - """Create a builder based on an Options object.""" - if options.namespaces: - return ExpatBuilderNS(options) - else: - return ExpatBuilder(options) diff --git a/modules/language/python/module/xml/dom/minicompat.py~ b/modules/language/python/module/xml/dom/minicompat.py~ deleted file mode 100644 index 5d6fae9..0000000 --- a/modules/language/python/module/xml/dom/minicompat.py~ +++ /dev/null @@ -1,109 +0,0 @@ -"""Python version compatibility support for minidom. - -This module contains internal implementation details and -should not be imported; use xml.dom.minidom instead. -""" - -# This module should only be imported using "import *". -# -# The following names are defined: -# -# NodeList -- lightest possible NodeList implementation -# -# EmptyNodeList -- lightest possible NodeList that is guaranteed to -# remain empty (immutable) -# -# StringTypes -- tuple of defined string types -# -# defproperty -- function used in conjunction with GetattrMagic; -# using these together is needed to make them work -# as efficiently as possible in both Python 2.2+ -# and older versions. For example: -# -# class MyClass(GetattrMagic): -# def _get_myattr(self): -# return something -# -# defproperty(MyClass, "myattr", -# "return some value") -# -# For Python 2.2 and newer, this will construct a -# property object on the class, which avoids -# needing to override __getattr__(). It will only -# work for read-only attributes. -# -# For older versions of Python, inheriting from -# GetattrMagic will use the traditional -# __getattr__() hackery to achieve the same effect, -# but less efficiently. -# -# defproperty() should be used for each version of -# the relevant _get_<property>() function. - -__all__ = ["NodeList", "EmptyNodeList", "StringTypes", "defproperty"] - -import xml.dom - -StringTypes = (str,) - - -class NodeList(list): - __slots__ = () - - def item(self, index): - if 0 <= index < len(self): - return self[index] - - def _get_length(self): - return len(self) - - def _set_length(self, value): - raise xml.dom.NoModificationAllowedErr( - "attempt to modify read-only attribute 'length'") - - length = property(_get_length, _set_length, - doc="The number of nodes in the NodeList.") - - # For backward compatibility - def __setstate__(self, state): - if state is None: - state = [] - self[:] = state - - -class EmptyNodeList(tuple): - __slots__ = () - - def __add__(self, other): - NL = NodeList() - NL.extend(other) - return NL - - def __radd__(self, other): - NL = NodeList() - NL.extend(other) - return NL - - def item(self, index): - return None - - def _get_length(self): - return 0 - - def _set_length(self, value): - raise xml.dom.NoModificationAllowedErr( - "attempt to modify read-only attribute 'length'") - - length = property(_get_length, _set_length, - doc="The number of nodes in the NodeList.") - - -def defproperty(klass, name, doc): - get = getattr(klass, ("_get_" + name)) - def set(self, value, name=name): - raise xml.dom.NoModificationAllowedErr( - "attempt to modify read-only attribute " + repr(name)) - assert not hasattr(klass, "_set_" + name), \ - "expected not to find _set_" + name - prop = property(get, set, doc=doc) - setattr(klass, name, prop) diff --git a/modules/language/python/module/xml/dom/pulldom.py~ b/modules/language/python/module/xml/dom/pulldom.py~ deleted file mode 100644 index 43504f7..0000000 --- a/modules/language/python/module/xml/dom/pulldom.py~ +++ /dev/null @@ -1,342 +0,0 @@ -import xml.sax -import xml.sax.handler - -START_ELEMENT = "START_ELEMENT" -END_ELEMENT = "END_ELEMENT" -COMMENT = "COMMENT" -START_DOCUMENT = "START_DOCUMENT" -END_DOCUMENT = "END_DOCUMENT" -PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION" -IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE" -CHARACTERS = "CHARACTERS" - -class PullDOM(xml.sax.ContentHandler): - _locator = None - document = None - - def __init__(self, documentFactory=None): - from xml.dom import XML_NAMESPACE - self.documentFactory = documentFactory - self.firstEvent = [None, None] - self.lastEvent = self.firstEvent - self.elementStack = [] - self.push = self.elementStack.append - try: - self.pop = self.elementStack.pop - except AttributeError: - # use class' pop instead - pass - self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts - self._current_context = self._ns_contexts[-1] - self.pending_events = [] - - def pop(self): - result = self.elementStack[-1] - del self.elementStack[-1] - return result - - def setDocumentLocator(self, locator): - self._locator = locator - - def startPrefixMapping(self, prefix, uri): - if not hasattr(self, '_xmlns_attrs'): - self._xmlns_attrs = [] - self._xmlns_attrs.append((prefix or 'xmlns', uri)) - self._ns_contexts.append(self._current_context.copy()) - self._current_context[uri] = prefix or None - - def endPrefixMapping(self, prefix): - self._current_context = self._ns_contexts.pop() - - def startElementNS(self, name, tagName , attrs): - # Retrieve xml namespace declaration attributes. - xmlns_uri = 'http://www.w3.org/2000/xmlns/' - xmlns_attrs = getattr(self, '_xmlns_attrs', None) - if xmlns_attrs is not None: - for aname, value in xmlns_attrs: - attrs._attrs[(xmlns_uri, aname)] = value - self._xmlns_attrs = [] - uri, localname = name - if uri: - # When using namespaces, the reader may or may not - # provide us with the original name. If not, create - # *a* valid tagName from the current context. - if tagName is None: - prefix = self._current_context[uri] - if prefix: - tagName = prefix + ":" + localname - else: - tagName = localname - if self.document: - node = self.document.createElementNS(uri, tagName) - else: - node = self.buildDocument(uri, tagName) - else: - # When the tagname is not prefixed, it just appears as - # localname - if self.document: - node = self.document.createElement(localname) - else: - node = self.buildDocument(None, localname) - - for aname,value in attrs.items(): - a_uri, a_localname = aname - if a_uri == xmlns_uri: - if a_localname == 'xmlns': - qname = a_localname - else: - qname = 'xmlns:' + a_localname - attr = self.document.createAttributeNS(a_uri, qname) - node.setAttributeNodeNS(attr) - elif a_uri: - prefix = self._current_context[a_uri] - if prefix: - qname = prefix + ":" + a_localname - else: - qname = a_localname - attr = self.document.createAttributeNS(a_uri, qname) - node.setAttributeNodeNS(attr) - else: - attr = self.document.createAttribute(a_localname) - node.setAttributeNode(attr) - attr.value = value - - self.lastEvent[1] = [(START_ELEMENT, node), None] - self.lastEvent = self.lastEvent[1] - self.push(node) - - def endElementNS(self, name, tagName): - self.lastEvent[1] = [(END_ELEMENT, self.pop()), None] - self.lastEvent = self.lastEvent[1] - - def startElement(self, name, attrs): - if self.document: - node = self.document.createElement(name) - else: - node = self.buildDocument(None, name) - - for aname,value in attrs.items(): - attr = self.document.createAttribute(aname) - attr.value = value - node.setAttributeNode(attr) - - self.lastEvent[1] = [(START_ELEMENT, node), None] - self.lastEvent = self.lastEvent[1] - self.push(node) - - def endElement(self, name): - self.lastEvent[1] = [(END_ELEMENT, self.pop()), None] - self.lastEvent = self.lastEvent[1] - - def comment(self, s): - if self.document: - node = self.document.createComment(s) - self.lastEvent[1] = [(COMMENT, node), None] - self.lastEvent = self.lastEvent[1] - else: - event = [(COMMENT, s), None] - self.pending_events.append(event) - - def processingInstruction(self, target, data): - if self.document: - node = self.document.createProcessingInstruction(target, data) - self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None] - self.lastEvent = self.lastEvent[1] - else: - event = [(PROCESSING_INSTRUCTION, target, data), None] - self.pending_events.append(event) - - def ignorableWhitespace(self, chars): - node = self.document.createTextNode(chars) - self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None] - self.lastEvent = self.lastEvent[1] - - def characters(self, chars): - node = self.document.createTextNode(chars) - self.lastEvent[1] = [(CHARACTERS, node), None] - self.lastEvent = self.lastEvent[1] - - def startDocument(self): - if self.documentFactory is None: - import xml.dom.minidom - self.documentFactory = xml.dom.minidom.Document.implementation - - def buildDocument(self, uri, tagname): - # Can't do that in startDocument, since we need the tagname - # XXX: obtain DocumentType - node = self.documentFactory.createDocument(uri, tagname, None) - self.document = node - self.lastEvent[1] = [(START_DOCUMENT, node), None] - self.lastEvent = self.lastEvent[1] - self.push(node) - # Put everything we have seen so far into the document - for e in self.pending_events: - if e[0][0] == PROCESSING_INSTRUCTION: - _,target,data = e[0] - n = self.document.createProcessingInstruction(target, data) - e[0] = (PROCESSING_INSTRUCTION, n) - elif e[0][0] == COMMENT: - n = self.document.createComment(e[0][1]) - e[0] = (COMMENT, n) - else: - raise AssertionError("Unknown pending event ",e[0][0]) - self.lastEvent[1] = e - self.lastEvent = e - self.pending_events = None - return node.firstChild - - def endDocument(self): - self.lastEvent[1] = [(END_DOCUMENT, self.document), None] - self.pop() - - def clear(self): - "clear(): Explicitly release parsing structures" - self.document = None - -class ErrorHandler: - def warning(self, exception): - print(exception) - def error(self, exception): - raise exception - def fatalError(self, exception): - raise exception - -class DOMEventStream: - def __init__(self, stream, parser, bufsize): - self.stream = stream - self.parser = parser - self.bufsize = bufsize - if not hasattr(self.parser, 'feed'): - self.getEvent = self._slurp - self.reset() - - def reset(self): - self.pulldom = PullDOM() - # This content handler relies on namespace support - self.parser.setFeature(xml.sax.handler.feature_namespaces, 1) - self.parser.setContentHandler(self.pulldom) - - def __getitem__(self, pos): - rc = self.getEvent() - if rc: - return rc - raise IndexError - - def __next__(self): - rc = self.getEvent() - if rc: - return rc - raise StopIteration - - def __iter__(self): - return self - - def expandNode(self, node): - event = self.getEvent() - parents = [node] - while event: - token, cur_node = event - if cur_node is node: - return - if token != END_ELEMENT: - parents[-1].appendChild(cur_node) - if token == START_ELEMENT: - parents.append(cur_node) - elif token == END_ELEMENT: - del parents[-1] - event = self.getEvent() - - def getEvent(self): - # use IncrementalParser interface, so we get the desired - # pull effect - if not self.pulldom.firstEvent[1]: - self.pulldom.lastEvent = self.pulldom.firstEvent - while not self.pulldom.firstEvent[1]: - buf = self.stream.read(self.bufsize) - if not buf: - self.parser.close() - return None - self.parser.feed(buf) - rc = self.pulldom.firstEvent[1][0] - self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1] - return rc - - def _slurp(self): - """ Fallback replacement for getEvent() using the - standard SAX2 interface, which means we slurp the - SAX events into memory (no performance gain, but - we are compatible to all SAX parsers). - """ - self.parser.parse(self.stream) - self.getEvent = self._emit - return self._emit() - - def _emit(self): - """ Fallback replacement for getEvent() that emits - the events that _slurp() read previously. - """ - rc = self.pulldom.firstEvent[1][0] - self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1] - return rc - - def clear(self): - """clear(): Explicitly release parsing objects""" - self.pulldom.clear() - del self.pulldom - self.parser = None - self.stream = None - -class SAX2DOM(PullDOM): - - def startElementNS(self, name, tagName , attrs): - PullDOM.startElementNS(self, name, tagName, attrs) - curNode = self.elementStack[-1] - parentNode = self.elementStack[-2] - parentNode.appendChild(curNode) - - def startElement(self, name, attrs): - PullDOM.startElement(self, name, attrs) - curNode = self.elementStack[-1] - parentNode = self.elementStack[-2] - parentNode.appendChild(curNode) - - def processingInstruction(self, target, data): - PullDOM.processingInstruction(self, target, data) - node = self.lastEvent[0][1] - parentNode = self.elementStack[-1] - parentNode.appendChild(node) - - def ignorableWhitespace(self, chars): - PullDOM.ignorableWhitespace(self, chars) - node = self.lastEvent[0][1] - parentNode = self.elementStack[-1] - parentNode.appendChild(node) - - def characters(self, chars): - PullDOM.characters(self, chars) - node = self.lastEvent[0][1] - parentNode = self.elementStack[-1] - parentNode.appendChild(node) - - -default_bufsize = (2 ** 14) - 20 - -def parse(stream_or_string, parser=None, bufsize=None): - if bufsize is None: - bufsize = default_bufsize - if isinstance(stream_or_string, str): - stream = open(stream_or_string, 'rb') - else: - stream = stream_or_string - if not parser: - parser = xml.sax.make_parser() - return DOMEventStream(stream, parser, bufsize) - -def parseString(string, parser=None): - from io import StringIO - - bufsize = len(string) - buf = StringIO(string) - if not parser: - parser = xml.sax.make_parser() - return DOMEventStream(buf, parser, bufsize) diff --git a/modules/language/python/module/xml/dom/xmlbuilder.py~ b/modules/language/python/module/xml/dom/xmlbuilder.py~ deleted file mode 100644 index e9a1536..0000000 --- a/modules/language/python/module/xml/dom/xmlbuilder.py~ +++ /dev/null @@ -1,410 +0,0 @@ -"""Implementation of the DOM Level 3 'LS-Load' feature.""" - -import copy -import warnings -import xml.dom - -from xml.dom.NodeFilter import NodeFilter - - -__all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"] - - -class Options: - """Features object that has variables set for each DOMBuilder feature. - - The DOMBuilder class uses an instance of this class to pass settings to - the ExpatBuilder class. - """ - - # Note that the DOMBuilder class in LoadSave constrains which of these - # values can be set using the DOM Level 3 LoadSave feature. - - namespaces = 1 - namespace_declarations = True - validation = False - external_parameter_entities = True - external_general_entities = True - external_dtd_subset = True - validate_if_schema = False - validate = False - datatype_normalization = False - create_entity_ref_nodes = True - entities = True - whitespace_in_element_content = True - cdata_sections = True - comments = True - charset_overrides_xml_encoding = True - infoset = False - supported_mediatypes_only = False - - errorHandler = None - filter = None - - -class DOMBuilder: - entityResolver = None - errorHandler = None - filter = None - - ACTION_REPLACE = 1 - ACTION_APPEND_AS_CHILDREN = 2 - ACTION_INSERT_AFTER = 3 - ACTION_INSERT_BEFORE = 4 - - _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN, - ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE) - - def __init__(self): - self._options = Options() - - def _get_entityResolver(self): - return self.entityResolver - def _set_entityResolver(self, entityResolver): - self.entityResolver = entityResolver - - def _get_errorHandler(self): - return self.errorHandler - def _set_errorHandler(self, errorHandler): - self.errorHandler = errorHandler - - def _get_filter(self): - return self.filter - def _set_filter(self, filter): - self.filter = filter - - def setFeature(self, name, state): - if self.supportsFeature(name): - state = state and 1 or 0 - try: - settings = self._settings[(_name_xform(name), state)] - except KeyError: - raise xml.dom.NotSupportedErr( - "unsupported feature: %r" % (name,)) - else: - for name, value in settings: - setattr(self._options, name, value) - else: - raise xml.dom.NotFoundErr("unknown feature: " + repr(name)) - - def supportsFeature(self, name): - return hasattr(self._options, _name_xform(name)) - - def canSetFeature(self, name, state): - key = (_name_xform(name), state and 1 or 0) - return key in self._settings - - # This dictionary maps from (feature,value) to a list of - # (option,value) pairs that should be set on the Options object. - # If a (feature,value) setting is not in this dictionary, it is - # not supported by the DOMBuilder. - # - _settings = { - ("namespace_declarations", 0): [ - ("namespace_declarations", 0)], - ("namespace_declarations", 1): [ - ("namespace_declarations", 1)], - ("validation", 0): [ - ("validation", 0)], - ("external_general_entities", 0): [ - ("external_general_entities", 0)], - ("external_general_entities", 1): [ - ("external_general_entities", 1)], - ("external_parameter_entities", 0): [ - ("external_parameter_entities", 0)], - ("external_parameter_entities", 1): [ - ("external_parameter_entities", 1)], - ("validate_if_schema", 0): [ - ("validate_if_schema", 0)], - ("create_entity_ref_nodes", 0): [ - ("create_entity_ref_nodes", 0)], - ("create_entity_ref_nodes", 1): [ - ("create_entity_ref_nodes", 1)], - ("entities", 0): [ - ("create_entity_ref_nodes", 0), - ("entities", 0)], - ("entities", 1): [ - ("entities", 1)], - ("whitespace_in_element_content", 0): [ - ("whitespace_in_element_content", 0)], - ("whitespace_in_element_content", 1): [ - ("whitespace_in_element_content", 1)], - ("cdata_sections", 0): [ - ("cdata_sections", 0)], - ("cdata_sections", 1): [ - ("cdata_sections", 1)], - ("comments", 0): [ - ("comments", 0)], - ("comments", 1): [ - ("comments", 1)], - ("charset_overrides_xml_encoding", 0): [ - ("charset_overrides_xml_encoding", 0)], - ("charset_overrides_xml_encoding", 1): [ - ("charset_overrides_xml_encoding", 1)], - ("infoset", 0): [], - ("infoset", 1): [ - ("namespace_declarations", 0), - ("validate_if_schema", 0), - ("create_entity_ref_nodes", 0), - ("entities", 0), - ("cdata_sections", 0), - ("datatype_normalization", 1), - ("whitespace_in_element_content", 1), - ("comments", 1), - ("charset_overrides_xml_encoding", 1)], - ("supported_mediatypes_only", 0): [ - ("supported_mediatypes_only", 0)], - ("namespaces", 0): [ - ("namespaces", 0)], - ("namespaces", 1): [ - ("namespaces", 1)], - } - - def getFeature(self, name): - xname = _name_xform(name) - try: - return getattr(self._options, xname) - except AttributeError: - if name == "infoset": - options = self._options - return (options.datatype_normalization - and options.whitespace_in_element_content - and options.comments - and options.charset_overrides_xml_encoding - and not (options.namespace_declarations - or options.validate_if_schema - or options.create_entity_ref_nodes - or options.entities - or options.cdata_sections)) - raise xml.dom.NotFoundErr("feature %s not known" % repr(name)) - - def parseURI(self, uri): - if self.entityResolver: - input = self.entityResolver.resolveEntity(None, uri) - else: - input = DOMEntityResolver().resolveEntity(None, uri) - return self.parse(input) - - def parse(self, input): - options = copy.copy(self._options) - options.filter = self.filter - options.errorHandler = self.errorHandler - fp = input.byteStream - if fp is None and options.systemId: - import urllib.request - fp = urllib.request.urlopen(input.systemId) - return self._parse_bytestream(fp, options) - - def parseWithContext(self, input, cnode, action): - if action not in self._legal_actions: - raise ValueError("not a legal action") - raise NotImplementedError("Haven't written this yet...") - - def _parse_bytestream(self, stream, options): - import xml.dom.expatbuilder - builder = xml.dom.expatbuilder.makeBuilder(options) - return builder.parseFile(stream) - - -def _name_xform(name): - return name.lower().replace('-', '_') - - -class DOMEntityResolver(object): - __slots__ = '_opener', - - def resolveEntity(self, publicId, systemId): - assert systemId is not None - source = DOMInputSource() - source.publicId = publicId - source.systemId = systemId - source.byteStream = self._get_opener().open(systemId) - - # determine the encoding if the transport provided it - source.encoding = self._guess_media_encoding(source) - - # determine the base URI is we can - import posixpath, urllib.parse - parts = urllib.parse.urlparse(systemId) - scheme, netloc, path, params, query, fragment = parts - # XXX should we check the scheme here as well? - if path and not path.endswith("/"): - path = posixpath.dirname(path) + "/" - parts = scheme, netloc, path, params, query, fragment - source.baseURI = urllib.parse.urlunparse(parts) - - return source - - def _get_opener(self): - try: - return self._opener - except AttributeError: - self._opener = self._create_opener() - return self._opener - - def _create_opener(self): - import urllib.request - return urllib.request.build_opener() - - def _guess_media_encoding(self, source): - info = source.byteStream.info() - if "Content-Type" in info: - for param in info.getplist(): - if param.startswith("charset="): - return param.split("=", 1)[1].lower() - - -class DOMInputSource(object): - __slots__ = ('byteStream', 'characterStream', 'stringData', - 'encoding', 'publicId', 'systemId', 'baseURI') - - def __init__(self): - self.byteStream = None - self.characterStream = None - self.stringData = None - self.encoding = None - self.publicId = None - self.systemId = None - self.baseURI = None - - def _get_byteStream(self): - return self.byteStream - def _set_byteStream(self, byteStream): - self.byteStream = byteStream - - def _get_characterStream(self): - return self.characterStream - def _set_characterStream(self, characterStream): - self.characterStream = characterStream - - def _get_stringData(self): - return self.stringData - def _set_stringData(self, data): - self.stringData = data - - def _get_encoding(self): - return self.encoding - def _set_encoding(self, encoding): - self.encoding = encoding - - def _get_publicId(self): - return self.publicId - def _set_publicId(self, publicId): - self.publicId = publicId - - def _get_systemId(self): - return self.systemId - def _set_systemId(self, systemId): - self.systemId = systemId - - def _get_baseURI(self): - return self.baseURI - def _set_baseURI(self, uri): - self.baseURI = uri - - -class DOMBuilderFilter: - """Element filter which can be used to tailor construction of - a DOM instance. - """ - - # There's really no need for this class; concrete implementations - # should just implement the endElement() and startElement() - # methods as appropriate. Using this makes it easy to only - # implement one of them. - - FILTER_ACCEPT = 1 - FILTER_REJECT = 2 - FILTER_SKIP = 3 - FILTER_INTERRUPT = 4 - - whatToShow = NodeFilter.SHOW_ALL - - def _get_whatToShow(self): - return self.whatToShow - - def acceptNode(self, element): - return self.FILTER_ACCEPT - - def startContainer(self, element): - return self.FILTER_ACCEPT - -del NodeFilter - - -class _AsyncDeprecatedProperty: - def warn(self, cls): - clsname = cls.__name__ - warnings.warn( - "{cls}.async is deprecated; use {cls}.async_".format(cls=clsname), - DeprecationWarning) - - def __get__(self, instance, cls): - self.warn(cls) - if instance is not None: - return instance.async_ - return False - - def __set__(self, instance, value): - self.warn(type(instance)) - setattr(instance, 'async_', value) - - -class DocumentLS: - """Mixin to create documents that conform to the load/save spec.""" - - async_ = False - locals()['async'] = _AsyncDeprecatedProperty() # Avoid DeprecationWarning - - def _get_async(self): - return False - - def _set_async(self, flag): - if flag: - raise xml.dom.NotSupportedErr( - "asynchronous document loading is not supported") - - def abort(self): - # What does it mean to "clear" a document? Does the - # documentElement disappear? - raise NotImplementedError( - "haven't figured out what this means yet") - - def load(self, uri): - raise NotImplementedError("haven't written this yet") - - def loadXML(self, source): - raise NotImplementedError("haven't written this yet") - - def saveXML(self, snode): - if snode is None: - snode = self - elif snode.ownerDocument is not self: - raise xml.dom.WrongDocumentErr() - return snode.toxml() - - -del _AsyncDeprecatedProperty - - -class DOMImplementationLS: - MODE_SYNCHRONOUS = 1 - MODE_ASYNCHRONOUS = 2 - - def createDOMBuilder(self, mode, schemaType): - if schemaType is not None: - raise xml.dom.NotSupportedErr( - "schemaType not yet supported") - if mode == self.MODE_SYNCHRONOUS: - return DOMBuilder() - if mode == self.MODE_ASYNCHRONOUS: - raise xml.dom.NotSupportedErr( - "asynchronous builders are not supported") - raise ValueError("unknown value for mode") - - def createDOMWriter(self): - raise NotImplementedError( - "the writer interface hasn't been written yet!") - - def createDOMInputSource(self): - return DOMInputSource() diff --git a/modules/language/python/module/xml/etree.py~ b/modules/language/python/module/xml/etree.py~ deleted file mode 100644 index 27fd8f6..0000000 --- a/modules/language/python/module/xml/etree.py~ +++ /dev/null @@ -1,33 +0,0 @@ -# $Id: __init__.py 3375 2008-02-13 08:05:08Z fredrik $ -# elementtree package - -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2008 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - -# Licensed to PSF under a Contributor Agreement. -# See http://www.python.org/psf/license for licensing details. diff --git a/modules/language/python/module/xml/etree/ElementInclude.py~ b/modules/language/python/module/xml/etree/ElementInclude.py~ deleted file mode 100644 index 963470e..0000000 --- a/modules/language/python/module/xml/etree/ElementInclude.py~ +++ /dev/null @@ -1,143 +0,0 @@ -# -# ElementTree -# $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $ -# -# limited xinclude support for element trees -# -# history: -# 2003-08-15 fl created -# 2003-11-14 fl fixed default loader -# -# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. -# -# fredrik@pythonware.com -# http://www.pythonware.com -# -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2008 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - -# Licensed to PSF under a Contributor Agreement. -# See http://www.python.org/psf/license for licensing details. - -## -# Limited XInclude support for the ElementTree package. -## - -import copy -from . import ElementTree - -XINCLUDE = "{http://www.w3.org/2001/XInclude}" - -XINCLUDE_INCLUDE = XINCLUDE + "include" -XINCLUDE_FALLBACK = XINCLUDE + "fallback" - -## -# Fatal include error. - -class FatalIncludeError(SyntaxError): - pass - -## -# Default loader. This loader reads an included resource from disk. -# -# @param href Resource reference. -# @param parse Parse mode. Either "xml" or "text". -# @param encoding Optional text encoding (UTF-8 by default for "text"). -# @return The expanded resource. If the parse mode is "xml", this -# is an ElementTree instance. If the parse mode is "text", this -# is a Unicode string. If the loader fails, it can return None -# or raise an OSError exception. -# @throws OSError If the loader fails to load the resource. - -def default_loader(href, parse, encoding=None): - if parse == "xml": - with open(href, 'rb') as file: - data = ElementTree.parse(file).getroot() - else: - if not encoding: - encoding = 'UTF-8' - with open(href, 'r', encoding=encoding) as file: - data = file.read() - return data - -## -# Expand XInclude directives. -# -# @param elem Root element. -# @param loader Optional resource loader. If omitted, it defaults -# to {@link default_loader}. If given, it should be a callable -# that implements the same interface as <b>default_loader</b>. -# @throws FatalIncludeError If the function fails to include a given -# resource, or if the tree contains malformed XInclude elements. -# @throws OSError If the function fails to load a given resource. - -def include(elem, loader=None): - if loader is None: - loader = default_loader - # look for xinclude elements - i = 0 - while i < len(elem): - e = elem[i] - if e.tag == XINCLUDE_INCLUDE: - # process xinclude directive - href = e.get("href") - parse = e.get("parse", "xml") - if parse == "xml": - node = loader(href, parse) - if node is None: - raise FatalIncludeError( - "cannot load %r as %r" % (href, parse) - ) - node = copy.copy(node) - if e.tail: - node.tail = (node.tail or "") + e.tail - elem[i] = node - elif parse == "text": - text = loader(href, parse, e.get("encoding")) - if text is None: - raise FatalIncludeError( - "cannot load %r as %r" % (href, parse) - ) - if i: - node = elem[i-1] - node.tail = (node.tail or "") + text + (e.tail or "") - else: - elem.text = (elem.text or "") + text + (e.tail or "") - del elem[i] - continue - else: - raise FatalIncludeError( - "unknown parse type in xi:include tag (%r)" % parse - ) - elif e.tag == XINCLUDE_FALLBACK: - raise FatalIncludeError( - "xi:fallback tag must be child of xi:include (%r)" % e.tag - ) - else: - include(e, loader) - i = i + 1 diff --git a/modules/language/python/module/xml/etree/ElementPath.py~ b/modules/language/python/module/xml/etree/ElementPath.py~ deleted file mode 100644 index ab6b79a..0000000 --- a/modules/language/python/module/xml/etree/ElementPath.py~ +++ /dev/null @@ -1,314 +0,0 @@ -# -# ElementTree -# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $ -# -# limited xpath support for element trees -# -# history: -# 2003-05-23 fl created -# 2003-05-28 fl added support for // etc -# 2003-08-27 fl fixed parsing of periods in element names -# 2007-09-10 fl new selection engine -# 2007-09-12 fl fixed parent selector -# 2007-09-13 fl added iterfind; changed findall to return a list -# 2007-11-30 fl added namespaces support -# 2009-10-30 fl added child element value filter -# -# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved. -# -# fredrik@pythonware.com -# http://www.pythonware.com -# -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2009 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - -# Licensed to PSF under a Contributor Agreement. -# See http://www.python.org/psf/license for licensing details. - -## -# Implementation module for XPath support. There's usually no reason -# to import this module directly; the <b>ElementTree</b> does this for -# you, if needed. -## - -import re - -xpath_tokenizer_re = re.compile( - r"(" - r"'[^']*'|\"[^\"]*\"|" - r"::|" - r"//?|" - r"\.\.|" - r"\(\)|" - r"[/.*:\[\]\(\)@=])|" - r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" - r"\s+" - ) - -def xpath_tokenizer(pattern, namespaces=None): - for token in xpath_tokenizer_re.findall(pattern): - tag = token[1] - if tag and tag[0] != "{" and ":" in tag: - try: - prefix, uri = tag.split(":", 1) - if not namespaces: - raise KeyError - yield token[0], "{%s}%s" % (namespaces[prefix], uri) - except KeyError: - raise SyntaxError("prefix %r not found in prefix map" % prefix) - else: - yield token - -def get_parent_map(context): - parent_map = context.parent_map - if parent_map is None: - context.parent_map = parent_map = {} - for p in context.root.iter(): - for e in p: - parent_map[e] = p - return parent_map - -def prepare_child(next, token): - tag = token[1] - def select(context, result): - for elem in result: - for e in elem: - if e.tag == tag: - yield e - return select - -def prepare_star(next, token): - def select(context, result): - for elem in result: - yield from elem - return select - -def prepare_self(next, token): - def select(context, result): - yield from result - return select - -def prepare_descendant(next, token): - try: - token = next() - except StopIteration: - return - if token[0] == "*": - tag = "*" - elif not token[0]: - tag = token[1] - else: - raise SyntaxError("invalid descendant") - def select(context, result): - for elem in result: - for e in elem.iter(tag): - if e is not elem: - yield e - return select - -def prepare_parent(next, token): - def select(context, result): - # FIXME: raise error if .. is applied at toplevel? - parent_map = get_parent_map(context) - result_map = {} - for elem in result: - if elem in parent_map: - parent = parent_map[elem] - if parent not in result_map: - result_map[parent] = None - yield parent - return select - -def prepare_predicate(next, token): - # FIXME: replace with real parser!!! refs: - # http://effbot.org/zone/simple-iterator-parser.htm - # http://javascript.crockford.com/tdop/tdop.html - signature = [] - predicate = [] - while 1: - try: - token = next() - except StopIteration: - return - if token[0] == "]": - break - if token[0] and token[0][:1] in "'\"": - token = "'", token[0][1:-1] - signature.append(token[0] or "-") - predicate.append(token[1]) - signature = "".join(signature) - # use signature to determine predicate type - if signature == "@-": - # [@attribute] predicate - key = predicate[1] - def select(context, result): - for elem in result: - if elem.get(key) is not None: - yield elem - return select - if signature == "@-='": - # [@attribute='value'] - key = predicate[1] - value = predicate[-1] - def select(context, result): - for elem in result: - if elem.get(key) == value: - yield elem - return select - if signature == "-" and not re.match(r"\-?\d+$", predicate[0]): - # [tag] - tag = predicate[0] - def select(context, result): - for elem in result: - if elem.find(tag) is not None: - yield elem - return select - if signature == "-='" and not re.match(r"\-?\d+$", predicate[0]): - # [tag='value'] - tag = predicate[0] - value = predicate[-1] - def select(context, result): - for elem in result: - for e in elem.findall(tag): - if "".join(e.itertext()) == value: - yield elem - break - return select - if signature == "-" or signature == "-()" or signature == "-()-": - # [index] or [last()] or [last()-index] - if signature == "-": - # [index] - index = int(predicate[0]) - 1 - if index < 0: - raise SyntaxError("XPath position >= 1 expected") - else: - if predicate[0] != "last": - raise SyntaxError("unsupported function") - if signature == "-()-": - try: - index = int(predicate[2]) - 1 - except ValueError: - raise SyntaxError("unsupported expression") - if index > -2: - raise SyntaxError("XPath offset from last() must be negative") - else: - index = -1 - def select(context, result): - parent_map = get_parent_map(context) - for elem in result: - try: - parent = parent_map[elem] - # FIXME: what if the selector is "*" ? - elems = list(parent.findall(elem.tag)) - if elems[index] is elem: - yield elem - except (IndexError, KeyError): - pass - return select - raise SyntaxError("invalid predicate") - -ops = { - "": prepare_child, - "*": prepare_star, - ".": prepare_self, - "..": prepare_parent, - "//": prepare_descendant, - "[": prepare_predicate, - } - -_cache = {} - -class _SelectorContext: - parent_map = None - def __init__(self, root): - self.root = root - -# -------------------------------------------------------------------- - -## -# Generate all matching objects. - -def iterfind(elem, path, namespaces=None): - # compile selector pattern - cache_key = (path, None if namespaces is None - else tuple(sorted(namespaces.items()))) - if path[-1:] == "/": - path = path + "*" # implicit all (FIXME: keep this?) - try: - selector = _cache[cache_key] - except KeyError: - if len(_cache) > 100: - _cache.clear() - if path[:1] == "/": - raise SyntaxError("cannot use absolute path on element") - next = iter(xpath_tokenizer(path, namespaces)).__next__ - try: - token = next() - except StopIteration: - return - selector = [] - while 1: - try: - selector.append(ops[token[0]](next, token)) - except StopIteration: - raise SyntaxError("invalid path") - try: - token = next() - if token[0] == "/": - token = next() - except StopIteration: - break - _cache[cache_key] = selector - # execute selector pattern - result = [elem] - context = _SelectorContext(elem) - for select in selector: - result = select(context, result) - return result - -## -# Find first matching object. - -def find(elem, path, namespaces=None): - return next(iterfind(elem, path, namespaces), None) - -## -# Find all matching objects. - -def findall(elem, path, namespaces=None): - return list(iterfind(elem, path, namespaces)) - -## -# Find text for first matching object. - -def findtext(elem, path, default=None, namespaces=None): - try: - elem = next(iterfind(elem, path, namespaces)) - return elem.text or "" - except StopIteration: - return default diff --git a/modules/language/python/module/xml/etree/ElementTree.py~ b/modules/language/python/module/xml/etree/ElementTree.py~ deleted file mode 100644 index cf4e1da..0000000 --- a/modules/language/python/module/xml/etree/ElementTree.py~ +++ /dev/null @@ -1,1656 +0,0 @@ -"""Lightweight XML support for Python. - - XML is an inherently hierarchical data format, and the most natural way to - represent it is with a tree. This module has two classes for this purpose: - - 1. ElementTree represents the whole XML document as a tree and - - 2. Element represents a single node in this tree. - - Interactions with the whole document (reading and writing to/from files) are - usually done on the ElementTree level. Interactions with a single XML element - and its sub-elements are done on the Element level. - - Element is a flexible container object designed to store hierarchical data - structures in memory. It can be described as a cross between a list and a - dictionary. Each Element has a number of properties associated with it: - - 'tag' - a string containing the element's name. - - 'attributes' - a Python dictionary storing the element's attributes. - - 'text' - a string containing the element's text content. - - 'tail' - an optional string containing text after the element's end tag. - - And a number of child elements stored in a Python sequence. - - To create an element instance, use the Element constructor, - or the SubElement factory function. - - You can also use the ElementTree class to wrap an element structure - and convert it to and from XML. - -""" - -#--------------------------------------------------------------------- -# Licensed to PSF under a Contributor Agreement. -# See http://www.python.org/psf/license for licensing details. -# -# ElementTree -# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved. -# -# fredrik@pythonware.com -# http://www.pythonware.com -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2008 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - -__all__ = [ - # public symbols - "Comment", - "dump", - "Element", "ElementTree", - "fromstring", "fromstringlist", - "iselement", "iterparse", - "parse", "ParseError", - "PI", "ProcessingInstruction", - "QName", - "SubElement", - "tostring", "tostringlist", - "TreeBuilder", - "VERSION", - "XML", "XMLID", - "XMLParser", "XMLPullParser", - "register_namespace", - ] - -VERSION = "1.3.0" - -import sys -import re -import warnings -import io -import collections -import contextlib - -from . import ElementPath - - -class ParseError(SyntaxError): - """An error when parsing an XML document. - - In addition to its exception value, a ParseError contains - two extra attributes: - 'code' - the specific exception code - 'position' - the line and column of the error - - """ - pass - -# -------------------------------------------------------------------- - - -def iselement(element): - """Return True if *element* appears to be an Element.""" - return hasattr(element, 'tag') - - -class Element: - """An XML element. - - This class is the reference implementation of the Element interface. - - An element's length is its number of subelements. That means if you - want to check if an element is truly empty, you should check BOTH - its length AND its text attribute. - - The element tag, attribute names, and attribute values can be either - bytes or strings. - - *tag* is the element name. *attrib* is an optional dictionary containing - element attributes. *extra* are additional element attributes given as - keyword arguments. - - Example form: - <tag attrib>text<child/>...</tag>tail - - """ - - tag = None - """The element's name.""" - - attrib = None - """Dictionary of the element's attributes.""" - - text = None - """ - Text before first subelement. This is either a string or the value None. - Note that if there is no text, this attribute may be either - None or the empty string, depending on the parser. - - """ - - tail = None - """ - Text after this element's end tag, but before the next sibling element's - start tag. This is either a string or the value None. Note that if there - was no text, this attribute may be either None or an empty string, - depending on the parser. - - """ - - def __init__(self, tag, attrib={}, **extra): - if not isinstance(attrib, dict): - raise TypeError("attrib must be dict, not %s" % ( - attrib.__class__.__name__,)) - attrib = attrib.copy() - attrib.update(extra) - self.tag = tag - self.attrib = attrib - self._children = [] - - def __repr__(self): - return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self)) - - def makeelement(self, tag, attrib): - """Create a new element with the same type. - - *tag* is a string containing the element name. - *attrib* is a dictionary containing the element attributes. - - Do not call this method, use the SubElement factory function instead. - - """ - return self.__class__(tag, attrib) - - def copy(self): - """Return copy of current element. - - This creates a shallow copy. Subelements will be shared with the - original tree. - - """ - elem = self.makeelement(self.tag, self.attrib) - elem.text = self.text - elem.tail = self.tail - elem[:] = self - return elem - - def __len__(self): - return len(self._children) - - def __bool__(self): - warnings.warn( - "The behavior of this method will change in future versions. " - "Use specific 'len(elem)' or 'elem is not None' test instead.", - FutureWarning, stacklevel=2 - ) - return len(self._children) != 0 # emulate old behaviour, for now - - def __getitem__(self, index): - return self._children[index] - - def __setitem__(self, index, element): - # if isinstance(index, slice): - # for elt in element: - # assert iselement(elt) - # else: - # assert iselement(element) - self._children[index] = element - - def __delitem__(self, index): - del self._children[index] - - def append(self, subelement): - """Add *subelement* to the end of this element. - - The new element will appear in document order after the last existing - subelement (or directly after the text, if it's the first subelement), - but before the end tag for this element. - - """ - self._assert_is_element(subelement) - self._children.append(subelement) - - def extend(self, elements): - """Append subelements from a sequence. - - *elements* is a sequence with zero or more elements. - - """ - for element in elements: - self._assert_is_element(element) - self._children.extend(elements) - - def insert(self, index, subelement): - """Insert *subelement* at position *index*.""" - self._assert_is_element(subelement) - self._children.insert(index, subelement) - - def _assert_is_element(self, e): - # Need to refer to the actual Python implementation, not the - # shadowing C implementation. - if not isinstance(e, _Element_Py): - raise TypeError('expected an Element, not %s' % type(e).__name__) - - def remove(self, subelement): - """Remove matching subelement. - - Unlike the find methods, this method compares elements based on - identity, NOT ON tag value or contents. To remove subelements by - other means, the easiest way is to use a list comprehension to - select what elements to keep, and then use slice assignment to update - the parent element. - - ValueError is raised if a matching element could not be found. - - """ - # assert iselement(element) - self._children.remove(subelement) - - def getchildren(self): - """(Deprecated) Return all subelements. - - Elements are returned in document order. - - """ - warnings.warn( - "This method will be removed in future versions. " - "Use 'list(elem)' or iteration over elem instead.", - DeprecationWarning, stacklevel=2 - ) - return self._children - - def find(self, path, namespaces=None): - """Find first matching element by tag name or path. - - *path* is a string having either an element tag or an XPath, - *namespaces* is an optional mapping from namespace prefix to full name. - - Return the first matching element, or None if no element was found. - - """ - return ElementPath.find(self, path, namespaces) - - def findtext(self, path, default=None, namespaces=None): - """Find text for first matching element by tag name or path. - - *path* is a string having either an element tag or an XPath, - *default* is the value to return if the element was not found, - *namespaces* is an optional mapping from namespace prefix to full name. - - Return text content of first matching element, or default value if - none was found. Note that if an element is found having no text - content, the empty string is returned. - - """ - return ElementPath.findtext(self, path, default, namespaces) - - def findall(self, path, namespaces=None): - """Find all matching subelements by tag name or path. - - *path* is a string having either an element tag or an XPath, - *namespaces* is an optional mapping from namespace prefix to full name. - - Returns list containing all matching elements in document order. - - """ - return ElementPath.findall(self, path, namespaces) - - def iterfind(self, path, namespaces=None): - """Find all matching subelements by tag name or path. - - *path* is a string having either an element tag or an XPath, - *namespaces* is an optional mapping from namespace prefix to full name. - - Return an iterable yielding all matching elements in document order. - - """ - return ElementPath.iterfind(self, path, namespaces) - - def clear(self): - """Reset element. - - This function removes all subelements, clears all attributes, and sets - the text and tail attributes to None. - - """ - self.attrib.clear() - self._children = [] - self.text = self.tail = None - - def get(self, key, default=None): - """Get element attribute. - - Equivalent to attrib.get, but some implementations may handle this a - bit more efficiently. *key* is what attribute to look for, and - *default* is what to return if the attribute was not found. - - Returns a string containing the attribute value, or the default if - attribute was not found. - - """ - return self.attrib.get(key, default) - - def set(self, key, value): - """Set element attribute. - - Equivalent to attrib[key] = value, but some implementations may handle - this a bit more efficiently. *key* is what attribute to set, and - *value* is the attribute value to set it to. - - """ - self.attrib[key] = value - - def keys(self): - """Get list of attribute names. - - Names are returned in an arbitrary order, just like an ordinary - Python dict. Equivalent to attrib.keys() - - """ - return self.attrib.keys() - - def items(self): - """Get element attributes as a sequence. - - The attributes are returned in arbitrary order. Equivalent to - attrib.items(). - - Return a list of (name, value) tuples. - - """ - return self.attrib.items() - - def iter(self, tag=None): - """Create tree iterator. - - The iterator loops over the element and all subelements in document - order, returning all elements with a matching tag. - - If the tree structure is modified during iteration, new or removed - elements may or may not be included. To get a stable set, use the - list() function on the iterator, and loop over the resulting list. - - *tag* is what tags to look for (default is to return all elements) - - Return an iterator containing all the matching elements. - - """ - if tag == "*": - tag = None - if tag is None or self.tag == tag: - yield self - for e in self._children: - yield from e.iter(tag) - - # compatibility - def getiterator(self, tag=None): - # Change for a DeprecationWarning in 1.4 - warnings.warn( - "This method will be removed in future versions. " - "Use 'elem.iter()' or 'list(elem.iter())' instead.", - PendingDeprecationWarning, stacklevel=2 - ) - return list(self.iter(tag)) - - def itertext(self): - """Create text iterator. - - The iterator loops over the element and all subelements in document - order, returning all inner text. - - """ - tag = self.tag - if not isinstance(tag, str) and tag is not None: - return - t = self.text - if t: - yield t - for e in self: - yield from e.itertext() - t = e.tail - if t: - yield t - - -def SubElement(parent, tag, attrib={}, **extra): - """Subelement factory which creates an element instance, and appends it - to an existing parent. - - The element tag, attribute names, and attribute values can be either - bytes or Unicode strings. - - *parent* is the parent element, *tag* is the subelements name, *attrib* is - an optional directory containing element attributes, *extra* are - additional attributes given as keyword arguments. - - """ - attrib = attrib.copy() - attrib.update(extra) - element = parent.makeelement(tag, attrib) - parent.append(element) - return element - - -def Comment(text=None): - """Comment element factory. - - This function creates a special element which the standard serializer - serializes as an XML comment. - - *text* is a string containing the comment string. - - """ - element = Element(Comment) - element.text = text - return element - - -def ProcessingInstruction(target, text=None): - """Processing Instruction element factory. - - This function creates a special element which the standard serializer - serializes as an XML comment. - - *target* is a string containing the processing instruction, *text* is a - string containing the processing instruction contents, if any. - - """ - element = Element(ProcessingInstruction) - element.text = target - if text: - element.text = element.text + " " + text - return element - -PI = ProcessingInstruction - - -class QName: - """Qualified name wrapper. - - This class can be used to wrap a QName attribute value in order to get - proper namespace handing on output. - - *text_or_uri* is a string containing the QName value either in the form - {uri}local, or if the tag argument is given, the URI part of a QName. - - *tag* is an optional argument which if given, will make the first - argument (text_or_uri) be interpreted as a URI, and this argument (tag) - be interpreted as a local name. - - """ - def __init__(self, text_or_uri, tag=None): - if tag: - text_or_uri = "{%s}%s" % (text_or_uri, tag) - self.text = text_or_uri - def __str__(self): - return self.text - def __repr__(self): - return '<%s %r>' % (self.__class__.__name__, self.text) - def __hash__(self): - return hash(self.text) - def __le__(self, other): - if isinstance(other, QName): - return self.text <= other.text - return self.text <= other - def __lt__(self, other): - if isinstance(other, QName): - return self.text < other.text - return self.text < other - def __ge__(self, other): - if isinstance(other, QName): - return self.text >= other.text - return self.text >= other - def __gt__(self, other): - if isinstance(other, QName): - return self.text > other.text - return self.text > other - def __eq__(self, other): - if isinstance(other, QName): - return self.text == other.text - return self.text == other - -# -------------------------------------------------------------------- - - -class ElementTree: - """An XML element hierarchy. - - This class also provides support for serialization to and from - standard XML. - - *element* is an optional root element node, - *file* is an optional file handle or file name of an XML file whose - contents will be used to initialize the tree with. - - """ - def __init__(self, element=None, file=None): - # assert element is None or iselement(element) - self._root = element # first node - if file: - self.parse(file) - - def getroot(self): - """Return root element of this tree.""" - return self._root - - def _setroot(self, element): - """Replace root element of this tree. - - This will discard the current contents of the tree and replace it - with the given element. Use with care! - - """ - # assert iselement(element) - self._root = element - - def parse(self, source, parser=None): - """Load external XML document into element tree. - - *source* is a file name or file object, *parser* is an optional parser - instance that defaults to XMLParser. - - ParseError is raised if the parser fails to parse the document. - - Returns the root element of the given source document. - - """ - close_source = False - if not hasattr(source, "read"): - source = open(source, "rb") - close_source = True - try: - if parser is None: - # If no parser was specified, create a default XMLParser - parser = XMLParser() - if hasattr(parser, '_parse_whole'): - # The default XMLParser, when it comes from an accelerator, - # can define an internal _parse_whole API for efficiency. - # It can be used to parse the whole source without feeding - # it with chunks. - self._root = parser._parse_whole(source) - return self._root - while True: - data = source.read(65536) - if not data: - break - parser.feed(data) - self._root = parser.close() - return self._root - finally: - if close_source: - source.close() - - def iter(self, tag=None): - """Create and return tree iterator for the root element. - - The iterator loops over all elements in this tree, in document order. - - *tag* is a string with the tag name to iterate over - (default is to return all elements). - - """ - # assert self._root is not None - return self._root.iter(tag) - - # compatibility - def getiterator(self, tag=None): - # Change for a DeprecationWarning in 1.4 - warnings.warn( - "This method will be removed in future versions. " - "Use 'tree.iter()' or 'list(tree.iter())' instead.", - PendingDeprecationWarning, stacklevel=2 - ) - return list(self.iter(tag)) - - def find(self, path, namespaces=None): - """Find first matching element by tag name or path. - - Same as getroot().find(path), which is Element.find() - - *path* is a string having either an element tag or an XPath, - *namespaces* is an optional mapping from namespace prefix to full name. - - Return the first matching element, or None if no element was found. - - """ - # assert self._root is not None - if path[:1] == "/": - path = "." + path - warnings.warn( - "This search is broken in 1.3 and earlier, and will be " - "fixed in a future version. If you rely on the current " - "behaviour, change it to %r" % path, - FutureWarning, stacklevel=2 - ) - return self._root.find(path, namespaces) - - def findtext(self, path, default=None, namespaces=None): - """Find first matching element by tag name or path. - - Same as getroot().findtext(path), which is Element.findtext() - - *path* is a string having either an element tag or an XPath, - *namespaces* is an optional mapping from namespace prefix to full name. - - Return the first matching element, or None if no element was found. - - """ - # assert self._root is not None - if path[:1] == "/": - path = "." + path - warnings.warn( - "This search is broken in 1.3 and earlier, and will be " - "fixed in a future version. If you rely on the current " - "behaviour, change it to %r" % path, - FutureWarning, stacklevel=2 - ) - return self._root.findtext(path, default, namespaces) - - def findall(self, path, namespaces=None): - """Find all matching subelements by tag name or path. - - Same as getroot().findall(path), which is Element.findall(). - - *path* is a string having either an element tag or an XPath, - *namespaces* is an optional mapping from namespace prefix to full name. - - Return list containing all matching elements in document order. - - """ - # assert self._root is not None - if path[:1] == "/": - path = "." + path - warnings.warn( - "This search is broken in 1.3 and earlier, and will be " - "fixed in a future version. If you rely on the current " - "behaviour, change it to %r" % path, - FutureWarning, stacklevel=2 - ) - return self._root.findall(path, namespaces) - - def iterfind(self, path, namespaces=None): - """Find all matching subelements by tag name or path. - - Same as getroot().iterfind(path), which is element.iterfind() - - *path* is a string having either an element tag or an XPath, - *namespaces* is an optional mapping from namespace prefix to full name. - - Return an iterable yielding all matching elements in document order. - - """ - # assert self._root is not None - if path[:1] == "/": - path = "." + path - warnings.warn( - "This search is broken in 1.3 and earlier, and will be " - "fixed in a future version. If you rely on the current " - "behaviour, change it to %r" % path, - FutureWarning, stacklevel=2 - ) - return self._root.iterfind(path, namespaces) - - def write(self, file_or_filename, - encoding=None, - xml_declaration=None, - default_namespace=None, - method=None, *, - short_empty_elements=True): - """Write element tree to a file as XML. - - Arguments: - *file_or_filename* -- file name or a file object opened for writing - - *encoding* -- the output encoding (default: US-ASCII) - - *xml_declaration* -- bool indicating if an XML declaration should be - added to the output. If None, an XML declaration - is added if encoding IS NOT either of: - US-ASCII, UTF-8, or Unicode - - *default_namespace* -- sets the default XML namespace (for "xmlns") - - *method* -- either "xml" (default), "html, "text", or "c14n" - - *short_empty_elements* -- controls the formatting of elements - that contain no content. If True (default) - they are emitted as a single self-closed - tag, otherwise they are emitted as a pair - of start/end tags - - """ - if not method: - method = "xml" - elif method not in _serialize: - raise ValueError("unknown method %r" % method) - if not encoding: - if method == "c14n": - encoding = "utf-8" - else: - encoding = "us-ascii" - enc_lower = encoding.lower() - with _get_writer(file_or_filename, enc_lower) as write: - if method == "xml" and (xml_declaration or - (xml_declaration is None and - enc_lower not in ("utf-8", "us-ascii", "unicode"))): - declared_encoding = encoding - if enc_lower == "unicode": - # Retrieve the default encoding for the xml declaration - import locale - declared_encoding = locale.getpreferredencoding() - write("<?xml version='1.0' encoding='%s'?>\n" % ( - declared_encoding,)) - if method == "text": - _serialize_text(write, self._root) - else: - qnames, namespaces = _namespaces(self._root, default_namespace) - serialize = _serialize[method] - serialize(write, self._root, qnames, namespaces, - short_empty_elements=short_empty_elements) - - def write_c14n(self, file): - # lxml.etree compatibility. use output method instead - return self.write(file, method="c14n") - -# -------------------------------------------------------------------- -# serialization support - -@contextlib.contextmanager -def _get_writer(file_or_filename, encoding): - # returns text write method and release all resources after using - try: - write = file_or_filename.write - except AttributeError: - # file_or_filename is a file name - if encoding == "unicode": - file = open(file_or_filename, "w") - else: - file = open(file_or_filename, "w", encoding=encoding, - errors="xmlcharrefreplace") - with file: - yield file.write - else: - # file_or_filename is a file-like object - # encoding determines if it is a text or binary writer - if encoding == "unicode": - # use a text writer as is - yield write - else: - # wrap a binary writer with TextIOWrapper - with contextlib.ExitStack() as stack: - if isinstance(file_or_filename, io.BufferedIOBase): - file = file_or_filename - elif isinstance(file_or_filename, io.RawIOBase): - file = io.BufferedWriter(file_or_filename) - # Keep the original file open when the BufferedWriter is - # destroyed - stack.callback(file.detach) - else: - # This is to handle passed objects that aren't in the - # IOBase hierarchy, but just have a write method - file = io.BufferedIOBase() - file.writable = lambda: True - file.write = write - try: - # TextIOWrapper uses this methods to determine - # if BOM (for UTF-16, etc) should be added - file.seekable = file_or_filename.seekable - file.tell = file_or_filename.tell - except AttributeError: - pass - file = io.TextIOWrapper(file, - encoding=encoding, - errors="xmlcharrefreplace", - newline="\n") - # Keep the original file open when the TextIOWrapper is - # destroyed - stack.callback(file.detach) - yield file.write - -def _namespaces(elem, default_namespace=None): - # identify namespaces used in this tree - - # maps qnames to *encoded* prefix:local names - qnames = {None: None} - - # maps uri:s to prefixes - namespaces = {} - if default_namespace: - namespaces[default_namespace] = "" - - def add_qname(qname): - # calculate serialized qname representation - try: - if qname[:1] == "{": - uri, tag = qname[1:].rsplit("}", 1) - prefix = namespaces.get(uri) - if prefix is None: - prefix = _namespace_map.get(uri) - if prefix is None: - prefix = "ns%d" % len(namespaces) - if prefix != "xml": - namespaces[uri] = prefix - if prefix: - qnames[qname] = "%s:%s" % (prefix, tag) - else: - qnames[qname] = tag # default element - else: - if default_namespace: - # FIXME: can this be handled in XML 1.0? - raise ValueError( - "cannot use non-qualified names with " - "default_namespace option" - ) - qnames[qname] = qname - except TypeError: - _raise_serialization_error(qname) - - # populate qname and namespaces table - for elem in elem.iter(): - tag = elem.tag - if isinstance(tag, QName): - if tag.text not in qnames: - add_qname(tag.text) - elif isinstance(tag, str): - if tag not in qnames: - add_qname(tag) - elif tag is not None and tag is not Comment and tag is not PI: - _raise_serialization_error(tag) - for key, value in elem.items(): - if isinstance(key, QName): - key = key.text - if key not in qnames: - add_qname(key) - if isinstance(value, QName) and value.text not in qnames: - add_qname(value.text) - text = elem.text - if isinstance(text, QName) and text.text not in qnames: - add_qname(text.text) - return qnames, namespaces - -def _serialize_xml(write, elem, qnames, namespaces, - short_empty_elements, **kwargs): - tag = elem.tag - text = elem.text - if tag is Comment: - write("<!--%s-->" % text) - elif tag is ProcessingInstruction: - write("<?%s?>" % text) - else: - tag = qnames[tag] - if tag is None: - if text: - write(_escape_cdata(text)) - for e in elem: - _serialize_xml(write, e, qnames, None, - short_empty_elements=short_empty_elements) - else: - write("<" + tag) - items = list(elem.items()) - if items or namespaces: - if namespaces: - for v, k in sorted(namespaces.items(), - key=lambda x: x[1]): # sort on prefix - if k: - k = ":" + k - write(" xmlns%s=\"%s\"" % ( - k, - _escape_attrib(v) - )) - for k, v in sorted(items): # lexical order - if isinstance(k, QName): - k = k.text - if isinstance(v, QName): - v = qnames[v.text] - else: - v = _escape_attrib(v) - write(" %s=\"%s\"" % (qnames[k], v)) - if text or len(elem) or not short_empty_elements: - write(">") - if text: - write(_escape_cdata(text)) - for e in elem: - _serialize_xml(write, e, qnames, None, - short_empty_elements=short_empty_elements) - write("</" + tag + ">") - else: - write(" />") - if elem.tail: - write(_escape_cdata(elem.tail)) - -HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", - "img", "input", "isindex", "link", "meta", "param") - -try: - HTML_EMPTY = set(HTML_EMPTY) -except NameError: - pass - -def _serialize_html(write, elem, qnames, namespaces, **kwargs): - tag = elem.tag - text = elem.text - if tag is Comment: - write("<!--%s-->" % _escape_cdata(text)) - elif tag is ProcessingInstruction: - write("<?%s?>" % _escape_cdata(text)) - else: - tag = qnames[tag] - if tag is None: - if text: - write(_escape_cdata(text)) - for e in elem: - _serialize_html(write, e, qnames, None) - else: - write("<" + tag) - items = list(elem.items()) - if items or namespaces: - if namespaces: - for v, k in sorted(namespaces.items(), - key=lambda x: x[1]): # sort on prefix - if k: - k = ":" + k - write(" xmlns%s=\"%s\"" % ( - k, - _escape_attrib(v) - )) - for k, v in sorted(items): # lexical order - if isinstance(k, QName): - k = k.text - if isinstance(v, QName): - v = qnames[v.text] - else: - v = _escape_attrib_html(v) - # FIXME: handle boolean attributes - write(" %s=\"%s\"" % (qnames[k], v)) - write(">") - ltag = tag.lower() - if text: - if ltag == "script" or ltag == "style": - write(text) - else: - write(_escape_cdata(text)) - for e in elem: - _serialize_html(write, e, qnames, None) - if ltag not in HTML_EMPTY: - write("</" + tag + ">") - if elem.tail: - write(_escape_cdata(elem.tail)) - -def _serialize_text(write, elem): - for part in elem.itertext(): - write(part) - if elem.tail: - write(elem.tail) - -_serialize = { - "xml": _serialize_xml, - "html": _serialize_html, - "text": _serialize_text, -# this optional method is imported at the end of the module -# "c14n": _serialize_c14n, -} - - -def register_namespace(prefix, uri): - """Register a namespace prefix. - - The registry is global, and any existing mapping for either the - given prefix or the namespace URI will be removed. - - *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and - attributes in this namespace will be serialized with prefix if possible. - - ValueError is raised if prefix is reserved or is invalid. - - """ - if re.match(r"ns\d+$", prefix): - raise ValueError("Prefix format reserved for internal use") - for k, v in list(_namespace_map.items()): - if k == uri or v == prefix: - del _namespace_map[k] - _namespace_map[uri] = prefix - -_namespace_map = { - # "well-known" namespace prefixes - "http://www.w3.org/XML/1998/namespace": "xml", - "http://www.w3.org/1999/xhtml": "html", - "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", - "http://schemas.xmlsoap.org/wsdl/": "wsdl", - # xml schema - "http://www.w3.org/2001/XMLSchema": "xs", - "http://www.w3.org/2001/XMLSchema-instance": "xsi", - # dublin core - "http://purl.org/dc/elements/1.1/": "dc", -} -# For tests and troubleshooting -register_namespace._namespace_map = _namespace_map - -def _raise_serialization_error(text): - raise TypeError( - "cannot serialize %r (type %s)" % (text, type(text).__name__) - ) - -def _escape_cdata(text): - # escape character data - try: - # it's worth avoiding do-nothing calls for strings that are - # shorter than 500 characters, or so. assume that's, by far, - # the most common case in most applications. - if "&" in text: - text = text.replace("&", "&") - if "<" in text: - text = text.replace("<", "<") - if ">" in text: - text = text.replace(">", ">") - return text - except (TypeError, AttributeError): - _raise_serialization_error(text) - -def _escape_attrib(text): - # escape attribute value - try: - if "&" in text: - text = text.replace("&", "&") - if "<" in text: - text = text.replace("<", "<") - if ">" in text: - text = text.replace(">", ">") - if "\"" in text: - text = text.replace("\"", """) - # The following business with carriage returns is to satisfy - # Section 2.11 of the XML specification, stating that - # CR or CR LN should be replaced with just LN - # http://www.w3.org/TR/REC-xml/#sec-line-ends - if "\r\n" in text: - text = text.replace("\r\n", "\n") - if "\r" in text: - text = text.replace("\r", "\n") - #The following four lines are issue 17582 - if "\n" in text: - text = text.replace("\n", " ") - if "\t" in text: - text = text.replace("\t", "	") - return text - except (TypeError, AttributeError): - _raise_serialization_error(text) - -def _escape_attrib_html(text): - # escape attribute value - try: - if "&" in text: - text = text.replace("&", "&") - if ">" in text: - text = text.replace(">", ">") - if "\"" in text: - text = text.replace("\"", """) - return text - except (TypeError, AttributeError): - _raise_serialization_error(text) - -# -------------------------------------------------------------------- - -def tostring(element, encoding=None, method=None, *, - short_empty_elements=True): - """Generate string representation of XML element. - - All subelements are included. If encoding is "unicode", a string - is returned. Otherwise a bytestring is returned. - - *element* is an Element instance, *encoding* is an optional output - encoding defaulting to US-ASCII, *method* is an optional output which can - be one of "xml" (default), "html", "text" or "c14n". - - Returns an (optionally) encoded string containing the XML data. - - """ - stream = io.StringIO() if encoding == 'unicode' else io.BytesIO() - ElementTree(element).write(stream, encoding, method=method, - short_empty_elements=short_empty_elements) - return stream.getvalue() - -class _ListDataStream(io.BufferedIOBase): - """An auxiliary stream accumulating into a list reference.""" - def __init__(self, lst): - self.lst = lst - - def writable(self): - return True - - def seekable(self): - return True - - def write(self, b): - self.lst.append(b) - - def tell(self): - return len(self.lst) - -def tostringlist(element, encoding=None, method=None, *, - short_empty_elements=True): - lst = [] - stream = _ListDataStream(lst) - ElementTree(element).write(stream, encoding, method=method, - short_empty_elements=short_empty_elements) - return lst - - -def dump(elem): - """Write element tree or element structure to sys.stdout. - - This function should be used for debugging only. - - *elem* is either an ElementTree, or a single Element. The exact output - format is implementation dependent. In this version, it's written as an - ordinary XML file. - - """ - # debugging - if not isinstance(elem, ElementTree): - elem = ElementTree(elem) - elem.write(sys.stdout, encoding="unicode") - tail = elem.getroot().tail - if not tail or tail[-1] != "\n": - sys.stdout.write("\n") - -# -------------------------------------------------------------------- -# parsing - - -def parse(source, parser=None): - """Parse XML document into element tree. - - *source* is a filename or file object containing XML data, - *parser* is an optional parser instance defaulting to XMLParser. - - Return an ElementTree instance. - - """ - tree = ElementTree() - tree.parse(source, parser) - return tree - - -def iterparse(source, events=None, parser=None): - """Incrementally parse XML document into ElementTree. - - This class also reports what's going on to the user based on the - *events* it is initialized with. The supported events are the strings - "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get - detailed namespace information). If *events* is omitted, only - "end" events are reported. - - *source* is a filename or file object containing XML data, *events* is - a list of events to report back, *parser* is an optional parser instance. - - Returns an iterator providing (event, elem) pairs. - - """ - # Use the internal, undocumented _parser argument for now; When the - # parser argument of iterparse is removed, this can be killed. - pullparser = XMLPullParser(events=events, _parser=parser) - def iterator(): - try: - while True: - yield from pullparser.read_events() - # load event buffer - data = source.read(16 * 1024) - if not data: - break - pullparser.feed(data) - root = pullparser._close_and_return_root() - yield from pullparser.read_events() - it.root = root - finally: - if close_source: - source.close() - - class IterParseIterator(collections.Iterator): - __next__ = iterator().__next__ - it = IterParseIterator() - it.root = None - del iterator, IterParseIterator - - close_source = False - if not hasattr(source, "read"): - source = open(source, "rb") - close_source = True - - return it - - -class XMLPullParser: - - def __init__(self, events=None, *, _parser=None): - # The _parser argument is for internal use only and must not be relied - # upon in user code. It will be removed in a future release. - # See http://bugs.python.org/issue17741 for more details. - - self._events_queue = collections.deque() - self._parser = _parser or XMLParser(target=TreeBuilder()) - # wire up the parser for event reporting - if events is None: - events = ("end",) - self._parser._setevents(self._events_queue, events) - - def feed(self, data): - """Feed encoded data to parser.""" - if self._parser is None: - raise ValueError("feed() called after end of stream") - if data: - try: - self._parser.feed(data) - except SyntaxError as exc: - self._events_queue.append(exc) - - def _close_and_return_root(self): - # iterparse needs this to set its root attribute properly :( - root = self._parser.close() - self._parser = None - return root - - def close(self): - """Finish feeding data to parser. - - Unlike XMLParser, does not return the root element. Use - read_events() to consume elements from XMLPullParser. - """ - self._close_and_return_root() - - def read_events(self): - """Return an iterator over currently available (event, elem) pairs. - - Events are consumed from the internal event queue as they are - retrieved from the iterator. - """ - events = self._events_queue - while events: - event = events.popleft() - if isinstance(event, Exception): - raise event - else: - yield event - - -def XML(text, parser=None): - """Parse XML document from string constant. - - This function can be used to embed "XML Literals" in Python code. - - *text* is a string containing XML data, *parser* is an - optional parser instance, defaulting to the standard XMLParser. - - Returns an Element instance. - - """ - if not parser: - parser = XMLParser(target=TreeBuilder()) - parser.feed(text) - return parser.close() - - -def XMLID(text, parser=None): - """Parse XML document from string constant for its IDs. - - *text* is a string containing XML data, *parser* is an - optional parser instance, defaulting to the standard XMLParser. - - Returns an (Element, dict) tuple, in which the - dict maps element id:s to elements. - - """ - if not parser: - parser = XMLParser(target=TreeBuilder()) - parser.feed(text) - tree = parser.close() - ids = {} - for elem in tree.iter(): - id = elem.get("id") - if id: - ids[id] = elem - return tree, ids - -# Parse XML document from string constant. Alias for XML(). -fromstring = XML - -def fromstringlist(sequence, parser=None): - """Parse XML document from sequence of string fragments. - - *sequence* is a list of other sequence, *parser* is an optional parser - instance, defaulting to the standard XMLParser. - - Returns an Element instance. - - """ - if not parser: - parser = XMLParser(target=TreeBuilder()) - for text in sequence: - parser.feed(text) - return parser.close() - -# -------------------------------------------------------------------- - - -class TreeBuilder: - """Generic element structure builder. - - This builder converts a sequence of start, data, and end method - calls to a well-formed element structure. - - You can use this class to build an element structure using a custom XML - parser, or a parser for some other XML-like format. - - *element_factory* is an optional element factory which is called - to create new Element instances, as necessary. - - """ - def __init__(self, element_factory=None): - self._data = [] # data collector - self._elem = [] # element stack - self._last = None # last element - self._tail = None # true if we're after an end tag - if element_factory is None: - element_factory = Element - self._factory = element_factory - - def close(self): - """Flush builder buffers and return toplevel document Element.""" - assert len(self._elem) == 0, "missing end tags" - assert self._last is not None, "missing toplevel element" - return self._last - - def _flush(self): - if self._data: - if self._last is not None: - text = "".join(self._data) - if self._tail: - assert self._last.tail is None, "internal error (tail)" - self._last.tail = text - else: - assert self._last.text is None, "internal error (text)" - self._last.text = text - self._data = [] - - def data(self, data): - """Add text to current element.""" - self._data.append(data) - - def start(self, tag, attrs): - """Open new element and return it. - - *tag* is the element name, *attrs* is a dict containing element - attributes. - - """ - self._flush() - self._last = elem = self._factory(tag, attrs) - if self._elem: - self._elem[-1].append(elem) - self._elem.append(elem) - self._tail = 0 - return elem - - def end(self, tag): - """Close and return current Element. - - *tag* is the element name. - - """ - self._flush() - self._last = self._elem.pop() - assert self._last.tag == tag,\ - "end tag mismatch (expected %s, got %s)" % ( - self._last.tag, tag) - self._tail = 1 - return self._last - - -# also see ElementTree and TreeBuilder -class XMLParser: - """Element structure builder for XML source data based on the expat parser. - - *html* are predefined HTML entities (deprecated and not supported), - *target* is an optional target object which defaults to an instance of the - standard TreeBuilder class, *encoding* is an optional encoding string - which if given, overrides the encoding specified in the XML file: - http://www.iana.org/assignments/character-sets - - """ - - def __init__(self, html=0, target=None, encoding=None): - try: - from xml.parsers import expat - except ImportError: - try: - import pyexpat as expat - except ImportError: - raise ImportError( - "No module named expat; use SimpleXMLTreeBuilder instead" - ) - parser = expat.ParserCreate(encoding, "}") - if target is None: - target = TreeBuilder() - # underscored names are provided for compatibility only - self.parser = self._parser = parser - self.target = self._target = target - self._error = expat.error - self._names = {} # name memo cache - # main callbacks - parser.DefaultHandlerExpand = self._default - if hasattr(target, 'start'): - parser.StartElementHandler = self._start - if hasattr(target, 'end'): - parser.EndElementHandler = self._end - if hasattr(target, 'data'): - parser.CharacterDataHandler = target.data - # miscellaneous callbacks - if hasattr(target, 'comment'): - parser.CommentHandler = target.comment - if hasattr(target, 'pi'): - parser.ProcessingInstructionHandler = target.pi - # Configure pyexpat: buffering, new-style attribute handling. - parser.buffer_text = 1 - parser.ordered_attributes = 1 - parser.specified_attributes = 1 - self._doctype = None - self.entity = {} - try: - self.version = "Expat %d.%d.%d" % expat.version_info - except AttributeError: - pass # unknown - - def _setevents(self, events_queue, events_to_report): - # Internal API for XMLPullParser - # events_to_report: a list of events to report during parsing (same as - # the *events* of XMLPullParser's constructor. - # events_queue: a list of actual parsing events that will be populated - # by the underlying parser. - # - parser = self._parser - append = events_queue.append - for event_name in events_to_report: - if event_name == "start": - parser.ordered_attributes = 1 - parser.specified_attributes = 1 - def handler(tag, attrib_in, event=event_name, append=append, - start=self._start): - append((event, start(tag, attrib_in))) - parser.StartElementHandler = handler - elif event_name == "end": - def handler(tag, event=event_name, append=append, - end=self._end): - append((event, end(tag))) - parser.EndElementHandler = handler - elif event_name == "start-ns": - def handler(prefix, uri, event=event_name, append=append): - append((event, (prefix or "", uri or ""))) - parser.StartNamespaceDeclHandler = handler - elif event_name == "end-ns": - def handler(prefix, event=event_name, append=append): - append((event, None)) - parser.EndNamespaceDeclHandler = handler - else: - raise ValueError("unknown event %r" % event_name) - - def _raiseerror(self, value): - err = ParseError(value) - err.code = value.code - err.position = value.lineno, value.offset - raise err - - def _fixname(self, key): - # expand qname, and convert name string to ascii, if possible - try: - name = self._names[key] - except KeyError: - name = key - if "}" in name: - name = "{" + name - self._names[key] = name - return name - - def _start(self, tag, attr_list): - # Handler for expat's StartElementHandler. Since ordered_attributes - # is set, the attributes are reported as a list of alternating - # attribute name,value. - fixname = self._fixname - tag = fixname(tag) - attrib = {} - if attr_list: - for i in range(0, len(attr_list), 2): - attrib[fixname(attr_list[i])] = attr_list[i+1] - return self.target.start(tag, attrib) - - def _end(self, tag): - return self.target.end(self._fixname(tag)) - - def _default(self, text): - prefix = text[:1] - if prefix == "&": - # deal with undefined entities - try: - data_handler = self.target.data - except AttributeError: - return - try: - data_handler(self.entity[text[1:-1]]) - except KeyError: - from xml.parsers import expat - err = expat.error( - "undefined entity %s: line %d, column %d" % - (text, self.parser.ErrorLineNumber, - self.parser.ErrorColumnNumber) - ) - err.code = 11 # XML_ERROR_UNDEFINED_ENTITY - err.lineno = self.parser.ErrorLineNumber - err.offset = self.parser.ErrorColumnNumber - raise err - elif prefix == "<" and text[:9] == "<!DOCTYPE": - self._doctype = [] # inside a doctype declaration - elif self._doctype is not None: - # parse doctype contents - if prefix == ">": - self._doctype = None - return - text = text.strip() - if not text: - return - self._doctype.append(text) - n = len(self._doctype) - if n > 2: - type = self._doctype[1] - if type == "PUBLIC" and n == 4: - name, type, pubid, system = self._doctype - if pubid: - pubid = pubid[1:-1] - elif type == "SYSTEM" and n == 3: - name, type, system = self._doctype - pubid = None - else: - return - if hasattr(self.target, "doctype"): - self.target.doctype(name, pubid, system[1:-1]) - elif self.doctype != self._XMLParser__doctype: - # warn about deprecated call - self._XMLParser__doctype(name, pubid, system[1:-1]) - self.doctype(name, pubid, system[1:-1]) - self._doctype = None - - def doctype(self, name, pubid, system): - """(Deprecated) Handle doctype declaration - - *name* is the Doctype name, *pubid* is the public identifier, - and *system* is the system identifier. - - """ - warnings.warn( - "This method of XMLParser is deprecated. Define doctype() " - "method on the TreeBuilder target.", - DeprecationWarning, - ) - - # sentinel, if doctype is redefined in a subclass - __doctype = doctype - - def feed(self, data): - """Feed encoded data to parser.""" - try: - self.parser.Parse(data, 0) - except self._error as v: - self._raiseerror(v) - - def close(self): - """Finish feeding data to parser and return element structure.""" - try: - self.parser.Parse("", 1) # end of data - except self._error as v: - self._raiseerror(v) - try: - close_handler = self.target.close - except AttributeError: - pass - else: - return close_handler() - finally: - # get rid of circular references - del self.parser, self._parser - del self.target, self._target - - -# Import the C accelerators -try: - # Element is going to be shadowed by the C implementation. We need to keep - # the Python version of it accessible for some "creative" by external code - # (see tests) - _Element_Py = Element - - # Element, SubElement, ParseError, TreeBuilder, XMLParser - from _elementtree import * -except ImportError: - pass diff --git a/modules/language/python/module/xml/parsers.py~ b/modules/language/python/module/xml/parsers.py~ deleted file mode 100644 index eb314a3..0000000 --- a/modules/language/python/module/xml/parsers.py~ +++ /dev/null @@ -1,8 +0,0 @@ -"""Python interfaces to XML parsers. - -This package contains one module: - -expat -- Python wrapper for James Clark's Expat parser, with namespace - support. - -""" diff --git a/modules/language/python/module/xml/parsers/expat.py~ b/modules/language/python/module/xml/parsers/expat.py~ deleted file mode 100644 index bcbe9fb..0000000 --- a/modules/language/python/module/xml/parsers/expat.py~ +++ /dev/null @@ -1,8 +0,0 @@ -"""Interface to the Expat non-validating XML parser.""" -import sys - -from pyexpat import * - -# provide pyexpat submodules as xml.parsers.expat submodules -sys.modules['xml.parsers.expat.model'] = model -sys.modules['xml.parsers.expat.errors'] = errors diff --git a/modules/language/python/module/xml/sax.py~ b/modules/language/python/module/xml/sax.py~ deleted file mode 100644 index ef67ae6..0000000 --- a/modules/language/python/module/xml/sax.py~ +++ /dev/null @@ -1,107 +0,0 @@ -"""Simple API for XML (SAX) implementation for Python. - -This module provides an implementation of the SAX 2 interface; -information about the Java version of the interface can be found at -http://www.megginson.com/SAX/. The Python version of the interface is -documented at <...>. - -This package contains the following modules: - -handler -- Base classes and constants which define the SAX 2 API for - the 'client-side' of SAX for Python. - -saxutils -- Implementation of the convenience classes commonly used to - work with SAX. - -xmlreader -- Base classes and constants which define the SAX 2 API for - the parsers used with SAX for Python. - -expatreader -- Driver that allows use of the Expat parser with SAX. -""" - -from .xmlreader import InputSource -from .handler import ContentHandler, ErrorHandler -from ._exceptions import SAXException, SAXNotRecognizedException, \ - SAXParseException, SAXNotSupportedException, \ - SAXReaderNotAvailable - - -def parse(source, handler, errorHandler=ErrorHandler()): - parser = make_parser() - parser.setContentHandler(handler) - parser.setErrorHandler(errorHandler) - parser.parse(source) - -def parseString(string, handler, errorHandler=ErrorHandler()): - import io - if errorHandler is None: - errorHandler = ErrorHandler() - parser = make_parser() - parser.setContentHandler(handler) - parser.setErrorHandler(errorHandler) - - inpsrc = InputSource() - if isinstance(string, str): - inpsrc.setCharacterStream(io.StringIO(string)) - else: - inpsrc.setByteStream(io.BytesIO(string)) - parser.parse(inpsrc) - -# this is the parser list used by the make_parser function if no -# alternatives are given as parameters to the function - -default_parser_list = ["xml.sax.expatreader"] - -# tell modulefinder that importing sax potentially imports expatreader -_false = 0 -if _false: - import xml.sax.expatreader - -import os, sys -if "PY_SAX_PARSER" in os.environ: - default_parser_list = os.environ["PY_SAX_PARSER"].split(",") -del os - -_key = "python.xml.sax.parser" -if sys.platform[:4] == "java" and sys.registry.containsKey(_key): - default_parser_list = sys.registry.getProperty(_key).split(",") - - -def make_parser(parser_list = []): - """Creates and returns a SAX parser. - - Creates the first parser it is able to instantiate of the ones - given in the list created by doing parser_list + - default_parser_list. The lists must contain the names of Python - modules containing both a SAX parser and a create_parser function.""" - - for parser_name in parser_list + default_parser_list: - try: - return _create_parser(parser_name) - except ImportError as e: - import sys - if parser_name in sys.modules: - # The parser module was found, but importing it - # failed unexpectedly, pass this exception through - raise - except SAXReaderNotAvailable: - # The parser module detected that it won't work properly, - # so try the next one - pass - - raise SAXReaderNotAvailable("No parsers found", None) - -# --- Internal utility methods used by make_parser - -if sys.platform[ : 4] == "java": - def _create_parser(parser_name): - from org.python.core import imp - drv_module = imp.importName(parser_name, 0, globals()) - return drv_module.create_parser() - -else: - def _create_parser(parser_name): - drv_module = __import__(parser_name,{},{},['create_parser']) - return drv_module.create_parser() - -del sys diff --git a/modules/language/python/module/xml/sax/_exceptions.py~ b/modules/language/python/module/xml/sax/_exceptions.py~ deleted file mode 100644 index a9b2ba3..0000000 --- a/modules/language/python/module/xml/sax/_exceptions.py~ +++ /dev/null @@ -1,131 +0,0 @@ -"""Different kinds of SAX Exceptions""" -import sys -if sys.platform[:4] == "java": - from java.lang import Exception -del sys - -# ===== SAXEXCEPTION ===== - -class SAXException(Exception): - """Encapsulate an XML error or warning. This class can contain - basic error or warning information from either the XML parser or - the application: you can subclass it to provide additional - functionality, or to add localization. Note that although you will - receive a SAXException as the argument to the handlers in the - ErrorHandler interface, you are not actually required to raise - the exception; instead, you can simply read the information in - it.""" - - def __init__(self, msg, exception=None): - """Creates an exception. The message is required, but the exception - is optional.""" - self._msg = msg - self._exception = exception - Exception.__init__(self, msg) - - def getMessage(self): - "Return a message for this exception." - return self._msg - - def getException(self): - "Return the embedded exception, or None if there was none." - return self._exception - - def __str__(self): - "Create a string representation of the exception." - return self._msg - - def __getitem__(self, ix): - """Avoids weird error messages if someone does exception[ix] by - mistake, since Exception has __getitem__ defined.""" - raise AttributeError("__getitem__") - - -# ===== SAXPARSEEXCEPTION ===== - -class SAXParseException(SAXException): - """Encapsulate an XML parse error or warning. - - This exception will include information for locating the error in - the original XML document. Note that although the application will - receive a SAXParseException as the argument to the handlers in the - ErrorHandler interface, the application is not actually required - to raise the exception; instead, it can simply read the - information in it and take a different action. - - Since this exception is a subclass of SAXException, it inherits - the ability to wrap another exception.""" - - def __init__(self, msg, exception, locator): - "Creates the exception. The exception parameter is allowed to be None." - SAXException.__init__(self, msg, exception) - self._locator = locator - - # We need to cache this stuff at construction time. - # If this exception is raised, the objects through which we must - # traverse to get this information may be deleted by the time - # it gets caught. - self._systemId = self._locator.getSystemId() - self._colnum = self._locator.getColumnNumber() - self._linenum = self._locator.getLineNumber() - - def getColumnNumber(self): - """The column number of the end of the text where the exception - occurred.""" - return self._colnum - - def getLineNumber(self): - "The line number of the end of the text where the exception occurred." - return self._linenum - - def getPublicId(self): - "Get the public identifier of the entity where the exception occurred." - return self._locator.getPublicId() - - def getSystemId(self): - "Get the system identifier of the entity where the exception occurred." - return self._systemId - - def __str__(self): - "Create a string representation of the exception." - sysid = self.getSystemId() - if sysid is None: - sysid = "<unknown>" - linenum = self.getLineNumber() - if linenum is None: - linenum = "?" - colnum = self.getColumnNumber() - if colnum is None: - colnum = "?" - return "%s:%s:%s: %s" % (sysid, linenum, colnum, self._msg) - - -# ===== SAXNOTRECOGNIZEDEXCEPTION ===== - -class SAXNotRecognizedException(SAXException): - """Exception class for an unrecognized identifier. - - An XMLReader will raise this exception when it is confronted with an - unrecognized feature or property. SAX applications and extensions may - use this class for similar purposes.""" - - -# ===== SAXNOTSUPPORTEDEXCEPTION ===== - -class SAXNotSupportedException(SAXException): - """Exception class for an unsupported operation. - - An XMLReader will raise this exception when a service it cannot - perform is requested (specifically setting a state or value). SAX - applications and extensions may use this class for similar - purposes.""" - -# ===== SAXNOTSUPPORTEDEXCEPTION ===== - -class SAXReaderNotAvailable(SAXNotSupportedException): - """Exception class for a missing driver. - - An XMLReader module (driver) should raise this exception when it - is first imported, e.g. when a support module cannot be imported. - It also may be raised during parsing, e.g. if executing an external - program is not permitted.""" diff --git a/modules/language/python/module/xml/sax/expatreader.py~ b/modules/language/python/module/xml/sax/expatreader.py~ deleted file mode 100644 index 421358f..0000000 --- a/modules/language/python/module/xml/sax/expatreader.py~ +++ /dev/null @@ -1,446 +0,0 @@ -""" -SAX driver for the pyexpat C module. This driver works with -pyexpat.__version__ == '2.22'. -""" - -version = "0.20" - -from xml.sax._exceptions import * -from xml.sax.handler import feature_validation, feature_namespaces -from xml.sax.handler import feature_namespace_prefixes -from xml.sax.handler import feature_external_ges, feature_external_pes -from xml.sax.handler import feature_string_interning -from xml.sax.handler import property_xml_string, property_interning_dict - -# xml.parsers.expat does not raise ImportError in Jython -import sys -if sys.platform[:4] == "java": - raise SAXReaderNotAvailable("expat not available in Java", None) -del sys - -try: - from xml.parsers import expat -except ImportError: - raise SAXReaderNotAvailable("expat not supported", None) -else: - if not hasattr(expat, "ParserCreate"): - raise SAXReaderNotAvailable("expat not supported", None) -from xml.sax import xmlreader, saxutils, handler - -AttributesImpl = xmlreader.AttributesImpl -AttributesNSImpl = xmlreader.AttributesNSImpl - -# If we're using a sufficiently recent version of Python, we can use -# weak references to avoid cycles between the parser and content -# handler, otherwise we'll just have to pretend. -try: - import _weakref -except ImportError: - def _mkproxy(o): - return o -else: - import weakref - _mkproxy = weakref.proxy - del weakref, _weakref - -class _ClosedParser: - pass - -# --- ExpatLocator - -class ExpatLocator(xmlreader.Locator): - """Locator for use with the ExpatParser class. - - This uses a weak reference to the parser object to avoid creating - a circular reference between the parser and the content handler. - """ - def __init__(self, parser): - self._ref = _mkproxy(parser) - - def getColumnNumber(self): - parser = self._ref - if parser._parser is None: - return None - return parser._parser.ErrorColumnNumber - - def getLineNumber(self): - parser = self._ref - if parser._parser is None: - return 1 - return parser._parser.ErrorLineNumber - - def getPublicId(self): - parser = self._ref - if parser is None: - return None - return parser._source.getPublicId() - - def getSystemId(self): - parser = self._ref - if parser is None: - return None - return parser._source.getSystemId() - - -# --- ExpatParser - -class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): - """SAX driver for the pyexpat C module.""" - - def __init__(self, namespaceHandling=0, bufsize=2**16-20): - xmlreader.IncrementalParser.__init__(self, bufsize) - self._source = xmlreader.InputSource() - self._parser = None - self._namespaces = namespaceHandling - self._lex_handler_prop = None - self._parsing = 0 - self._entity_stack = [] - self._external_ges = 1 - self._interning = None - - # XMLReader methods - - def parse(self, source): - "Parse an XML document from a URL or an InputSource." - source = saxutils.prepare_input_source(source) - - self._source = source - try: - self.reset() - self._cont_handler.setDocumentLocator(ExpatLocator(self)) - xmlreader.IncrementalParser.parse(self, source) - except: - # bpo-30264: Close the source on error to not leak resources: - # xml.sax.parse() doesn't give access to the underlying parser - # to the caller - self._close_source() - raise - - def prepareParser(self, source): - if source.getSystemId() is not None: - self._parser.SetBase(source.getSystemId()) - - # Redefined setContentHandler to allow changing handlers during parsing - - def setContentHandler(self, handler): - xmlreader.IncrementalParser.setContentHandler(self, handler) - if self._parsing: - self._reset_cont_handler() - - def getFeature(self, name): - if name == feature_namespaces: - return self._namespaces - elif name == feature_string_interning: - return self._interning is not None - elif name in (feature_validation, feature_external_pes, - feature_namespace_prefixes): - return 0 - elif name == feature_external_ges: - return self._external_ges - raise SAXNotRecognizedException("Feature '%s' not recognized" % name) - - def setFeature(self, name, state): - if self._parsing: - raise SAXNotSupportedException("Cannot set features while parsing") - - if name == feature_namespaces: - self._namespaces = state - elif name == feature_external_ges: - self._external_ges = state - elif name == feature_string_interning: - if state: - if self._interning is None: - self._interning = {} - else: - self._interning = None - elif name == feature_validation: - if state: - raise SAXNotSupportedException( - "expat does not support validation") - elif name == feature_external_pes: - if state: - raise SAXNotSupportedException( - "expat does not read external parameter entities") - elif name == feature_namespace_prefixes: - if state: - raise SAXNotSupportedException( - "expat does not report namespace prefixes") - else: - raise SAXNotRecognizedException( - "Feature '%s' not recognized" % name) - - def getProperty(self, name): - if name == handler.property_lexical_handler: - return self._lex_handler_prop - elif name == property_interning_dict: - return self._interning - elif name == property_xml_string: - if self._parser: - if hasattr(self._parser, "GetInputContext"): - return self._parser.GetInputContext() - else: - raise SAXNotRecognizedException( - "This version of expat does not support getting" - " the XML string") - else: - raise SAXNotSupportedException( - "XML string cannot be returned when not parsing") - raise SAXNotRecognizedException("Property '%s' not recognized" % name) - - def setProperty(self, name, value): - if name == handler.property_lexical_handler: - self._lex_handler_prop = value - if self._parsing: - self._reset_lex_handler_prop() - elif name == property_interning_dict: - self._interning = value - elif name == property_xml_string: - raise SAXNotSupportedException("Property '%s' cannot be set" % - name) - else: - raise SAXNotRecognizedException("Property '%s' not recognized" % - name) - - # IncrementalParser methods - - def feed(self, data, isFinal = 0): - if not self._parsing: - self.reset() - self._parsing = 1 - self._cont_handler.startDocument() - - try: - # The isFinal parameter is internal to the expat reader. - # If it is set to true, expat will check validity of the entire - # document. When feeding chunks, they are not normally final - - # except when invoked from close. - self._parser.Parse(data, isFinal) - except expat.error as e: - exc = SAXParseException(expat.ErrorString(e.code), e, self) - # FIXME: when to invoke error()? - self._err_handler.fatalError(exc) - - def _close_source(self): - source = self._source - try: - file = source.getCharacterStream() - if file is not None: - file.close() - finally: - file = source.getByteStream() - if file is not None: - file.close() - - def close(self): - if (self._entity_stack or self._parser is None or - isinstance(self._parser, _ClosedParser)): - # If we are completing an external entity, do nothing here - return - try: - self.feed("", isFinal = 1) - self._cont_handler.endDocument() - self._parsing = 0 - # break cycle created by expat handlers pointing to our methods - self._parser = None - finally: - self._parsing = 0 - if self._parser is not None: - # Keep ErrorColumnNumber and ErrorLineNumber after closing. - parser = _ClosedParser() - parser.ErrorColumnNumber = self._parser.ErrorColumnNumber - parser.ErrorLineNumber = self._parser.ErrorLineNumber - self._parser = parser - self._close_source() - - def _reset_cont_handler(self): - self._parser.ProcessingInstructionHandler = \ - self._cont_handler.processingInstruction - self._parser.CharacterDataHandler = self._cont_handler.characters - - def _reset_lex_handler_prop(self): - lex = self._lex_handler_prop - parser = self._parser - if lex is None: - parser.CommentHandler = None - parser.StartCdataSectionHandler = None - parser.EndCdataSectionHandler = None - parser.StartDoctypeDeclHandler = None - parser.EndDoctypeDeclHandler = None - else: - parser.CommentHandler = lex.comment - parser.StartCdataSectionHandler = lex.startCDATA - parser.EndCdataSectionHandler = lex.endCDATA - parser.StartDoctypeDeclHandler = self.start_doctype_decl - parser.EndDoctypeDeclHandler = lex.endDTD - - def reset(self): - if self._namespaces: - self._parser = expat.ParserCreate(self._source.getEncoding(), " ", - intern=self._interning) - self._parser.namespace_prefixes = 1 - self._parser.StartElementHandler = self.start_element_ns - self._parser.EndElementHandler = self.end_element_ns - else: - self._parser = expat.ParserCreate(self._source.getEncoding(), - intern = self._interning) - self._parser.StartElementHandler = self.start_element - self._parser.EndElementHandler = self.end_element - - self._reset_cont_handler() - self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl - self._parser.NotationDeclHandler = self.notation_decl - self._parser.StartNamespaceDeclHandler = self.start_namespace_decl - self._parser.EndNamespaceDeclHandler = self.end_namespace_decl - - self._decl_handler_prop = None - if self._lex_handler_prop: - self._reset_lex_handler_prop() -# self._parser.DefaultHandler = -# self._parser.DefaultHandlerExpand = -# self._parser.NotStandaloneHandler = - self._parser.ExternalEntityRefHandler = self.external_entity_ref - try: - self._parser.SkippedEntityHandler = self.skipped_entity_handler - except AttributeError: - # This pyexpat does not support SkippedEntity - pass - self._parser.SetParamEntityParsing( - expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) - - self._parsing = 0 - self._entity_stack = [] - - # Locator methods - - def getColumnNumber(self): - if self._parser is None: - return None - return self._parser.ErrorColumnNumber - - def getLineNumber(self): - if self._parser is None: - return 1 - return self._parser.ErrorLineNumber - - def getPublicId(self): - return self._source.getPublicId() - - def getSystemId(self): - return self._source.getSystemId() - - # event handlers - def start_element(self, name, attrs): - self._cont_handler.startElement(name, AttributesImpl(attrs)) - - def end_element(self, name): - self._cont_handler.endElement(name) - - def start_element_ns(self, name, attrs): - pair = name.split() - if len(pair) == 1: - # no namespace - pair = (None, name) - elif len(pair) == 3: - pair = pair[0], pair[1] - else: - # default namespace - pair = tuple(pair) - - newattrs = {} - qnames = {} - for (aname, value) in attrs.items(): - parts = aname.split() - length = len(parts) - if length == 1: - # no namespace - qname = aname - apair = (None, aname) - elif length == 3: - qname = "%s:%s" % (parts[2], parts[1]) - apair = parts[0], parts[1] - else: - # default namespace - qname = parts[1] - apair = tuple(parts) - - newattrs[apair] = value - qnames[apair] = qname - - self._cont_handler.startElementNS(pair, None, - AttributesNSImpl(newattrs, qnames)) - - def end_element_ns(self, name): - pair = name.split() - if len(pair) == 1: - pair = (None, name) - elif len(pair) == 3: - pair = pair[0], pair[1] - else: - pair = tuple(pair) - - self._cont_handler.endElementNS(pair, None) - - # this is not used (call directly to ContentHandler) - def processing_instruction(self, target, data): - self._cont_handler.processingInstruction(target, data) - - # this is not used (call directly to ContentHandler) - def character_data(self, data): - self._cont_handler.characters(data) - - def start_namespace_decl(self, prefix, uri): - self._cont_handler.startPrefixMapping(prefix, uri) - - def end_namespace_decl(self, prefix): - self._cont_handler.endPrefixMapping(prefix) - - def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): - self._lex_handler_prop.startDTD(name, pubid, sysid) - - def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): - self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) - - def notation_decl(self, name, base, sysid, pubid): - self._dtd_handler.notationDecl(name, pubid, sysid) - - def external_entity_ref(self, context, base, sysid, pubid): - if not self._external_ges: - return 1 - - source = self._ent_handler.resolveEntity(pubid, sysid) - source = saxutils.prepare_input_source(source, - self._source.getSystemId() or - "") - - self._entity_stack.append((self._parser, self._source)) - self._parser = self._parser.ExternalEntityParserCreate(context) - self._source = source - - try: - xmlreader.IncrementalParser.parse(self, source) - except: - return 0 # FIXME: save error info here? - - (self._parser, self._source) = self._entity_stack[-1] - del self._entity_stack[-1] - return 1 - - def skipped_entity_handler(self, name, is_pe): - if is_pe: - # The SAX spec requires to report skipped PEs with a '%' - name = '%'+name - self._cont_handler.skippedEntity(name) - -# --- - -def create_parser(*args, **kwargs): - return ExpatParser(*args, **kwargs) - -# --- - -if __name__ == "__main__": - import xml.sax.saxutils - p = create_parser() - p.setContentHandler(xml.sax.saxutils.XMLGenerator()) - p.setErrorHandler(xml.sax.ErrorHandler()) - p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml") diff --git a/modules/language/python/module/xml/sax/handler.py~ b/modules/language/python/module/xml/sax/handler.py~ deleted file mode 100644 index 481733d..0000000 --- a/modules/language/python/module/xml/sax/handler.py~ +++ /dev/null @@ -1,342 +0,0 @@ -""" -This module contains the core classes of version 2.0 of SAX for Python. -This file provides only default classes with absolutely minimum -functionality, from which drivers and applications can be subclassed. - -Many of these classes are empty and are included only as documentation -of the interfaces. - -$Id$ -""" - -version = '2.0beta' - -#============================================================================ -# -# HANDLER INTERFACES -# -#============================================================================ - -# ===== ERRORHANDLER ===== - -class ErrorHandler: - """Basic interface for SAX error handlers. - - If you create an object that implements this interface, then - register the object with your XMLReader, the parser will call the - methods in your object to report all warnings and errors. There - are three levels of errors available: warnings, (possibly) - recoverable errors, and unrecoverable errors. All methods take a - SAXParseException as the only parameter.""" - - def error(self, exception): - "Handle a recoverable error." - raise exception - - def fatalError(self, exception): - "Handle a non-recoverable error." - raise exception - - def warning(self, exception): - "Handle a warning." - print(exception) - - -# ===== CONTENTHANDLER ===== - -class ContentHandler: - """Interface for receiving logical document content events. - - This is the main callback interface in SAX, and the one most - important to applications. The order of events in this interface - mirrors the order of the information in the document.""" - - def __init__(self): - self._locator = None - - def setDocumentLocator(self, locator): - """Called by the parser to give the application a locator for - locating the origin of document events. - - SAX parsers are strongly encouraged (though not absolutely - required) to supply a locator: if it does so, it must supply - the locator to the application by invoking this method before - invoking any of the other methods in the DocumentHandler - interface. - - The locator allows the application to determine the end - position of any document-related event, even if the parser is - not reporting an error. Typically, the application will use - this information for reporting its own errors (such as - character content that does not match an application's - business rules). The information returned by the locator is - probably not sufficient for use with a search engine. - - Note that the locator will return correct information only - during the invocation of the events in this interface. The - application should not attempt to use it at any other time.""" - self._locator = locator - - def startDocument(self): - """Receive notification of the beginning of a document. - - The SAX parser will invoke this method only once, before any - other methods in this interface or in DTDHandler (except for - setDocumentLocator).""" - - def endDocument(self): - """Receive notification of the end of a document. - - The SAX parser will invoke this method only once, and it will - be the last method invoked during the parse. The parser shall - not invoke this method until it has either abandoned parsing - (because of an unrecoverable error) or reached the end of - input.""" - - def startPrefixMapping(self, prefix, uri): - """Begin the scope of a prefix-URI Namespace mapping. - - The information from this event is not necessary for normal - Namespace processing: the SAX XML reader will automatically - replace prefixes for element and attribute names when the - http://xml.org/sax/features/namespaces feature is true (the - default). - - There are cases, however, when applications need to use - prefixes in character data or in attribute values, where they - cannot safely be expanded automatically; the - start/endPrefixMapping event supplies the information to the - application to expand prefixes in those contexts itself, if - necessary. - - Note that start/endPrefixMapping events are not guaranteed to - be properly nested relative to each-other: all - startPrefixMapping events will occur before the corresponding - startElement event, and all endPrefixMapping events will occur - after the corresponding endElement event, but their order is - not guaranteed.""" - - def endPrefixMapping(self, prefix): - """End the scope of a prefix-URI mapping. - - See startPrefixMapping for details. This event will always - occur after the corresponding endElement event, but the order - of endPrefixMapping events is not otherwise guaranteed.""" - - def startElement(self, name, attrs): - """Signals the start of an element in non-namespace mode. - - The name parameter contains the raw XML 1.0 name of the - element type as a string and the attrs parameter holds an - instance of the Attributes class containing the attributes of - the element.""" - - def endElement(self, name): - """Signals the end of an element in non-namespace mode. - - The name parameter contains the name of the element type, just - as with the startElement event.""" - - def startElementNS(self, name, qname, attrs): - """Signals the start of an element in namespace mode. - - The name parameter contains the name of the element type as a - (uri, localname) tuple, the qname parameter the raw XML 1.0 - name used in the source document, and the attrs parameter - holds an instance of the Attributes class containing the - attributes of the element. - - The uri part of the name tuple is None for elements which have - no namespace.""" - - def endElementNS(self, name, qname): - """Signals the end of an element in namespace mode. - - The name parameter contains the name of the element type, just - as with the startElementNS event.""" - - def characters(self, content): - """Receive notification of character data. - - The Parser will call this method to report each chunk of - character data. SAX parsers may return all contiguous - character data in a single chunk, or they may split it into - several chunks; however, all of the characters in any single - event must come from the same external entity so that the - Locator provides useful information.""" - - def ignorableWhitespace(self, whitespace): - """Receive notification of ignorable whitespace in element content. - - Validating Parsers must use this method to report each chunk - of ignorable whitespace (see the W3C XML 1.0 recommendation, - section 2.10): non-validating parsers may also use this method - if they are capable of parsing and using content models. - - SAX parsers may return all contiguous whitespace in a single - chunk, or they may split it into several chunks; however, all - of the characters in any single event must come from the same - external entity, so that the Locator provides useful - information.""" - - def processingInstruction(self, target, data): - """Receive notification of a processing instruction. - - The Parser will invoke this method once for each processing - instruction found: note that processing instructions may occur - before or after the main document element. - - A SAX parser should never report an XML declaration (XML 1.0, - section 2.8) or a text declaration (XML 1.0, section 4.3.1) - using this method.""" - - def skippedEntity(self, name): - """Receive notification of a skipped entity. - - The Parser will invoke this method once for each entity - skipped. Non-validating processors may skip entities if they - have not seen the declarations (because, for example, the - entity was declared in an external DTD subset). All processors - may skip external entities, depending on the values of the - http://xml.org/sax/features/external-general-entities and the - http://xml.org/sax/features/external-parameter-entities - properties.""" - - -# ===== DTDHandler ===== - -class DTDHandler: - """Handle DTD events. - - This interface specifies only those DTD events required for basic - parsing (unparsed entities and attributes).""" - - def notationDecl(self, name, publicId, systemId): - "Handle a notation declaration event." - - def unparsedEntityDecl(self, name, publicId, systemId, ndata): - "Handle an unparsed entity declaration event." - - -# ===== ENTITYRESOLVER ===== - -class EntityResolver: - """Basic interface for resolving entities. If you create an object - implementing this interface, then register the object with your - Parser, the parser will call the method in your object to - resolve all external entities. Note that DefaultHandler implements - this interface with the default behaviour.""" - - def resolveEntity(self, publicId, systemId): - """Resolve the system identifier of an entity and return either - the system identifier to read from as a string, or an InputSource - to read from.""" - return systemId - - -#============================================================================ -# -# CORE FEATURES -# -#============================================================================ - -feature_namespaces = "http://xml.org/sax/features/namespaces" -# true: Perform Namespace processing (default). -# false: Optionally do not perform Namespace processing -# (implies namespace-prefixes). -# access: (parsing) read-only; (not parsing) read/write - -feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes" -# true: Report the original prefixed names and attributes used for Namespace -# declarations. -# false: Do not report attributes used for Namespace declarations, and -# optionally do not report original prefixed names (default). -# access: (parsing) read-only; (not parsing) read/write - -feature_string_interning = "http://xml.org/sax/features/string-interning" -# true: All element names, prefixes, attribute names, Namespace URIs, and -# local names are interned using the built-in intern function. -# false: Names are not necessarily interned, although they may be (default). -# access: (parsing) read-only; (not parsing) read/write - -feature_validation = "http://xml.org/sax/features/validation" -# true: Report all validation errors (implies external-general-entities and -# external-parameter-entities). -# false: Do not report validation errors. -# access: (parsing) read-only; (not parsing) read/write - -feature_external_ges = "http://xml.org/sax/features/external-general-entities" -# true: Include all external general (text) entities. -# false: Do not include external general entities. -# access: (parsing) read-only; (not parsing) read/write - -feature_external_pes = "http://xml.org/sax/features/external-parameter-entities" -# true: Include all external parameter entities, including the external -# DTD subset. -# false: Do not include any external parameter entities, even the external -# DTD subset. -# access: (parsing) read-only; (not parsing) read/write - -all_features = [feature_namespaces, - feature_namespace_prefixes, - feature_string_interning, - feature_validation, - feature_external_ges, - feature_external_pes] - - -#============================================================================ -# -# CORE PROPERTIES -# -#============================================================================ - -property_lexical_handler = "http://xml.org/sax/properties/lexical-handler" -# data type: xml.sax.sax2lib.LexicalHandler -# description: An optional extension handler for lexical events like comments. -# access: read/write - -property_declaration_handler = "http://xml.org/sax/properties/declaration-handler" -# data type: xml.sax.sax2lib.DeclHandler -# description: An optional extension handler for DTD-related events other -# than notations and unparsed entities. -# access: read/write - -property_dom_node = "http://xml.org/sax/properties/dom-node" -# data type: org.w3c.dom.Node -# description: When parsing, the current DOM node being visited if this is -# a DOM iterator; when not parsing, the root DOM node for -# iteration. -# access: (parsing) read-only; (not parsing) read/write - -property_xml_string = "http://xml.org/sax/properties/xml-string" -# data type: String -# description: The literal string of characters that was the source for -# the current event. -# access: read-only - -property_encoding = "http://www.python.org/sax/properties/encoding" -# data type: String -# description: The name of the encoding to assume for input data. -# access: write: set the encoding, e.g. established by a higher-level -# protocol. May change during parsing (e.g. after -# processing a META tag) -# read: return the current encoding (possibly established through -# auto-detection. -# initial value: UTF-8 -# - -property_interning_dict = "http://www.python.org/sax/properties/interning-dict" -# data type: Dictionary -# description: The dictionary used to intern common strings in the document -# access: write: Request that the parser uses a specific dictionary, to -# allow interning across different documents -# read: return the current interning dictionary, or None -# - -all_properties = [property_lexical_handler, - property_dom_node, - property_declaration_handler, - property_xml_string, - property_encoding, - property_interning_dict] diff --git a/modules/language/python/module/xml/sax/saxutils.py~ b/modules/language/python/module/xml/sax/saxutils.py~ deleted file mode 100644 index a69c7f7..0000000 --- a/modules/language/python/module/xml/sax/saxutils.py~ +++ /dev/null @@ -1,368 +0,0 @@ -"""\ -A library of useful helper classes to the SAX classes, for the -convenience of application and driver writers. -""" - -import os, urllib.parse, urllib.request -import io -import codecs -from . import handler -from . import xmlreader - -def __dict_replace(s, d): - """Replace substrings of a string using a dictionary.""" - for key, value in d.items(): - s = s.replace(key, value) - return s - -def escape(data, entities={}): - """Escape &, <, and > in a string of data. - - You can escape other strings of data by passing a dictionary as - the optional entities parameter. The keys and values must all be - strings; each key will be replaced with its corresponding value. - """ - - # must do ampersand first - data = data.replace("&", "&") - data = data.replace(">", ">") - data = data.replace("<", "<") - if entities: - data = __dict_replace(data, entities) - return data - -def unescape(data, entities={}): - """Unescape &, <, and > in a string of data. - - You can unescape other strings of data by passing a dictionary as - the optional entities parameter. The keys and values must all be - strings; each key will be replaced with its corresponding value. - """ - data = data.replace("<", "<") - data = data.replace(">", ">") - if entities: - data = __dict_replace(data, entities) - # must do ampersand last - return data.replace("&", "&") - -def quoteattr(data, entities={}): - """Escape and quote an attribute value. - - Escape &, <, and > in a string of data, then quote it for use as - an attribute value. The \" character will be escaped as well, if - necessary. - - You can escape other strings of data by passing a dictionary as - the optional entities parameter. The keys and values must all be - strings; each key will be replaced with its corresponding value. - """ - entities = entities.copy() - entities.update({'\n': ' ', '\r': ' ', '\t':'	'}) - data = escape(data, entities) - if '"' in data: - if "'" in data: - data = '"%s"' % data.replace('"', """) - else: - data = "'%s'" % data - else: - data = '"%s"' % data - return data - - -def _gettextwriter(out, encoding): - if out is None: - import sys - return sys.stdout - - if isinstance(out, io.TextIOBase): - # use a text writer as is - return out - - if isinstance(out, (codecs.StreamWriter, codecs.StreamReaderWriter)): - # use a codecs stream writer as is - return out - - # wrap a binary writer with TextIOWrapper - if isinstance(out, io.RawIOBase): - # Keep the original file open when the TextIOWrapper is - # destroyed - class _wrapper: - __class__ = out.__class__ - def __getattr__(self, name): - return getattr(out, name) - buffer = _wrapper() - buffer.close = lambda: None - else: - # This is to handle passed objects that aren't in the - # IOBase hierarchy, but just have a write method - buffer = io.BufferedIOBase() - buffer.writable = lambda: True - buffer.write = out.write - try: - # TextIOWrapper uses this methods to determine - # if BOM (for UTF-16, etc) should be added - buffer.seekable = out.seekable - buffer.tell = out.tell - except AttributeError: - pass - return io.TextIOWrapper(buffer, encoding=encoding, - errors='xmlcharrefreplace', - newline='\n', - write_through=True) - -class XMLGenerator(handler.ContentHandler): - - def __init__(self, out=None, encoding="iso-8859-1", short_empty_elements=False): - handler.ContentHandler.__init__(self) - out = _gettextwriter(out, encoding) - self._write = out.write - self._flush = out.flush - self._ns_contexts = [{}] # contains uri -> prefix dicts - self._current_context = self._ns_contexts[-1] - self._undeclared_ns_maps = [] - self._encoding = encoding - self._short_empty_elements = short_empty_elements - self._pending_start_element = False - - def _qname(self, name): - """Builds a qualified name from a (ns_url, localname) pair""" - if name[0]: - # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is - # bound by definition to http://www.w3.org/XML/1998/namespace. It - # does not need to be declared and will not usually be found in - # self._current_context. - if 'http://www.w3.org/XML/1998/namespace' == name[0]: - return 'xml:' + name[1] - # The name is in a non-empty namespace - prefix = self._current_context[name[0]] - if prefix: - # If it is not the default namespace, prepend the prefix - return prefix + ":" + name[1] - # Return the unqualified name - return name[1] - - def _finish_pending_start_element(self,endElement=False): - if self._pending_start_element: - self._write('>') - self._pending_start_element = False - - # ContentHandler methods - - def startDocument(self): - self._write('<?xml version="1.0" encoding="%s"?>\n' % - self._encoding) - - def endDocument(self): - self._flush() - - def startPrefixMapping(self, prefix, uri): - self._ns_contexts.append(self._current_context.copy()) - self._current_context[uri] = prefix - self._undeclared_ns_maps.append((prefix, uri)) - - def endPrefixMapping(self, prefix): - self._current_context = self._ns_contexts[-1] - del self._ns_contexts[-1] - - def startElement(self, name, attrs): - self._finish_pending_start_element() - self._write('<' + name) - for (name, value) in attrs.items(): - self._write(' %s=%s' % (name, quoteattr(value))) - if self._short_empty_elements: - self._pending_start_element = True - else: - self._write(">") - - def endElement(self, name): - if self._pending_start_element: - self._write('/>') - self._pending_start_element = False - else: - self._write('</%s>' % name) - - def startElementNS(self, name, qname, attrs): - self._finish_pending_start_element() - self._write('<' + self._qname(name)) - - for prefix, uri in self._undeclared_ns_maps: - if prefix: - self._write(' xmlns:%s="%s"' % (prefix, uri)) - else: - self._write(' xmlns="%s"' % uri) - self._undeclared_ns_maps = [] - - for (name, value) in attrs.items(): - self._write(' %s=%s' % (self._qname(name), quoteattr(value))) - if self._short_empty_elements: - self._pending_start_element = True - else: - self._write(">") - - def endElementNS(self, name, qname): - if self._pending_start_element: - self._write('/>') - self._pending_start_element = False - else: - self._write('</%s>' % self._qname(name)) - - def characters(self, content): - if content: - self._finish_pending_start_element() - if not isinstance(content, str): - content = str(content, self._encoding) - self._write(escape(content)) - - def ignorableWhitespace(self, content): - if content: - self._finish_pending_start_element() - if not isinstance(content, str): - content = str(content, self._encoding) - self._write(content) - - def processingInstruction(self, target, data): - self._finish_pending_start_element() - self._write('<?%s %s?>' % (target, data)) - - -class XMLFilterBase(xmlreader.XMLReader): - """This class is designed to sit between an XMLReader and the - client application's event handlers. By default, it does nothing - but pass requests up to the reader and events on to the handlers - unmodified, but subclasses can override specific methods to modify - the event stream or the configuration requests as they pass - through.""" - - def __init__(self, parent = None): - xmlreader.XMLReader.__init__(self) - self._parent = parent - - # ErrorHandler methods - - def error(self, exception): - self._err_handler.error(exception) - - def fatalError(self, exception): - self._err_handler.fatalError(exception) - - def warning(self, exception): - self._err_handler.warning(exception) - - # ContentHandler methods - - def setDocumentLocator(self, locator): - self._cont_handler.setDocumentLocator(locator) - - def startDocument(self): - self._cont_handler.startDocument() - - def endDocument(self): - self._cont_handler.endDocument() - - def startPrefixMapping(self, prefix, uri): - self._cont_handler.startPrefixMapping(prefix, uri) - - def endPrefixMapping(self, prefix): - self._cont_handler.endPrefixMapping(prefix) - - def startElement(self, name, attrs): - self._cont_handler.startElement(name, attrs) - - def endElement(self, name): - self._cont_handler.endElement(name) - - def startElementNS(self, name, qname, attrs): - self._cont_handler.startElementNS(name, qname, attrs) - - def endElementNS(self, name, qname): - self._cont_handler.endElementNS(name, qname) - - def characters(self, content): - self._cont_handler.characters(content) - - def ignorableWhitespace(self, chars): - self._cont_handler.ignorableWhitespace(chars) - - def processingInstruction(self, target, data): - self._cont_handler.processingInstruction(target, data) - - def skippedEntity(self, name): - self._cont_handler.skippedEntity(name) - - # DTDHandler methods - - def notationDecl(self, name, publicId, systemId): - self._dtd_handler.notationDecl(name, publicId, systemId) - - def unparsedEntityDecl(self, name, publicId, systemId, ndata): - self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata) - - # EntityResolver methods - - def resolveEntity(self, publicId, systemId): - return self._ent_handler.resolveEntity(publicId, systemId) - - # XMLReader methods - - def parse(self, source): - self._parent.setContentHandler(self) - self._parent.setErrorHandler(self) - self._parent.setEntityResolver(self) - self._parent.setDTDHandler(self) - self._parent.parse(source) - - def setLocale(self, locale): - self._parent.setLocale(locale) - - def getFeature(self, name): - return self._parent.getFeature(name) - - def setFeature(self, name, state): - self._parent.setFeature(name, state) - - def getProperty(self, name): - return self._parent.getProperty(name) - - def setProperty(self, name, value): - self._parent.setProperty(name, value) - - # XMLFilter methods - - def getParent(self): - return self._parent - - def setParent(self, parent): - self._parent = parent - -# --- Utility functions - -def prepare_input_source(source, base=""): - """This function takes an InputSource and an optional base URL and - returns a fully resolved InputSource object ready for reading.""" - - if isinstance(source, str): - source = xmlreader.InputSource(source) - elif hasattr(source, "read"): - f = source - source = xmlreader.InputSource() - if isinstance(f.read(0), str): - source.setCharacterStream(f) - else: - source.setByteStream(f) - if hasattr(f, "name") and isinstance(f.name, str): - source.setSystemId(f.name) - - if source.getCharacterStream() is None and source.getByteStream() is None: - sysid = source.getSystemId() - basehead = os.path.dirname(os.path.normpath(base)) - sysidfilename = os.path.join(basehead, sysid) - if os.path.isfile(sysidfilename): - source.setSystemId(sysidfilename) - f = open(sysidfilename, "rb") - else: - source.setSystemId(urllib.parse.urljoin(base, sysid)) - f = urllib.request.urlopen(source.getSystemId()) - - source.setByteStream(f) - - return source diff --git a/modules/language/python/module/xml/sax/xmlreader.py~ b/modules/language/python/module/xml/sax/xmlreader.py~ deleted file mode 100644 index 716f228..0000000 --- a/modules/language/python/module/xml/sax/xmlreader.py~ +++ /dev/null @@ -1,380 +0,0 @@ -"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers -should be based on this code. """ - -from . import handler - -from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException - - -# ===== XMLREADER ===== - -class XMLReader: - """Interface for reading an XML document using callbacks. - - XMLReader is the interface that an XML parser's SAX2 driver must - implement. This interface allows an application to set and query - features and properties in the parser, to register event handlers - for document processing, and to initiate a document parse. - - All SAX interfaces are assumed to be synchronous: the parse - methods must not return until parsing is complete, and readers - must wait for an event-handler callback to return before reporting - the next event.""" - - def __init__(self): - self._cont_handler = handler.ContentHandler() - self._dtd_handler = handler.DTDHandler() - self._ent_handler = handler.EntityResolver() - self._err_handler = handler.ErrorHandler() - - def parse(self, source): - "Parse an XML document from a system identifier or an InputSource." - raise NotImplementedError("This method must be implemented!") - - def getContentHandler(self): - "Returns the current ContentHandler." - return self._cont_handler - - def setContentHandler(self, handler): - "Registers a new object to receive document content events." - self._cont_handler = handler - - def getDTDHandler(self): - "Returns the current DTD handler." - return self._dtd_handler - - def setDTDHandler(self, handler): - "Register an object to receive basic DTD-related events." - self._dtd_handler = handler - - def getEntityResolver(self): - "Returns the current EntityResolver." - return self._ent_handler - - def setEntityResolver(self, resolver): - "Register an object to resolve external entities." - self._ent_handler = resolver - - def getErrorHandler(self): - "Returns the current ErrorHandler." - return self._err_handler - - def setErrorHandler(self, handler): - "Register an object to receive error-message events." - self._err_handler = handler - - def setLocale(self, locale): - """Allow an application to set the locale for errors and warnings. - - SAX parsers are not required to provide localization for errors - and warnings; if they cannot support the requested locale, - however, they must raise a SAX exception. Applications may - request a locale change in the middle of a parse.""" - raise SAXNotSupportedException("Locale support not implemented") - - def getFeature(self, name): - "Looks up and returns the state of a SAX2 feature." - raise SAXNotRecognizedException("Feature '%s' not recognized" % name) - - def setFeature(self, name, state): - "Sets the state of a SAX2 feature." - raise SAXNotRecognizedException("Feature '%s' not recognized" % name) - - def getProperty(self, name): - "Looks up and returns the value of a SAX2 property." - raise SAXNotRecognizedException("Property '%s' not recognized" % name) - - def setProperty(self, name, value): - "Sets the value of a SAX2 property." - raise SAXNotRecognizedException("Property '%s' not recognized" % name) - -class IncrementalParser(XMLReader): - """This interface adds three extra methods to the XMLReader - interface that allow XML parsers to support incremental - parsing. Support for this interface is optional, since not all - underlying XML parsers support this functionality. - - When the parser is instantiated it is ready to begin accepting - data from the feed method immediately. After parsing has been - finished with a call to close the reset method must be called to - make the parser ready to accept new data, either from feed or - using the parse method. - - Note that these methods must _not_ be called during parsing, that - is, after parse has been called and before it returns. - - By default, the class also implements the parse method of the XMLReader - interface using the feed, close and reset methods of the - IncrementalParser interface as a convenience to SAX 2.0 driver - writers.""" - - def __init__(self, bufsize=2**16): - self._bufsize = bufsize - XMLReader.__init__(self) - - def parse(self, source): - from . import saxutils - source = saxutils.prepare_input_source(source) - - self.prepareParser(source) - file = source.getCharacterStream() - if file is None: - file = source.getByteStream() - buffer = file.read(self._bufsize) - while buffer: - self.feed(buffer) - buffer = file.read(self._bufsize) - self.close() - - def feed(self, data): - """This method gives the raw XML data in the data parameter to - the parser and makes it parse the data, emitting the - corresponding events. It is allowed for XML constructs to be - split across several calls to feed. - - feed may raise SAXException.""" - raise NotImplementedError("This method must be implemented!") - - def prepareParser(self, source): - """This method is called by the parse implementation to allow - the SAX 2.0 driver to prepare itself for parsing.""" - raise NotImplementedError("prepareParser must be overridden!") - - def close(self): - """This method is called when the entire XML document has been - passed to the parser through the feed method, to notify the - parser that there are no more data. This allows the parser to - do the final checks on the document and empty the internal - data buffer. - - The parser will not be ready to parse another document until - the reset method has been called. - - close may raise SAXException.""" - raise NotImplementedError("This method must be implemented!") - - def reset(self): - """This method is called after close has been called to reset - the parser so that it is ready to parse new documents. The - results of calling parse or feed after close without calling - reset are undefined.""" - raise NotImplementedError("This method must be implemented!") - -# ===== LOCATOR ===== - -class Locator: - """Interface for associating a SAX event with a document - location. A locator object will return valid results only during - calls to DocumentHandler methods; at any other time, the - results are unpredictable.""" - - def getColumnNumber(self): - "Return the column number where the current event ends." - return -1 - - def getLineNumber(self): - "Return the line number where the current event ends." - return -1 - - def getPublicId(self): - "Return the public identifier for the current event." - return None - - def getSystemId(self): - "Return the system identifier for the current event." - return None - -# ===== INPUTSOURCE ===== - -class InputSource: - """Encapsulation of the information needed by the XMLReader to - read entities. - - This class may include information about the public identifier, - system identifier, byte stream (possibly with character encoding - information) and/or the character stream of an entity. - - Applications will create objects of this class for use in the - XMLReader.parse method and for returning from - EntityResolver.resolveEntity. - - An InputSource belongs to the application, the XMLReader is not - allowed to modify InputSource objects passed to it from the - application, although it may make copies and modify those.""" - - def __init__(self, system_id = None): - self.__system_id = system_id - self.__public_id = None - self.__encoding = None - self.__bytefile = None - self.__charfile = None - - def setPublicId(self, public_id): - "Sets the public identifier of this InputSource." - self.__public_id = public_id - - def getPublicId(self): - "Returns the public identifier of this InputSource." - return self.__public_id - - def setSystemId(self, system_id): - "Sets the system identifier of this InputSource." - self.__system_id = system_id - - def getSystemId(self): - "Returns the system identifier of this InputSource." - return self.__system_id - - def setEncoding(self, encoding): - """Sets the character encoding of this InputSource. - - The encoding must be a string acceptable for an XML encoding - declaration (see section 4.3.3 of the XML recommendation). - - The encoding attribute of the InputSource is ignored if the - InputSource also contains a character stream.""" - self.__encoding = encoding - - def getEncoding(self): - "Get the character encoding of this InputSource." - return self.__encoding - - def setByteStream(self, bytefile): - """Set the byte stream (a Python file-like object which does - not perform byte-to-character conversion) for this input - source. - - The SAX parser will ignore this if there is also a character - stream specified, but it will use a byte stream in preference - to opening a URI connection itself. - - If the application knows the character encoding of the byte - stream, it should set it with the setEncoding method.""" - self.__bytefile = bytefile - - def getByteStream(self): - """Get the byte stream for this input source. - - The getEncoding method will return the character encoding for - this byte stream, or None if unknown.""" - return self.__bytefile - - def setCharacterStream(self, charfile): - """Set the character stream for this input source. (The stream - must be a Python 2.0 Unicode-wrapped file-like that performs - conversion to Unicode strings.) - - If there is a character stream specified, the SAX parser will - ignore any byte stream and will not attempt to open a URI - connection to the system identifier.""" - self.__charfile = charfile - - def getCharacterStream(self): - "Get the character stream for this input source." - return self.__charfile - -# ===== ATTRIBUTESIMPL ===== - -class AttributesImpl: - - def __init__(self, attrs): - """Non-NS-aware implementation. - - attrs should be of the form {name : value}.""" - self._attrs = attrs - - def getLength(self): - return len(self._attrs) - - def getType(self, name): - return "CDATA" - - def getValue(self, name): - return self._attrs[name] - - def getValueByQName(self, name): - return self._attrs[name] - - def getNameByQName(self, name): - if name not in self._attrs: - raise KeyError(name) - return name - - def getQNameByName(self, name): - if name not in self._attrs: - raise KeyError(name) - return name - - def getNames(self): - return list(self._attrs.keys()) - - def getQNames(self): - return list(self._attrs.keys()) - - def __len__(self): - return len(self._attrs) - - def __getitem__(self, name): - return self._attrs[name] - - def keys(self): - return list(self._attrs.keys()) - - def __contains__(self, name): - return name in self._attrs - - def get(self, name, alternative=None): - return self._attrs.get(name, alternative) - - def copy(self): - return self.__class__(self._attrs) - - def items(self): - return list(self._attrs.items()) - - def values(self): - return list(self._attrs.values()) - -# ===== ATTRIBUTESNSIMPL ===== - -class AttributesNSImpl(AttributesImpl): - - def __init__(self, attrs, qnames): - """NS-aware implementation. - - attrs should be of the form {(ns_uri, lname): value, ...}. - qnames of the form {(ns_uri, lname): qname, ...}.""" - self._attrs = attrs - self._qnames = qnames - - def getValueByQName(self, name): - for (nsname, qname) in self._qnames.items(): - if qname == name: - return self._attrs[nsname] - - raise KeyError(name) - - def getNameByQName(self, name): - for (nsname, qname) in self._qnames.items(): - if qname == name: - return nsname - - raise KeyError(name) - - def getQNameByName(self, name): - return self._qnames[name] - - def getQNames(self): - return list(self._qnames.values()) - - def copy(self): - return self.__class__(self._attrs, self._qnames) - - -def _test(): - XMLReader() - IncrementalParser() - Locator() - -if __name__ == "__main__": - _test() |