diff options
author | Stefan Israelsson Tampe <stefan.itampe@gmail.com> | 2018-09-11 20:49:02 +0200 |
---|---|---|
committer | Stefan Israelsson Tampe <stefan.itampe@gmail.com> | 2018-09-11 20:49:02 +0200 |
commit | c75be2215da7dcc993ca8450bafcbab609132871 (patch) | |
tree | 02dd34ba28787b8d98ad50433c41339639d60cfc | |
parent | f75533465dbe70b153c75c85297422964b1a598d (diff) |
final hhtp code
-rw-r--r-- | modules/language/python/list.scm | 17 | ||||
-rw-r--r-- | modules/language/python/module/email/mime/application.py | 39 | ||||
-rw-r--r-- | modules/language/python/module/email/mime/multipart.py | 48 | ||||
-rw-r--r-- | modules/language/python/module/http/client.py | 5 | ||||
-rw-r--r-- | modules/language/python/module/http/cookiejar.py | 2100 | ||||
-rw-r--r-- | modules/language/python/module/http/cookies.py | 637 | ||||
-rw-r--r-- | modules/language/python/module/http/server.py | 1214 | ||||
-rw-r--r-- | modules/language/python/module/json.py | 4 | ||||
-rw-r--r-- | modules/language/python/module/os.scm | 5 | ||||
-rw-r--r-- | modules/language/python/module/os/path.scm | 23 | ||||
-rw-r--r-- | modules/language/python/module/re/parser.scm | 8 | ||||
-rw-r--r-- | modules/language/python/module/urllib/parse.py | 7 | ||||
-rw-r--r-- | modules/language/python/module/urllib/request.py | 10 | ||||
-rw-r--r-- | modules/language/python/number.scm | 18 | ||||
-rw-r--r-- | modules/oop/pf-objects.scm | 2 |
15 files changed, 4108 insertions, 29 deletions
diff --git a/modules/language/python/list.scm b/modules/language/python/list.scm index a60703c..c656edf 100644 --- a/modules/language/python/list.scm +++ b/modules/language/python/list.scm @@ -50,8 +50,11 @@ (slot-ref o 'n) (vector->list (slot-ref o 'vec))))) +(define (int x) x) + (define-method (pylist-delete! (o <py-list>) k) (let* ((n (slot-ref o 'n)) + (k (int k)) (k (if (< k 0) (+ k n) k))) (pylist-subset! o k (+ k 1) None pylist-null))) @@ -154,31 +157,33 @@ ;;; REF (define-method (pylist-ref (o <py-list>) nin) (define N (slot-ref o 'n)) - (define n (if (< nin 0) (+ N nin) nin)) + (define n0 (int nin)) + (define n (if (< n0 0) (+ N n0) n0)) (if (and (>= n 0) (< n (slot-ref o 'n))) (vector-ref (slot-ref o 'vec) n) (raise IndexError))) (defpair (pylist-ref o n) - (list-ref o (if (< n 0) (+ (length o) n) n))) + (list-ref o (let ((n (int n))) (if (< n 0) (+ (length o) n) n)))) (define-method (pylist-ref (o <vector>) n) - (vector-ref o n)) + (vector-ref o (let ((n (int n))) (if (< n 0) (+ (vector-length o) n) n)))) ;;; SET (define-method (pylist-set! (o <py-list>) nin val) (define N (slot-ref o 'n)) - (define n (if (< nin 0) (+ N nin) nin)) + (define n0 (int nin)) + (define n (if (< n0 0) (+ N n0) n0)) (if (and (>= n 0) (< n (slot-ref o 'n))) (vector-set! (slot-ref o 'vec) n val) (raise IndexError))) (defpair (pylist-set! o n val) - (list-set! o n val)) + (list-set! o (let ((n (int n))) (if (< n 0) (+ (length o) n) n)) val)) (define-method (pylist-set! (o <vector>) n val) - (vector-set! o n val)) + (vector-set! o (let ((n (int n))) (if (< n 0) (+ (length o) n) n)) val)) ;;SLICE diff --git a/modules/language/python/module/email/mime/application.py b/modules/language/python/module/email/mime/application.py new file mode 100644 index 0000000..8eec2c3 --- /dev/null +++ b/modules/language/python/module/email/mime/application.py @@ -0,0 +1,39 @@ +module(email,mime,application) + +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Keith Dart +# Contact: email-sig@python.org + +"""Class representing application/* type MIME documents.""" + +__all__ = ["MIMEApplication"] + +import email.encoders as encoders +from email.mime.nonmultipart import MIMENonMultipart + + +class MIMEApplication(MIMENonMultipart): + """Class for generating application/* MIME documents.""" + + def __init__(self, _data, _subtype='octet-stream', + _encoder=encoders.encode_base64, *, policy=None, **_params): + """Create an application/* type MIME document. + + _data is a string containing the raw application data. + + _subtype is the MIME content type subtype, defaulting to + 'octet-stream'. + + _encoder is a function which will perform the actual encoding for + transport of the application data, defaulting to base64 encoding. + + Any additional keyword arguments are passed to the base class + constructor, which turns them into parameters on the Content-Type + header. + """ + if _subtype is None: + raise TypeError('Invalid application MIME subtype') + MIMENonMultipart.__init__(self, 'application', _subtype, policy=policy, + **_params) + self.set_payload(_data) + _encoder(self) diff --git a/modules/language/python/module/email/mime/multipart.py b/modules/language/python/module/email/mime/multipart.py new file mode 100644 index 0000000..d79225b --- /dev/null +++ b/modules/language/python/module/email/mime/multipart.py @@ -0,0 +1,48 @@ +module(email,mime,multipart) + +# Copyright (C) 2002-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Base class for MIME multipart/* type messages.""" + +__all__ = ['MIMEMultipart'] + +from email.mime.base import MIMEBase + +class MIMEMultipart(MIMEBase): + """Base class for MIME multipart/* type messages.""" + + def __init__(self, _subtype='mixed', boundary=None, _subparts=None, + *, policy=None, + **_params): + """Creates a multipart/* type message. + + By default, creates a multipart/mixed message, with proper + Content-Type and MIME-Version headers. + + _subtype is the subtype of the multipart content type, defaulting to + `mixed'. + + boundary is the multipart boundary string. By default it is + calculated as needed. + + _subparts is a sequence of initial subparts for the payload. It + must be an iterable object, such as a list. You can always + attach new subparts to the message by using the attach() method. + + Additional parameters for the Content-Type header are taken from the + keyword arguments (or passed into the _params argument). + """ + MIMEBase.__init__(self, 'multipart', _subtype, policy=policy, **_params) + + # Initialise _payload to an empty list as the Message superclass's + # implementation of is_multipart assumes that _payload is a list for + # multipart messages. + self._payload = [] + + if _subparts: + for p in _subparts: + self.attach(p) + if boundary: + self.set_boundary(boundary) diff --git a/modules/language/python/module/http/client.py b/modules/language/python/module/http/client.py index 8dd6028..8f152f6 100644 --- a/modules/language/python/module/http/client.py +++ b/modules/language/python/module/http/client.py @@ -105,9 +105,9 @@ globals().update(http.HTTPStatus.__members__) # another hack to maintain backwards compatibility # Mapping status codes to official W3C names -pk(6) + responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()} -pk(7) + # maximal amount of data to read at one time in _safe_read MAXAMOUNT = 1048576 @@ -147,7 +147,6 @@ _is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search # servers will otherwise respond with a 411 _METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'} -pk(8) def _encode(data, name='data'): """Call data.encode("latin-1") but show a better error message.""" try: diff --git a/modules/language/python/module/http/cookiejar.py b/modules/language/python/module/http/cookiejar.py new file mode 100644 index 0000000..a37db20 --- /dev/null +++ b/modules/language/python/module/http/cookiejar.py @@ -0,0 +1,2100 @@ +module(http,cookiejar) + +r"""HTTP cookie handling for web clients. + +This module has (now fairly distant) origins in Gisle Aas' Perl module +HTTP::Cookies, from the libwww-perl library. + +Docstrings, comments and debug strings in this code refer to the +attributes of the HTTP cookie system as cookie-attributes, to distinguish +them clearly from Python attributes. + +Class diagram (note that BSDDBCookieJar and the MSIE* classes are not +distributed with the Python standard library, but are available from +http://wwwsearch.sf.net/): + + CookieJar____ + / \ \ + FileCookieJar \ \ + / | \ \ \ + MozillaCookieJar | LWPCookieJar \ \ + | | \ + | ---MSIEBase | \ + | / | | \ + | / MSIEDBCookieJar BSDDBCookieJar + |/ + MSIECookieJar + +""" + +__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', + 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar'] + +import copy +import datetime +import re +import time +import urllib.parse, urllib.request +try: + import threading as _threading +except ImportError: + import dummy_threading as _threading +import http.client # only for the default HTTP port +from calendar import timegm + +debug = False # set to True to enable debugging via the logging module +logger = None + +def _debug(*args): + if not debug: + return + global logger + if not logger: + import logging + logger = logging.getLogger("http.cookiejar") + return logger.debug(*args) + + +DEFAULT_HTTP_PORT = str(http.client.HTTP_PORT) +MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " + "instance initialised with one)") + +def _warn_unhandled_exception(): + # There are a few catch-all except: statements in this module, for + # catching input that's bad in unexpected ways. Warn if any + # exceptions are caught there. + import io, warnings, traceback + f = io.StringIO() + traceback.print_exc(None, f) + msg = f.getvalue() + warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2) + + +# Date/time conversion +# ----------------------------------------------------------------------------- + +EPOCH_YEAR = 1970 +def _timegm(tt): + year, month, mday, hour, min, sec = tt[:6] + if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and + (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): + return timegm(tt) + else: + return None + +DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] +MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] +MONTHS_LOWER = [] +for month in MONTHS: MONTHS_LOWER.append(month.lower()) + +def time2isoz(t=None): + """Return a string representing time in seconds since epoch, t. + + If the function is called without an argument, it will use the current + time. + + The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", + representing Universal Time (UTC, aka GMT). An example of this format is: + + 1994-11-24 08:49:37Z + + """ + if t is None: + dt = datetime.datetime.utcnow() + else: + dt = datetime.datetime.utcfromtimestamp(t) + return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( + dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second) + +def time2netscape(t=None): + """Return a string representing time in seconds since epoch, t. + + If the function is called without an argument, it will use the current + time. + + The format of the returned string is like this: + + Wed, DD-Mon-YYYY HH:MM:SS GMT + + """ + if t is None: + dt = datetime.datetime.utcnow() + else: + dt = datetime.datetime.utcfromtimestamp(t) + return "%s, %02d-%s-%04d %02d:%02d:%02d GMT" % ( + DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1], + dt.year, dt.hour, dt.minute, dt.second) + + +UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} + +TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII) +def offset_from_tz_string(tz): + offset = None + if tz in UTC_ZONES: + offset = 0 + else: + m = TIMEZONE_RE.search(tz) + if m: + offset = 3600 * int(m.group(2)) + if m.group(3): + offset = offset + 60 * int(m.group(3)) + if m.group(1) == '-': + offset = -offset + return offset + +def _str2time(day, mon, yr, hr, min, sec, tz): + yr = int(yr) + if yr > datetime.MAXYEAR: + return None + + # translate month name to number + # month numbers start with 1 (January) + try: + mon = MONTHS_LOWER.index(mon.lower())+1 + except ValueError: + # maybe it's already a number + try: + imon = int(mon) + except ValueError: + return None + if 1 <= imon <= 12: + mon = imon + else: + return None + + # make sure clock elements are defined + if hr is None: hr = 0 + if min is None: min = 0 + if sec is None: sec = 0 + + day = int(day) + hr = int(hr) + min = int(min) + sec = int(sec) + + if yr < 1000: + # find "obvious" year + cur_yr = time.localtime(time.time())[0] + m = cur_yr % 100 + tmp = yr + yr = yr + cur_yr - m + m = m - tmp + if abs(m) > 50: + if m > 0: yr = yr + 100 + else: yr = yr - 100 + + # convert UTC time tuple to seconds since epoch (not timezone-adjusted) + t = _timegm((yr, mon, day, hr, min, sec, tz)) + + if t is not None: + # adjust time using timezone string, to get absolute time since epoch + if tz is None: + tz = "UTC" + tz = tz.upper() + offset = offset_from_tz_string(tz) + if offset is None: + return None + t = t - offset + + return t + +STRICT_DATE_RE = re.compile( + r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " + r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII) +WEEKDAY_RE = re.compile( + r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII) +LOOSE_HTTP_DATE_RE = re.compile( + r"""^ + (\d\d?) # day + (?:\s+|[-\/]) + (\w+) # month + (?:\s+|[-\/]) + (\d+) # year + (?: + (?:\s+|:) # separator before clock + (\d\d?):(\d\d) # hour:min + (?::(\d\d))? # optional seconds + )? # optional clock + \s* + ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone + \s* + (?:\(\w+\))? # ASCII representation of timezone in parens. + \s*$""", re.X | re.ASCII) +def http2time(text): + """Returns time in seconds since epoch of time represented by a string. + + Return value is an integer. + + None is returned if the format of str is unrecognized, the time is outside + the representable range, or the timezone string is not recognized. If the + string contains no timezone, UTC is assumed. + + The timezone in the string may be numerical (like "-0800" or "+0100") or a + string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the + timezone strings equivalent to UTC (zero offset) are known to the function. + + The function loosely parses the following formats: + + Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format + Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format + Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format + 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) + 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) + 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday) + + The parser ignores leading and trailing whitespace. The time may be + absent. + + If the year is given with only 2 digits, the function will select the + century that makes the year closest to the current date. + + """ + # fast exit for strictly conforming string + m = STRICT_DATE_RE.search(text) + if m: + g = m.groups() + mon = MONTHS_LOWER.index(g[1].lower()) + 1 + tt = (int(g[2]), mon, int(g[0]), + int(g[3]), int(g[4]), float(g[5])) + return _timegm(tt) + + # No, we need some messy parsing... + + # clean up + text = text.lstrip() + text = WEEKDAY_RE.sub("", text, 1) # Useless weekday + + # tz is time zone specifier string + day, mon, yr, hr, min, sec, tz = [None]*7 + + # loose regexp parse + m = LOOSE_HTTP_DATE_RE.search(text) + if m is not None: + day, mon, yr, hr, min, sec, tz = m.groups() + else: + return None # bad format + + return _str2time(day, mon, yr, hr, min, sec, tz) + +ISO_DATE_RE = re.compile( + r"""^ + (\d{4}) # year + [-\/]? + (\d\d?) # numerical month + [-\/]? + (\d\d?) # day + (?: + (?:\s+|[-:Tt]) # separator before clock + (\d\d?):?(\d\d) # hour:min + (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) + )? # optional clock + \s* + ([-+]?\d\d?:?(:?\d\d)? + |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) + \s*$""", re.X | re. ASCII) +def iso2time(text): + """ + As for http2time, but parses the ISO 8601 formats: + + 1994-02-03 14:15:29 -0100 -- ISO 8601 format + 1994-02-03 14:15:29 -- zone is optional + 1994-02-03 -- only date + 1994-02-03T14:15:29 -- Use T as separator + 19940203T141529Z -- ISO 8601 compact format + 19940203 -- only date + + """ + # clean up + text = text.lstrip() + + # tz is time zone specifier string + day, mon, yr, hr, min, sec, tz = [None]*7 + + # loose regexp parse + m = ISO_DATE_RE.search(text) + if m is not None: + # XXX there's an extra bit of the timezone I'm ignoring here: is + # this the right thing to do? + yr, mon, day, hr, min, sec, tz, _ = m.groups() + else: + return None # bad format + + return _str2time(day, mon, yr, hr, min, sec, tz) + + +# Header parsing +# ----------------------------------------------------------------------------- + +def unmatched(match): + """Return unmatched part of re.Match object.""" + start, end = match.span(0) + return match.string[:start]+match.string[end:] + +HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)") +HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") +HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)") +HEADER_ESCAPE_RE = re.compile(r"\\(.)") +def split_header_words(header_values): + r"""Parse header values into a list of lists containing key,value pairs. + + The function knows how to deal with ",", ";" and "=" as well as quoted + values after "=". A list of space separated tokens are parsed as if they + were separated by ";". + + If the header_values passed as argument contains multiple values, then they + are treated as if they were a single value separated by comma ",". + + This means that this function is useful for parsing header fields that + follow this syntax (BNF as from the HTTP/1.1 specification, but we relax + the requirement for tokens). + + headers = #header + header = (token | parameter) *( [";"] (token | parameter)) + + token = 1*<any CHAR except CTLs or separators> + separators = "(" | ")" | "<" | ">" | "@" + | "," | ";" | ":" | "\" | <"> + | "/" | "[" | "]" | "?" | "=" + | "{" | "}" | SP | HT + + quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) + qdtext = <any TEXT except <">> + quoted-pair = "\" CHAR + + parameter = attribute "=" value + attribute = token + value = token | quoted-string + + Each header is represented by a list of key/value pairs. The value for a + simple token (not part of a parameter) is None. Syntactically incorrect + headers will not necessarily be parsed as you would want. + + This is easier to describe with some examples: + + >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) + [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] + >>> split_header_words(['text/html; charset="iso-8859-1"']) + [[('text/html', None), ('charset', 'iso-8859-1')]] + >>> split_header_words([r'Basic realm="\"foo\bar\""']) + [[('Basic', None), ('realm', '"foobar"')]] + + """ + assert not isinstance(header_values, str) + result = [] + for text in header_values: + orig_text = text + pairs = [] + while text: + m = HEADER_TOKEN_RE.search(text) + if m: + text = unmatched(m) + name = m.group(1) + m = HEADER_QUOTED_VALUE_RE.search(text) + if m: # quoted value + text = unmatched(m) + value = m.group(1) + value = HEADER_ESCAPE_RE.sub(r"\1", value) + else: + m = HEADER_VALUE_RE.search(text) + if m: # unquoted value + text = unmatched(m) + value = m.group(1) + value = value.rstrip() + else: + # no value, a lone token + value = None + pairs.append((name, value)) + elif text.lstrip().startswith(","): + # concatenated headers, as per RFC 2616 section 4.2 + text = text.lstrip()[1:] + if pairs: result.append(pairs) + pairs = [] + else: + # skip junk + non_junk, nr_junk_chars = re.subn(r"^[=\s;]*", "", text) + assert nr_junk_chars > 0, ( + "split_header_words bug: '%s', '%s', %s" % + (orig_text, text, pairs)) + text = non_junk + if pairs: result.append(pairs) + return result + +HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") +def join_header_words(lists): + """Do the inverse (almost) of the conversion done by split_header_words. + + Takes a list of lists of (key, value) pairs and produces a single header + value. Attribute values are quoted if needed. + + >>> join_header_words([[("text/plain", None), ("charset", "iso-8859-1")]]) + 'text/plain; charset="iso-8859-1"' + >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859-1")]]) + 'text/plain, charset="iso-8859-1"' + + """ + headers = [] + for pairs in lists: + attr = [] + for k, v in pairs: + if v is not None: + if not re.search(r"^\w+$", v): + v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ + v = '"%s"' % v + k = "%s=%s" % (k, v) + attr.append(k) + if attr: headers.append("; ".join(attr)) + return ", ".join(headers) + +def strip_quotes(text): + if text.startswith('"'): + text = text[1:] + if text.endswith('"'): + text = text[:-1] + return text + +def parse_ns_headers(ns_headers): + """Ad-hoc parser for Netscape protocol cookie-attributes. + + The old Netscape cookie format for Set-Cookie can for instance contain + an unquoted "," in the expires field, so we have to use this ad-hoc + parser instead of split_header_words. + + XXX This may not make the best possible effort to parse all the crap + that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient + parser is probably better, so could do worse than following that if + this ever gives any trouble. + + Currently, this is also used for parsing RFC 2109 cookies. + + """ + known_attrs = ("expires", "domain", "path", "secure", + # RFC 2109 attrs (may turn up in Netscape cookies, too) + "version", "port", "max-age") + + result = [] + for ns_header in ns_headers: + pairs = [] + version_set = False + + # XXX: The following does not strictly adhere to RFCs in that empty + # names and values are legal (the former will only appear once and will + # be overwritten if multiple occurrences are present). This is + # mostly to deal with backwards compatibility. + for ii, param in enumerate(ns_header.split(';')): + param = param.strip() + + key, sep, val = param.partition('=') + key = key.strip() + + if not key: + if ii == 0: + break + else: + continue + + # allow for a distinction between present and empty and missing + # altogether + val = val.strip() if sep else None + + if ii != 0: + lc = key.lower() + if lc in known_attrs: + key = lc + + if key == "version": + # This is an RFC 2109 cookie. + if val is not None: + val = strip_quotes(val) + version_set = True + elif key == "expires": + # convert expires date to seconds since epoch + if val is not None: + val = http2time(strip_quotes(val)) # None if invalid + pairs.append((key, val)) + + if pairs: + if not version_set: + pairs.append(("version", "0")) + result.append(pairs) + + return result + + +IPV4_RE = re.compile(r"\.\d+$", re.ASCII) +def is_HDN(text): + """Return True if text is a host domain name.""" + # XXX + # This may well be wrong. Which RFC is HDN defined in, if any (for + # the purposes of RFC 2965)? + # For the current implementation, what about IPv6? Remember to look + # at other uses of IPV4_RE also, if change this. + if IPV4_RE.search(text): + return False + if text == "": + return False + if text[0] == "." or text[-1] == ".": + return False + return True + +def domain_match(A, B): + """Return True if domain A domain-matches domain B, according to RFC 2965. + + A and B may be host domain names or IP addresses. + + RFC 2965, section 1: + + Host names can be specified either as an IP address or a HDN string. + Sometimes we compare one host name with another. (Such comparisons SHALL + be case-insensitive.) Host A's name domain-matches host B's if + + * their host name strings string-compare equal; or + + * A is a HDN string and has the form NB, where N is a non-empty + name string, B has the form .B', and B' is a HDN string. (So, + x.y.com domain-matches .Y.com but not Y.com.) + + Note that domain-match is not a commutative operation: a.b.c.com + domain-matches .c.com, but not the reverse. + + """ + # Note that, if A or B are IP addresses, the only relevant part of the + # definition of the domain-match algorithm is the direct string-compare. + A = A.lower() + B = B.lower() + if A == B: + return True + if not is_HDN(A): + return False + i = A.rfind(B) + if i == -1 or i == 0: + # A does not have form NB, or N is the empty string + return False + if not B.startswith("."): + return False + if not is_HDN(B[1:]): + return False + return True + +def liberal_is_HDN(text): + """Return True if text is a sort-of-like a host domain name. + + For accepting/blocking domains. + + """ + if IPV4_RE.search(text): + return False + return True + +def user_domain_match(A, B): + """For blocking/accepting domains. + + A and B may be host domain names or IP addresses. + + """ + A = A.lower() + B = B.lower() + if not (liberal_is_HDN(A) and liberal_is_HDN(B)): + if A == B: + # equal IP addresses + return True + return False + initial_dot = B.startswith(".") + if initial_dot and A.endswith(B): + return True + if not initial_dot and A == B: + return True + return False + +cut_port_re = re.compile(r":\d+$", re.ASCII) +def request_host(request): + """Return request-host, as defined by RFC 2965. + + Variation from RFC: returned value is lowercased, for convenient + comparison. + + """ + url = request.get_full_url() + host = urllib.parse.urlparse(url)[1] + if host == "": + host = request.get_header("Host", "") + + # remove port, if present + host = cut_port_re.sub("", host, 1) + return host.lower() + +def eff_request_host(request): + """Return a tuple (request-host, effective request-host name). + + As defined by RFC 2965, except both are lowercased. + + """ + erhn = req_host = request_host(request) + if req_host.find(".") == -1 and not IPV4_RE.search(req_host): + erhn = req_host + ".local" + return req_host, erhn + +def request_path(request): + """Path component of request-URI, as defined by RFC 2965.""" + url = request.get_full_url() + parts = urllib.parse.urlsplit(url) + path = escape_path(parts.path) + if not path.startswith("/"): + # fix bad RFC 2396 absoluteURI + path = "/" + path + return path + +def request_port(request): + host = request.host + i = host.find(':') + if i >= 0: + port = host[i+1:] + try: + int(port) + except ValueError: + _debug("nonnumeric port: '%s'", port) + return None + else: + port = DEFAULT_HTTP_PORT + return port + +# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't +# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). +HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" +ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") +def uppercase_escaped_char(match): + return "%%%s" % match.group(1).upper() +def escape_path(path): + """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" + # There's no knowing what character encoding was used to create URLs + # containing %-escapes, but since we have to pick one to escape invalid + # path characters, we pick UTF-8, as recommended in the HTML 4.0 + # specification: + # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 + # And here, kind of: draft-fielding-uri-rfc2396bis-03 + # (And in draft IRI specification: draft-duerst-iri-05) + # (And here, for new URI schemes: RFC 2718) + path = urllib.parse.quote(path, HTTP_PATH_SAFE) + path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) + return path + +def reach(h): + """Return reach of host h, as defined by RFC 2965, section 1. + + The reach R of a host name H is defined as follows: + + * If + + - H is the host domain name of a host; and, + + - H has the form A.B; and + + - A has no embedded (that is, interior) dots; and + + - B has at least one embedded dot, or B is the string "local". + then the reach of H is .B. + + * Otherwise, the reach of H is H. + + >>> reach("www.acme.com") + '.acme.com' + >>> reach("acme.com") + 'acme.com' + >>> reach("acme.local") + '.local' + + """ + i = h.find(".") + if i >= 0: + #a = h[:i] # this line is only here to show what a is + b = h[i+1:] + i = b.find(".") + if is_HDN(h) and (i >= 0 or b == "local"): + return "."+b + return h + +def is_third_party(request): + """ + + RFC 2965, section 3.3.6: + + An unverifiable transaction is to a third-party host if its request- + host U does not domain-match the reach R of the request-host O in the + origin transaction. + + """ + req_host = request_host(request) + if not domain_match(req_host, reach(request.origin_req_host)): + return True + else: + return False + + +class Cookie: + """HTTP Cookie. + + This class represents both Netscape and RFC 2965 cookies. + + This is deliberately a very simple class. It just holds attributes. It's + possible to construct Cookie instances that don't comply with the cookie + standards. CookieJar.make_cookies is the factory function for Cookie + objects -- it deals with cookie parsing, supplying defaults, and + normalising to the representation used in this class. CookiePolicy is + responsible for checking them to see whether they should be accepted from + and returned to the server. + + Note that the port may be present in the headers, but unspecified ("Port" + rather than"Port=80", for example); if this is the case, port is None. + + """ + + def __init__(self, version, name, value, + port, port_specified, + domain, domain_specified, domain_initial_dot, + path, path_specified, + secure, + expires, + discard, + comment, + comment_url, + rest, + rfc2109=False + ): + + if version is not None: version = int(version) + if expires is not None: expires = int(float(expires)) + if port is None and port_specified is True: + raise ValueError("if port is None, port_specified must be false") + + self.version = version + self.name = name + self.value = value + self.port = port + self.port_specified = port_specified + # normalise case, as per RFC 2965 section 3.3.3 + self.domain = domain.lower() + self.domain_specified = domain_specified + # Sigh. We need to know whether the domain given in the + # cookie-attribute had an initial dot, in order to follow RFC 2965 + # (as clarified in draft errata). Needed for the returned $Domain + # value. + self.domain_initial_dot = domain_initial_dot + self.path = path + self.path_specified = path_specified + self.secure = secure + self.expires = expires + self.discard = discard + self.comment = comment + self.comment_url = comment_url + self.rfc2109 = rfc2109 + + self._rest = copy.copy(rest) + + def has_nonstandard_attr(self, name): + return name in self._rest + def get_nonstandard_attr(self, name, default=None): + return self._rest.get(name, default) + def set_nonstandard_attr(self, name, value): + self._rest[name] = value + + def is_expired(self, now=None): + if now is None: now = time.time() + if (self.expires is not None) and (self.expires <= now): + return True + return False + + def __str__(self): + if self.port is None: p = "" + else: p = ":"+self.port + limit = self.domain + p + self.path + if self.value is not None: + namevalue = "%s=%s" % (self.name, self.value) + else: + namevalue = self.name + return "<Cookie %s for %s>" % (namevalue, limit) + + def __repr__(self): + args = [] + for name in ("version", "name", "value", + "port", "port_specified", + "domain", "domain_specified", "domain_initial_dot", + "path", "path_specified", + "secure", "expires", "discard", "comment", "comment_url", + ): + attr = getattr(self, name) + args.append("%s=%s" % (name, repr(attr))) + args.append("rest=%s" % repr(self._rest)) + args.append("rfc2109=%s" % repr(self.rfc2109)) + return "%s(%s)" % (self.__class__.__name__, ", ".join(args)) + + +class CookiePolicy: + """Defines which cookies get accepted from and returned to server. + + May also modify cookies, though this is probably a bad idea. + + The subclass DefaultCookiePolicy defines the standard rules for Netscape + and RFC 2965 cookies -- override that if you want a customized policy. + + """ + def set_ok(self, cookie, request): + """Return true if (and only if) cookie should be accepted from server. + + Currently, pre-expired cookies never get this far -- the CookieJar + class deletes such cookies itself. + + """ + raise NotImplementedError() + + def return_ok(self, cookie, request): + """Return true if (and only if) cookie should be returned to server.""" + raise NotImplementedError() + + def domain_return_ok(self, domain, request): + """Return false if cookies should not be returned, given cookie domain. + """ + return True + + def path_return_ok(self, path, request): + """Return false if cookies should not be returned, given cookie path. + """ + return True + + +class DefaultCookiePolicy(CookiePolicy): + """Implements the standard rules for accepting and returning cookies.""" + + DomainStrictNoDots = 1 + DomainStrictNonDomain = 2 + DomainRFC2965Match = 4 + + DomainLiberal = 0 + DomainStrict = DomainStrictNoDots|DomainStrictNonDomain + + def __init__(self, + blocked_domains=None, allowed_domains=None, + netscape=True, rfc2965=False, + rfc2109_as_netscape=None, + hide_cookie2=False, + strict_domain=False, + strict_rfc2965_unverifiable=True, + strict_ns_unverifiable=False, + strict_ns_domain=DomainLiberal, + strict_ns_set_initial_dollar=False, + strict_ns_set_path=False + ): + """Constructor arguments should be passed as keyword arguments only.""" + self.netscape = netscape + self.rfc2965 = rfc2965 + self.rfc2109_as_netscape = rfc2109_as_netscape + self.hide_cookie2 = hide_cookie2 + self.strict_domain = strict_domain + self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable + self.strict_ns_unverifiable = strict_ns_unverifiable + self.strict_ns_domain = strict_ns_domain + self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar + self.strict_ns_set_path = strict_ns_set_path + + if blocked_domains is not None: + self._blocked_domains = tuple(blocked_domains) + else: + self._blocked_domains = () + + if allowed_domains is not None: + allowed_domains = tuple(allowed_domains) + self._allowed_domains = allowed_domains + + def blocked_domains(self): + """Return the sequence of blocked domains (as a tuple).""" + return self._blocked_domains + def set_blocked_domains(self, blocked_domains): + """Set the sequence of blocked domains.""" + self._blocked_domains = tuple(blocked_domains) + + def is_blocked(self, domain): + for blocked_domain in self._blocked_domains: + if user_domain_match(domain, blocked_domain): + return True + return False + + def allowed_domains(self): + """Return None, or the sequence of allowed domains (as a tuple).""" + return self._allowed_domains + def set_allowed_domains(self, allowed_domains): + """Set the sequence of allowed domains, or None.""" + if allowed_domains is not None: + allowed_domains = tuple(allowed_domains) + self._allowed_domains = allowed_domains + + def is_not_allowed(self, domain): + if self._allowed_domains is None: + return False + for allowed_domain in self._allowed_domains: + if user_domain_match(domain, allowed_domain): + return False + return True + + def set_ok(self, cookie, request): + """ + If you override .set_ok(), be sure to call this method. If it returns + false, so should your subclass (assuming your subclass wants to be more + strict about which cookies to accept). + + """ + _debug(" - checking cookie %s=%s", cookie.name, cookie.value) + + assert cookie.name is not None + + for n in "version", "verifiability", "name", "path", "domain", "port": + fn_name = "set_ok_"+n + fn = getattr(self, fn_name) + if not fn(cookie, request): + return False + + return True + + def set_ok_version(self, cookie, request): + if cookie.version is None: + # Version is always set to 0 by parse_ns_headers if it's a Netscape + # cookie, so this must be an invalid RFC 2965 cookie. + _debug(" Set-Cookie2 without version attribute (%s=%s)", + cookie.name, cookie.value) + return False + if cookie.version > 0 and not self.rfc2965: + _debug(" RFC 2965 cookies are switched off") + return False + elif cookie.version == 0 and not self.netscape: + _debug(" Netscape cookies are switched off") + return False + return True + + def set_ok_verifiability(self, cookie, request): + if request.unverifiable and is_third_party(request): + if cookie.version > 0 and self.strict_rfc2965_unverifiable: + _debug(" third-party RFC 2965 cookie during " + "unverifiable transaction") + return False + elif cookie.version == 0 and self.strict_ns_unverifiable: + _debug(" third-party Netscape cookie during " + "unverifiable transaction") + return False + return True + + def set_ok_name(self, cookie, request): + # Try and stop servers setting V0 cookies designed to hack other + # servers that know both V0 and V1 protocols. + if (cookie.version == 0 and self.strict_ns_set_initial_dollar and + cookie.name.startswith("$")): + _debug(" illegal name (starts with '$'): '%s'", cookie.name) + return False + return True + + def set_ok_path(self, cookie, request): + if cookie.path_specified: + req_path = request_path(request) + if ((cookie.version > 0 or + (cookie.version == 0 and self.strict_ns_set_path)) and + not req_path.startswith(cookie.path)): + _debug(" path attribute %s is not a prefix of request " + "path %s", cookie.path, req_path) + return False + return True + + def set_ok_domain(self, cookie, request): + if self.is_blocked(cookie.domain): + _debug(" domain %s is in user block-list", cookie.domain) + return False + if self.is_not_allowed(cookie.domain): + _debug(" domain %s is not in user allow-list", cookie.domain) + return False + if cookie.domain_specified: + req_host, erhn = eff_request_host(request) + domain = cookie.domain + if self.strict_domain and (domain.count(".") >= 2): + # XXX This should probably be compared with the Konqueror + # (kcookiejar.cpp) and Mozilla implementations, but it's a + # losing battle. + i = domain.rfind(".") + j = domain.rfind(".", 0, i) + if j == 0: # domain like .foo.bar + tld = domain[i+1:] + sld = domain[j+1:i] + if sld.lower() in ("co", "ac", "com", "edu", "org", "net", + "gov", "mil", "int", "aero", "biz", "cat", "coop", + "info", "jobs", "mobi", "museum", "name", "pro", + "travel", "eu") and len(tld) == 2: + # domain like .co.uk + _debug(" country-code second level domain %s", domain) + return False + if domain.startswith("."): + undotted_domain = domain[1:] + else: + undotted_domain = domain + embedded_dots = (undotted_domain.find(".") >= 0) + if not embedded_dots and domain != ".local": + _debug(" non-local domain %s contains no embedded dot", + domain) + return False + if cookie.version == 0: + if (not erhn.endswith(domain) and + (not erhn.startswith(".") and + not ("."+erhn).endswith(domain))): + _debug(" effective request-host %s (even with added " + "initial dot) does not end with %s", + erhn, domain) + return False + if (cookie.version > 0 or + (self.strict_ns_domain & self.DomainRFC2965Match)): + if not domain_match(erhn, domain): + _debug(" effective request-host %s does not domain-match " + "%s", erhn, domain) + return False + if (cookie.version > 0 or + (self.strict_ns_domain & self.DomainStrictNoDots)): + host_prefix = req_host[:-len(domain)] + if (host_prefix.find(".") >= 0 and + not IPV4_RE.search(req_host)): + _debug(" host prefix %s for domain %s contains a dot", + host_prefix, domain) + return False + return True + + def set_ok_port(self, cookie, request): + if cookie.port_specified: + req_port = request_port(request) + if req_port is None: + req_port = "80" + else: + req_port = str(req_port) + for p in cookie.port.split(","): + try: + int(p) + except ValueError: + _debug(" bad port %s (not numeric)", p) + return False + if p == req_port: + break + else: + _debug(" request port (%s) not found in %s", + req_port, cookie.port) + return False + return True + + def return_ok(self, cookie, request): + """ + If you override .return_ok(), be sure to call this method. If it + returns false, so should your subclass (assuming your subclass wants to + be more strict about which cookies to return). + + """ + # Path has already been checked by .path_return_ok(), and domain + # blocking done by .domain_return_ok(). + _debug(" - checking cookie %s=%s", cookie.name, cookie.value) + + for n in "version", "verifiability", "secure", "expires", "port", "domain": + fn_name = "return_ok_"+n + fn = getattr(self, fn_name) + if not fn(cookie, request): + return False + return True + + def return_ok_version(self, cookie, request): + if cookie.version > 0 and not self.rfc2965: + _debug(" RFC 2965 cookies are switched off") + return False + elif cookie.version == 0 and not self.netscape: + _debug(" Netscape cookies are switched off") + return False + return True + + def return_ok_verifiability(self, cookie, request): + if request.unverifiable and is_third_party(request): + if cookie.version > 0 and self.strict_rfc2965_unverifiable: + _debug(" third-party RFC 2965 cookie during unverifiable " + "transaction") + return False + elif cookie.version == 0 and self.strict_ns_unverifiable: + _debug(" third-party Netscape cookie during unverifiable " + "transaction") + return False + return True + + def return_ok_secure(self, cookie, request): + if cookie.secure and request.type != "https": + _debug(" secure cookie with non-secure request") + return False + return True + + def return_ok_expires(self, cookie, request): + if cookie.is_expired(self._now): + _debug(" cookie expired") + return False + return True + + def return_ok_port(self, cookie, request): + if cookie.port: + req_port = request_port(request) + if req_port is None: + req_port = "80" + for p in cookie.port.split(","): + if p == req_port: + break + else: + _debug(" request port %s does not match cookie port %s", + req_port, cookie.port) + return False + return True + + def return_ok_domain(self, cookie, request): + req_host, erhn = eff_request_host(request) + domain = cookie.domain + + # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't + if (cookie.version == 0 and + (self.strict_ns_domain & self.DomainStrictNonDomain) and + not cookie.domain_specified and domain != erhn): + _debug(" cookie with unspecified domain does not string-compare " + "equal to request domain") + return False + + if cookie.version > 0 and not domain_match(erhn, domain): + _debug(" effective request-host name %s does not domain-match " + "RFC 2965 cookie domain %s", erhn, domain) + return False + if cookie.version == 0 and not ("."+erhn).endswith(domain): + _debug(" request-host %s does not match Netscape cookie domain " + "%s", req_host, domain) + return False + return True + + def domain_return_ok(self, domain, request): + # Liberal check of. This is here as an optimization to avoid + # having to load lots of MSIE cookie files unless necessary. + req_host, erhn = eff_request_host(request) + if not req_host.startswith("."): + req_host = "."+req_host + if not erhn.startswith("."): + erhn = "."+erhn + if not (req_host.endswith(domain) or erhn.endswith(domain)): + #_debug(" request domain %s does not match cookie domain %s", + # req_host, domain) + return False + + if self.is_blocked(domain): + _debug(" domain %s is in user block-list", domain) + return False + if self.is_not_allowed(domain): + _debug(" domain %s is not in user allow-list", domain) + return False + + return True + + def path_return_ok(self, path, request): + _debug("- checking cookie path=%s", path) + req_path = request_path(request) + if not req_path.startswith(path): + _debug(" %s does not path-match %s", req_path, path) + return False + return True + + +def vals_sorted_by_key(adict): + keys = sorted(adict.keys()) + return map(adict.get, keys) + +def deepvalues(mapping): + """Iterates over nested mapping, depth-first, in sorted order by key.""" + values = vals_sorted_by_key(mapping) + for obj in values: + mapping = False + try: + obj.items + except AttributeError: + pass + else: + mapping = True + yield from deepvalues(obj) + if not mapping: + yield obj + + +# Used as second parameter to dict.get() method, to distinguish absent +# dict key from one with a None value. +class Absent: pass + +class CookieJar: + """Collection of HTTP cookies. + + You may not need to know about this class: try + urllib.request.build_opener(HTTPCookieProcessor).open(url). + """ + + non_word_re = re.compile(r"\W") + quote_re = re.compile(r"([\"\\])") + strict_domain_re = re.compile(r"\.?[^.]*") + domain_re = re.compile(r"[^.]*") + dots_re = re.compile(r"^\.+") + + magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII) + + def __init__(self, policy=None): + if policy is None: + policy = DefaultCookiePolicy() + self._policy = policy + + self._cookies_lock = _threading.RLock() + self._cookies = {} + + def set_policy(self, policy): + self._policy = policy + + def _cookies_for_domain(self, domain, request): + cookies = [] + if not self._policy.domain_return_ok(domain, request): + return [] + _debug("Checking %s for cookies to return", domain) + cookies_by_path = self._cookies[domain] + for path in cookies_by_path.keys(): + if not self._policy.path_return_ok(path, request): + continue + cookies_by_name = cookies_by_path[path] + for cookie in cookies_by_name.values(): + if not self._policy.return_ok(cookie, request): + _debug(" not returning cookie") + continue + _debug(" it's a match") + cookies.append(cookie) + return cookies + + def _cookies_for_request(self, request): + """Return a list of cookies to be returned to server.""" + cookies = [] + for domain in self._cookies.keys(): + cookies.extend(self._cookies_for_domain(domain, request)) + return cookies + + def _cookie_attrs(self, cookies): + """Return a list of cookie-attributes to be returned to server. + + like ['foo="bar"; $Path="/"', ...] + + The $Version attribute is also added when appropriate (currently only + once per request). + + """ + # add cookies in order of most specific (ie. longest) path first + cookies.sort(key=lambda a: len(a.path), reverse=True) + + version_set = False + + attrs = [] + for cookie in cookies: + # set version of Cookie header + # XXX + # What should it be if multiple matching Set-Cookie headers have + # different versions themselves? + # Answer: there is no answer; was supposed to be settled by + # RFC 2965 errata, but that may never appear... + version = cookie.version + if not version_set: + version_set = True + if version > 0: + attrs.append("$Version=%s" % version) + + # quote cookie value if necessary + # (not for Netscape protocol, which already has any quotes + # intact, due to the poorly-specified Netscape Cookie: syntax) + if ((cookie.value is not None) and + self.non_word_re.search(cookie.value) and version > 0): + value = self.quote_re.sub(r"\\\1", cookie.value) + else: + value = cookie.value + + # add cookie-attributes to be returned in Cookie header + if cookie.value is None: + attrs.append(cookie.name) + else: + attrs.append("%s=%s" % (cookie.name, value)) + if version > 0: + if cookie.path_specified: + attrs.append('$Path="%s"' % cookie.path) + if cookie.domain.startswith("."): + domain = cookie.domain + if (not cookie.domain_initial_dot and + domain.startswith(".")): + domain = domain[1:] + attrs.append('$Domain="%s"' % domain) + if cookie.port is not None: + p = "$Port" + if cookie.port_specified: + p = p + ('="%s"' % cookie.port) + attrs.append(p) + + return attrs + + def add_cookie_header(self, request): + """Add correct Cookie: header to request (urllib.request.Request object). + + The Cookie2 header is also added unless policy.hide_cookie2 is true. + + """ + _debug("add_cookie_header") + self._cookies_lock.acquire() + try: + + self._policy._now = self._now = int(time.time()) + + cookies = self._cookies_for_request(request) + + attrs = self._cookie_attrs(cookies) + if attrs: + if not request.has_header("Cookie"): + request.add_unredirected_header( + "Cookie", "; ".join(attrs)) + + # if necessary, advertise that we know RFC 2965 + if (self._policy.rfc2965 and not self._policy.hide_cookie2 and + not request.has_header("Cookie2")): + for cookie in cookies: + if cookie.version != 1: + request.add_unredirected_header("Cookie2", '$Version="1"') + break + + finally: + self._cookies_lock.release() + + self.clear_expired_cookies() + + def _normalized_cookie_tuples(self, attrs_set): + """Return list of tuples containing normalised cookie information. + + attrs_set is the list of lists of key,value pairs extracted from + the Set-Cookie or Set-Cookie2 headers. + + Tuples are name, value, standard, rest, where name and value are the + cookie name and value, standard is a dictionary containing the standard + cookie-attributes (discard, secure, version, expires or max-age, + domain, path and port) and rest is a dictionary containing the rest of + the cookie-attributes. + + """ + cookie_tuples = [] + + boolean_attrs = "discard", "secure" + value_attrs = ("version", + "expires", "max-age", + "domain", "path", "port", + "comment", "commenturl") + + for cookie_attrs in attrs_set: + name, value = cookie_attrs[0] + + # Build dictionary of standard cookie-attributes (standard) and + # dictionary of other cookie-attributes (rest). + + # Note: expiry time is normalised to seconds since epoch. V0 + # cookies should have the Expires cookie-attribute, and V1 cookies + # should have Max-Age, but since V1 includes RFC 2109 cookies (and + # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we + # accept either (but prefer Max-Age). + max_age_set = False + + bad_cookie = False + + standard = {} + rest = {} + for k, v in cookie_attrs[1:]: + lc = k.lower() + # don't lose case distinction for unknown fields + if lc in value_attrs or lc in boolean_attrs: + k = lc + if k in boolean_attrs and v is None: + # boolean cookie-attribute is present, but has no value + # (like "discard", rather than "port=80") + v = True + if k in standard: + # only first value is significant + continue + if k == "domain": + if v is None: + _debug(" missing value for domain attribute") + bad_cookie = True + break + # RFC 2965 section 3.3.3 + v = v.lower() + if k == "expires": + if max_age_set: + # Prefer max-age to expires (like Mozilla) + continue + if v is None: + _debug(" missing or invalid value for expires " + "attribute: treating as session cookie") + continue + if k == "max-age": + max_age_set = True + try: + v = int(v) + except ValueError: + _debug(" missing or invalid (non-numeric) value for " + "max-age attribute") + bad_cookie = True + break + # convert RFC 2965 Max-Age to seconds since epoch + # XXX Strictly you're supposed to follow RFC 2616 + # age-calculation rules. Remember that zero Max-Age + # is a request to discard (old and new) cookie, though. + k = "expires" + v = self._now + v + if (k in value_attrs) or (k in boolean_attrs): + if (v is None and + k not in ("port", "comment", "commenturl")): + _debug(" missing value for %s attribute" % k) + bad_cookie = True + break + standard[k] = v + else: + rest[k] = v + + if bad_cookie: + continue + + cookie_tuples.append((name, value, standard, rest)) + + return cookie_tuples + + def _cookie_from_cookie_tuple(self, tup, request): + # standard is dict of standard cookie-attributes, rest is dict of the + # rest of them + name, value, standard, rest = tup + + domain = standard.get("domain", Absent) + path = standard.get("path", Absent) + port = standard.get("port", Absent) + expires = standard.get("expires", Absent) + + # set the easy defaults + version = standard.get("version", None) + if version is not None: + try: + version = int(version) + except ValueError: + return None # invalid version, ignore cookie + secure = standard.get("secure", False) + # (discard is also set if expires is Absent) + discard = standard.get("discard", False) + comment = standard.get("comment", None) + comment_url = standard.get("commenturl", None) + + # set default path + if path is not Absent and path != "": + path_specified = True + path = escape_path(path) + else: + path_specified = False + path = request_path(request) + i = path.rfind("/") + if i != -1: + if version == 0: + # Netscape spec parts company from reality here + path = path[:i] + else: + path = path[:i+1] + if len(path) == 0: path = "/" + + # set default domain + domain_specified = domain is not Absent + # but first we have to remember whether it starts with a dot + domain_initial_dot = False + if domain_specified: + domain_initial_dot = bool(domain.startswith(".")) + if domain is Absent: + req_host, erhn = eff_request_host(request) + domain = erhn + elif not domain.startswith("."): + domain = "."+domain + + # set default port + port_specified = False + if port is not Absent: + if port is None: + # Port attr present, but has no value: default to request port. + # Cookie should then only be sent back on that port. + port = request_port(request) + else: + port_specified = True + port = re.sub(r"\s+", "", port) + else: + # No port attr present. Cookie can be sent back on any port. + port = None + + # set default expires and discard + if expires is Absent: + expires = None + discard = True + elif expires <= self._now: + # Expiry date in past is request to delete cookie. This can't be + # in DefaultCookiePolicy, because can't delete cookies there. + try: + self.clear(domain, path, name) + except KeyError: + pass + _debug("Expiring cookie, domain='%s', path='%s', name='%s'", + domain, path, name) + return None + + return Cookie(version, + name, value, + port, port_specified, + domain, domain_specified, domain_initial_dot, + path, path_specified, + secure, + expires, + discard, + comment, + comment_url, + rest) + + def _cookies_from_attrs_set(self, attrs_set, request): + cookie_tuples = self._normalized_cookie_tuples(attrs_set) + + cookies = [] + for tup in cookie_tuples: + cookie = self._cookie_from_cookie_tuple(tup, request) + if cookie: cookies.append(cookie) + return cookies + + def _process_rfc2109_cookies(self, cookies): + rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None) + if rfc2109_as_ns is None: + rfc2109_as_ns = not self._policy.rfc2965 + for cookie in cookies: + if cookie.version == 1: + cookie.rfc2109 = True + if rfc2109_as_ns: + # treat 2109 cookies as Netscape cookies rather than + # as RFC2965 cookies + cookie.version = 0 + + def make_cookies(self, response, request): + """Return sequence of Cookie objects extracted from response object.""" + # get cookie-attributes for RFC 2965 and Netscape protocols + headers = response.info() + rfc2965_hdrs = headers.get_all("Set-Cookie2", []) + ns_hdrs = headers.get_all("Set-Cookie", []) + + rfc2965 = self._policy.rfc2965 + netscape = self._policy.netscape + + if ((not rfc2965_hdrs and not ns_hdrs) or + (not ns_hdrs and not rfc2965) or + (not rfc2965_hdrs and not netscape) or + (not netscape and not rfc2965)): + return [] # no relevant cookie headers: quick exit + + try: + cookies = self._cookies_from_attrs_set( + split_header_words(rfc2965_hdrs), request) + except Exception: + _warn_unhandled_exception() + cookies = [] + + if ns_hdrs and netscape: + try: + # RFC 2109 and Netscape cookies + ns_cookies = self._cookies_from_attrs_set( + parse_ns_headers(ns_hdrs), request) + except Exception: + _warn_unhandled_exception() + ns_cookies = [] + self._process_rfc2109_cookies(ns_cookies) + + # Look for Netscape cookies (from Set-Cookie headers) that match + # corresponding RFC 2965 cookies (from Set-Cookie2 headers). + # For each match, keep the RFC 2965 cookie and ignore the Netscape + # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are + # bundled in with the Netscape cookies for this purpose, which is + # reasonable behaviour. + if rfc2965: + lookup = {} + for cookie in cookies: + lookup[(cookie.domain, cookie.path, cookie.name)] = None + + def no_matching_rfc2965(ns_cookie, lookup=lookup): + key = ns_cookie.domain, ns_cookie.path, ns_cookie.name + return key not in lookup + ns_cookies = filter(no_matching_rfc2965, ns_cookies) + + if ns_cookies: + cookies.extend(ns_cookies) + + return cookies + + def set_cookie_if_ok(self, cookie, request): + """Set a cookie if policy says it's OK to do so.""" + self._cookies_lock.acquire() + try: + self._policy._now = self._now = int(time.time()) + + if self._policy.set_ok(cookie, request): + self.set_cookie(cookie) + + + finally: + self._cookies_lock.release() + + def set_cookie(self, cookie): + """Set a cookie, without checking whether or not it should be set.""" + c = self._cookies + self._cookies_lock.acquire() + try: + if cookie.domain not in c: c[cookie.domain] = {} + c2 = c[cookie.domain] + if cookie.path not in c2: c2[cookie.path] = {} + c3 = c2[cookie.path] + c3[cookie.name] = cookie + finally: + self._cookies_lock.release() + + def extract_cookies(self, response, request): + """Extract cookies from response, where allowable given the request.""" + _debug("extract_cookies: %s", response.info()) + self._cookies_lock.acquire() + try: + self._policy._now = self._now = int(time.time()) + + for cookie in self.make_cookies(response, request): + if self._policy.set_ok(cookie, request): + _debug(" setting cookie: %s", cookie) + self.set_cookie(cookie) + finally: + self._cookies_lock.release() + + def clear(self, domain=None, path=None, name=None): + """Clear some cookies. + + Invoking this method without arguments will clear all cookies. If + given a single argument, only cookies belonging to that domain will be + removed. If given two arguments, cookies belonging to the specified + path within that domain are removed. If given three arguments, then + the cookie with the specified name, path and domain is removed. + + Raises KeyError if no matching cookie exists. + + """ + if name is not None: + if (domain is None) or (path is None): + raise ValueError( + "domain and path must be given to remove a cookie by name") + del self._cookies[domain][path][name] + elif path is not None: + if domain is None: + raise ValueError( + "domain must be given to remove cookies by path") + del self._cookies[domain][path] + elif domain is not None: + del self._cookies[domain] + else: + self._cookies = {} + + def clear_session_cookies(self): + """Discard all session cookies. + + Note that the .save() method won't save session cookies anyway, unless + you ask otherwise by passing a true ignore_discard argument. + + """ + self._cookies_lock.acquire() + try: + for cookie in self: + if cookie.discard: + self.clear(cookie.domain, cookie.path, cookie.name) + finally: + self._cookies_lock.release() + + def clear_expired_cookies(self): + """Discard all expired cookies. + + You probably don't need to call this method: expired cookies are never + sent back to the server (provided you're using DefaultCookiePolicy), + this method is called by CookieJar itself every so often, and the + .save() method won't save expired cookies anyway (unless you ask + otherwise by passing a true ignore_expires argument). + + """ + self._cookies_lock.acquire() + try: + now = time.time() + for cookie in self: + if cookie.is_expired(now): + self.clear(cookie.domain, cookie.path, cookie.name) + finally: + self._cookies_lock.release() + + def __iter__(self): + return deepvalues(self._cookies) + + def __len__(self): + """Return number of contained cookies.""" + i = 0 + for cookie in self: i = i + 1 + return i + + def __repr__(self): + r = [] + for cookie in self: r.append(repr(cookie)) + return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r)) + + def __str__(self): + r = [] + for cookie in self: r.append(str(cookie)) + return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r)) + + +# derives from OSError for backwards-compatibility with Python 2.4.0 +class LoadError(OSError): pass + +class FileCookieJar(CookieJar): + """CookieJar that can be loaded from and saved to a file.""" + + def __init__(self, filename=None, delayload=False, policy=None): + """ + Cookies are NOT loaded from the named file until either the .load() or + .revert() method is called. + + """ + CookieJar.__init__(self, policy) + if filename is not None: + try: + filename+"" + except: + raise ValueError("filename must be string-like") + self.filename = filename + self.delayload = bool(delayload) + + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + """Save cookies to a file.""" + raise NotImplementedError() + + def load(self, filename=None, ignore_discard=False, ignore_expires=False): + """Load cookies from a file.""" + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + with open(filename) as f: + self._really_load(f, filename, ignore_discard, ignore_expires) + + def revert(self, filename=None, + ignore_discard=False, ignore_expires=False): + """Clear all cookies and reload cookies from a saved file. + + Raises LoadError (or OSError) if reversion is not successful; the + object's state will not be altered if this happens. + + """ + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + self._cookies_lock.acquire() + try: + + old_state = copy.deepcopy(self._cookies) + self._cookies = {} + try: + self.load(filename, ignore_discard, ignore_expires) + except OSError: + self._cookies = old_state + raise + + finally: + self._cookies_lock.release() + + +def lwp_cookie_str(cookie): + """Return string representation of Cookie in the LWP cookie file format. + + Actually, the format is extended a bit -- see module docstring. + + """ + h = [(cookie.name, cookie.value), + ("path", cookie.path), + ("domain", cookie.domain)] + if cookie.port is not None: h.append(("port", cookie.port)) + if cookie.path_specified: h.append(("path_spec", None)) + if cookie.port_specified: h.append(("port_spec", None)) + if cookie.domain_initial_dot: h.append(("domain_dot", None)) + if cookie.secure: h.append(("secure", None)) + if cookie.expires: h.append(("expires", + time2isoz(float(cookie.expires)))) + if cookie.discard: h.append(("discard", None)) + if cookie.comment: h.append(("comment", cookie.comment)) + if cookie.comment_url: h.append(("commenturl", cookie.comment_url)) + + keys = sorted(cookie._rest.keys()) + for k in keys: + h.append((k, str(cookie._rest[k]))) + + h.append(("version", str(cookie.version))) + + return join_header_words([h]) + +class LWPCookieJar(FileCookieJar): + """ + The LWPCookieJar saves a sequence of "Set-Cookie3" lines. + "Set-Cookie3" is the format used by the libwww-perl library, not known + to be compatible with any browser, but which is easy to read and + doesn't lose information about RFC 2965 cookies. + + Additional methods + + as_lwp_str(ignore_discard=True, ignore_expired=True) + + """ + + def as_lwp_str(self, ignore_discard=True, ignore_expires=True): + """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers. + + ignore_discard and ignore_expires: see docstring for FileCookieJar.save + + """ + now = time.time() + r = [] + for cookie in self: + if not ignore_discard and cookie.discard: + continue + if not ignore_expires and cookie.is_expired(now): + continue + r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie)) + return "\n".join(r+[""]) + + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + with open(filename, "w") as f: + # There really isn't an LWP Cookies 2.0 format, but this indicates + # that there is extra information in here (domain_dot and + # port_spec) while still being compatible with libwww-perl, I hope. + f.write("#LWP-Cookies-2.0\n") + f.write(self.as_lwp_str(ignore_discard, ignore_expires)) + + def _really_load(self, f, filename, ignore_discard, ignore_expires): + magic = f.readline() + if not self.magic_re.search(magic): + msg = ("%r does not look like a Set-Cookie3 (LWP) format " + "file" % filename) + raise LoadError(msg) + + now = time.time() + + header = "Set-Cookie3:" + boolean_attrs = ("port_spec", "path_spec", "domain_dot", + "secure", "discard") + value_attrs = ("version", + "port", "path", "domain", + "expires", + "comment", "commenturl") + + try: + while 1: + line = f.readline() + if line == "": break + if not line.startswith(header): + continue + line = line[len(header):].strip() + + for data in split_header_words([line]): + name, value = data[0] + standard = {} + rest = {} + for k in boolean_attrs: + standard[k] = False + for k, v in data[1:]: + if k is not None: + lc = k.lower() + else: + lc = None + # don't lose case distinction for unknown fields + if (lc in value_attrs) or (lc in boolean_attrs): + k = lc + if k in boolean_attrs: + if v is None: v = True + standard[k] = v + elif k in value_attrs: + standard[k] = v + else: + rest[k] = v + + h = standard.get + expires = h("expires") + discard = h("discard") + if expires is not None: + expires = iso2time(expires) + if expires is None: + discard = True + domain = h("domain") + domain_specified = domain.startswith(".") + c = Cookie(h("version"), name, value, + h("port"), h("port_spec"), + domain, domain_specified, h("domain_dot"), + h("path"), h("path_spec"), + h("secure"), + expires, + discard, + h("comment"), + h("commenturl"), + rest) + if not ignore_discard and c.discard: + continue + if not ignore_expires and c.is_expired(now): + continue + self.set_cookie(c) + except OSError: + raise + except Exception: + _warn_unhandled_exception() + raise LoadError("invalid Set-Cookie3 format file %r: %r" % + (filename, line)) + + +class MozillaCookieJar(FileCookieJar): + """ + + WARNING: you may want to backup your browser's cookies file if you use + this class to save cookies. I *think* it works, but there have been + bugs in the past! + + This class differs from CookieJar only in the format it uses to save and + load cookies to and from a file. This class uses the Mozilla/Netscape + `cookies.txt' format. lynx uses this file format, too. + + Don't expect cookies saved while the browser is running to be noticed by + the browser (in fact, Mozilla on unix will overwrite your saved cookies if + you change them on disk while it's running; on Windows, you probably can't + save at all while the browser is running). + + Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to + Netscape cookies on saving. + + In particular, the cookie version and port number information is lost, + together with information about whether or not Path, Port and Discard were + specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the + domain as set in the HTTP header started with a dot (yes, I'm aware some + domains in Netscape files start with a dot and some don't -- trust me, you + really don't want to know any more about this). + + Note that though Mozilla and Netscape use the same format, they use + slightly different headers. The class saves cookies using the Netscape + header by default (Mozilla can cope with that). + + """ + magic_re = re.compile("#( Netscape)? HTTP Cookie File") + header = """\ +# Netscape HTTP Cookie File +# http://curl.haxx.se/rfc/cookie_spec.html +# This is a generated file! Do not edit. + +""" + + def _really_load(self, f, filename, ignore_discard, ignore_expires): + now = time.time() + + magic = f.readline() + if not self.magic_re.search(magic): + raise LoadError( + "%r does not look like a Netscape format cookies file" % + filename) + + try: + while 1: + line = f.readline() + if line == "": break + + # last field may be absent, so keep any trailing tab + if line.endswith("\n"): line = line[:-1] + + # skip comments and blank lines XXX what is $ for? + if (line.strip().startswith(("#", "$")) or + line.strip() == ""): + continue + + domain, domain_specified, path, secure, expires, name, value = \ + line.split("\t") + secure = (secure == "TRUE") + domain_specified = (domain_specified == "TRUE") + if name == "": + # cookies.txt regards 'Set-Cookie: foo' as a cookie + # with no name, whereas http.cookiejar regards it as a + # cookie with no value. + name = value + value = None + + initial_dot = domain.startswith(".") + assert domain_specified == initial_dot + + discard = False + if expires == "": + expires = None + discard = True + + # assume path_specified is false + c = Cookie(0, name, value, + None, False, + domain, domain_specified, initial_dot, + path, False, + secure, + expires, + discard, + None, + None, + {}) + if not ignore_discard and c.discard: + continue + if not ignore_expires and c.is_expired(now): + continue + self.set_cookie(c) + + except OSError: + raise + except Exception: + _warn_unhandled_exception() + raise LoadError("invalid Netscape format cookies file %r: %r" % + (filename, line)) + + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + with open(filename, "w") as f: + f.write(self.header) + now = time.time() + for cookie in self: + if not ignore_discard and cookie.discard: + continue + if not ignore_expires and cookie.is_expired(now): + continue + if cookie.secure: secure = "TRUE" + else: secure = "FALSE" + if cookie.domain.startswith("."): initial_dot = "TRUE" + else: initial_dot = "FALSE" + if cookie.expires is not None: + expires = str(cookie.expires) + else: + expires = "" + if cookie.value is None: + # cookies.txt regards 'Set-Cookie: foo' as a cookie + # with no name, whereas http.cookiejar regards it as a + # cookie with no value. + name = "" + value = cookie.name + else: + name = cookie.name + value = cookie.value + f.write( + "\t".join([cookie.domain, initial_dot, cookie.path, + secure, expires, name, value])+ + "\n") diff --git a/modules/language/python/module/http/cookies.py b/modules/language/python/module/http/cookies.py new file mode 100644 index 0000000..ef08bcc --- /dev/null +++ b/modules/language/python/module/http/cookies.py @@ -0,0 +1,637 @@ +module(http,cookies) + +#### +# Copyright 2000 by Timothy O'Malley <timo@alum.mit.edu> +# +# All Rights Reserved +# +# Permission to use, copy, modify, and distribute this software +# and its documentation for any purpose and without fee is hereby +# granted, provided that the above copyright notice appear in all +# copies and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Timothy O'Malley not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS +# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR +# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. +# +#### +# +# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp +# by Timothy O'Malley <timo@alum.mit.edu> +# +# Cookie.py is a Python module for the handling of HTTP +# cookies as a Python dictionary. See RFC 2109 for more +# information on cookies. +# +# The original idea to treat Cookies as a dictionary came from +# Dave Mitchell (davem@magnet.com) in 1995, when he released the +# first version of nscookie.py. +# +#### + +r""" +Here's a sample session to show how to use this module. +At the moment, this is the only documentation. + +The Basics +---------- + +Importing is easy... + + >>> from http import cookies + +Most of the time you start by creating a cookie. + + >>> C = cookies.SimpleCookie() + +Once you've created your Cookie, you can add values just as if it were +a dictionary. + + >>> C = cookies.SimpleCookie() + >>> C["fig"] = "newton" + >>> C["sugar"] = "wafer" + >>> C.output() + 'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer' + +Notice that the printable representation of a Cookie is the +appropriate format for a Set-Cookie: header. This is the +default behavior. You can change the header and printed +attributes by using the .output() function + + >>> C = cookies.SimpleCookie() + >>> C["rocky"] = "road" + >>> C["rocky"]["path"] = "/cookie" + >>> print(C.output(header="Cookie:")) + Cookie: rocky=road; Path=/cookie + >>> print(C.output(attrs=[], header="Cookie:")) + Cookie: rocky=road + +The load() method of a Cookie extracts cookies from a string. In a +CGI script, you would use this method to extract the cookies from the +HTTP_COOKIE environment variable. + + >>> C = cookies.SimpleCookie() + >>> C.load("chips=ahoy; vienna=finger") + >>> C.output() + 'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger' + +The load() method is darn-tootin smart about identifying cookies +within a string. Escaped quotation marks, nested semicolons, and other +such trickeries do not confuse it. + + >>> C = cookies.SimpleCookie() + >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') + >>> print(C) + Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" + +Each element of the Cookie also supports all of the RFC 2109 +Cookie attributes. Here's an example which sets the Path +attribute. + + >>> C = cookies.SimpleCookie() + >>> C["oreo"] = "doublestuff" + >>> C["oreo"]["path"] = "/" + >>> print(C) + Set-Cookie: oreo=doublestuff; Path=/ + +Each dictionary element has a 'value' attribute, which gives you +back the value associated with the key. + + >>> C = cookies.SimpleCookie() + >>> C["twix"] = "none for you" + >>> C["twix"].value + 'none for you' + +The SimpleCookie expects that all values should be standard strings. +Just to be sure, SimpleCookie invokes the str() builtin to convert +the value to a string, when the values are set dictionary-style. + + >>> C = cookies.SimpleCookie() + >>> C["number"] = 7 + >>> C["string"] = "seven" + >>> C["number"].value + '7' + >>> C["string"].value + 'seven' + >>> C.output() + 'Set-Cookie: number=7\r\nSet-Cookie: string=seven' + +Finis. +""" + +# +# Import our required modules +# +import re +import string + +__all__ = ["CookieError", "BaseCookie", "SimpleCookie"] + +_nulljoin = ''.join +_semispacejoin = '; '.join +_spacejoin = ' '.join + +def _warn_deprecated_setter(setter): + import warnings + msg = ('The .%s setter is deprecated. The attribute will be read-only in ' + 'future releases. Please use the set() method instead.' % setter) + warnings.warn(msg, DeprecationWarning, stacklevel=3) + +# +# Define an exception visible to External modules +# +class CookieError(Exception): + pass + + +# These quoting routines conform to the RFC2109 specification, which in +# turn references the character definitions from RFC2068. They provide +# a two-way quoting algorithm. Any non-text character is translated +# into a 4 character sequence: a forward-slash followed by the +# three-digit octal equivalent of the character. Any '\' or '"' is +# quoted with a preceding '\' slash. +# Because of the way browsers really handle cookies (as opposed to what +# the RFC says) we also encode "," and ";". +# +# These are taken from RFC2068 and RFC2109. +# _LegalChars is the list of chars which don't require "'s +# _Translator hash-table for fast quoting +# +_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:" +_UnescapedChars = _LegalChars + ' ()/<=>?@[]{}' + +_Translator = {n: '\\%03o' % n + for n in set(range(256)) - set(map(ord, _UnescapedChars))} +_Translator.update({ + ord('"'): '\\"', + ord('\\'): '\\\\', +}) + +_is_legal_key = re.compile('[%s]+' % re.escape(_LegalChars)).fullmatch + +def _quote(str): + r"""Quote a string for use in a cookie header. + + If the string does not need to be double-quoted, then just return the + string. Otherwise, surround the string in doublequotes and quote + (with a \) special characters. + """ + if str is None or _is_legal_key(str): + return str + else: + return '"' + str.translate(_Translator) + '"' + + +_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") +_QuotePatt = re.compile(r"[\\].") + +def _unquote(str): + # If there aren't any doublequotes, + # then there can't be any special characters. See RFC 2109. + if str is None or len(str) < 2: + return str + if str[0] != '"' or str[-1] != '"': + return str + + # We have to assume that we must decode this string. + # Down to work. + + # Remove the "s + str = str[1:-1] + + # Check for special sequences. Examples: + # \012 --> \n + # \" --> " + # + i = 0 + n = len(str) + res = [] + while 0 <= i < n: + o_match = _OctalPatt.search(str, i) + q_match = _QuotePatt.search(str, i) + if not o_match and not q_match: # Neither matched + res.append(str[i:]) + break + # else: + j = k = -1 + if o_match: + j = o_match.start(0) + if q_match: + k = q_match.start(0) + if q_match and (not o_match or k < j): # QuotePatt matched + res.append(str[i:k]) + res.append(str[k+1]) + i = k + 2 + else: # OctalPatt matched + res.append(str[i:j]) + res.append(chr(int(str[j+1:j+4], 8))) + i = j + 4 + return _nulljoin(res) + +# The _getdate() routine is used to set the expiration time in the cookie's HTTP +# header. By default, _getdate() returns the current time in the appropriate +# "expires" format for a Set-Cookie header. The one optional argument is an +# offset from now, in seconds. For example, an offset of -3600 means "one hour +# ago". The offset may be a floating point number. +# + +_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + +_monthname = [None, + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + +def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname): + from time import gmtime, time + now = time() + year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future) + return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % \ + (weekdayname[wd], day, monthname[month], year, hh, mm, ss) + + +class Morsel(dict): + """A class to hold ONE (key, value) pair. + + In a cookie, each such pair may have several attributes, so this class is + used to keep the attributes associated with the appropriate key,value pair. + This class also includes a coded_value attribute, which is used to hold + the network representation of the value. This is most useful when Python + objects are pickled for network transit. + """ + # RFC 2109 lists these attributes as reserved: + # path comment domain + # max-age secure version + # + # For historical reasons, these attributes are also reserved: + # expires + # + # This is an extension from Microsoft: + # httponly + # + # This dictionary provides a mapping from the lowercase + # variant on the left to the appropriate traditional + # formatting on the right. + _reserved = { + "expires" : "expires", + "path" : "Path", + "comment" : "Comment", + "domain" : "Domain", + "max-age" : "Max-Age", + "secure" : "Secure", + "httponly" : "HttpOnly", + "version" : "Version", + } + + _flags = {'secure', 'httponly'} + + def __init__(self): + # Set defaults + self._key = self._value = self._coded_value = None + + # Set default attributes + for key in self._reserved: + dict.__setitem__(self, key, "") + + @property + def key(self): + return self._key + + @key.setter + def key(self, key): + _warn_deprecated_setter('key') + self._key = key + + @property + def value(self): + return self._value + + @value.setter + def value(self, value): + _warn_deprecated_setter('value') + self._value = value + + @property + def coded_value(self): + return self._coded_value + + @coded_value.setter + def coded_value(self, coded_value): + _warn_deprecated_setter('coded_value') + self._coded_value = coded_value + + def __setitem__(self, K, V): + K = K.lower() + if not K in self._reserved: + raise CookieError("Invalid attribute %r" % (K,)) + dict.__setitem__(self, K, V) + + def setdefault(self, key, val=None): + key = key.lower() + if key not in self._reserved: + raise CookieError("Invalid attribute %r" % (key,)) + return dict.setdefault(self, key, val) + + def __eq__(self, morsel): + if not isinstance(morsel, Morsel): + return NotImplemented + return (dict.__eq__(self, morsel) and + self._value == morsel._value and + self._key == morsel._key and + self._coded_value == morsel._coded_value) + + __ne__ = object.__ne__ + + def copy(self): + morsel = Morsel() + dict.update(morsel, self) + morsel.__dict__.update(self.__dict__) + return morsel + + def update(self, values): + data = {} + for key, val in dict(values).items(): + key = key.lower() + if key not in self._reserved: + raise CookieError("Invalid attribute %r" % (key,)) + data[key] = val + dict.update(self, data) + + def isReservedKey(self, K): + return K.lower() in self._reserved + + def set(self, key, val, coded_val, LegalChars=_LegalChars): + if LegalChars != _LegalChars: + import warnings + warnings.warn( + 'LegalChars parameter is deprecated, ignored and will ' + 'be removed in future versions.', DeprecationWarning, + stacklevel=2) + + if key.lower() in self._reserved: + raise CookieError('Attempt to set a reserved key %r' % (key,)) + if not _is_legal_key(key): + raise CookieError('Illegal key %r' % (key,)) + + # It's a good key, so save it. + self._key = key + self._value = val + self._coded_value = coded_val + + def __getstate__(self): + return { + 'key': self._key, + 'value': self._value, + 'coded_value': self._coded_value, + } + + def __setstate__(self, state): + self._key = state['key'] + self._value = state['value'] + self._coded_value = state['coded_value'] + + def output(self, attrs=None, header="Set-Cookie:"): + return "%s %s" % (header, self.OutputString(attrs)) + + __str__ = output + + def __repr__(self): + return '<%s: %s>' % (self.__class__.__name__, self.OutputString()) + + def js_output(self, attrs=None): + # Print javascript + return """ + <script type="text/javascript"> + <!-- begin hiding + document.cookie = \"%s\"; + // end hiding --> + </script> + """ % (self.OutputString(attrs).replace('"', r'\"')) + + def OutputString(self, attrs=None): + # Build up our result + # + result = [] + append = result.append + + # First, the key=value pair + append("%s=%s" % (self.key, self.coded_value)) + + # Now add any defined attributes + if attrs is None: + attrs = self._reserved + items = sorted(self.items()) + for key, value in items: + if value == "": + continue + if key not in attrs: + continue + if key == "expires" and isinstance(value, int): + append("%s=%s" % (self._reserved[key], _getdate(value))) + elif key == "max-age" and isinstance(value, int): + append("%s=%d" % (self._reserved[key], value)) + elif key in self._flags: + if value: + append(str(self._reserved[key])) + else: + append("%s=%s" % (self._reserved[key], value)) + + # Return the result + return _semispacejoin(result) + + +# +# Pattern for finding cookie +# +# This used to be strict parsing based on the RFC2109 and RFC2068 +# specifications. I have since discovered that MSIE 3.0x doesn't +# follow the character rules outlined in those specs. As a +# result, the parsing rules here are less strict. +# + +_LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=" +_LegalValueChars = _LegalKeyChars + r'\[\]' +_CookiePattern = re.compile(r""" + \s* # Optional whitespace at start of cookie + (?P<key> # Start of group 'key' + [""" + _LegalKeyChars + r"""]+? # Any word of at least one letter + ) # End of group 'key' + ( # Optional group: there may not be a value. + \s*=\s* # Equal Sign + (?P<val> # Start of group 'val' + "(?:[^\\"]|\\.)*" # Any doublequoted string + | # or + \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr + | # or + [""" + _LegalValueChars + r"""]* # Any word or empty string + ) # End of group 'val' + )? # End of optional value group + \s* # Any number of spaces. + (\s+|;|$) # Ending either at space, semicolon, or EOS. + """, re.ASCII | re.VERBOSE) # re.ASCII may be removed if safe. + + +# At long last, here is the cookie class. Using this class is almost just like +# using a dictionary. See this module's docstring for example usage. +# +class BaseCookie(dict): + """A container class for a set of Morsels.""" + + def value_decode(self, val): + """real_value, coded_value = value_decode(STRING) + Called prior to setting a cookie's value from the network + representation. The VALUE is the value read from HTTP + header. + Override this function to modify the behavior of cookies. + """ + return val, val + + def value_encode(self, val): + """real_value, coded_value = value_encode(VALUE) + Called prior to setting a cookie's value from the dictionary + representation. The VALUE is the value being assigned. + Override this function to modify the behavior of cookies. + """ + strval = str(val) + return strval, strval + + def __init__(self, input=None): + if input: + self.load(input) + + def __set(self, key, real_value, coded_value): + """Private method for setting a cookie's value""" + M = self.get(key, Morsel()) + M.set(key, real_value, coded_value) + dict.__setitem__(self, key, M) + + def __setitem__(self, key, value): + """Dictionary style assignment.""" + if isinstance(value, Morsel): + # allow assignment of constructed Morsels (e.g. for pickling) + dict.__setitem__(self, key, value) + else: + rval, cval = self.value_encode(value) + self.__set(key, rval, cval) + + def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"): + """Return a string suitable for HTTP.""" + result = [] + items = sorted(self.items()) + for key, value in items: + result.append(value.output(attrs, header)) + return sep.join(result) + + __str__ = output + + def __repr__(self): + l = [] + items = sorted(self.items()) + for key, value in items: + l.append('%s=%s' % (key, repr(value.value))) + return '<%s: %s>' % (self.__class__.__name__, _spacejoin(l)) + + def js_output(self, attrs=None): + """Return a string suitable for JavaScript.""" + result = [] + items = sorted(self.items()) + for key, value in items: + result.append(value.js_output(attrs)) + return _nulljoin(result) + + def load(self, rawdata): + """Load cookies from a string (presumably HTTP_COOKIE) or + from a dictionary. Loading cookies from a dictionary 'd' + is equivalent to calling: + map(Cookie.__setitem__, d.keys(), d.values()) + """ + if isinstance(rawdata, str): + self.__parse_string(rawdata) + else: + # self.update() wouldn't call our custom __setitem__ + for key, value in rawdata.items(): + self[key] = value + return + + def __parse_string(self, str, patt=_CookiePattern): + i = 0 # Our starting point + n = len(str) # Length of string + parsed_items = [] # Parsed (type, key, value) triples + morsel_seen = False # A key=value pair was previously encountered + + TYPE_ATTRIBUTE = 1 + TYPE_KEYVALUE = 2 + + # We first parse the whole cookie string and reject it if it's + # syntactically invalid (this helps avoid some classes of injection + # attacks). + while 0 <= i < n: + # Start looking for a cookie + match = patt.match(str, i) + if not match: + # No more cookies + break + + key, value = match.group("key"), match.group("val") + i = match.end(0) + + if key[0] == "$": + if not morsel_seen: + # We ignore attributes which pertain to the cookie + # mechanism as a whole, such as "$Version". + # See RFC 2965. (Does anyone care?) + continue + parsed_items.append((TYPE_ATTRIBUTE, key[1:], value)) + elif key.lower() in Morsel._reserved: + if not morsel_seen: + # Invalid cookie string + return + if value is None: + if key.lower() in Morsel._flags: + parsed_items.append((TYPE_ATTRIBUTE, key, True)) + else: + # Invalid cookie string + return + else: + parsed_items.append((TYPE_ATTRIBUTE, key, _unquote(value))) + elif value is not None: + parsed_items.append((TYPE_KEYVALUE, key, self.value_decode(value))) + morsel_seen = True + else: + # Invalid cookie string + return + + # The cookie string is valid, apply it. + M = None # current morsel + for tp, key, value in parsed_items: + if tp == TYPE_ATTRIBUTE: + assert M is not None + M[key] = value + else: + assert tp == TYPE_KEYVALUE + rval, cval = value + self.__set(key, rval, cval) + M = self[key] + + +class SimpleCookie(BaseCookie): + """ + SimpleCookie supports strings as cookie values. When setting + the value using the dictionary assignment notation, SimpleCookie + calls the builtin str() to convert the value to a string. Values + received from HTTP are kept as strings. + """ + def value_decode(self, val): + return _unquote(val), val + + def value_encode(self, val): + strval = str(val) + return strval, _quote(strval) diff --git a/modules/language/python/module/http/server.py b/modules/language/python/module/http/server.py new file mode 100644 index 0000000..7799221 --- /dev/null +++ b/modules/language/python/module/http/server.py @@ -0,0 +1,1214 @@ +module(http,server) + +"""HTTP server classes. + +Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see +SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, +and CGIHTTPRequestHandler for CGI scripts. + +It does, however, optionally implement HTTP/1.1 persistent connections, +as of version 0.3. + +Notes on CGIHTTPRequestHandler +------------------------------ + +This class implements GET and POST requests to cgi-bin scripts. + +If the os.fork() function is not present (e.g. on Windows), +subprocess.Popen() is used as a fallback, with slightly altered semantics. + +In all cases, the implementation is intentionally naive -- all +requests are executed synchronously. + +SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL +-- it may execute arbitrary Python code or external programs. + +Note that status code 200 is sent prior to execution of a CGI script, so +scripts cannot send other status codes such as 302 (redirect). + +XXX To do: + +- log requests even later (to capture byte count) +- log user-agent header and other interesting goodies +- send error log to separate file +""" + + +# See also: +# +# HTTP Working Group T. Berners-Lee +# INTERNET-DRAFT R. T. Fielding +# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen +# Expires September 8, 1995 March 8, 1995 +# +# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt +# +# and +# +# Network Working Group R. Fielding +# Request for Comments: 2616 et al +# Obsoletes: 2068 June 1999 +# Category: Standards Track +# +# URL: http://www.faqs.org/rfcs/rfc2616.html + +# Log files +# --------- +# +# Here's a quote from the NCSA httpd docs about log file format. +# +# | The logfile format is as follows. Each line consists of: +# | +# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb +# | +# | host: Either the DNS name or the IP number of the remote client +# | rfc931: Any information returned by identd for this person, +# | - otherwise. +# | authuser: If user sent a userid for authentication, the user name, +# | - otherwise. +# | DD: Day +# | Mon: Month (calendar name) +# | YYYY: Year +# | hh: hour (24-hour format, the machine's timezone) +# | mm: minutes +# | ss: seconds +# | request: The first line of the HTTP request as sent by the client. +# | ddd: the status code returned by the server, - if not available. +# | bbbb: the total number of bytes sent, +# | *not including the HTTP/1.0 header*, - if not available +# | +# | You can determine the name of the file accessed through request. +# +# (Actually, the latter is only true if you know the server configuration +# at the time the request was made!) + +__version__ = "0.6" + +__all__ = [ + "HTTPServer", "BaseHTTPRequestHandler", + "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler", +] + +import email.utils +import html +import http.client +import io +import mimetypes +import os +import posixpath +import select +import shutil +import socket # For gethostbyaddr() +import socketserver +import sys +import time +import urllib.parse +import copy +import argparse +from http import HTTPStatus + + +# Default error message template +DEFAULT_ERROR_MESSAGE = """\ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" + "http://www.w3.org/TR/html4/strict.dtd"> +<html> + <head> + <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> + <title>Error response</title> + </head> + <body> + <h1>Error response</h1> + <p>Error code: %(code)d</p> + <p>Message: %(message)s.</p> + <p>Error code explanation: %(code)s - %(explain)s.</p> + </body> +</html> +""" + +DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" + +class HTTPServer(socketserver.TCPServer): + + allow_reuse_address = 1 # Seems to make sense in testing environment + + def server_bind(self): + """Override server_bind to store the server name.""" + socketserver.TCPServer.server_bind(self) + host, port = self.server_address[:2] + self.server_name = socket.getfqdn(host) + self.server_port = port + + +class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): + + """HTTP request handler base class. + + The following explanation of HTTP serves to guide you through the + code as well as to expose any misunderstandings I may have about + HTTP (so you don't need to read the code to figure out I'm wrong + :-). + + HTTP (HyperText Transfer Protocol) is an extensible protocol on + top of a reliable stream transport (e.g. TCP/IP). The protocol + recognizes three parts to a request: + + 1. One line identifying the request type and path + 2. An optional set of RFC-822-style headers + 3. An optional data part + + The headers and data are separated by a blank line. + + The first line of the request has the form + + <command> <path> <version> + + where <command> is a (case-sensitive) keyword such as GET or POST, + <path> is a string containing path information for the request, + and <version> should be the string "HTTP/1.0" or "HTTP/1.1". + <path> is encoded using the URL encoding scheme (using %xx to signify + the ASCII character with hex code xx). + + The specification specifies that lines are separated by CRLF but + for compatibility with the widest range of clients recommends + servers also handle LF. Similarly, whitespace in the request line + is treated sensibly (allowing multiple spaces between components + and allowing trailing whitespace). + + Similarly, for output, lines ought to be separated by CRLF pairs + but most clients grok LF characters just fine. + + If the first line of the request has the form + + <command> <path> + + (i.e. <version> is left out) then this is assumed to be an HTTP + 0.9 request; this form has no optional headers and data part and + the reply consists of just the data. + + The reply form of the HTTP 1.x protocol again has three parts: + + 1. One line giving the response code + 2. An optional set of RFC-822-style headers + 3. The data + + Again, the headers and data are separated by a blank line. + + The response code line has the form + + <version> <responsecode> <responsestring> + + where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), + <responsecode> is a 3-digit response code indicating success or + failure of the request, and <responsestring> is an optional + human-readable string explaining what the response code means. + + This server parses the request and the headers, and then calls a + function specific to the request type (<command>). Specifically, + a request SPAM will be handled by a method do_SPAM(). If no + such method exists the server sends an error response to the + client. If it exists, it is called with no arguments: + + do_SPAM() + + Note that the request name is case sensitive (i.e. SPAM and spam + are different requests). + + The various request details are stored in instance variables: + + - client_address is the client IP address in the form (host, + port); + + - command, path and version are the broken-down request line; + + - headers is an instance of email.message.Message (or a derived + class) containing the header information; + + - rfile is a file object open for reading positioned at the + start of the optional input data part; + + - wfile is a file object open for writing. + + IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! + + The first thing to be written must be the response line. Then + follow 0 or more header lines, then a blank line, and then the + actual data (if any). The meaning of the header lines depends on + the command executed by the server; in most cases, when data is + returned, there should be at least one header line of the form + + Content-type: <type>/<subtype> + + where <type> and <subtype> should be registered MIME types, + e.g. "text/html" or "text/plain". + + """ + + # The Python system version, truncated to its first component. + sys_version = "Python/" + sys.version.split()[0] + + # The server software version. You may want to override this. + # The format is multiple whitespace-separated strings, + # where each string is of the form name[/version]. + server_version = "BaseHTTP/" + __version__ + + error_message_format = DEFAULT_ERROR_MESSAGE + error_content_type = DEFAULT_ERROR_CONTENT_TYPE + + # The default request version. This only affects responses up until + # the point where the request line is parsed, so it mainly decides what + # the client gets back when sending a malformed request line. + # Most web servers default to HTTP 0.9, i.e. don't send a status line. + default_request_version = "HTTP/0.9" + + def parse_request(self): + """Parse a request (internal). + + The request should be stored in self.raw_requestline; the results + are in self.command, self.path, self.request_version and + self.headers. + + Return True for success, False for failure; on failure, an + error is sent back. + + """ + self.command = None # set in case of error on the first line + self.request_version = version = self.default_request_version + self.close_connection = True + requestline = str(self.raw_requestline, 'iso-8859-1') + requestline = requestline.rstrip('\r\n') + self.requestline = requestline + words = requestline.split() + if len(words) == 3: + command, path, version = words + try: + if version[:5] != 'HTTP/': + raise ValueError + base_version_number = version.split('/', 1)[1] + version_number = base_version_number.split(".") + # RFC 2145 section 3.1 says there can be only one "." and + # - major and minor numbers MUST be treated as + # separate integers; + # - HTTP/2.4 is a lower version than HTTP/2.13, which in + # turn is lower than HTTP/12.3; + # - Leading zeros MUST be ignored by recipients. + if len(version_number) != 2: + raise ValueError + version_number = int(version_number[0]), int(version_number[1]) + except (ValueError, IndexError): + self.send_error( + HTTPStatus.BAD_REQUEST, + "Bad request version (%r)" % version) + return False + if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": + self.close_connection = False + if version_number >= (2, 0): + self.send_error( + HTTPStatus.HTTP_VERSION_NOT_SUPPORTED, + "Invalid HTTP version (%s)" % base_version_number) + return False + elif len(words) == 2: + command, path = words + self.close_connection = True + if command != 'GET': + self.send_error( + HTTPStatus.BAD_REQUEST, + "Bad HTTP/0.9 request type (%r)" % command) + return False + elif not words: + return False + else: + self.send_error( + HTTPStatus.BAD_REQUEST, + "Bad request syntax (%r)" % requestline) + return False + self.command, self.path, self.request_version = command, path, version + + # Examine the headers and look for a Connection directive. + try: + self.headers = http.client.parse_headers(self.rfile, + _class=self.MessageClass) + except http.client.LineTooLong as err: + self.send_error( + HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, + "Line too long", + str(err)) + return False + except http.client.HTTPException as err: + self.send_error( + HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, + "Too many headers", + str(err) + ) + return False + + conntype = self.headers.get('Connection', "") + if conntype.lower() == 'close': + self.close_connection = True + elif (conntype.lower() == 'keep-alive' and + self.protocol_version >= "HTTP/1.1"): + self.close_connection = False + # Examine the headers and look for an Expect directive + expect = self.headers.get('Expect', "") + if (expect.lower() == "100-continue" and + self.protocol_version >= "HTTP/1.1" and + self.request_version >= "HTTP/1.1"): + if not self.handle_expect_100(): + return False + return True + + def handle_expect_100(self): + """Decide what to do with an "Expect: 100-continue" header. + + If the client is expecting a 100 Continue response, we must + respond with either a 100 Continue or a final response before + waiting for the request body. The default is to always respond + with a 100 Continue. You can behave differently (for example, + reject unauthorized requests) by overriding this method. + + This method should either return True (possibly after sending + a 100 Continue response) or send an error response and return + False. + + """ + self.send_response_only(HTTPStatus.CONTINUE) + self.end_headers() + return True + + def handle_one_request(self): + """Handle a single HTTP request. + + You normally don't need to override this method; see the class + __doc__ string for information on how to handle specific HTTP + commands such as GET and POST. + + """ + try: + self.raw_requestline = self.rfile.readline(65537) + if len(self.raw_requestline) > 65536: + self.requestline = '' + self.request_version = '' + self.command = '' + self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG) + return + if not self.raw_requestline: + self.close_connection = True + return + if not self.parse_request(): + # An error code has been sent, just exit + return + mname = 'do_' + self.command + if not hasattr(self, mname): + self.send_error( + HTTPStatus.NOT_IMPLEMENTED, + "Unsupported method (%r)" % self.command) + return + method = getattr(self, mname) + method() + self.wfile.flush() #actually send the response if not already done. + except socket.timeout as e: + #a read or a write timed out. Discard this connection + self.log_error("Request timed out: %r", e) + self.close_connection = True + return + + def handle(self): + """Handle multiple requests if necessary.""" + self.close_connection = True + + self.handle_one_request() + while not self.close_connection: + self.handle_one_request() + + def send_error(self, code, message=None, explain=None): + """Send and log an error reply. + + Arguments are + * code: an HTTP error code + 3 digits + * message: a simple optional 1 line reason phrase. + *( HTAB / SP / VCHAR / %x80-FF ) + defaults to short entry matching the response code + * explain: a detailed message defaults to the long entry + matching the response code. + + This sends an error response (so it must be called before any + output has been generated), logs the error, and finally sends + a piece of HTML explaining the error to the user. + + """ + + try: + shortmsg, longmsg = self.responses[code] + except KeyError: + shortmsg, longmsg = '???', '???' + if message is None: + message = shortmsg + if explain is None: + explain = longmsg + self.log_error("code %d, message %s", code, message) + self.send_response(code, message) + self.send_header('Connection', 'close') + + # Message body is omitted for cases described in: + # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified) + # - RFC7231: 6.3.6. 205(Reset Content) + body = None + if (code >= 200 and + code not in (HTTPStatus.NO_CONTENT, + HTTPStatus.RESET_CONTENT, + HTTPStatus.NOT_MODIFIED)): + # HTML encode to prevent Cross Site Scripting attacks + # (see bug #1100201) + content = (self.error_message_format % { + 'code': code, + 'message': html.escape(message, quote=False), + 'explain': html.escape(explain, quote=False) + }) + body = content.encode('UTF-8', 'replace') + self.send_header("Content-Type", self.error_content_type) + self.send_header('Content-Length', int(len(body))) + self.end_headers() + + if self.command != 'HEAD' and body: + self.wfile.write(body) + + def send_response(self, code, message=None): + """Add the response header to the headers buffer and log the + response code. + + Also send two standard headers with the server software + version and the current date. + + """ + self.log_request(code) + self.send_response_only(code, message) + self.send_header('Server', self.version_string()) + self.send_header('Date', self.date_time_string()) + + def send_response_only(self, code, message=None): + """Send the response header only.""" + if self.request_version != 'HTTP/0.9': + if message is None: + if code in self.responses: + message = self.responses[code][0] + else: + message = '' + if not hasattr(self, '_headers_buffer'): + self._headers_buffer = [] + self._headers_buffer.append(("%s %d %s\r\n" % + (self.protocol_version, code, message)).encode( + 'latin-1', 'strict')) + + def send_header(self, keyword, value): + """Send a MIME header to the headers buffer.""" + if self.request_version != 'HTTP/0.9': + if not hasattr(self, '_headers_buffer'): + self._headers_buffer = [] + self._headers_buffer.append( + ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) + + if keyword.lower() == 'connection': + if value.lower() == 'close': + self.close_connection = True + elif value.lower() == 'keep-alive': + self.close_connection = False + + def end_headers(self): + """Send the blank line ending the MIME headers.""" + if self.request_version != 'HTTP/0.9': + self._headers_buffer.append(b"\r\n") + self.flush_headers() + + def flush_headers(self): + if hasattr(self, '_headers_buffer'): + self.wfile.write(b"".join(self._headers_buffer)) + self._headers_buffer = [] + + def log_request(self, code='-', size='-'): + """Log an accepted request. + + This is called by send_response(). + + """ + if isinstance(code, HTTPStatus): + code = code.value + self.log_message('"%s" %s %s', + self.requestline, str(code), str(size)) + + def log_error(self, format, *args): + """Log an error. + + This is called when a request cannot be fulfilled. By + default it passes the message on to log_message(). + + Arguments are the same as for log_message(). + + XXX This should go to the separate error log. + + """ + + self.log_message(format, *args) + + def log_message(self, format, *args): + """Log an arbitrary message. + + This is used by all other logging functions. Override + it if you have specific logging wishes. + + The first argument, FORMAT, is a format string for the + message to be logged. If the format string contains + any % escapes requiring parameters, they should be + specified as subsequent arguments (it's just like + printf!). + + The client ip and current date/time are prefixed to + every message. + + """ + + sys.stderr.write("%s - - [%s] %s\n" % + (self.address_string(), + self.log_date_time_string(), + format%args)) + + def version_string(self): + """Return the server software version string.""" + return self.server_version + ' ' + self.sys_version + + def date_time_string(self, timestamp=None): + """Return the current date and time formatted for a message header.""" + if timestamp is None: + timestamp = time.time() + return email.utils.formatdate(timestamp, usegmt=True) + + def log_date_time_string(self): + """Return the current time formatted for logging.""" + now = time.time() + year, month, day, hh, mm, ss, x, y, z = time.localtime(now) + s = "%02d/%3s/%04d %02d:%02d:%02d" % ( + day, self.monthname[month], year, hh, mm, ss) + return s + + weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + + monthname = [None, + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + + def address_string(self): + """Return the client address.""" + + return self.client_address[0] + + # Essentially static class variables + + # The version of the HTTP protocol we support. + # Set this to HTTP/1.1 to enable automatic keepalive + protocol_version = "HTTP/1.0" + + # MessageClass used to parse headers + MessageClass = http.client.HTTPMessage + + # hack to maintain backwards compatibility + responses = { + v: (v.phrase, v.description) + for v in HTTPStatus.__members__.values() + } + + +class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): + + """Simple HTTP request handler with GET and HEAD commands. + + This serves files from the current directory and any of its + subdirectories. The MIME type for files is determined by + calling the .guess_type() method. + + The GET and HEAD requests are identical except that the HEAD + request omits the actual contents of the file. + + """ + + server_version = "SimpleHTTP/" + __version__ + + def do_GET(self): + """Serve a GET request.""" + f = self.send_head() + if f: + try: + self.copyfile(f, self.wfile) + finally: + f.close() + + def do_HEAD(self): + """Serve a HEAD request.""" + f = self.send_head() + if f: + f.close() + + def send_head(self): + """Common code for GET and HEAD commands. + + This sends the response code and MIME headers. + + Return value is either a file object (which has to be copied + to the outputfile by the caller unless the command was HEAD, + and must be closed by the caller under all circumstances), or + None, in which case the caller has nothing further to do. + + """ + path = self.translate_path(self.path) + f = None + if os.path.isdir(path): + parts = urllib.parse.urlsplit(self.path) + if not parts.path.endswith('/'): + # redirect browser - doing basically what apache does + self.send_response(HTTPStatus.MOVED_PERMANENTLY) + new_parts = (parts[0], parts[1], parts[2] + '/', + parts[3], parts[4]) + new_url = urllib.parse.urlunsplit(new_parts) + self.send_header("Location", new_url) + self.end_headers() + return None + for index in "index.html", "index.htm": + index = os.path.join(path, index) + if os.path.exists(index): + path = index + break + else: + return self.list_directory(path) + ctype = self.guess_type(path) + try: + f = open(path, 'rb') + except OSError: + self.send_error(HTTPStatus.NOT_FOUND, "File not found") + return None + try: + self.send_response(HTTPStatus.OK) + self.send_header("Content-type", ctype) + fs = os.fstat(f.fileno()) + self.send_header("Content-Length", str(fs[6])) + self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) + self.end_headers() + return f + except: + f.close() + raise + + def list_directory(self, path): + """Helper to produce a directory listing (absent index.html). + + Return value is either a file object, or None (indicating an + error). In either case, the headers are sent, making the + interface the same as for send_head(). + + """ + try: + list = os.listdir(path) + except OSError: + self.send_error( + HTTPStatus.NOT_FOUND, + "No permission to list directory") + return None + list.sort(key=lambda a: a.lower()) + r = [] + try: + displaypath = urllib.parse.unquote(self.path, + errors='surrogatepass') + except UnicodeDecodeError: + displaypath = urllib.parse.unquote(path) + displaypath = html.escape(displaypath, quote=False) + enc = sys.getfilesystemencoding() + title = 'Directory listing for %s' % displaypath + r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' + '"http://www.w3.org/TR/html4/strict.dtd">') + r.append('<html>\n<head>') + r.append('<meta http-equiv="Content-Type" ' + 'content="text/html; charset=%s">' % enc) + r.append('<title>%s</title>\n</head>' % title) + r.append('<body>\n<h1>%s</h1>' % title) + r.append('<hr>\n<ul>') + for name in list: + fullname = os.path.join(path, name) + displayname = linkname = name + # Append / for directories or @ for symbolic links + if os.path.isdir(fullname): + displayname = name + "/" + linkname = name + "/" + if os.path.islink(fullname): + displayname = name + "@" + # Note: a link to a directory displays with @ and links with / + r.append('<li><a href="%s">%s</a></li>' + % (urllib.parse.quote(linkname, + errors='surrogatepass'), + html.escape(displayname, quote=False))) + r.append('</ul>\n<hr>\n</body>\n</html>\n') + encoded = '\n'.join(r).encode(enc, 'surrogateescape') + f = io.BytesIO() + f.write(encoded) + f.seek(0) + self.send_response(HTTPStatus.OK) + self.send_header("Content-type", "text/html; charset=%s" % enc) + self.send_header("Content-Length", str(len(encoded))) + self.end_headers() + return f + + def translate_path(self, path): + """Translate a /-separated PATH to the local filename syntax. + + Components that mean special things to the local file system + (e.g. drive or directory names) are ignored. (XXX They should + probably be diagnosed.) + + """ + # abandon query parameters + path = path.split('?',1)[0] + path = path.split('#',1)[0] + # Don't forget explicit trailing slash when normalizing. Issue17324 + trailing_slash = path.rstrip().endswith('/') + try: + path = urllib.parse.unquote(path, errors='surrogatepass') + except UnicodeDecodeError: + path = urllib.parse.unquote(path) + path = posixpath.normpath(path) + words = path.split('/') + words = filter(None, words) + path = os.getcwd() + for word in words: + if os.path.dirname(word) or word in (os.curdir, os.pardir): + # Ignore components that are not a simple file/directory name + continue + path = os.path.join(path, word) + if trailing_slash: + path += '/' + return path + + def copyfile(self, source, outputfile): + """Copy all data between two file objects. + + The SOURCE argument is a file object open for reading + (or anything with a read() method) and the DESTINATION + argument is a file object open for writing (or + anything with a write() method). + + The only reason for overriding this would be to change + the block size or perhaps to replace newlines by CRLF + -- note however that this the default server uses this + to copy binary data as well. + + """ + shutil.copyfileobj(source, outputfile) + + def guess_type(self, path): + """Guess the type of a file. + + Argument is a PATH (a filename). + + Return value is a string of the form type/subtype, + usable for a MIME Content-type header. + + The default implementation looks the file's extension + up in the table self.extensions_map, using application/octet-stream + as a default; however it would be permissible (if + slow) to look inside the data to make a better guess. + + """ + + base, ext = posixpath.splitext(path) + if ext in self.extensions_map: + return self.extensions_map[ext] + ext = ext.lower() + if ext in self.extensions_map: + return self.extensions_map[ext] + else: + return self.extensions_map[''] + + if not mimetypes.inited: + mimetypes.init() # try to read system mime.types + + extensions_map = mimetypes.types_map.copy() + + extensions_map.update({ + '': 'application/octet-stream', # Default + '.py': 'text/plain', + '.c': 'text/plain', + '.h': 'text/plain', + }) + + +# Utilities for CGIHTTPRequestHandler + +def _url_collapse_path(path): + """ + Given a URL path, remove extra '/'s and '.' path elements and collapse + any '..' references and returns a collapsed path. + + Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. + The utility of this function is limited to is_cgi method and helps + preventing some security attacks. + + Returns: The reconstituted URL, which will always start with a '/'. + + Raises: IndexError if too many '..' occur within the path. + + """ + # Query component should not be involved. + path, _, query = path.partition('?') + path = urllib.parse.unquote(path) + + # Similar to os.path.split(os.path.normpath(path)) but specific to URL + # path semantics rather than local operating system semantics. + path_parts = path.split('/') + head_parts = [] + for part in path_parts[:-1]: + if part == '..': + head_parts.pop() # IndexError if more '..' than prior parts + elif part and part != '.': + head_parts.append( part ) + if path_parts: + tail_part = path_parts.pop() + if tail_part: + if tail_part == '..': + head_parts.pop() + tail_part = '' + elif tail_part == '.': + tail_part = '' + else: + tail_part = '' + + if query: + tail_part = '?'.join((tail_part, query)) + + splitpath = ('/' + '/'.join(head_parts), tail_part) + collapsed_path = "/".join(splitpath) + + return collapsed_path + + + +nobody = None + +def nobody_uid(): + """Internal routine to get nobody's uid""" + global nobody + if nobody: + return nobody + try: + import pwd + except ImportError: + return -1 + try: + nobody = pwd.getpwnam('nobody')[2] + except KeyError: + nobody = 1 + max(x[2] for x in pwd.getpwall()) + return nobody + + +def executable(path): + """Test for executable file.""" + return os.access(path, os.X_OK) + + +class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): + + """Complete HTTP server with GET, HEAD and POST commands. + + GET and HEAD also support running CGI scripts. + + The POST command is *only* implemented for CGI scripts. + + """ + + # Determine platform specifics + have_fork = hasattr(os, 'fork') + + # Make rfile unbuffered -- we need to read one line and then pass + # the rest to a subprocess, so we can't use buffered input. + rbufsize = 0 + + def do_POST(self): + """Serve a POST request. + + This is only implemented for CGI scripts. + + """ + + if self.is_cgi(): + self.run_cgi() + else: + self.send_error( + HTTPStatus.NOT_IMPLEMENTED, + "Can only POST to CGI scripts") + + def send_head(self): + """Version of send_head that support CGI scripts""" + if self.is_cgi(): + return self.run_cgi() + else: + return SimpleHTTPRequestHandler.send_head(self) + + def is_cgi(self): + """Test whether self.path corresponds to a CGI script. + + Returns True and updates the cgi_info attribute to the tuple + (dir, rest) if self.path requires running a CGI script. + Returns False otherwise. + + If any exception is raised, the caller should assume that + self.path was rejected as invalid and act accordingly. + + The default implementation tests whether the normalized url + path begins with one of the strings in self.cgi_directories + (and the next character is a '/' or the end of the string). + + """ + collapsed_path = _url_collapse_path(self.path) + dir_sep = collapsed_path.find('/', 1) + head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] + if head in self.cgi_directories: + self.cgi_info = head, tail + return True + return False + + + cgi_directories = ['/cgi-bin', '/htbin'] + + def is_executable(self, path): + """Test whether argument path is an executable file.""" + return executable(path) + + def is_python(self, path): + """Test whether argument path is a Python script.""" + head, tail = os.path.splitext(path) + return tail.lower() in (".py", ".pyw") + + def run_cgi(self): + """Execute a CGI script.""" + dir, rest = self.cgi_info + path = dir + '/' + rest + i = path.find('/', len(dir)+1) + while i >= 0: + nextdir = path[:i] + nextrest = path[i+1:] + + scriptdir = self.translate_path(nextdir) + if os.path.isdir(scriptdir): + dir, rest = nextdir, nextrest + i = path.find('/', len(dir)+1) + else: + break + + # find an explicit query string, if present. + rest, _, query = rest.partition('?') + + # dissect the part after the directory name into a script name & + # a possible additional path, to be stored in PATH_INFO. + i = rest.find('/') + if i >= 0: + script, rest = rest[:i], rest[i:] + else: + script, rest = rest, '' + + scriptname = dir + '/' + script + scriptfile = self.translate_path(scriptname) + if not os.path.exists(scriptfile): + self.send_error( + HTTPStatus.NOT_FOUND, + "No such CGI script (%r)" % scriptname) + return + if not os.path.isfile(scriptfile): + self.send_error( + HTTPStatus.FORBIDDEN, + "CGI script is not a plain file (%r)" % scriptname) + return + ispy = self.is_python(scriptname) + if self.have_fork or not ispy: + if not self.is_executable(scriptfile): + self.send_error( + HTTPStatus.FORBIDDEN, + "CGI script is not executable (%r)" % scriptname) + return + + # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html + # XXX Much of the following could be prepared ahead of time! + env = copy.deepcopy(os.environ) + env['SERVER_SOFTWARE'] = self.version_string() + env['SERVER_NAME'] = self.server.server_name + env['GATEWAY_INTERFACE'] = 'CGI/1.1' + env['SERVER_PROTOCOL'] = self.protocol_version + env['SERVER_PORT'] = str(self.server.server_port) + env['REQUEST_METHOD'] = self.command + uqrest = urllib.parse.unquote(rest) + env['PATH_INFO'] = uqrest + env['PATH_TRANSLATED'] = self.translate_path(uqrest) + env['SCRIPT_NAME'] = scriptname + if query: + env['QUERY_STRING'] = query + env['REMOTE_ADDR'] = self.client_address[0] + authorization = self.headers.get("authorization") + if authorization: + authorization = authorization.split() + if len(authorization) == 2: + import base64, binascii + env['AUTH_TYPE'] = authorization[0] + if authorization[0].lower() == "basic": + try: + authorization = authorization[1].encode('ascii') + authorization = base64.decodebytes(authorization).\ + decode('ascii') + except (binascii.Error, UnicodeError): + pass + else: + authorization = authorization.split(':') + if len(authorization) == 2: + env['REMOTE_USER'] = authorization[0] + # XXX REMOTE_IDENT + if self.headers.get('content-type') is None: + env['CONTENT_TYPE'] = self.headers.get_content_type() + else: + env['CONTENT_TYPE'] = self.headers['content-type'] + length = self.headers.get('content-length') + if length: + env['CONTENT_LENGTH'] = length + referer = self.headers.get('referer') + if referer: + env['HTTP_REFERER'] = referer + accept = [] + for line in self.headers.getallmatchingheaders('accept'): + if line[:1] in "\t\n\r ": + accept.append(line.strip()) + else: + accept = accept + line[7:].split(',') + env['HTTP_ACCEPT'] = ','.join(accept) + ua = self.headers.get('user-agent') + if ua: + env['HTTP_USER_AGENT'] = ua + co = filter(None, self.headers.get_all('cookie', [])) + cookie_str = ', '.join(co) + if cookie_str: + env['HTTP_COOKIE'] = cookie_str + # XXX Other HTTP_* headers + # Since we're setting the env in the parent, provide empty + # values to override previously set values + for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', + 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): + env.setdefault(k, "") + + self.send_response(HTTPStatus.OK, "Script output follows") + self.flush_headers() + + decoded_query = query.replace('+', ' ') + + if self.have_fork: + # Unix -- fork as we should + args = [script] + if '=' not in decoded_query: + args.append(decoded_query) + nobody = nobody_uid() + self.wfile.flush() # Always flush before forking + pid = os.fork() + if pid != 0: + # Parent + pid, sts = os.waitpid(pid, 0) + # throw away additional data [see bug #427345] + while select.select([self.rfile], [], [], 0)[0]: + if not self.rfile.read(1): + break + if sts: + self.log_error("CGI script exit status %#x", sts) + return + # Child + try: + try: + os.setuid(nobody) + except OSError: + pass + os.dup2(self.rfile.fileno(), 0) + os.dup2(self.wfile.fileno(), 1) + os.execve(scriptfile, args, env) + except: + self.server.handle_error(self.request, self.client_address) + os._exit(127) + + else: + # Non-Unix -- use subprocess + import subprocess + cmdline = [scriptfile] + if self.is_python(scriptfile): + interp = sys.executable + if interp.lower().endswith("w.exe"): + # On Windows, use python.exe, not pythonw.exe + interp = interp[:-5] + interp[-4:] + cmdline = [interp, '-u'] + cmdline + if '=' not in query: + cmdline.append(query) + self.log_message("command: %s", subprocess.list2cmdline(cmdline)) + try: + nbytes = int(length) + except (TypeError, ValueError): + nbytes = 0 + p = subprocess.Popen(cmdline, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env = env + ) + if self.command.lower() == "post" and nbytes > 0: + data = self.rfile.read(nbytes) + else: + data = None + # throw away additional data [see bug #427345] + while select.select([self.rfile._sock], [], [], 0)[0]: + if not self.rfile._sock.recv(1): + break + stdout, stderr = p.communicate(data) + self.wfile.write(stdout) + if stderr: + self.log_error('%s', stderr) + p.stderr.close() + p.stdout.close() + status = p.returncode + if status: + self.log_error("CGI script exit status %#x", status) + else: + self.log_message("CGI script exited OK") + + +def test(HandlerClass=BaseHTTPRequestHandler, + ServerClass=HTTPServer, protocol="HTTP/1.0", port=8000, bind=""): + """Test the HTTP request handler class. + + This runs an HTTP server on port 8000 (or the port argument). + + """ + server_address = (bind, port) + + HandlerClass.protocol_version = protocol + with ServerClass(server_address, HandlerClass) as httpd: + sa = httpd.socket.getsockname() + serve_message = "Serving HTTP on {host} port {port} (http://{host}:{port}/) ..." + print(serve_message.format(host=sa[0], port=sa[1])) + try: + httpd.serve_forever() + except KeyboardInterrupt: + print("\nKeyboard interrupt received, exiting.") + sys.exit(0) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--cgi', action='store_true', + help='Run as CGI Server') + parser.add_argument('--bind', '-b', default='', metavar='ADDRESS', + help='Specify alternate bind address ' + '[default: all interfaces]') + parser.add_argument('port', action='store', + default=8000, type=int, + nargs='?', + help='Specify alternate port [default: 8000]') + args = parser.parse_args() + if args.cgi: + handler_class = CGIHTTPRequestHandler + else: + handler_class = SimpleHTTPRequestHandler + test(HandlerClass=handler_class, port=args.port, bind=args.bind) diff --git a/modules/language/python/module/json.py b/modules/language/python/module/json.py index 0d6951f..93a7b1c 100644 --- a/modules/language/python/module/json.py +++ b/modules/language/python/module/json.py @@ -108,7 +108,7 @@ __author__ = 'Bob Ippolito <bob@redivi.com>' from json.decoder import JSONDecoder, JSONDecodeError from json.encoder import JSONEncoder import codecs -pk(1) + _default_encoder = JSONEncoder( skipkeys=False, ensure_ascii=True, @@ -118,7 +118,7 @@ _default_encoder = JSONEncoder( separators=None, default=None, ) -pk(2) + def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, default=None, sort_keys=False, **kw): diff --git a/modules/language/python/module/os.scm b/modules/language/python/module/os.scm index 764c701..7823633 100644 --- a/modules/language/python/module/os.scm +++ b/modules/language/python/module/os.scm @@ -1399,7 +1399,10 @@ (yield top dirs nondirs))))))) (define (path:islink p) - (ca (S_ISLNK (stat:mode ((@ (guile) stat) (path-it p)))))) + (catch #t + (lambda () + (ca (S_ISLNK (stat:mode ((@ (guile) stat) (path-it p)))))) + (lambda x #f))) (define (path:samestat s1 s2) (and (equal? (ref s1 'st_dev) (ref s2 'st_dev)) diff --git a/modules/language/python/module/os/path.scm b/modules/language/python/module/os/path.scm index 1853c86..8547a1b 100644 --- a/modules/language/python/module/os/path.scm +++ b/modules/language/python/module/os/path.scm @@ -170,17 +170,26 @@ (define (isfile p) - (ca (S_ISREG (stat:mode ((@ (guile) stat) (path-it p)))))) + (catch #t + (lambda () + (ca (S_ISREG (stat:mode ((@ (guile) stat) (path-it p)))))) + (lambda x #f))) (define (isdir p) - (ca (S_ISDIR (stat:mode ((@ (guile) stat) (path-it p)))))) + (catch #t + (lambda () + (ca (S_ISDIR (stat:mode ((@ (guile) stat) (path-it p)))))) + (lambda x #f))) (define (ismount p) - (ca - (let* ((p (path-it p)) - (q (string-append p "/.."))) - (not (= (stat:dev ((@ (guile) stat) p)) - (stat:dev ((@ (guile) stat) q))))))) + (catch #t + (lambda () + (ca + (let* ((p (path-it p)) + (q (string-append p "/.."))) + (not (= (stat:dev ((@ (guile) stat) p)) + (stat:dev ((@ (guile) stat) q))))))) + (lambda x #f))) (define (normcase x) x) diff --git a/modules/language/python/module/re/parser.scm b/modules/language/python/module/re/parser.scm index 765500d..5af54e8 100644 --- a/modules/language/python/module/re/parser.scm +++ b/modules/language/python/module/re/parser.scm @@ -14,7 +14,7 @@ (mk q+? #:+? "+?") (define subexpr (f-list #:sub - (f-seq (f-tag "(") (Ds ee-) (f-tag ")")))) + (f-seq (f-tag "(") (Ds ee) (f-tag ")")))) (define f-back (f-or (f-list #:class (mk-token (f-reg! "[AZbBdDsSwntr]"))) @@ -89,14 +89,14 @@ lookh lookh! rev rev! f-^ f-$ flags)) (define spec (f-list #:op atom (f-or! q+? q?? q*? q* q? q+ repn? repnm? repn repnm))) (define aatom (f-or! spec atom)) -(define f-com (f-seq (f-tag "#") (f* (f-not (f-or! f-eof f-nl))))) +(define f-com (f-seq (f-tag "#") + (f-reg "[ \t]") + (f* (f-not (f-or! f-eof f-nl))))) (define ws (Ds (if (fluid-ref pretty) (f* (f-or! f-com f-nl (f-reg "[ \t\r]"))) f-true))) (define line (f-cons* #:seq ws aatom ws (ff* (f-seq ws aatom ws) ))) -(define line- (f-cons* #:seq aatom (ff* (f-seq aatom) ))) (define ee (f-cons* #:or line (ff* (f-seq f-bar line)))) -(define ee- (f-cons* #:or line- (ff* (f-seq f-bar line-)))) (define pretty (make-fluid #f)) (define (parse-reg str) (with-fluids ((*whitespace* ws)) diff --git a/modules/language/python/module/urllib/parse.py b/modules/language/python/module/urllib/parse.py index 3bfe63c..97747ce 100644 --- a/modules/language/python/module/urllib/parse.py +++ b/modules/language/python/module/urllib/parse.py @@ -35,8 +35,11 @@ import collections __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", "urlsplit", "urlunsplit", "urlencode", "parse_qs", "parse_qsl", "quote", "quote_plus", "quote_from_bytes", - "unquote", "unquote_plus", "unquote_to_bytes", - "DefragResult", "ParseResult", "SplitResult", + "unquote", "unquote_plus", "unquote_to_bytes","unwrap", + "DefragResult", "ParseResult", "SplitResult","splittype", + "splithost", "splitport", "splituser", "splitpasswd", + "splitattr", "splitquery", "splitvalue", "splittag", + "to_bytes", "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"] # A classification of schemes. diff --git a/modules/language/python/module/urllib/request.py b/modules/language/python/module/urllib/request.py index d5372e5..5df7af8 100644 --- a/modules/language/python/module/urllib/request.py +++ b/modules/language/python/module/urllib/request.py @@ -82,7 +82,7 @@ f = urllib.request.urlopen('http://www.python.org/') # Possible extensions: # complex proxies XXX not sure what exactly was meant by this # abstract factory for opener -pk(1) + import base64 import bisect import email @@ -100,7 +100,7 @@ import collections import tempfile import contextlib import warnings -pk(2) + from urllib.error import URLError, HTTPError, ContentTooShortError from urllib.parse import ( @@ -109,7 +109,7 @@ from urllib.parse import ( splitattr, splitquery, splitvalue, splittag, to_bytes, unquote_to_bytes, urlunparse) from urllib.response import addinfourl, addclosehook -pk(3) + # check for SSL try: import ssl @@ -1647,7 +1647,9 @@ MAXFTPCACHE = 10 # Trim the ftp cache beyond this size # Helper for non-unix systems if os.name == 'nt': - from nturl2path import url2pathname, pathname2url + from nturl2path import url2pathname_, pathname2url_ + url2pathname = url2pathname_ + pathname2url = pathname2url_ else: def url2pathname(pathname): """OS-specific conversion from a relative URL of the 'file' scheme diff --git a/modules/language/python/number.scm b/modules/language/python/number.scm index 089095d..3744fec 100644 --- a/modules/language/python/number.scm +++ b/modules/language/python/number.scm @@ -23,6 +23,23 @@ py-ilogior py-ilogand py-ipow py-isub py-i/ py-irshift py-ilogxor)) +(define-method (> (o <boolean>) x) + (> (if o 1 0) x)) +(define-method (> x (o <boolean>)) + (> x (if o 1 0))) +(define-method (>= (o <boolean>) x) + (>= (if o 1 0) x)) +(define-method (>= x (o <boolean>)) + (>= x (if o 1 0))) +(define-method (< (o <boolean>) x) + (< (if o 1 0) x)) +(define-method (< x (o <boolean>)) + (< x (if o 1 0))) +(define-method (<= (o <boolean>) x) + (<= (if o 1 0) x)) +(define-method (<= x (o <boolean>)) + (<= x (if o 1 0))) + (define-syntax-rule (aif it p x y) (let ((it p)) (if it x y))) (define-syntax-rule (mki py-iadd __iadd__) @@ -617,3 +634,4 @@ (+ "0b" (number->string (py-index o) 2))) +(set! (@@ (language python list) int) int) diff --git a/modules/oop/pf-objects.scm b/modules/oop/pf-objects.scm index cf8ded6..382cdea 100644 --- a/modules/oop/pf-objects.scm +++ b/modules/oop/pf-objects.scm @@ -1661,6 +1661,8 @@ explicitly tell it to not update etc. (define __getattribute__ attr) (define __setattr__ (object-method *setattr*)) (define __str__ *str*) + (define __ne__ (object-method + (lambda (self x) (not (equal? self x))))) (define __format__ (lambda (self x) (*str* self))) (define __reduce_ex__ (lambda x (error "not implemented"))) (define __weakref__ (lambda (self) self)))) |