diff options
author | Stefan Israelsson Tampe <stefan.itampe@gmail.com> | 2018-09-04 16:19:39 +0200 |
---|---|---|
committer | Stefan Israelsson Tampe <stefan.itampe@gmail.com> | 2018-09-04 16:19:39 +0200 |
commit | e37613527a05fc56f8ee5886a868c948b3ee4cfc (patch) | |
tree | 38c51006b81a165133fbd44566940decb06fae5d | |
parent | 8223db0b1a660ad10830e9a5a2fe71858cf52481 (diff) |
pipes
-rw-r--r-- | modules/language/python/compile.scm | 34 | ||||
-rw-r--r-- | modules/language/python/exceptions.scm | 4 | ||||
-rw-r--r-- | modules/language/python/module/binascii.scm | 32 | ||||
-rw-r--r-- | modules/language/python/module/mimetypes.py | 553 | ||||
-rw-r--r-- | modules/language/python/module/pipes.py | 248 | ||||
-rw-r--r-- | modules/language/python/module/re/compile.scm | 16 | ||||
-rw-r--r-- | modules/language/python/module/shlex.py | 329 | ||||
-rw-r--r-- | modules/language/python/module/urllib/parse.py | 1022 |
8 files changed, 2223 insertions, 15 deletions
diff --git a/modules/language/python/compile.scm b/modules/language/python/compile.scm index 3f02f38..a83a781 100644 --- a/modules/language/python/compile.scm +++ b/modules/language/python/compile.scm @@ -1879,7 +1879,9 @@ (map (lambda (x) (exp '() x)) arglist)) - + + (define name (string-join (map symbol->string args) ".")) + `((define-module (language python module ,@args) #:pure #:use-module ((guile) #:select @@ -1890,6 +1892,7 @@ #:use-module ((language python compile) #:select (pks)) #:use-module (language python exceptions)) (,(G 'define) __doc__ #f) + (,(G 'define) __name__ ,name) (,(G 'define) __module__ (,(G 'quote) (language python module ,@args)))))) (x '()))) @@ -2580,7 +2583,7 @@ (define-syntax ref-x (lambda (x) - (syntax-case x (quote __dict__) + (syntax-case x (@) ((_ v) #'v) ((_ v (#:fastfkn-ref f tag) . l) @@ -2593,20 +2596,29 @@ (apply f v x))) . l)))) ((_ v (#:fast-id f _) . l) #'(ref-x (f v) . l)) - ((_ v (#:identifier '__dict__) . l) + + ((_ v (#:identifier ((@ x q) dict)) . l) + (equal? (syntax->datum #'dict) '__dict__) #'(ref-x (py-dict v) . l)) + ((_ v (#:identifier x) . l) #'(ref-x (wr x (ref v x miss)) . l)) + ((_ v (#:call-obj x) . l) #'(ref-x (x v) . l)) + ((_ v (#:call x ...) . l) #'(ref-x (v x ...) . l)) + ((_ v (#:apply x ...) . l) #'(ref-x (py-apply v x ...) . l)) + ((_ v (#:apply x ...) . l) #'(ref-x (py-apply v x ...) . l)) + ((_ v (#:vecref x) . l) #'(ref-x (pylist-ref v x) . l)) + ((_ v (#:vecsub . x) . l) #'(ref-x (pylist-slice v . x) . l))))) @@ -2692,7 +2704,7 @@ (define-syntax boolit - (syntax-rules (and eq? equal? or not < <= > >=) + (syntax-rules (@ and eq? equal? or not < <= > >=) ((_ (and x y)) (and (boolit x) (boolit y))) ((_ (or x y)) (or (boolit x) (boolit y))) ((_ (not x )) (not (boolit x))) @@ -2741,18 +2753,28 @@ (define-syntax qset! (lambda (x) - (syntax-case x () + (pkkk x) + (syntax-case x (@@ @) ((_ (cons x y) v) (equal? (syntax->datum #'cons) '(@ (guile) cons)) #'(let ((w v)) (qset! x (car w)) (qset! y (cdr w)))) - ((_ '() v) + ((_ ((@ (guile) q) ()) v) + (equal? (syntax->datum #'q) 'quote) #'(if (not (null? v)) (raise (ValueError "too many values to unpack")) (values))) + + ((_ ((@@ u li) x) v) + (equal? (syntax->datum #'li) 'to-pylist) + #'(let ((w (to-list v))) + (qset! x w))) + ((_ (ref v a ...) w) + #'(set-x v (a ...) w)) + ((_ x v) #'(set! x v))))) diff --git a/modules/language/python/exceptions.scm b/modules/language/python/exceptions.scm index f2cceee..5b58f9e 100644 --- a/modules/language/python/exceptions.scm +++ b/modules/language/python/exceptions.scm @@ -17,7 +17,8 @@ UnicodeDecodeError LookupError IndentationError KeyboardInterrupt MemoryError NameError EOFError UnicodeError UnicodeEncodeError - FileExistsError FileNotFoundError IsADirectoryError )) + FileExistsError FileNotFoundError IsADirectoryError + EnvironmentError)) (define-syntax-rule (aif it p x y) (let ((it p)) (if it x y))) @@ -69,6 +70,7 @@ (define StopIteration 'StopIteration) (define GeneratorExit 'GeneratorExit) +(define-er EnvironmentError 'EnvironmentError) (define-er UnicodeEncodeError 'UnicodeEncodeError) (define-er FileExistsError 'FileExistsError) (define-er FileNotFoundError 'FileNotFoundError) diff --git a/modules/language/python/module/binascii.scm b/modules/language/python/module/binascii.scm index ab3dd69..2fc0e62 100644 --- a/modules/language/python/module/binascii.scm +++ b/modules/language/python/module/binascii.scm @@ -7,7 +7,7 @@ #:use-module (rnrs bytevectors) #:use-module (oop pf-objects) #:export (Error Incomplete a2b_uu b2a_uu a2b_base64 b2a_base64 a2b_qp b2a_qp - a2b_hex b2a_hex)) + a2b_hex b2a_hex crc32 crc_hqx)) (define-python-class Error (Exception)) (define-python-class Incomplete (Exception)) @@ -385,4 +385,32 @@ (x (logior a1 (ash a2 4)))) (lp (+ i 2) (cons x r))) (bytes (reverse r))))))) - + +(define (id x) x) +(define-syntax-rule (mkcrc crc_hqx high xor mask) + (def (crc_hqx data (= value 0)) + (let ((n (len data)) + (d (bv-scm data))) + (let lp ((i 0) (v value)) + (if (< i n) + (let ((b (id (bytevector-u8-ref d i)))) + (let lp2 ((j 0) (x 1) (v v)) + (if (> j -8) + (let ((bit (ash (logand x b) j)) + (hbit (logand v high))) + (if (= hbit 0) + (lp2 (- j 1) (ash x 1) (logior bit (ash v 1))) + (lp2 (- j 1) (ash x 1) (logxor + xor + (logand mask + (logior + bit + (ash v 1))))))) + (lp (+ i 1) v)))) + v))))) + + +(mkcrc crc_hqx #x8000 #x1021 #xffff) +(mkcrc crc32 #x80000000 #x04c11db7 #xffffffff) + + diff --git a/modules/language/python/module/mimetypes.py b/modules/language/python/module/mimetypes.py new file mode 100644 index 0000000..76a5e87 --- /dev/null +++ b/modules/language/python/module/mimetypes.py @@ -0,0 +1,553 @@ +module(mimetypes) +"""Guess the MIME type of a file. + +This module defines two useful functions: + +guess_type(url, strict=True) -- guess the MIME type and encoding of a URL. + +guess_extension(type, strict=True) -- guess the extension for a given MIME type. + +It also contains the following, for tuning the behavior: + +Data: + +knownfiles -- list of files to parse +inited -- flag set when init() has been called +suffix_map -- dictionary mapping suffixes to suffixes +encodings_map -- dictionary mapping suffixes to encodings +types_map -- dictionary mapping suffixes to types + +Functions: + +init([files]) -- parse a list of files, default knownfiles (on Windows, the + default values are taken from the registry) +read_mime_types(file) -- parse one file, return a dictionary or None +""" + +import os +import sys +import posixpath +import urllib.parse +try: + import winreg as _winreg +except ImportError: + _winreg = None + +__all__ = [ + "knownfiles", "inited", "MimeTypes", + "guess_type", "guess_all_extensions", "guess_extension", + "add_type", "init", "read_mime_types", + "suffix_map", "encodings_map", "types_map", "common_types" +] + +knownfiles = [ + "/etc/mime.types", + "/etc/httpd/mime.types", # Mac OS X + "/etc/httpd/conf/mime.types", # Apache + "/etc/apache/mime.types", # Apache 1 + "/etc/apache2/mime.types", # Apache 2 + "/usr/local/etc/httpd/conf/mime.types", + "/usr/local/lib/netscape/mime.types", + "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2 + "/usr/local/etc/mime.types", # Apache 1.3 + ] + +inited = False +_db = None + + +class MimeTypes: + """MIME-types datastore. + + This datastore can handle information from mime.types-style files + and supports basic determination of MIME type from a filename or + URL, and can guess a reasonable extension given a MIME type. + """ + + def __init__(self, filenames=(), strict=True): + if not inited: + init() + self.encodings_map = encodings_map.copy() + self.suffix_map = suffix_map.copy() + self.types_map = ({}, {}) # dict for (non-strict, strict) + self.types_map_inv = ({}, {}) + for (ext, type) in types_map.items(): + self.add_type(type, ext, True) + for (ext, type) in common_types.items(): + self.add_type(type, ext, False) + for name in filenames: + self.read(name, strict) + + def add_type(self, type, ext, strict=True): + """Add a mapping between a type and an extension. + + When the extension is already known, the new + type will replace the old one. When the type + is already known the extension will be added + to the list of known extensions. + + If strict is true, information will be added to + list of standard types, else to the list of non-standard + types. + """ + self.types_map[strict][ext] = type + exts = self.types_map_inv[strict].setdefault(type, []) + if ext not in exts: + exts.append(ext) + + def guess_type(self, url, strict=True): + """Guess the type of a file based on its URL. + + Return value is a tuple (type, encoding) where type is None if + the type can't be guessed (no or unknown suffix) or a string + of the form type/subtype, usable for a MIME Content-type + header; and encoding is None for no encoding or the name of + the program used to encode (e.g. compress or gzip). The + mappings are table driven. Encoding suffixes are case + sensitive; type suffixes are first tried case sensitive, then + case insensitive. + + The suffixes .tgz, .taz and .tz (case sensitive!) are all + mapped to '.tar.gz'. (This is table-driven too, using the + dictionary suffix_map.) + + Optional `strict' argument when False adds a bunch of commonly found, + but non-standard types. + """ + scheme, url = urllib.parse.splittype(url) + if scheme == 'data': + # syntax of data URLs: + # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data + # mediatype := [ type "/" subtype ] *( ";" parameter ) + # data := *urlchar + # parameter := attribute "=" value + # type/subtype defaults to "text/plain" + comma = url.find(',') + if comma < 0: + # bad data URL + return None, None + semi = url.find(';', 0, comma) + if semi >= 0: + type = url[:semi] + else: + type = url[:comma] + if '=' in type or '/' not in type: + type = 'text/plain' + return type, None # never compressed, so encoding is None + base, ext = posixpath.splitext(url) + while ext in self.suffix_map: + base, ext = posixpath.splitext(base + self.suffix_map[ext]) + if ext in self.encodings_map: + encoding = self.encodings_map[ext] + base, ext = posixpath.splitext(base) + else: + encoding = None + types_map = self.types_map[True] + if ext in types_map: + return types_map[ext], encoding + elif ext.lower() in types_map: + return types_map[ext.lower()], encoding + elif strict: + return None, encoding + types_map = self.types_map[False] + if ext in types_map: + return types_map[ext], encoding + elif ext.lower() in types_map: + return types_map[ext.lower()], encoding + else: + return None, encoding + + def guess_all_extensions(self, type, strict=True): + """Guess the extensions for a file based on its MIME type. + + Return value is a list of strings giving the possible filename + extensions, including the leading dot ('.'). The extension is not + guaranteed to have been associated with any particular data stream, + but would be mapped to the MIME type `type' by guess_type(). + + Optional `strict' argument when false adds a bunch of commonly found, + but non-standard types. + """ + type = type.lower() + extensions = self.types_map_inv[True].get(type, []) + if not strict: + for ext in self.types_map_inv[False].get(type, []): + if ext not in extensions: + extensions.append(ext) + return extensions + + def guess_extension(self, type, strict=True): + """Guess the extension for a file based on its MIME type. + + Return value is a string giving a filename extension, + including the leading dot ('.'). The extension is not + guaranteed to have been associated with any particular data + stream, but would be mapped to the MIME type `type' by + guess_type(). If no extension can be guessed for `type', None + is returned. + + Optional `strict' argument when false adds a bunch of commonly found, + but non-standard types. + """ + extensions = self.guess_all_extensions(type, strict) + if not extensions: + return None + return extensions[0] + + def read(self, filename, strict=True): + """ + Read a single mime.types-format file, specified by pathname. + + If strict is true, information will be added to + list of standard types, else to the list of non-standard + types. + """ + with open(filename, encoding='utf-8') as fp: + self.readfp(fp, strict) + + def readfp(self, fp, strict=True): + """ + Read a single mime.types-format file. + + If strict is true, information will be added to + list of standard types, else to the list of non-standard + types. + """ + while 1: + line = fp.readline() + if not line: + break + words = line.split() + for i in range(len(words)): + if words[i][0] == '#': + del words[i:] + break + if not words: + continue + type, suffixes = words[0], words[1:] + for suff in suffixes: + self.add_type(type, '.' + suff, strict) + + def read_windows_registry(self, strict=True): + """ + Load the MIME types database from Windows registry. + + If strict is true, information will be added to + list of standard types, else to the list of non-standard + types. + """ + + # Windows only + if not _winreg: + return + + def enum_types(mimedb): + i = 0 + while True: + try: + ctype = _winreg.EnumKey(mimedb, i) + except EnvironmentError: + break + else: + if '\0' not in ctype: + yield ctype + i += 1 + + with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '') as hkcr: + for subkeyname in enum_types(hkcr): + try: + with _winreg.OpenKey(hkcr, subkeyname) as subkey: + # Only check file extensions + if not subkeyname.startswith("."): + continue + # raises EnvironmentError if no 'Content Type' value + mimetype, datatype = _winreg.QueryValueEx( + subkey, 'Content Type') + if datatype != _winreg.REG_SZ: + continue + self.add_type(mimetype, subkeyname, strict) + except EnvironmentError: + continue + +def guess_type(url, strict=True): + """Guess the type of a file based on its URL. + + Return value is a tuple (type, encoding) where type is None if the + type can't be guessed (no or unknown suffix) or a string of the + form type/subtype, usable for a MIME Content-type header; and + encoding is None for no encoding or the name of the program used + to encode (e.g. compress or gzip). The mappings are table + driven. Encoding suffixes are case sensitive; type suffixes are + first tried case sensitive, then case insensitive. + + The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped + to ".tar.gz". (This is table-driven too, using the dictionary + suffix_map). + + Optional `strict' argument when false adds a bunch of commonly found, but + non-standard types. + """ + if _db is None: + init() + return _db.guess_type(url, strict) + + +def guess_all_extensions(type, strict=True): + """Guess the extensions for a file based on its MIME type. + + Return value is a list of strings giving the possible filename + extensions, including the leading dot ('.'). The extension is not + guaranteed to have been associated with any particular data + stream, but would be mapped to the MIME type `type' by + guess_type(). If no extension can be guessed for `type', None + is returned. + + Optional `strict' argument when false adds a bunch of commonly found, + but non-standard types. + """ + if _db is None: + init() + return _db.guess_all_extensions(type, strict) + +def guess_extension(type, strict=True): + """Guess the extension for a file based on its MIME type. + + Return value is a string giving a filename extension, including the + leading dot ('.'). The extension is not guaranteed to have been + associated with any particular data stream, but would be mapped to the + MIME type `type' by guess_type(). If no extension can be guessed for + `type', None is returned. + + Optional `strict' argument when false adds a bunch of commonly found, + but non-standard types. + """ + if _db is None: + init() + return _db.guess_extension(type, strict) + +def add_type(type, ext, strict=True): + """Add a mapping between a type and an extension. + + When the extension is already known, the new + type will replace the old one. When the type + is already known the extension will be added + to the list of known extensions. + + If strict is true, information will be added to + list of standard types, else to the list of non-standard + types. + """ + if _db is None: + init() + return _db.add_type(type, ext, strict) + + +def init(files=None): + global suffix_map, types_map, encodings_map, common_types + global inited, _db + inited = True # so that MimeTypes.__init__() doesn't call us again + db = MimeTypes() + if files is None: + if _winreg: + db.read_windows_registry() + files = knownfiles + for file in files: + if os.path.isfile(file): + db.read(file) + encodings_map = db.encodings_map + suffix_map = db.suffix_map + types_map = db.types_map[True] + common_types = db.types_map[False] + # Make the DB a global variable now that it is fully initialized + _db = db + + +def read_mime_types(file): + try: + f = open(file) + except OSError: + return None + with f: + db = MimeTypes() + db.readfp(f, True) + return db.types_map[True] + + +def _default_mime_types(): + global suffix_map + global encodings_map + global types_map + global common_types + + suffix_map = { + '.svgz': '.svg.gz', + '.tgz': '.tar.gz', + '.taz': '.tar.gz', + '.tz': '.tar.gz', + '.tbz2': '.tar.bz2', + '.txz': '.tar.xz', + } + + encodings_map = { + '.gz': 'gzip', + '.Z': 'compress', + '.bz2': 'bzip2', + '.xz': 'xz', + } + + # Before adding new types, make sure they are either registered with IANA, + # at http://www.iana.org/assignments/media-types + # or extensions, i.e. using the x- prefix + + # If you add to these, please keep them sorted! + types_map = { + '.a' : 'application/octet-stream', + '.ai' : 'application/postscript', + '.aif' : 'audio/x-aiff', + '.aifc' : 'audio/x-aiff', + '.aiff' : 'audio/x-aiff', + '.au' : 'audio/basic', + '.avi' : 'video/x-msvideo', + '.bat' : 'text/plain', + '.bcpio' : 'application/x-bcpio', + '.bin' : 'application/octet-stream', + '.bmp' : 'image/x-ms-bmp', + '.c' : 'text/plain', + # Duplicates :( + '.cdf' : 'application/x-cdf', + '.cdf' : 'application/x-netcdf', + '.cpio' : 'application/x-cpio', + '.csh' : 'application/x-csh', + '.css' : 'text/css', + '.csv' : 'text/csv', + '.dll' : 'application/octet-stream', + '.doc' : 'application/msword', + '.dot' : 'application/msword', + '.dvi' : 'application/x-dvi', + '.eml' : 'message/rfc822', + '.eps' : 'application/postscript', + '.etx' : 'text/x-setext', + '.exe' : 'application/octet-stream', + '.gif' : 'image/gif', + '.gtar' : 'application/x-gtar', + '.h' : 'text/plain', + '.hdf' : 'application/x-hdf', + '.htm' : 'text/html', + '.html' : 'text/html', + '.ico' : 'image/vnd.microsoft.icon', + '.ief' : 'image/ief', + '.jpe' : 'image/jpeg', + '.jpeg' : 'image/jpeg', + '.jpg' : 'image/jpeg', + '.js' : 'application/javascript', + '.json' : 'application/json', + '.ksh' : 'text/plain', + '.latex' : 'application/x-latex', + '.m1v' : 'video/mpeg', + '.m3u' : 'application/vnd.apple.mpegurl', + '.m3u8' : 'application/vnd.apple.mpegurl', + '.man' : 'application/x-troff-man', + '.me' : 'application/x-troff-me', + '.mht' : 'message/rfc822', + '.mhtml' : 'message/rfc822', + '.mif' : 'application/x-mif', + '.mov' : 'video/quicktime', + '.movie' : 'video/x-sgi-movie', + '.mp2' : 'audio/mpeg', + '.mp3' : 'audio/mpeg', + '.mp4' : 'video/mp4', + '.mpa' : 'video/mpeg', + '.mpe' : 'video/mpeg', + '.mpeg' : 'video/mpeg', + '.mpg' : 'video/mpeg', + '.ms' : 'application/x-troff-ms', + '.nc' : 'application/x-netcdf', + '.nws' : 'message/rfc822', + '.o' : 'application/octet-stream', + '.obj' : 'application/octet-stream', + '.oda' : 'application/oda', + '.p12' : 'application/x-pkcs12', + '.p7c' : 'application/pkcs7-mime', + '.pbm' : 'image/x-portable-bitmap', + '.pdf' : 'application/pdf', + '.pfx' : 'application/x-pkcs12', + '.pgm' : 'image/x-portable-graymap', + '.pl' : 'text/plain', + '.png' : 'image/png', + '.pnm' : 'image/x-portable-anymap', + '.pot' : 'application/vnd.ms-powerpoint', + '.ppa' : 'application/vnd.ms-powerpoint', + '.ppm' : 'image/x-portable-pixmap', + '.pps' : 'application/vnd.ms-powerpoint', + '.ppt' : 'application/vnd.ms-powerpoint', + '.ps' : 'application/postscript', + '.pwz' : 'application/vnd.ms-powerpoint', + '.py' : 'text/x-python', + '.pyc' : 'application/x-python-code', + '.pyo' : 'application/x-python-code', + '.qt' : 'video/quicktime', + '.ra' : 'audio/x-pn-realaudio', + '.ram' : 'application/x-pn-realaudio', + '.ras' : 'image/x-cmu-raster', + '.rdf' : 'application/xml', + '.rgb' : 'image/x-rgb', + '.roff' : 'application/x-troff', + '.rtx' : 'text/richtext', + '.sgm' : 'text/x-sgml', + '.sgml' : 'text/x-sgml', + '.sh' : 'application/x-sh', + '.shar' : 'application/x-shar', + '.snd' : 'audio/basic', + '.so' : 'application/octet-stream', + '.src' : 'application/x-wais-source', + '.sv4cpio': 'application/x-sv4cpio', + '.sv4crc' : 'application/x-sv4crc', + '.svg' : 'image/svg+xml', + '.swf' : 'application/x-shockwave-flash', + '.t' : 'application/x-troff', + '.tar' : 'application/x-tar', + '.tcl' : 'application/x-tcl', + '.tex' : 'application/x-tex', + '.texi' : 'application/x-texinfo', + '.texinfo': 'application/x-texinfo', + '.tif' : 'image/tiff', + '.tiff' : 'image/tiff', + '.tr' : 'application/x-troff', + '.tsv' : 'text/tab-separated-values', + '.txt' : 'text/plain', + '.ustar' : 'application/x-ustar', + '.vcf' : 'text/x-vcard', + '.wav' : 'audio/x-wav', + '.webm' : 'video/webm', + '.wiz' : 'application/msword', + '.wsdl' : 'application/xml', + '.xbm' : 'image/x-xbitmap', + '.xlb' : 'application/vnd.ms-excel', + # Duplicates :( + '.xls' : 'application/excel', + '.xls' : 'application/vnd.ms-excel', + '.xml' : 'text/xml', + '.xpdl' : 'application/xml', + '.xpm' : 'image/x-xpixmap', + '.xsl' : 'application/xml', + '.xwd' : 'image/x-xwindowdump', + '.zip' : 'application/zip', + } + + # These are non-standard types, commonly found in the wild. They will + # only match if strict=0 flag is given to the API methods. + + # Please sort these too + common_types = { + '.jpg' : 'image/jpg', + '.mid' : 'audio/midi', + '.midi': 'audio/midi', + '.pct' : 'image/pict', + '.pic' : 'image/pict', + '.pict': 'image/pict', + '.rtf' : 'application/rtf', + '.xul' : 'text/xul' + } + + +_default_mime_types() diff --git a/modules/language/python/module/pipes.py b/modules/language/python/module/pipes.py new file mode 100644 index 0000000..1285eac --- /dev/null +++ b/modules/language/python/module/pipes.py @@ -0,0 +1,248 @@ +module(pipes) +"""Conversion pipeline templates. + +The problem: +------------ + +Suppose you have some data that you want to convert to another format, +such as from GIF image format to PPM image format. Maybe the +conversion involves several steps (e.g. piping it through compress or +uuencode). Some of the conversion steps may require that their input +is a disk file, others may be able to read standard input; similar for +their output. The input to the entire conversion may also be read +from a disk file or from an open file, and similar for its output. + +The module lets you construct a pipeline template by sticking one or +more conversion steps together. It will take care of creating and +removing temporary files if they are necessary to hold intermediate +data. You can then use the template to do conversions from many +different sources to many different destinations. The temporary +file names used are different each time the template is used. + +The templates are objects so you can create templates for many +different conversion steps and store them in a dictionary, for +instance. + + +Directions: +----------- + +To create a template: + t = Template() + +To add a conversion step to a template: + t.append(command, kind) +where kind is a string of two characters: the first is '-' if the +command reads its standard input or 'f' if it requires a file; the +second likewise for the output. The command must be valid /bin/sh +syntax. If input or output files are required, they are passed as +$IN and $OUT; otherwise, it must be possible to use the command in +a pipeline. + +To add a conversion step at the beginning: + t.prepend(command, kind) + +To convert a file to another file using a template: + sts = t.copy(infile, outfile) +If infile or outfile are the empty string, standard input is read or +standard output is written, respectively. The return value is the +exit status of the conversion pipeline. + +To open a file for reading or writing through a conversion pipeline: + fp = t.open(file, mode) +where mode is 'r' to read the file, or 'w' to write it -- just like +for the built-in function open() or for os.popen(). + +To create a new template object initialized to a given one: + t2 = t.clone() +""" # ' + + +import re +import os +import tempfile +# we import the quote function rather than the module for backward compat +# (quote used to be an undocumented but used function in pipes) +from shlex import quote + +__all__ = ["Template"] + +# Conversion step kinds + +FILEIN_FILEOUT = 'ff' # Must read & write real files +STDIN_FILEOUT = '-f' # Must write a real file +FILEIN_STDOUT = 'f-' # Must read a real file +STDIN_STDOUT = '--' # Normal pipeline element +SOURCE = '.-' # Must be first, writes stdout +SINK = '-.' # Must be last, reads stdin + +stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \ + SOURCE, SINK] + + +class Template: + """Class representing a pipeline template.""" + + def __init__(self): + """Template() returns a fresh pipeline template.""" + self.debugging = 0 + self.reset() + + def __repr__(self): + """t.__repr__() implements repr(t).""" + return '<Template instance, steps=%r>' % (self.steps,) + + def reset(self): + """t.reset() restores a pipeline template to its initial state.""" + self.steps = [] + + def clone(self): + """t.clone() returns a new pipeline template with identical + initial state as the current one.""" + t = Template() + t.steps = self.steps[:] + t.debugging = self.debugging + return t + + def debug(self, flag): + """t.debug(flag) turns debugging on or off.""" + self.debugging = flag + + def append(self, cmd, kind): + """t.append(cmd, kind) adds a new step at the end.""" + if type(cmd) is not type(''): + raise TypeError('Template.append: cmd must be a string') + if kind not in stepkinds: + raise ValueError('Template.append: bad kind %r' % (kind,)) + if kind == SOURCE: + raise ValueError('Template.append: SOURCE can only be prepended') + if self.steps and self.steps[-1][1] == SINK: + raise ValueError('Template.append: already ends with SINK') + if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): + raise ValueError('Template.append: missing $IN in cmd') + if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): + raise ValueError('Template.append: missing $OUT in cmd') + self.steps.append((cmd, kind)) + + def prepend(self, cmd, kind): + """t.prepend(cmd, kind) adds a new step at the front.""" + if type(cmd) is not type(''): + raise TypeError('Template.prepend: cmd must be a string') + if kind not in stepkinds: + raise ValueError('Template.prepend: bad kind %r' % (kind,)) + if kind == SINK: + raise ValueError('Template.prepend: SINK can only be appended') + if self.steps and self.steps[0][1] == SOURCE: + raise ValueError('Template.prepend: already begins with SOURCE') + if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): + raise ValueError('Template.prepend: missing $IN in cmd') + if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): + raise ValueError('Template.prepend: missing $OUT in cmd') + self.steps.insert(0, (cmd, kind)) + + def open(self, file, rw): + """t.open(file, rw) returns a pipe or file object open for + reading or writing; the file is the other end of the pipeline.""" + if rw == 'r': + return self.open_r(file) + if rw == 'w': + return self.open_w(file) + raise ValueError('Template.open: rw must be \'r\' or \'w\', not %r' + % (rw,)) + + def open_r(self, file): + """t.open_r(file) and t.open_w(file) implement + t.open(file, 'r') and t.open(file, 'w') respectively.""" + if not self.steps: + return open(file, 'r') + if self.steps[-1][1] == SINK: + raise ValueError('Template.open_r: pipeline ends width SINK') + cmd = self.makepipeline(file, '') + return os.popen(cmd, 'r') + + def open_w(self, file): + if not self.steps: + return open(file, 'w') + if self.steps[0][1] == SOURCE: + raise ValueError('Template.open_w: pipeline begins with SOURCE') + cmd = self.makepipeline('', file) + return os.popen(cmd, 'w') + + def copy(self, infile, outfile): + return os.system(self.makepipeline(infile, outfile)) + + def makepipeline(self, infile, outfile): + cmd = makepipeline(infile, self.steps, outfile) + if self.debugging: + print(cmd) + cmd = 'set -x; ' + cmd + return cmd + + +def makepipeline(infile, steps, outfile): + # Build a list with for each command: + # [input filename or '', command string, kind, output filename or ''] + + list = [] + for cmd, kind in steps: + list.append(['', cmd, kind, '']) + # + # Make sure there is at least one step + # + if not list: + list.append(['', 'cat', '--', '']) + # + # Take care of the input and output ends + # + [cmd, kind] = list[0][1:3] + if kind[0] == 'f' and not infile: + list.insert(0, ['', 'cat', '--', '']) + list[0][0] = infile + # + [cmd, kind] = list[-1][1:3] + if kind[1] == 'f' and not outfile: + list.append(['', 'cat', '--', '']) + list[-1][-1] = outfile + # + # Invent temporary files to connect stages that need files + # + garbage = [] + for i in range(1, len(list)): + lkind = list[i-1][2] + rkind = list[i][2] + if lkind[1] == 'f' or rkind[0] == 'f': + (fd, temp) = tempfile.mkstemp() + os.close(fd) + garbage.append(temp) + list[i-1][-1] = list[i][0] = temp + # + for item in list: + [inf, cmd, kind, outf] = item + if kind[1] == 'f': + cmd = 'OUT=' + quote(outf) + '; ' + cmd + if kind[0] == 'f': + cmd = 'IN=' + quote(inf) + '; ' + cmd + if kind[0] == '-' and inf: + cmd = cmd + ' <' + quote(inf) + if kind[1] == '-' and outf: + cmd = cmd + ' >' + quote(outf) + item[1] = cmd + # + cmdlist = list[0][1] + for item in list[1:]: + [cmd, kind] = item[1:3] + if item[0] == '': + if 'f' in kind: + cmd = '{ ' + cmd + '; }' + cmdlist = cmdlist + ' |\n' + cmd + else: + cmdlist = cmdlist + '\n' + cmd + # + if garbage: + rmcmd = 'rm -f' + for file in garbage: + rmcmd = rmcmd + ' ' + quote(file) + trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15' + cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd + # + return cmdlist diff --git a/modules/language/python/module/re/compile.scm b/modules/language/python/module/re/compile.scm index f92a8dd..d23ea71 100644 --- a/modules/language/python/module/re/compile.scm +++ b/modules/language/python/module/re/compile.scm @@ -451,12 +451,16 @@ (let ((f (apply f-or! (map (lambda (x) (match x - ((#:range ch1 ch2) - (f-reg! (format #f "[~a-~a]" ch1 ch2))) - ((#:ch (#:class ch)) - (get-class ch)) - ((#:ch ch) - (get-ch ch)))) + ((#:range ch1 ch2) + (if (and (<= (char->integer ch1) 10) + (>= (char->integer ch2) 10)) + (f-or! f-nl! + (f-reg! (format #f "[~a-~a]" ch1 ch2))) + (f-reg! (format #f "[~a-~a]" ch1 ch2)))) + ((#:ch (#:class ch)) + (get-class ch)) + ((#:ch ch) + (get-ch ch)))) ch)))) (trace `brack (fw diff --git a/modules/language/python/module/shlex.py b/modules/language/python/module/shlex.py new file mode 100644 index 0000000..afdd35d --- /dev/null +++ b/modules/language/python/module/shlex.py @@ -0,0 +1,329 @@ +module(shlex) +"""A lexical analyzer class for simple shell-like syntaxes.""" + +# Module and documentation by Eric S. Raymond, 21 Dec 1998 +# Input stacking and error message cleanup added by ESR, March 2000 +# push_source() and pop_source() made explicit by ESR, January 2001. +# Posix compliance, split(), string arguments, and +# iterator interface by Gustavo Niemeyer, April 2003. +# changes to tokenize more like Posix shells by Vinay Sajip, July 2016. + +import os +import re +import sys +from collections import deque + +from io import StringIO + +__all__ = ["shlex", "split", "quote"] + +class shlex: + "A lexical analyzer class for simple shell-like syntaxes." + def __init__(self, instream=None, infile=None, posix=False, + punctuation_chars=False): + if isinstance(instream, str): + instream = StringIO(instream) + if instream is not None: + self.instream = instream + self.infile = infile + else: + self.instream = sys.stdin + self.infile = None + self.posix = posix + if posix: + self.eof = None + else: + self.eof = '' + self.commenters = '#' + self.wordchars = ('abcdfeghijklmnopqrstuvwxyz' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_') + if self.posix: + self.wordchars += ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ' + 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ') + self.whitespace = ' \t\r\n' + self.whitespace_split = False + self.quotes = '\'"' + self.escape = '\\' + self.escapedquotes = '"' + self.state = ' ' + self.pushback = deque() + self.lineno = 1 + self.debug = 0 + self.token = '' + self.filestack = deque() + self.source = None + if not punctuation_chars: + punctuation_chars = '' + elif punctuation_chars is True: + punctuation_chars = '();<>|&' + self.punctuation_chars = punctuation_chars + if punctuation_chars: + # _pushback_chars is a push back queue used by lookahead logic + self._pushback_chars = deque() + # these chars added because allowed in file names, args, wildcards + self.wordchars += '~-./*?=' + #remove any punctuation chars from wordchars + t = self.wordchars.maketrans(dict.fromkeys(punctuation_chars)) + self.wordchars = self.wordchars.translate(t) + + def push_token(self, tok): + "Push a token onto the stack popped by the get_token method" + if self.debug >= 1: + print("shlex: pushing token " + repr(tok)) + self.pushback.appendleft(tok) + + def push_source(self, newstream, newfile=None): + "Push an input source onto the lexer's input source stack." + if isinstance(newstream, str): + newstream = StringIO(newstream) + self.filestack.appendleft((self.infile, self.instream, self.lineno)) + self.infile = newfile + self.instream = newstream + self.lineno = 1 + if self.debug: + if newfile is not None: + print('shlex: pushing to file %s' % (self.infile,)) + else: + print('shlex: pushing to stream %s' % (self.instream,)) + + def pop_source(self): + "Pop the input source stack." + self.instream.close() + (self.infile, self.instream, self.lineno) = self.filestack.popleft() + if self.debug: + print('shlex: popping to %s, line %d' \ + % (self.instream, self.lineno)) + self.state = ' ' + + def get_token(self): + "Get a token from the input stream (or from stack if it's nonempty)" + if self.pushback: + tok = self.pushback.popleft() + if self.debug >= 1: + print("shlex: popping token " + repr(tok)) + return tok + # No pushback. Get a token. + raw = self.read_token() + # Handle inclusions + if self.source is not None: + while raw == self.source: + spec = self.sourcehook(self.read_token()) + if spec: + (newfile, newstream) = spec + self.push_source(newstream, newfile) + raw = self.get_token() + # Maybe we got EOF instead? + while raw == self.eof: + if not self.filestack: + return self.eof + else: + self.pop_source() + raw = self.get_token() + # Neither inclusion nor EOF + if self.debug >= 1: + if raw != self.eof: + print("shlex: token=" + repr(raw)) + else: + print("shlex: token=EOF") + return raw + + def read_token(self): + quoted = False + escapedstate = ' ' + while True: + if self.punctuation_chars and self._pushback_chars: + nextchar = self._pushback_chars.pop() + else: + nextchar = self.instream.read(1) + if nextchar == '\n': + self.lineno += 1 + if self.debug >= 3: + print("shlex: in state %r I see character: %r" % (self.state, + nextchar)) + if self.state is None: + self.token = '' # past end of file + break + elif self.state == ' ': + if not nextchar: + self.state = None # end of file + break + elif nextchar in self.whitespace: + if self.debug >= 2: + print("shlex: I see whitespace in whitespace state") + if self.token or (self.posix and quoted): + break # emit current token + else: + continue + elif nextchar in self.commenters: + self.instream.readline() + self.lineno += 1 + elif self.posix and nextchar in self.escape: + escapedstate = 'a' + self.state = nextchar + elif nextchar in self.wordchars: + self.token = nextchar + self.state = 'a' + elif nextchar in self.punctuation_chars: + self.token = nextchar + self.state = 'c' + elif nextchar in self.quotes: + if not self.posix: + self.token = nextchar + self.state = nextchar + elif self.whitespace_split: + self.token = nextchar + self.state = 'a' + else: + self.token = nextchar + if self.token or (self.posix and quoted): + break # emit current token + else: + continue + elif self.state in self.quotes: + quoted = True + if not nextchar: # end of file + if self.debug >= 2: + print("shlex: I see EOF in quotes state") + # XXX what error should be raised here? + raise ValueError("No closing quotation") + if nextchar == self.state: + if not self.posix: + self.token += nextchar + self.state = ' ' + break + else: + self.state = 'a' + elif (self.posix and nextchar in self.escape and self.state + in self.escapedquotes): + escapedstate = self.state + self.state = nextchar + else: + self.token += nextchar + elif self.state in self.escape: + if not nextchar: # end of file + if self.debug >= 2: + print("shlex: I see EOF in escape state") + # XXX what error should be raised here? + raise ValueError("No escaped character") + # In posix shells, only the quote itself or the escape + # character may be escaped within quotes. + if (escapedstate in self.quotes and + nextchar != self.state and nextchar != escapedstate): + self.token += self.state + self.token += nextchar + self.state = escapedstate + elif self.state in ('a', 'c'): + if not nextchar: + self.state = None # end of file + break + elif nextchar in self.whitespace: + if self.debug >= 2: + print("shlex: I see whitespace in word state") + self.state = ' ' + if self.token or (self.posix and quoted): + break # emit current token + else: + continue + elif nextchar in self.commenters: + self.instream.readline() + self.lineno += 1 + if self.posix: + self.state = ' ' + if self.token or (self.posix and quoted): + break # emit current token + else: + continue + elif self.state == 'c': + if nextchar in self.punctuation_chars: + self.token += nextchar + else: + if nextchar not in self.whitespace: + self._pushback_chars.append(nextchar) + self.state = ' ' + break + elif self.posix and nextchar in self.quotes: + self.state = nextchar + elif self.posix and nextchar in self.escape: + escapedstate = 'a' + self.state = nextchar + elif (nextchar in self.wordchars or nextchar in self.quotes + or self.whitespace_split): + self.token += nextchar + else: + if self.punctuation_chars: + self._pushback_chars.append(nextchar) + else: + self.pushback.appendleft(nextchar) + if self.debug >= 2: + print("shlex: I see punctuation in word state") + self.state = ' ' + if self.token or (self.posix and quoted): + break # emit current token + else: + continue + result = self.token + self.token = '' + if self.posix and not quoted and result == '': + result = None + if self.debug > 1: + if result: + print("shlex: raw token=" + repr(result)) + else: + print("shlex: raw token=EOF") + return result + + def sourcehook(self, newfile): + "Hook called on a filename to be sourced." + if newfile[0] == '"': + newfile = newfile[1:-1] + # This implements cpp-like semantics for relative-path inclusion. + if isinstance(self.infile, str) and not os.path.isabs(newfile): + newfile = os.path.join(os.path.dirname(self.infile), newfile) + return (newfile, open(newfile, "r")) + + def error_leader(self, infile=None, lineno=None): + "Emit a C-compiler-like, Emacs-friendly error-message leader." + if infile is None: + infile = self.infile + if lineno is None: + lineno = self.lineno + return "\"%s\", line %d: " % (infile, lineno) + + def __iter__(self): + return self + + def __next__(self): + token = self.get_token() + if token == self.eof: + raise StopIteration + return token + +def split(s, comments=False, posix=True): + lex = shlex(s, posix=posix) + lex.whitespace_split = True + if not comments: + lex.commenters = '' + return list(lex) + + +_find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search + +def quote(s): + """Return a shell-escaped version of the string *s*.""" + if not s: + return "''" + if _find_unsafe(s) is None: + return s + + # use single quotes, and put single quotes into double quotes + # the string $'b is then quoted as '$'"'"'b' + return "'" + s.replace("'", "'\"'\"'") + "'" + + +def _print_tokens(lexer): + while 1: + tt = lexer.get_token() + if not tt: + break + print("Token: " + repr(tt)) + diff --git a/modules/language/python/module/urllib/parse.py b/modules/language/python/module/urllib/parse.py new file mode 100644 index 0000000..3bfe63c --- /dev/null +++ b/modules/language/python/module/urllib/parse.py @@ -0,0 +1,1022 @@ +module(urllib,parse) +"""Parse (absolute and relative) URLs. + +urlparse module is based upon the following RFC specifications. + +RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding +and L. Masinter, January 2005. + +RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter +and L.Masinter, December 1999. + +RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T. +Berners-Lee, R. Fielding, and L. Masinter, August 1998. + +RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998. + +RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June +1995. + +RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M. +McCahill, December 1994 + +RFC 3986 is considered the current standard and any future changes to +urlparse module should conform with it. The urlparse module is +currently not entirely compliant with this RFC due to defacto +scenarios for parsing, and for backward compatibility purposes, some +parsing quirks from older RFCs are retained. The testcases in +test_urlparse.py provides a good indicator of parsing behavior. +""" + +import re +import sys +import collections + +__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", + "urlsplit", "urlunsplit", "urlencode", "parse_qs", + "parse_qsl", "quote", "quote_plus", "quote_from_bytes", + "unquote", "unquote_plus", "unquote_to_bytes", + "DefragResult", "ParseResult", "SplitResult", + "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"] + +# A classification of schemes. +# The empty string classifies URLs with no scheme specified, +# being the default value returned by “urlsplit” and “urlparse”. + +uses_relative = ['', 'ftp', 'http', 'gopher', 'nntp', 'imap', + 'wais', 'file', 'https', 'shttp', 'mms', + 'prospero', 'rtsp', 'rtspu', 'sftp', + 'svn', 'svn+ssh', 'ws', 'wss'] + +uses_netloc = ['', 'ftp', 'http', 'gopher', 'nntp', 'telnet', + 'imap', 'wais', 'file', 'mms', 'https', 'shttp', + 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', + 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh', + 'ws', 'wss'] + +uses_params = ['', 'ftp', 'hdl', 'prospero', 'http', 'imap', + 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', + 'mms', 'sftp', 'tel'] + +# These are not actually used anymore, but should stay for backwards +# compatibility. (They are undocumented, but have a public-looking name.) + +non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', + 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] + +uses_query = ['', 'http', 'wais', 'imap', 'https', 'shttp', 'mms', + 'gopher', 'rtsp', 'rtspu', 'sip', 'sips'] + +uses_fragment = ['', 'ftp', 'hdl', 'http', 'gopher', 'news', + 'nntp', 'wais', 'https', 'shttp', 'snews', + 'file', 'prospero'] + +# Characters valid in scheme names +scheme_chars = ('abcdefghijklmnopqrstuvwxyz' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + '0123456789' + '+-.') + +# XXX: Consider replacing with functools.lru_cache +MAX_CACHE_SIZE = 20 +_parse_cache = {} + +def clear_cache(): + """Clear the parse cache and the quoters cache.""" + _parse_cache.clear() + _safe_quoters.clear() + + +# Helpers for bytes handling +# For 3.2, we deliberately require applications that +# handle improperly quoted URLs to do their own +# decoding and encoding. If valid use cases are +# presented, we may relax this by using latin-1 +# decoding internally for 3.3 +_implicit_encoding = 'ascii' +_implicit_errors = 'strict' + +def _noop(obj): + return obj + +def _encode_result(obj, encoding=_implicit_encoding, + errors=_implicit_errors): + return obj.encode(encoding, errors) + +def _decode_args(args, encoding=_implicit_encoding, + errors=_implicit_errors): + return tuple(x.decode(encoding, errors) if x else '' for x in args) + +def _coerce_args(*args): + # Invokes decode if necessary to create str args + # and returns the coerced inputs along with + # an appropriate result coercion function + # - noop for str inputs + # - encoding function otherwise + str_input = isinstance(args[0], str) + for arg in args[1:]: + # We special-case the empty string to support the + # "scheme=''" default argument to some functions + if arg and isinstance(arg, str) != str_input: + raise TypeError("Cannot mix str and non-str arguments") + if str_input: + return args + (_noop,) + return _decode_args(args) + (_encode_result,) + +# Result objects are more helpful than simple tuples +class _ResultMixinStr(object): + """Standard approach to encoding parsed results from str to bytes""" + __slots__ = () + + def encode(self, encoding='ascii', errors='strict'): + return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self)) + + +class _ResultMixinBytes(object): + """Standard approach to decoding parsed results from bytes to str""" + __slots__ = () + + def decode(self, encoding='ascii', errors='strict'): + return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self)) + + +class _NetlocResultMixinBase(object): + """Shared methods for the parsed result objects containing a netloc element""" + __slots__ = () + + @property + def username(self): + return self._userinfo[0] + + @property + def password(self): + return self._userinfo[1] + + @property + def hostname(self): + hostname = self._hostinfo[0] + if not hostname: + hostname = None + elif hostname is not None: + hostname = hostname.lower() + return hostname + + @property + def port(self): + port = self._hostinfo[1] + if port is not None: + port = int(port, 10) + if not ( 0 <= port <= 65535): + raise ValueError("Port out of range 0-65535") + return port + + +class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): + __slots__ = () + + @property + def _userinfo(self): + netloc = self.netloc + userinfo, have_info, hostinfo = netloc.rpartition('@') + if have_info: + username, have_password, password = userinfo.partition(':') + if not have_password: + password = None + else: + username = password = None + return username, password + + @property + def _hostinfo(self): + netloc = self.netloc + _, _, hostinfo = netloc.rpartition('@') + _, have_open_br, bracketed = hostinfo.partition('[') + if have_open_br: + hostname, _, port = bracketed.partition(']') + _, _, port = port.partition(':') + else: + hostname, _, port = hostinfo.partition(':') + if not port: + port = None + return hostname, port + + +class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes): + __slots__ = () + + @property + def _userinfo(self): + netloc = self.netloc + userinfo, have_info, hostinfo = netloc.rpartition(b'@') + if have_info: + username, have_password, password = userinfo.partition(b':') + if not have_password: + password = None + else: + username = password = None + return username, password + + @property + def _hostinfo(self): + netloc = self.netloc + _, _, hostinfo = netloc.rpartition(b'@') + _, have_open_br, bracketed = hostinfo.partition(b'[') + if have_open_br: + hostname, _, port = bracketed.partition(b']') + _, _, port = port.partition(b':') + else: + hostname, _, port = hostinfo.partition(b':') + if not port: + port = None + return hostname, port + + +from collections import namedtuple + +_DefragResultBase = namedtuple('DefragResult', 'url fragment') +_SplitResultBase = namedtuple( + 'SplitResult', 'scheme netloc path query fragment') +_ParseResultBase = namedtuple( + 'ParseResult', 'scheme netloc path params query fragment') + +_DefragResultBase.__doc__ = """ +DefragResult(url, fragment) + +A 2-tuple that contains the url without fragment identifier and the fragment +identifier as a separate argument. +""" + +#_DefragResultBase.url.__doc__ = """The URL with no fragment identifier.""" + +#_DefragResultBase.fragment.__doc__ = """ +#Fragment identifier separated from URL, that allows indirect identification of a +#secondary resource by reference to a primary resource and additional identifying +#information. +#""" + +_SplitResultBase.__doc__ = """ +SplitResult(scheme, netloc, path, query, fragment) + +A 5-tuple that contains the different components of a URL. Similar to +ParseResult, but does not split params. +""" + +#_SplitResultBase.scheme.__doc__ = """Specifies URL scheme for the request.""" + +#_SplitResultBase.netloc.__doc__ = """ +#Network location where the request is made to. +#""" + +#_SplitResultBase.path.__doc__ = """ +#The hierarchical path, such as the path to a file to download. +#""" + +#_SplitResultBase.query.__doc__ = """ +#The query component, that contains non-hierarchical data, that along with data +#in path component, identifies a resource in the scope of URI's scheme and +#network location. +#""" + +#_SplitResultBase.fragment.__doc__ = """ +#Fragment identifier, that allows indirect identification of a secondary resource +#by reference to a primary resource and additional identifying information. +#""" + +_ParseResultBase.__doc__ = """ +ParseResult(scheme, netloc, path, params, query, fragment) + +A 6-tuple that contains components of a parsed URL. +""" + +#_ParseResultBase.scheme.__doc__ = _SplitResultBase.scheme.__doc__ +#_ParseResultBase.netloc.__doc__ = _SplitResultBase.netloc.__doc__ +#_ParseResultBase.path.__doc__ = _SplitResultBase.path.__doc__ +#_ParseResultBase.params.__doc__ = """ +#Parameters for last path element used to dereference the URI in order to provide +#access to perform some operation on the resource. +#""" + +#_ParseResultBase.query.__doc__ = _SplitResultBase.query.__doc__ +#_ParseResultBase.fragment.__doc__ = _SplitResultBase.fragment.__doc__ + + +# For backwards compatibility, alias _NetlocResultMixinStr +# ResultBase is no longer part of the documented API, but it is +# retained since deprecating it isn't worth the hassle +ResultBase = _NetlocResultMixinStr + +# Structured result objects for string data +class DefragResult(_DefragResultBase, _ResultMixinStr): + __slots__ = () + def geturl(self): + if self.fragment: + return self.url + '#' + self.fragment + else: + return self.url + +class SplitResult(_SplitResultBase, _NetlocResultMixinStr): + __slots__ = () + def geturl(self): + return urlunsplit(self) + +class ParseResult(_ParseResultBase, _NetlocResultMixinStr): + __slots__ = () + def geturl(self): + return urlunparse(self) + +# Structured result objects for bytes data +class DefragResultBytes(_DefragResultBase, _ResultMixinBytes): + __slots__ = () + def geturl(self): + if self.fragment: + return self.url + b'#' + self.fragment + else: + return self.url + +class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes): + __slots__ = () + def geturl(self): + return urlunsplit(self) + +class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes): + __slots__ = () + def geturl(self): + return urlunparse(self) + +# Set up the encode/decode result pairs +def _fix_result_transcoding(): + _result_pairs = ( + (DefragResult, DefragResultBytes), + (SplitResult, SplitResultBytes), + (ParseResult, ParseResultBytes), + ) + for _decoded, _encoded in _result_pairs: + _decoded._encoded_counterpart = _encoded + _encoded._decoded_counterpart = _decoded + +_fix_result_transcoding() +del _fix_result_transcoding + +def urlparse(url, scheme='', allow_fragments=True): + """Parse a URL into 6 components: + <scheme>://<netloc>/<path>;<params>?<query>#<fragment> + Return a 6-tuple: (scheme, netloc, path, params, query, fragment). + Note that we don't break the components up in smaller bits + (e.g. netloc is a single string) and we don't expand % escapes.""" + url, scheme, _coerce_result = _coerce_args(url, scheme) + splitresult = urlsplit(url, scheme, allow_fragments) + scheme, netloc, url, query, fragment = splitresult + if scheme in uses_params and ';' in url: + url, params = _splitparams(url) + else: + params = '' + result = ParseResult(scheme, netloc, url, params, query, fragment) + return _coerce_result(result) + +def _splitparams(url): + if '/' in url: + i = url.find(';', url.rfind('/')) + if i < 0: + return url, '' + else: + i = url.find(';') + return url[:i], url[i+1:] + +def _splitnetloc(url, start=0): + delim = len(url) # position of end of domain part of url, default is end + for c in '/?#': # look for delimiters; the order is NOT important + wdelim = url.find(c, start) # find first of this delim + if wdelim >= 0: # if found + delim = min(delim, wdelim) # use earliest delim position + return url[start:delim], url[delim:] # return (domain, rest) + +def urlsplit(url, scheme='', allow_fragments=True): + """Parse a URL into 5 components: + <scheme>://<netloc>/<path>?<query>#<fragment> + Return a 5-tuple: (scheme, netloc, path, query, fragment). + Note that we don't break the components up in smaller bits + (e.g. netloc is a single string) and we don't expand % escapes.""" + url, scheme, _coerce_result = _coerce_args(url, scheme) + allow_fragments = bool(allow_fragments) + key = url, scheme, allow_fragments, type(url), type(scheme) + cached = _parse_cache.get(key, None) + if cached: + return _coerce_result(cached) + if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth + clear_cache() + netloc = query = fragment = '' + i = url.find(':') + if i > 0: + if url[:i] == 'http': # optimize the common case + scheme = url[:i].lower() + url = url[i+1:] + if url[:2] == '//': + netloc, url = _splitnetloc(url, 2) + if (('[' in netloc and ']' not in netloc) or + (']' in netloc and '[' not in netloc)): + raise ValueError("Invalid IPv6 URL") + if allow_fragments and '#' in url: + url, fragment = url.split('#', 1) + if '?' in url: + url, query = url.split('?', 1) + v = SplitResult(scheme, netloc, url, query, fragment) + _parse_cache[key] = v + return _coerce_result(v) + for c in url[:i]: + if c not in scheme_chars: + break + else: + # make sure "url" is not actually a port number (in which case + # "scheme" is really part of the path) + rest = url[i+1:] + if not rest or any(c not in '0123456789' for c in rest): + # not a port number + scheme, url = url[:i].lower(), rest + + if url[:2] == '//': + netloc, url = _splitnetloc(url, 2) + if (('[' in netloc and ']' not in netloc) or + (']' in netloc and '[' not in netloc)): + raise ValueError("Invalid IPv6 URL") + if allow_fragments and '#' in url: + url, fragment = url.split('#', 1) + if '?' in url: + url, query = url.split('?', 1) + v = SplitResult(scheme, netloc, url, query, fragment) + _parse_cache[key] = v + return _coerce_result(v) + +def urlunparse(components): + """Put a parsed URL back together again. This may result in a + slightly different, but equivalent URL, if the URL that was parsed + originally had redundant delimiters, e.g. a ? with an empty query + (the draft states that these are equivalent).""" + scheme, netloc, url, params, query, fragment, _coerce_result = ( + _coerce_args(*components)) + if params: + url = "%s;%s" % (url, params) + return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment))) + +def urlunsplit(components): + """Combine the elements of a tuple as returned by urlsplit() into a + complete URL as a string. The data argument can be any five-item iterable. + This may result in a slightly different, but equivalent URL, if the URL that + was parsed originally had unnecessary delimiters (for example, a ? with an + empty query; the RFC states that these are equivalent).""" + scheme, netloc, url, query, fragment, _coerce_result = ( + _coerce_args(*components)) + if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): + if url and url[:1] != '/': url = '/' + url + url = '//' + (netloc or '') + url + if scheme: + url = scheme + ':' + url + if query: + url = url + '?' + query + if fragment: + url = url + '#' + fragment + return _coerce_result(url) + +def urljoin(base, url, allow_fragments=True): + """Join a base URL and a possibly relative URL to form an absolute + interpretation of the latter.""" + if not base: + return url + if not url: + return base + + base, url, _coerce_result = _coerce_args(base, url) + bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ + urlparse(base, '', allow_fragments) + scheme, netloc, path, params, query, fragment = \ + urlparse(url, bscheme, allow_fragments) + + if scheme != bscheme or scheme not in uses_relative: + return _coerce_result(url) + if scheme in uses_netloc: + if netloc: + return _coerce_result(urlunparse((scheme, netloc, path, + params, query, fragment))) + netloc = bnetloc + + if not path and not params: + path = bpath + params = bparams + if not query: + query = bquery + return _coerce_result(urlunparse((scheme, netloc, path, + params, query, fragment))) + + base_parts = bpath.split('/') + if base_parts[-1] != '': + # the last item is not a directory, so will not be taken into account + # in resolving the relative path + del base_parts[-1] + + # for rfc3986, ignore all base path should the first character be root. + if path[:1] == '/': + segments = path.split('/') + else: + segments = base_parts + path.split('/') + # filter out elements that would cause redundant slashes on re-joining + # the resolved_path + segments[1:-1] = filter(None, segments[1:-1]) + + resolved_path = [] + + for seg in segments: + if seg == '..': + try: + resolved_path.pop() + except IndexError: + # ignore any .. segments that would otherwise cause an IndexError + # when popped from resolved_path if resolving for rfc3986 + pass + elif seg == '.': + continue + else: + resolved_path.append(seg) + + if segments[-1] in ('.', '..'): + # do some post-processing here. if the last segment was a relative dir, + # then we need to append the trailing '/' + resolved_path.append('') + + return _coerce_result(urlunparse((scheme, netloc, '/'.join( + resolved_path) or '/', params, query, fragment))) + + +def urldefrag(url): + """Removes any existing fragment from URL. + + Returns a tuple of the defragmented URL and the fragment. If + the URL contained no fragments, the second element is the + empty string. + """ + url, _coerce_result = _coerce_args(url) + if '#' in url: + s, n, p, a, q, frag = urlparse(url) + defrag = urlunparse((s, n, p, a, q, '')) + else: + frag = '' + defrag = url + return _coerce_result(DefragResult(defrag, frag)) + +_hexdig = '0123456789ABCDEFabcdef' +_hextobyte = None + +def unquote_to_bytes(string): + """unquote_to_bytes('abc%20def') -> b'abc def'.""" + # Note: strings are encoded as UTF-8. This is only an issue if it contains + # unescaped non-ASCII characters, which URIs should not. + if not string: + # Is it a string-like object? + string.split + return b'' + if isinstance(string, str): + string = string.encode('utf-8') + bits = string.split(b'%') + if len(bits) == 1: + return string + res = [bits[0]] + append = res.append + # Delay the initialization of the table to not waste memory + # if the function is never called + global _hextobyte + if _hextobyte is None: + _hextobyte = {(a + b).encode(): bytes([int(a + b, 16)]) + for a in _hexdig for b in _hexdig} + for item in bits[1:]: + try: + append(_hextobyte[item[:2]]) + append(item[2:]) + except KeyError: + append(b'%') + append(item) + return b''.join(res) + +_asciire = re.compile('([\x01-\x7f]+)') + +def unquote(string, encoding='utf-8', errors='replace'): + """Replace %xx escapes by their single-character equivalent. The optional + encoding and errors parameters specify how to decode percent-encoded + sequences into Unicode characters, as accepted by the bytes.decode() + method. + By default, percent-encoded sequences are decoded with UTF-8, and invalid + sequences are replaced by a placeholder character. + + unquote('abc%20def') -> 'abc def'. + """ + if '%' not in string: + string.split + return string + if encoding is None: + encoding = 'utf-8' + if errors is None: + errors = 'replace' + bits = _asciire.split(string) + res = [bits[0]] + append = res.append + for i in range(1, len(bits), 2): + append(unquote_to_bytes(bits[i]).decode(encoding, errors)) + append(bits[i + 1]) + return ''.join(res) + + +def parse_qs(qs, keep_blank_values=False, strict_parsing=False, + encoding='utf-8', errors='replace'): + """Parse a query given as a string argument. + + Arguments: + + qs: percent-encoded query string to be parsed + + keep_blank_values: flag indicating whether blank values in + percent-encoded queries should be treated as blank strings. + A true value indicates that blanks should be retained as + blank strings. The default false value indicates that + blank values are to be ignored and treated as if they were + not included. + + strict_parsing: flag indicating what to do with parsing errors. + If false (the default), errors are silently ignored. + If true, errors raise a ValueError exception. + + encoding and errors: specify how to decode percent-encoded sequences + into Unicode characters, as accepted by the bytes.decode() method. + + Returns a dictionary. + """ + parsed_result = {} + pairs = parse_qsl(qs, keep_blank_values, strict_parsing, + encoding=encoding, errors=errors) + for name, value in pairs: + if name in parsed_result: + parsed_result[name].append(value) + else: + parsed_result[name] = [value] + return parsed_result + + +def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, + encoding='utf-8', errors='replace'): + """Parse a query given as a string argument. + + Arguments: + + qs: percent-encoded query string to be parsed + + keep_blank_values: flag indicating whether blank values in + percent-encoded queries should be treated as blank strings. + A true value indicates that blanks should be retained as blank + strings. The default false value indicates that blank values + are to be ignored and treated as if they were not included. + + strict_parsing: flag indicating what to do with parsing errors. If + false (the default), errors are silently ignored. If true, + errors raise a ValueError exception. + + encoding and errors: specify how to decode percent-encoded sequences + into Unicode characters, as accepted by the bytes.decode() method. + + Returns a list, as G-d intended. + """ + qs, _coerce_result = _coerce_args(qs) + pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] + r = [] + for name_value in pairs: + if not name_value and not strict_parsing: + continue + nv = name_value.split('=', 1) + if len(nv) != 2: + if strict_parsing: + raise ValueError("bad query field: %r" % (name_value,)) + # Handle case of a control-name with no equal sign + if keep_blank_values: + nv.append('') + else: + continue + if len(nv[1]) or keep_blank_values: + name = nv[0].replace('+', ' ') + name = unquote(name, encoding=encoding, errors=errors) + name = _coerce_result(name) + value = nv[1].replace('+', ' ') + value = unquote(value, encoding=encoding, errors=errors) + value = _coerce_result(value) + r.append((name, value)) + return r + +def unquote_plus(string, encoding='utf-8', errors='replace'): + """Like unquote(), but also replace plus signs by spaces, as required for + unquoting HTML form values. + + unquote_plus('%7e/abc+def') -> '~/abc def' + """ + string = string.replace('+', ' ') + return unquote(string, encoding, errors) + +_ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + b'abcdefghijklmnopqrstuvwxyz' + b'0123456789' + b'_.-') +_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) +_safe_quoters = {} + +class Quoter(collections.defaultdict): + """A mapping from bytes (in range(0,256)) to strings. + + String values are percent-encoded byte values, unless the key < 128, and + in the "safe" set (either the specified safe set, or default set). + """ + # Keeps a cache internally, using defaultdict, for efficiency (lookups + # of cached keys don't call Python code at all). + def __init__(self, safe): + """safe: bytes object.""" + self.safe = _ALWAYS_SAFE.union(safe) + + def __repr__(self): + # Without this, will just display as a defaultdict + return "<%s %r>" % (self.__class__.__name__, dict(self)) + + def __missing__(self, b): + # Handle a cache miss. Store quoted string in cache and return. + res = chr(b) if b in self.safe else '%{:02X}'.format(b) + self[b] = res + return res + +def quote(string, safe='/', encoding=None, errors=None): + """quote('abc def') -> 'abc%20def' + + Each part of a URL, e.g. the path info, the query, etc., has a + different set of reserved characters that must be quoted. + + RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists + the following reserved characters. + + reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | + "$" | "," + + Each of these characters is reserved in some component of a URL, + but not necessarily in all of them. + + By default, the quote function is intended for quoting the path + section of a URL. Thus, it will not encode '/'. This character + is reserved, but in typical usage the quote function is being + called on a path where the existing slash characters are used as + reserved characters. + + string and safe may be either str or bytes objects. encoding and errors + must not be specified if string is a bytes object. + + The optional encoding and errors parameters specify how to deal with + non-ASCII characters, as accepted by the str.encode method. + By default, encoding='utf-8' (characters are encoded with UTF-8), and + errors='strict' (unsupported characters raise a UnicodeEncodeError). + """ + if isinstance(string, str): + if not string: + return string + if encoding is None: + encoding = 'utf-8' + if errors is None: + errors = 'strict' + string = string.encode(encoding, errors) + else: + if encoding is not None: + raise TypeError("quote() doesn't support 'encoding' for bytes") + if errors is not None: + raise TypeError("quote() doesn't support 'errors' for bytes") + return quote_from_bytes(string, safe) + +def quote_plus(string, safe='', encoding=None, errors=None): + """Like quote(), but also replace ' ' with '+', as required for quoting + HTML form values. Plus signs in the original string are escaped unless + they are included in safe. It also does not have safe default to '/'. + """ + # Check if ' ' in string, where string may either be a str or bytes. If + # there are no spaces, the regular quote will produce the right answer. + if ((isinstance(string, str) and ' ' not in string) or + (isinstance(string, bytes) and b' ' not in string)): + return quote(string, safe, encoding, errors) + if isinstance(safe, str): + space = ' ' + else: + space = b' ' + string = quote(string, safe + space, encoding, errors) + return string.replace(' ', '+') + +def quote_from_bytes(bs, safe='/'): + """Like quote(), but accepts a bytes object rather than a str, and does + not perform string-to-bytes encoding. It always returns an ASCII string. + quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f' + """ + if not isinstance(bs, (bytes, bytearray)): + raise TypeError("quote_from_bytes() expected bytes") + if not bs: + return '' + if isinstance(safe, str): + # Normalize 'safe' by converting to bytes and removing non-ASCII chars + safe = safe.encode('ascii', 'ignore') + else: + safe = bytes([c for c in safe if c < 128]) + if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe): + return bs.decode() + try: + quoter = _safe_quoters[safe] + except KeyError: + _safe_quoters[safe] = quoter = Quoter(safe).__getitem__ + return ''.join([quoter(char) for char in bs]) + +def urlencode(query, doseq=False, safe='', encoding=None, errors=None, + quote_via=quote_plus): + """Encode a dict or sequence of two-element tuples into a URL query string. + + If any values in the query arg are sequences and doseq is true, each + sequence element is converted to a separate parameter. + + If the query arg is a sequence of two-element tuples, the order of the + parameters in the output will match the order of parameters in the + input. + + The components of a query arg may each be either a string or a bytes type. + + The safe, encoding, and errors parameters are passed down to the function + specified by quote_via (encoding and errors only if a component is a str). + """ + + if hasattr(query, "items"): + query = query.items() + else: + # It's a bother at times that strings and string-like objects are + # sequences. + try: + # non-sequence items should not work with len() + # non-empty strings will fail this + if len(query) and not isinstance(query[0], tuple): + raise TypeError + # Zero-length sequences of all types will get here and succeed, + # but that's a minor nit. Since the original implementation + # allowed empty dicts that type of behavior probably should be + # preserved for consistency + except TypeError: + ty, va, tb = sys.exc_info() + raise TypeError("not a valid non-string sequence " + "or mapping object").with_traceback(tb) + + l = [] + if not doseq: + for k, v in query: + if isinstance(k, bytes): + k = quote_via(k, safe) + else: + k = quote_via(str(k), safe, encoding, errors) + + if isinstance(v, bytes): + v = quote_via(v, safe) + else: + v = quote_via(str(v), safe, encoding, errors) + l.append(k + '=' + v) + else: + for k, v in query: + if isinstance(k, bytes): + k = quote_via(k, safe) + else: + k = quote_via(str(k), safe, encoding, errors) + + if isinstance(v, bytes): + v = quote_via(v, safe) + l.append(k + '=' + v) + elif isinstance(v, str): + v = quote_via(v, safe, encoding, errors) + l.append(k + '=' + v) + else: + try: + # Is this a sufficient test for sequence-ness? + x = len(v) + except TypeError: + # not a sequence + v = quote_via(str(v), safe, encoding, errors) + l.append(k + '=' + v) + else: + # loop over the sequence + for elt in v: + if isinstance(elt, bytes): + elt = quote_via(elt, safe) + else: + elt = quote_via(str(elt), safe, encoding, errors) + l.append(k + '=' + elt) + return '&'.join(l) + +def to_bytes(url): + """to_bytes(u"URL") --> 'URL'.""" + # Most URL schemes require ASCII. If that changes, the conversion + # can be relaxed. + # XXX get rid of to_bytes() + if isinstance(url, str): + try: + url = url.encode("ASCII").decode() + except UnicodeError: + raise UnicodeError("URL " + repr(url) + + " contains non-ASCII characters") + return url + +def unwrap(url): + """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" + url = str(url).strip() + if url[:1] == '<' and url[-1:] == '>': + url = url[1:-1].strip() + if url[:4] == 'URL:': url = url[4:].strip() + return url + +_typeprog = None +def splittype(url): + """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" + global _typeprog + if _typeprog is None: + _typeprog = re.compile('([^/:]+):(.*)', re.DOTALL) + + match = _typeprog.match(url) + if match: + scheme, data = match.groups() + return scheme.lower(), data + return None, url + +_hostprog = None +def splithost(url): + """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" + global _hostprog + if _hostprog is None: + _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL) + + match = _hostprog.match(url) + if match: + host_port, path = match.groups() + if path and path[0] != '/': + path = '/' + path + return host_port, path + return None, url + +def splituser(host): + """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" + user, delim, host = host.rpartition('@') + return (user if delim else None), host + +def splitpasswd(user): + """splitpasswd('user:passwd') -> 'user', 'passwd'.""" + user, delim, passwd = user.partition(':') + return user, (passwd if delim else None) + +# splittag('/path#tag') --> '/path', 'tag' +_portprog = None +def splitport(host): + """splitport('host:port') --> 'host', 'port'.""" + global _portprog + if _portprog is None: + _portprog = re.compile('(.*):([0-9]*)$', re.DOTALL) + + match = _portprog.match(host) + if match: + host, port = match.groups() + if port: + return host, port + return host, None + +def splitnport(host, defport=-1): + """Split host and port, returning numeric port. + Return given default port if no ':' found; defaults to -1. + Return numerical port if a valid number are found after ':'. + Return None if ':' but not a valid number.""" + host, delim, port = host.rpartition(':') + if not delim: + host = port + elif port: + try: + nport = int(port) + except ValueError: + nport = None + return host, nport + return host, defport + +def splitquery(url): + """splitquery('/path?query') --> '/path', 'query'.""" + path, delim, query = url.rpartition('?') + if delim: + return path, query + return url, None + +def splittag(url): + """splittag('/path#tag') --> '/path', 'tag'.""" + path, delim, tag = url.rpartition('#') + if delim: + return path, tag + return url, None + +def splitattr(url): + """splitattr('/path;attr1=value1;attr2=value2;...') -> + '/path', ['attr1=value1', 'attr2=value2', ...].""" + words = url.split(';') + return words[0], words[1:] + +def splitvalue(attr): + """splitvalue('attr=value') --> 'attr', 'value'.""" + attr, delim, value = attr.partition('=') + return attr, (value if delim else None) |