pipes

author: Stefan Israelsson Tampe <stefan.itampe@gmail.com> 2018-09-04 16:19:39 +0200
committer: Stefan Israelsson Tampe <stefan.itampe@gmail.com> 2018-09-04 16:19:39 +0200
commit: e37613527a05fc56f8ee5886a868c948b3ee4cfc (patch)
tree: 38c51006b81a165133fbd44566940decb06fae5d
parent: 8223db0b1a660ad10830e9a5a2fe71858cf52481 (diff)
8 files changed, 2223 insertions, 15 deletions
diff --git a/modules/language/python/compile.scm b/modules/language/python/compile.scm
index 3f02f38..a83a781 100644
--- a/modules/language/python/compile.scm
+++ b/modules/language/python/compile.scm
@@ -1879,7 +1879,9 @@
            (map (lambda (x)
                   (exp '() x))
                 arglist))
-
+	 
+	 (define name (string-join (map symbol->string args) "."))
+	 
          `((define-module (language python module ,@args)
             #:pure
             #:use-module ((guile) #:select
@@ -1890,6 +1892,7 @@
             #:use-module ((language python compile) #:select (pks))
             #:use-module (language python exceptions))
            (,(G 'define) __doc__    #f)
+	   (,(G 'define) __name__   ,name)
            (,(G 'define) __module__ (,(G 'quote)
                                      (language python module ,@args))))))
       (x '())))
@@ -2580,7 +2583,7 @@
 
 (define-syntax ref-x
   (lambda (x)
-    (syntax-case x (quote __dict__)
+    (syntax-case x (@)
       ((_ v)
        #'v)
       ((_ v (#:fastfkn-ref f tag) . l)
@@ -2593,20 +2596,29 @@
                             (apply f v x))) . l))))
       ((_ v (#:fast-id f _) . l)
        #'(ref-x (f v) . l))
-      ((_ v (#:identifier '__dict__) . l)
+      
+      ((_ v (#:identifier ((@ x q) dict)) . l)
+       (equal? (syntax->datum #'dict) '__dict__)
        #'(ref-x (py-dict v) . l))
+            
       ((_ v (#:identifier x) . l)
        #'(ref-x (wr x (ref v x miss)) . l))
+      
       ((_ v (#:call-obj x) . l)
        #'(ref-x (x v) . l))
+      
       ((_ v (#:call x ...) . l)
        #'(ref-x (v x ...) . l))
+      
       ((_ v (#:apply x ...) . l)
        #'(ref-x (py-apply v x ...) . l))
+      
       ((_ v (#:apply x ...) . l)
        #'(ref-x (py-apply v x ...) . l))
+      
       ((_ v (#:vecref x) . l)
        #'(ref-x (pylist-ref v x) . l))
+      
       ((_ v (#:vecsub . x) . l)
        #'(ref-x (pylist-slice v . x) . l)))))
 
@@ -2692,7 +2704,7 @@
 
 
 (define-syntax boolit
-  (syntax-rules (and eq? equal? or not < <= > >=)
+  (syntax-rules (@ and eq? equal? or not < <= > >=)
     ((_ (and    x y)) (and (boolit x) (boolit y)))
     ((_ (or     x y)) (or  (boolit x) (boolit y)))
     ((_ (not    x  )) (not (boolit x)))
@@ -2741,18 +2753,28 @@
 
 (define-syntax qset!
   (lambda (x)
-    (syntax-case x ()
+    (pkkk x)
+    (syntax-case x (@@ @)
       ((_ (cons x y) v)
        (equal? (syntax->datum #'cons) '(@ (guile) cons))
        #'(let ((w v))
            (qset! x (car w))
            (qset! y (cdr w))))
       
-      ((_ '() v)
+      ((_ ((@ (guile) q) ()) v)
+       (equal? (syntax->datum #'q) 'quote)
        #'(if (not (null? v))
              (raise (ValueError "too many values to unpack"))
              (values)))
+
+      ((_ ((@@ u li) x) v)
+       (equal? (syntax->datum #'li) 'to-pylist)
+       #'(let ((w (to-list v)))
+	   (qset! x w)))
       
+      ((_ (ref v a ...) w)
+       #'(set-x v (a ...) w))
+            
       ((_  x  v)
        #'(set! x v)))))
 
diff --git a/modules/language/python/exceptions.scm b/modules/language/python/exceptions.scm
index f2cceee..5b58f9e 100644
--- a/modules/language/python/exceptions.scm
+++ b/modules/language/python/exceptions.scm
@@ -17,7 +17,8 @@
                           UnicodeDecodeError LookupError IndentationError
                           KeyboardInterrupt MemoryError NameError
                           EOFError UnicodeError UnicodeEncodeError
-                          FileExistsError FileNotFoundError IsADirectoryError ))
+                          FileExistsError FileNotFoundError IsADirectoryError
+			  EnvironmentError))
 
 (define-syntax-rule (aif it p x y) (let ((it p)) (if it x y)))
 
@@ -69,6 +70,7 @@
 
 (define StopIteration           'StopIteration)
 (define GeneratorExit           'GeneratorExit)
+(define-er EnvironmentError     'EnvironmentError)
 (define-er UnicodeEncodeError   'UnicodeEncodeError)
 (define-er FileExistsError      'FileExistsError)
 (define-er FileNotFoundError    'FileNotFoundError)
diff --git a/modules/language/python/module/binascii.scm b/modules/language/python/module/binascii.scm
index ab3dd69..2fc0e62 100644
--- a/modules/language/python/module/binascii.scm
+++ b/modules/language/python/module/binascii.scm
@@ -7,7 +7,7 @@
   #:use-module (rnrs bytevectors)
   #:use-module (oop pf-objects)
   #:export (Error Incomplete a2b_uu b2a_uu a2b_base64 b2a_base64 a2b_qp b2a_qp
-		  a2b_hex b2a_hex))
+		  a2b_hex b2a_hex crc32 crc_hqx))
 
 (define-python-class Error (Exception))
 (define-python-class Incomplete (Exception))
@@ -385,4 +385,32 @@
 		     (x  (logior a1 (ash a2 4))))
 		(lp (+ i 2) (cons x r)))
 	      (bytes (reverse r)))))))
-	       
+
+(define (id x) x)
+(define-syntax-rule (mkcrc crc_hqx high xor mask)
+  (def (crc_hqx data (= value 0))
+       (let ((n (len data))
+	     (d (bv-scm data)))
+	 (let lp ((i 0) (v value))
+	   (if (< i n)
+	       (let ((b (id (bytevector-u8-ref d i))))
+		 (let lp2 ((j 0) (x 1) (v v))
+		   (if (> j -8)
+		       (let ((bit   (ash (logand x b) j))
+			     (hbit  (logand v high)))
+			 (if (= hbit 0)
+			     (lp2 (- j 1) (ash x 1) (logior bit (ash v 1)))
+			     (lp2 (- j 1) (ash x 1) (logxor
+						     xor
+						     (logand mask
+							     (logior
+							      bit
+							      (ash v 1)))))))
+		       (lp (+ i 1) v))))
+	       v)))))
+						  
+		       
+(mkcrc crc_hqx #x8000     #x1021     #xffff)
+(mkcrc crc32   #x80000000 #x04c11db7 #xffffffff)
+
+    
diff --git a/modules/language/python/module/mimetypes.py b/modules/language/python/module/mimetypes.py
new file mode 100644
index 0000000..76a5e87
--- /dev/null
+++ b/modules/language/python/module/mimetypes.py
@@ -0,0 +1,553 @@
+module(mimetypes)
+"""Guess the MIME type of a file.
+
+This module defines two useful functions:
+
+guess_type(url, strict=True) -- guess the MIME type and encoding of a URL.
+
+guess_extension(type, strict=True) -- guess the extension for a given MIME type.
+
+It also contains the following, for tuning the behavior:
+
+Data:
+
+knownfiles -- list of files to parse
+inited -- flag set when init() has been called
+suffix_map -- dictionary mapping suffixes to suffixes
+encodings_map -- dictionary mapping suffixes to encodings
+types_map -- dictionary mapping suffixes to types
+
+Functions:
+
+init([files]) -- parse a list of files, default knownfiles (on Windows, the
+  default values are taken from the registry)
+read_mime_types(file) -- parse one file, return a dictionary or None
+"""
+
+import os
+import sys
+import posixpath
+import urllib.parse
+try:
+    import winreg as _winreg
+except ImportError:
+    _winreg = None
+
+__all__ = [
+    "knownfiles", "inited", "MimeTypes",
+    "guess_type", "guess_all_extensions", "guess_extension",
+    "add_type", "init", "read_mime_types",
+    "suffix_map", "encodings_map", "types_map", "common_types"
+]
+
+knownfiles = [
+    "/etc/mime.types",
+    "/etc/httpd/mime.types",                    # Mac OS X
+    "/etc/httpd/conf/mime.types",               # Apache
+    "/etc/apache/mime.types",                   # Apache 1
+    "/etc/apache2/mime.types",                  # Apache 2
+    "/usr/local/etc/httpd/conf/mime.types",
+    "/usr/local/lib/netscape/mime.types",
+    "/usr/local/etc/httpd/conf/mime.types",     # Apache 1.2
+    "/usr/local/etc/mime.types",                # Apache 1.3
+    ]
+
+inited = False
+_db = None
+
+
+class MimeTypes:
+    """MIME-types datastore.
+
+    This datastore can handle information from mime.types-style files
+    and supports basic determination of MIME type from a filename or
+    URL, and can guess a reasonable extension given a MIME type.
+    """
+
+    def __init__(self, filenames=(), strict=True):
+        if not inited:
+            init()
+        self.encodings_map = encodings_map.copy()
+        self.suffix_map = suffix_map.copy()
+        self.types_map = ({}, {}) # dict for (non-strict, strict)
+        self.types_map_inv = ({}, {})
+        for (ext, type) in types_map.items():
+            self.add_type(type, ext, True)
+        for (ext, type) in common_types.items():
+            self.add_type(type, ext, False)
+        for name in filenames:
+            self.read(name, strict)
+
+    def add_type(self, type, ext, strict=True):
+        """Add a mapping between a type and an extension.
+
+        When the extension is already known, the new
+        type will replace the old one. When the type
+        is already known the extension will be added
+        to the list of known extensions.
+
+        If strict is true, information will be added to
+        list of standard types, else to the list of non-standard
+        types.
+        """
+        self.types_map[strict][ext] = type
+        exts = self.types_map_inv[strict].setdefault(type, [])
+        if ext not in exts:
+            exts.append(ext)
+
+    def guess_type(self, url, strict=True):
+        """Guess the type of a file based on its URL.
+
+        Return value is a tuple (type, encoding) where type is None if
+        the type can't be guessed (no or unknown suffix) or a string
+        of the form type/subtype, usable for a MIME Content-type
+        header; and encoding is None for no encoding or the name of
+        the program used to encode (e.g. compress or gzip).  The
+        mappings are table driven.  Encoding suffixes are case
+        sensitive; type suffixes are first tried case sensitive, then
+        case insensitive.
+
+        The suffixes .tgz, .taz and .tz (case sensitive!) are all
+        mapped to '.tar.gz'.  (This is table-driven too, using the
+        dictionary suffix_map.)
+
+        Optional `strict' argument when False adds a bunch of commonly found,
+        but non-standard types.
+        """
+        scheme, url = urllib.parse.splittype(url)
+        if scheme == 'data':
+            # syntax of data URLs:
+            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
+            # mediatype := [ type "/" subtype ] *( ";" parameter )
+            # data      := *urlchar
+            # parameter := attribute "=" value
+            # type/subtype defaults to "text/plain"
+            comma = url.find(',')
+            if comma < 0:
+                # bad data URL
+                return None, None
+            semi = url.find(';', 0, comma)
+            if semi >= 0:
+                type = url[:semi]
+            else:
+                type = url[:comma]
+            if '=' in type or '/' not in type:
+                type = 'text/plain'
+            return type, None           # never compressed, so encoding is None
+        base, ext = posixpath.splitext(url)
+        while ext in self.suffix_map:
+            base, ext = posixpath.splitext(base + self.suffix_map[ext])
+        if ext in self.encodings_map:
+            encoding = self.encodings_map[ext]
+            base, ext = posixpath.splitext(base)
+        else:
+            encoding = None
+        types_map = self.types_map[True]
+        if ext in types_map:
+            return types_map[ext], encoding
+        elif ext.lower() in types_map:
+            return types_map[ext.lower()], encoding
+        elif strict:
+            return None, encoding
+        types_map = self.types_map[False]
+        if ext in types_map:
+            return types_map[ext], encoding
+        elif ext.lower() in types_map:
+            return types_map[ext.lower()], encoding
+        else:
+            return None, encoding
+
+    def guess_all_extensions(self, type, strict=True):
+        """Guess the extensions for a file based on its MIME type.
+
+        Return value is a list of strings giving the possible filename
+        extensions, including the leading dot ('.').  The extension is not
+        guaranteed to have been associated with any particular data stream,
+        but would be mapped to the MIME type `type' by guess_type().
+
+        Optional `strict' argument when false adds a bunch of commonly found,
+        but non-standard types.
+        """
+        type = type.lower()
+        extensions = self.types_map_inv[True].get(type, [])
+        if not strict:
+            for ext in self.types_map_inv[False].get(type, []):
+                if ext not in extensions:
+                    extensions.append(ext)
+        return extensions
+
+    def guess_extension(self, type, strict=True):
+        """Guess the extension for a file based on its MIME type.
+
+        Return value is a string giving a filename extension,
+        including the leading dot ('.').  The extension is not
+        guaranteed to have been associated with any particular data
+        stream, but would be mapped to the MIME type `type' by
+        guess_type().  If no extension can be guessed for `type', None
+        is returned.
+
+        Optional `strict' argument when false adds a bunch of commonly found,
+        but non-standard types.
+        """
+        extensions = self.guess_all_extensions(type, strict)
+        if not extensions:
+            return None
+        return extensions[0]
+
+    def read(self, filename, strict=True):
+        """
+        Read a single mime.types-format file, specified by pathname.
+
+        If strict is true, information will be added to
+        list of standard types, else to the list of non-standard
+        types.
+        """
+        with open(filename, encoding='utf-8') as fp:
+            self.readfp(fp, strict)
+
+    def readfp(self, fp, strict=True):
+        """
+        Read a single mime.types-format file.
+
+        If strict is true, information will be added to
+        list of standard types, else to the list of non-standard
+        types.
+        """
+        while 1:
+            line = fp.readline()
+            if not line:
+                break
+            words = line.split()
+            for i in range(len(words)):
+                if words[i][0] == '#':
+                    del words[i:]
+                    break
+            if not words:
+                continue
+            type, suffixes = words[0], words[1:]
+            for suff in suffixes:
+                self.add_type(type, '.' + suff, strict)
+
+    def read_windows_registry(self, strict=True):
+        """
+        Load the MIME types database from Windows registry.
+
+        If strict is true, information will be added to
+        list of standard types, else to the list of non-standard
+        types.
+        """
+
+        # Windows only
+        if not _winreg:
+            return
+
+        def enum_types(mimedb):
+            i = 0
+            while True:
+                try:
+                    ctype = _winreg.EnumKey(mimedb, i)
+                except EnvironmentError:
+                    break
+                else:
+                    if '\0' not in ctype:
+                        yield ctype
+                i += 1
+
+        with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '') as hkcr:
+            for subkeyname in enum_types(hkcr):
+                try:
+                    with _winreg.OpenKey(hkcr, subkeyname) as subkey:
+                        # Only check file extensions
+                        if not subkeyname.startswith("."):
+                            continue
+                        # raises EnvironmentError if no 'Content Type' value
+                        mimetype, datatype = _winreg.QueryValueEx(
+                            subkey, 'Content Type')
+                        if datatype != _winreg.REG_SZ:
+                            continue
+                        self.add_type(mimetype, subkeyname, strict)
+                except EnvironmentError:
+                    continue
+
+def guess_type(url, strict=True):
+    """Guess the type of a file based on its URL.
+
+    Return value is a tuple (type, encoding) where type is None if the
+    type can't be guessed (no or unknown suffix) or a string of the
+    form type/subtype, usable for a MIME Content-type header; and
+    encoding is None for no encoding or the name of the program used
+    to encode (e.g. compress or gzip).  The mappings are table
+    driven.  Encoding suffixes are case sensitive; type suffixes are
+    first tried case sensitive, then case insensitive.
+
+    The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
+    to ".tar.gz".  (This is table-driven too, using the dictionary
+    suffix_map).
+
+    Optional `strict' argument when false adds a bunch of commonly found, but
+    non-standard types.
+    """
+    if _db is None:
+        init()
+    return _db.guess_type(url, strict)
+
+
+def guess_all_extensions(type, strict=True):
+    """Guess the extensions for a file based on its MIME type.
+
+    Return value is a list of strings giving the possible filename
+    extensions, including the leading dot ('.').  The extension is not
+    guaranteed to have been associated with any particular data
+    stream, but would be mapped to the MIME type `type' by
+    guess_type().  If no extension can be guessed for `type', None
+    is returned.
+
+    Optional `strict' argument when false adds a bunch of commonly found,
+    but non-standard types.
+    """
+    if _db is None:
+        init()
+    return _db.guess_all_extensions(type, strict)
+
+def guess_extension(type, strict=True):
+    """Guess the extension for a file based on its MIME type.
+
+    Return value is a string giving a filename extension, including the
+    leading dot ('.').  The extension is not guaranteed to have been
+    associated with any particular data stream, but would be mapped to the
+    MIME type `type' by guess_type().  If no extension can be guessed for
+    `type', None is returned.
+
+    Optional `strict' argument when false adds a bunch of commonly found,
+    but non-standard types.
+    """
+    if _db is None:
+        init()
+    return _db.guess_extension(type, strict)
+
+def add_type(type, ext, strict=True):
+    """Add a mapping between a type and an extension.
+
+    When the extension is already known, the new
+    type will replace the old one. When the type
+    is already known the extension will be added
+    to the list of known extensions.
+
+    If strict is true, information will be added to
+    list of standard types, else to the list of non-standard
+    types.
+    """
+    if _db is None:
+        init()
+    return _db.add_type(type, ext, strict)
+
+
+def init(files=None):
+    global suffix_map, types_map, encodings_map, common_types
+    global inited, _db
+    inited = True    # so that MimeTypes.__init__() doesn't call us again
+    db = MimeTypes()
+    if files is None:
+        if _winreg:
+            db.read_windows_registry()
+        files = knownfiles
+    for file in files:
+        if os.path.isfile(file):
+            db.read(file)
+    encodings_map = db.encodings_map
+    suffix_map = db.suffix_map
+    types_map = db.types_map[True]
+    common_types = db.types_map[False]
+    # Make the DB a global variable now that it is fully initialized
+    _db = db
+
+
+def read_mime_types(file):
+    try:
+        f = open(file)
+    except OSError:
+        return None
+    with f:
+        db = MimeTypes()
+        db.readfp(f, True)
+        return db.types_map[True]
+
+
+def _default_mime_types():
+    global suffix_map
+    global encodings_map
+    global types_map
+    global common_types
+
+    suffix_map = {
+        '.svgz': '.svg.gz',
+        '.tgz': '.tar.gz',
+        '.taz': '.tar.gz',
+        '.tz': '.tar.gz',
+        '.tbz2': '.tar.bz2',
+        '.txz': '.tar.xz',
+        }
+
+    encodings_map = {
+        '.gz': 'gzip',
+        '.Z': 'compress',
+        '.bz2': 'bzip2',
+        '.xz': 'xz',
+        }
+
+    # Before adding new types, make sure they are either registered with IANA,
+    # at http://www.iana.org/assignments/media-types
+    # or extensions, i.e. using the x- prefix
+
+    # If you add to these, please keep them sorted!
+    types_map = {
+        '.a'      : 'application/octet-stream',
+        '.ai'     : 'application/postscript',
+        '.aif'    : 'audio/x-aiff',
+        '.aifc'   : 'audio/x-aiff',
+        '.aiff'   : 'audio/x-aiff',
+        '.au'     : 'audio/basic',
+        '.avi'    : 'video/x-msvideo',
+        '.bat'    : 'text/plain',
+        '.bcpio'  : 'application/x-bcpio',
+        '.bin'    : 'application/octet-stream',
+        '.bmp'    : 'image/x-ms-bmp',
+        '.c'      : 'text/plain',
+        # Duplicates :(
+        '.cdf'    : 'application/x-cdf',
+        '.cdf'    : 'application/x-netcdf',
+        '.cpio'   : 'application/x-cpio',
+        '.csh'    : 'application/x-csh',
+        '.css'    : 'text/css',
+        '.csv'    : 'text/csv',
+        '.dll'    : 'application/octet-stream',
+        '.doc'    : 'application/msword',
+        '.dot'    : 'application/msword',
+        '.dvi'    : 'application/x-dvi',
+        '.eml'    : 'message/rfc822',
+        '.eps'    : 'application/postscript',
+        '.etx'    : 'text/x-setext',
+        '.exe'    : 'application/octet-stream',
+        '.gif'    : 'image/gif',
+        '.gtar'   : 'application/x-gtar',
+        '.h'      : 'text/plain',
+        '.hdf'    : 'application/x-hdf',
+        '.htm'    : 'text/html',
+        '.html'   : 'text/html',
+        '.ico'    : 'image/vnd.microsoft.icon',
+        '.ief'    : 'image/ief',
+        '.jpe'    : 'image/jpeg',
+        '.jpeg'   : 'image/jpeg',
+        '.jpg'    : 'image/jpeg',
+        '.js'     : 'application/javascript',
+        '.json'   : 'application/json',
+        '.ksh'    : 'text/plain',
+        '.latex'  : 'application/x-latex',
+        '.m1v'    : 'video/mpeg',
+        '.m3u'    : 'application/vnd.apple.mpegurl',
+        '.m3u8'   : 'application/vnd.apple.mpegurl',
+        '.man'    : 'application/x-troff-man',
+        '.me'     : 'application/x-troff-me',
+        '.mht'    : 'message/rfc822',
+        '.mhtml'  : 'message/rfc822',
+        '.mif'    : 'application/x-mif',
+        '.mov'    : 'video/quicktime',
+        '.movie'  : 'video/x-sgi-movie',
+        '.mp2'    : 'audio/mpeg',
+        '.mp3'    : 'audio/mpeg',
+        '.mp4'    : 'video/mp4',
+        '.mpa'    : 'video/mpeg',
+        '.mpe'    : 'video/mpeg',
+        '.mpeg'   : 'video/mpeg',
+        '.mpg'    : 'video/mpeg',
+        '.ms'     : 'application/x-troff-ms',
+        '.nc'     : 'application/x-netcdf',
+        '.nws'    : 'message/rfc822',
+        '.o'      : 'application/octet-stream',
+        '.obj'    : 'application/octet-stream',
+        '.oda'    : 'application/oda',
+        '.p12'    : 'application/x-pkcs12',
+        '.p7c'    : 'application/pkcs7-mime',
+        '.pbm'    : 'image/x-portable-bitmap',
+        '.pdf'    : 'application/pdf',
+        '.pfx'    : 'application/x-pkcs12',
+        '.pgm'    : 'image/x-portable-graymap',
+        '.pl'     : 'text/plain',
+        '.png'    : 'image/png',
+        '.pnm'    : 'image/x-portable-anymap',
+        '.pot'    : 'application/vnd.ms-powerpoint',
+        '.ppa'    : 'application/vnd.ms-powerpoint',
+        '.ppm'    : 'image/x-portable-pixmap',
+        '.pps'    : 'application/vnd.ms-powerpoint',
+        '.ppt'    : 'application/vnd.ms-powerpoint',
+        '.ps'     : 'application/postscript',
+        '.pwz'    : 'application/vnd.ms-powerpoint',
+        '.py'     : 'text/x-python',
+        '.pyc'    : 'application/x-python-code',
+        '.pyo'    : 'application/x-python-code',
+        '.qt'     : 'video/quicktime',
+        '.ra'     : 'audio/x-pn-realaudio',
+        '.ram'    : 'application/x-pn-realaudio',
+        '.ras'    : 'image/x-cmu-raster',
+        '.rdf'    : 'application/xml',
+        '.rgb'    : 'image/x-rgb',
+        '.roff'   : 'application/x-troff',
+        '.rtx'    : 'text/richtext',
+        '.sgm'    : 'text/x-sgml',
+        '.sgml'   : 'text/x-sgml',
+        '.sh'     : 'application/x-sh',
+        '.shar'   : 'application/x-shar',
+        '.snd'    : 'audio/basic',
+        '.so'     : 'application/octet-stream',
+        '.src'    : 'application/x-wais-source',
+        '.sv4cpio': 'application/x-sv4cpio',
+        '.sv4crc' : 'application/x-sv4crc',
+        '.svg'    : 'image/svg+xml',
+        '.swf'    : 'application/x-shockwave-flash',
+        '.t'      : 'application/x-troff',
+        '.tar'    : 'application/x-tar',
+        '.tcl'    : 'application/x-tcl',
+        '.tex'    : 'application/x-tex',
+        '.texi'   : 'application/x-texinfo',
+        '.texinfo': 'application/x-texinfo',
+        '.tif'    : 'image/tiff',
+        '.tiff'   : 'image/tiff',
+        '.tr'     : 'application/x-troff',
+        '.tsv'    : 'text/tab-separated-values',
+        '.txt'    : 'text/plain',
+        '.ustar'  : 'application/x-ustar',
+        '.vcf'    : 'text/x-vcard',
+        '.wav'    : 'audio/x-wav',
+        '.webm'   : 'video/webm',
+        '.wiz'    : 'application/msword',
+        '.wsdl'   : 'application/xml',
+        '.xbm'    : 'image/x-xbitmap',
+        '.xlb'    : 'application/vnd.ms-excel',
+        # Duplicates :(
+        '.xls'    : 'application/excel',
+        '.xls'    : 'application/vnd.ms-excel',
+        '.xml'    : 'text/xml',
+        '.xpdl'   : 'application/xml',
+        '.xpm'    : 'image/x-xpixmap',
+        '.xsl'    : 'application/xml',
+        '.xwd'    : 'image/x-xwindowdump',
+        '.zip'    : 'application/zip',
+        }
+
+    # These are non-standard types, commonly found in the wild.  They will
+    # only match if strict=0 flag is given to the API methods.
+
+    # Please sort these too
+    common_types = {
+        '.jpg' : 'image/jpg',
+        '.mid' : 'audio/midi',
+        '.midi': 'audio/midi',
+        '.pct' : 'image/pict',
+        '.pic' : 'image/pict',
+        '.pict': 'image/pict',
+        '.rtf' : 'application/rtf',
+        '.xul' : 'text/xul'
+        }
+
+
+_default_mime_types()
diff --git a/modules/language/python/module/pipes.py b/modules/language/python/module/pipes.py
new file mode 100644
index 0000000..1285eac
--- /dev/null
+++ b/modules/language/python/module/pipes.py
@@ -0,0 +1,248 @@
+module(pipes)
+"""Conversion pipeline templates.
+
+The problem:
+------------
+
+Suppose you have some data that you want to convert to another format,
+such as from GIF image format to PPM image format.  Maybe the
+conversion involves several steps (e.g. piping it through compress or
+uuencode).  Some of the conversion steps may require that their input
+is a disk file, others may be able to read standard input; similar for
+their output.  The input to the entire conversion may also be read
+from a disk file or from an open file, and similar for its output.
+
+The module lets you construct a pipeline template by sticking one or
+more conversion steps together.  It will take care of creating and
+removing temporary files if they are necessary to hold intermediate
+data.  You can then use the template to do conversions from many
+different sources to many different destinations.  The temporary
+file names used are different each time the template is used.
+
+The templates are objects so you can create templates for many
+different conversion steps and store them in a dictionary, for
+instance.
+
+
+Directions:
+-----------
+
+To create a template:
+    t = Template()
+
+To add a conversion step to a template:
+   t.append(command, kind)
+where kind is a string of two characters: the first is '-' if the
+command reads its standard input or 'f' if it requires a file; the
+second likewise for the output. The command must be valid /bin/sh
+syntax.  If input or output files are required, they are passed as
+$IN and $OUT; otherwise, it must be  possible to use the command in
+a pipeline.
+
+To add a conversion step at the beginning:
+   t.prepend(command, kind)
+
+To convert a file to another file using a template:
+  sts = t.copy(infile, outfile)
+If infile or outfile are the empty string, standard input is read or
+standard output is written, respectively.  The return value is the
+exit status of the conversion pipeline.
+
+To open a file for reading or writing through a conversion pipeline:
+   fp = t.open(file, mode)
+where mode is 'r' to read the file, or 'w' to write it -- just like
+for the built-in function open() or for os.popen().
+
+To create a new template object initialized to a given one:
+   t2 = t.clone()
+"""                                     # '
+
+
+import re
+import os
+import tempfile
+# we import the quote function rather than the module for backward compat
+# (quote used to be an undocumented but used function in pipes)
+from shlex import quote
+
+__all__ = ["Template"]
+
+# Conversion step kinds
+
+FILEIN_FILEOUT = 'ff'                   # Must read & write real files
+STDIN_FILEOUT  = '-f'                   # Must write a real file
+FILEIN_STDOUT  = 'f-'                   # Must read a real file
+STDIN_STDOUT   = '--'                   # Normal pipeline element
+SOURCE         = '.-'                   # Must be first, writes stdout
+SINK           = '-.'                   # Must be last, reads stdin
+
+stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
+             SOURCE, SINK]
+
+
+class Template:
+    """Class representing a pipeline template."""
+
+    def __init__(self):
+        """Template() returns a fresh pipeline template."""
+        self.debugging = 0
+        self.reset()
+
+    def __repr__(self):
+        """t.__repr__() implements repr(t)."""
+        return '<Template instance, steps=%r>' % (self.steps,)
+
+    def reset(self):
+        """t.reset() restores a pipeline template to its initial state."""
+        self.steps = []
+
+    def clone(self):
+        """t.clone() returns a new pipeline template with identical
+        initial state as the current one."""
+        t = Template()
+        t.steps = self.steps[:]
+        t.debugging = self.debugging
+        return t
+
+    def debug(self, flag):
+        """t.debug(flag) turns debugging on or off."""
+        self.debugging = flag
+
+    def append(self, cmd, kind):
+        """t.append(cmd, kind) adds a new step at the end."""
+        if type(cmd) is not type(''):
+            raise TypeError('Template.append: cmd must be a string')
+        if kind not in stepkinds:
+            raise ValueError('Template.append: bad kind %r' % (kind,))
+        if kind == SOURCE:
+            raise ValueError('Template.append: SOURCE can only be prepended')
+        if self.steps and self.steps[-1][1] == SINK:
+            raise ValueError('Template.append: already ends with SINK')
+        if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
+            raise ValueError('Template.append: missing $IN in cmd')
+        if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
+            raise ValueError('Template.append: missing $OUT in cmd')
+        self.steps.append((cmd, kind))
+
+    def prepend(self, cmd, kind):
+        """t.prepend(cmd, kind) adds a new step at the front."""
+        if type(cmd) is not type(''):
+            raise TypeError('Template.prepend: cmd must be a string')
+        if kind not in stepkinds:
+            raise ValueError('Template.prepend: bad kind %r' % (kind,))
+        if kind == SINK:
+            raise ValueError('Template.prepend: SINK can only be appended')
+        if self.steps and self.steps[0][1] == SOURCE:
+            raise ValueError('Template.prepend: already begins with SOURCE')
+        if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
+            raise ValueError('Template.prepend: missing $IN in cmd')
+        if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
+            raise ValueError('Template.prepend: missing $OUT in cmd')
+        self.steps.insert(0, (cmd, kind))
+
+    def open(self, file, rw):
+        """t.open(file, rw) returns a pipe or file object open for
+        reading or writing; the file is the other end of the pipeline."""
+        if rw == 'r':
+            return self.open_r(file)
+        if rw == 'w':
+            return self.open_w(file)
+        raise ValueError('Template.open: rw must be \'r\' or \'w\', not %r'
+                         % (rw,))
+
+    def open_r(self, file):
+        """t.open_r(file) and t.open_w(file) implement
+        t.open(file, 'r') and t.open(file, 'w') respectively."""
+        if not self.steps:
+            return open(file, 'r')
+        if self.steps[-1][1] == SINK:
+            raise ValueError('Template.open_r: pipeline ends width SINK')
+        cmd = self.makepipeline(file, '')
+        return os.popen(cmd, 'r')
+
+    def open_w(self, file):
+        if not self.steps:
+            return open(file, 'w')
+        if self.steps[0][1] == SOURCE:
+            raise ValueError('Template.open_w: pipeline begins with SOURCE')
+        cmd = self.makepipeline('', file)
+        return os.popen(cmd, 'w')
+
+    def copy(self, infile, outfile):
+        return os.system(self.makepipeline(infile, outfile))
+
+    def makepipeline(self, infile, outfile):
+        cmd = makepipeline(infile, self.steps, outfile)
+        if self.debugging:
+            print(cmd)
+            cmd = 'set -x; ' + cmd
+        return cmd
+
+
+def makepipeline(infile, steps, outfile):
+    # Build a list with for each command:
+    # [input filename or '', command string, kind, output filename or '']
+
+    list = []
+    for cmd, kind in steps:
+        list.append(['', cmd, kind, ''])
+    #
+    # Make sure there is at least one step
+    #
+    if not list:
+        list.append(['', 'cat', '--', ''])
+    #
+    # Take care of the input and output ends
+    #
+    [cmd, kind] = list[0][1:3]
+    if kind[0] == 'f' and not infile:
+        list.insert(0, ['', 'cat', '--', ''])
+    list[0][0] = infile
+    #
+    [cmd, kind] = list[-1][1:3]
+    if kind[1] == 'f' and not outfile:
+        list.append(['', 'cat', '--', ''])
+    list[-1][-1] = outfile
+    #
+    # Invent temporary files to connect stages that need files
+    #
+    garbage = []
+    for i in range(1, len(list)):
+        lkind = list[i-1][2]
+        rkind = list[i][2]
+        if lkind[1] == 'f' or rkind[0] == 'f':
+            (fd, temp) = tempfile.mkstemp()
+            os.close(fd)
+            garbage.append(temp)
+            list[i-1][-1] = list[i][0] = temp
+    #
+    for item in list:
+        [inf, cmd, kind, outf] = item
+        if kind[1] == 'f':
+            cmd = 'OUT=' + quote(outf) + '; ' + cmd
+        if kind[0] == 'f':
+            cmd = 'IN=' + quote(inf) + '; ' + cmd
+        if kind[0] == '-' and inf:
+            cmd = cmd + ' <' + quote(inf)
+        if kind[1] == '-' and outf:
+            cmd = cmd + ' >' + quote(outf)
+        item[1] = cmd
+    #
+    cmdlist = list[0][1]
+    for item in list[1:]:
+        [cmd, kind] = item[1:3]
+        if item[0] == '':
+            if 'f' in kind:
+                cmd = '{ ' + cmd + '; }'
+            cmdlist = cmdlist + ' |\n' + cmd
+        else:
+            cmdlist = cmdlist + '\n' + cmd
+    #
+    if garbage:
+        rmcmd = 'rm -f'
+        for file in garbage:
+            rmcmd = rmcmd + ' ' + quote(file)
+        trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
+        cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
+    #
+    return cmdlist
diff --git a/modules/language/python/module/re/compile.scm b/modules/language/python/module/re/compile.scm
index f92a8dd..d23ea71 100644
--- a/modules/language/python/module/re/compile.scm
+++ b/modules/language/python/module/re/compile.scm
@@ -451,12 +451,16 @@
      (let ((f (apply f-or!
                      (map (lambda (x)
                             (match x
-			      ((#:range ch1 ch2)
-			       (f-reg! (format #f "[~a-~a]" ch1 ch2)))
-			      ((#:ch (#:class ch))
-                               (get-class ch))
-                              ((#:ch ch)
-                               (get-ch ch))))
+			       ((#:range ch1 ch2)
+				(if (and (<= (char->integer ch1) 10)				      
+					 (>= (char->integer ch2) 10))
+				    (f-or! f-nl!
+					   (f-reg! (format #f "[~a-~a]" ch1 ch2)))
+				    (f-reg! (format #f "[~a-~a]" ch1 ch2))))
+			       ((#:ch (#:class ch))
+				(get-class ch))
+			       ((#:ch ch)
+				(get-ch ch))))
                           ch))))
        (trace `brack
         (fw
diff --git a/modules/language/python/module/shlex.py b/modules/language/python/module/shlex.py
new file mode 100644
index 0000000..afdd35d
--- /dev/null
+++ b/modules/language/python/module/shlex.py
@@ -0,0 +1,329 @@
+module(shlex)
+"""A lexical analyzer class for simple shell-like syntaxes."""
+
+# Module and documentation by Eric S. Raymond, 21 Dec 1998
+# Input stacking and error message cleanup added by ESR, March 2000
+# push_source() and pop_source() made explicit by ESR, January 2001.
+# Posix compliance, split(), string arguments, and
+# iterator interface by Gustavo Niemeyer, April 2003.
+# changes to tokenize more like Posix shells by Vinay Sajip, July 2016.
+
+import os
+import re
+import sys
+from collections import deque
+
+from io import StringIO
+
+__all__ = ["shlex", "split", "quote"]
+
+class shlex:
+    "A lexical analyzer class for simple shell-like syntaxes."
+    def __init__(self, instream=None, infile=None, posix=False,
+                 punctuation_chars=False):
+        if isinstance(instream, str):
+            instream = StringIO(instream)
+        if instream is not None:
+            self.instream = instream
+            self.infile = infile
+        else:
+            self.instream = sys.stdin
+            self.infile = None
+        self.posix = posix
+        if posix:
+            self.eof = None
+        else:
+            self.eof = ''
+        self.commenters = '#'
+        self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
+                          'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
+        if self.posix:
+            self.wordchars += ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
+                               'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
+        self.whitespace = ' \t\r\n'
+        self.whitespace_split = False
+        self.quotes = '\'"'
+        self.escape = '\\'
+        self.escapedquotes = '"'
+        self.state = ' '
+        self.pushback = deque()
+        self.lineno = 1
+        self.debug = 0
+        self.token = ''
+        self.filestack = deque()
+        self.source = None
+        if not punctuation_chars:
+            punctuation_chars = ''
+        elif punctuation_chars is True:
+            punctuation_chars = '();<>|&'
+        self.punctuation_chars = punctuation_chars
+        if punctuation_chars:
+            # _pushback_chars is a push back queue used by lookahead logic
+            self._pushback_chars = deque()
+            # these chars added because allowed in file names, args, wildcards
+            self.wordchars += '~-./*?='
+            #remove any punctuation chars from wordchars
+            t = self.wordchars.maketrans(dict.fromkeys(punctuation_chars))
+            self.wordchars = self.wordchars.translate(t)
+
+    def push_token(self, tok):
+        "Push a token onto the stack popped by the get_token method"
+        if self.debug >= 1:
+            print("shlex: pushing token " + repr(tok))
+        self.pushback.appendleft(tok)
+
+    def push_source(self, newstream, newfile=None):
+        "Push an input source onto the lexer's input source stack."
+        if isinstance(newstream, str):
+            newstream = StringIO(newstream)
+        self.filestack.appendleft((self.infile, self.instream, self.lineno))
+        self.infile = newfile
+        self.instream = newstream
+        self.lineno = 1
+        if self.debug:
+            if newfile is not None:
+                print('shlex: pushing to file %s' % (self.infile,))
+            else:
+                print('shlex: pushing to stream %s' % (self.instream,))
+
+    def pop_source(self):
+        "Pop the input source stack."
+        self.instream.close()
+        (self.infile, self.instream, self.lineno) = self.filestack.popleft()
+        if self.debug:
+            print('shlex: popping to %s, line %d' \
+                  % (self.instream, self.lineno))
+        self.state = ' '
+
+    def get_token(self):
+        "Get a token from the input stream (or from stack if it's nonempty)"
+        if self.pushback:
+            tok = self.pushback.popleft()
+            if self.debug >= 1:
+                print("shlex: popping token " + repr(tok))
+            return tok
+        # No pushback.  Get a token.
+        raw = self.read_token()
+        # Handle inclusions
+        if self.source is not None:
+            while raw == self.source:
+                spec = self.sourcehook(self.read_token())
+                if spec:
+                    (newfile, newstream) = spec
+                    self.push_source(newstream, newfile)
+                raw = self.get_token()
+        # Maybe we got EOF instead?
+        while raw == self.eof:
+            if not self.filestack:
+                return self.eof
+            else:
+                self.pop_source()
+                raw = self.get_token()
+        # Neither inclusion nor EOF
+        if self.debug >= 1:
+            if raw != self.eof:
+                print("shlex: token=" + repr(raw))
+            else:
+                print("shlex: token=EOF")
+        return raw
+
+    def read_token(self):
+        quoted = False
+        escapedstate = ' '
+        while True:
+            if self.punctuation_chars and self._pushback_chars:
+                nextchar = self._pushback_chars.pop()
+            else:
+                nextchar = self.instream.read(1)
+            if nextchar == '\n':
+                self.lineno += 1
+            if self.debug >= 3:
+                print("shlex: in state %r I see character: %r" % (self.state,
+                                                                  nextchar))
+            if self.state is None:
+                self.token = ''        # past end of file
+                break
+            elif self.state == ' ':
+                if not nextchar:
+                    self.state = None  # end of file
+                    break
+                elif nextchar in self.whitespace:
+                    if self.debug >= 2:
+                        print("shlex: I see whitespace in whitespace state")
+                    if self.token or (self.posix and quoted):
+                        break   # emit current token
+                    else:
+                        continue
+                elif nextchar in self.commenters:
+                    self.instream.readline()
+                    self.lineno += 1
+                elif self.posix and nextchar in self.escape:
+                    escapedstate = 'a'
+                    self.state = nextchar
+                elif nextchar in self.wordchars:
+                    self.token = nextchar
+                    self.state = 'a'
+                elif nextchar in self.punctuation_chars:
+                    self.token = nextchar
+                    self.state = 'c'
+                elif nextchar in self.quotes:
+                    if not self.posix:
+                        self.token = nextchar
+                    self.state = nextchar
+                elif self.whitespace_split:
+                    self.token = nextchar
+                    self.state = 'a'
+                else:
+                    self.token = nextchar
+                    if self.token or (self.posix and quoted):
+                        break   # emit current token
+                    else:
+                        continue
+            elif self.state in self.quotes:
+                quoted = True
+                if not nextchar:      # end of file
+                    if self.debug >= 2:
+                        print("shlex: I see EOF in quotes state")
+                    # XXX what error should be raised here?
+                    raise ValueError("No closing quotation")
+                if nextchar == self.state:
+                    if not self.posix:
+                        self.token += nextchar
+                        self.state = ' '
+                        break
+                    else:
+                        self.state = 'a'
+                elif (self.posix and nextchar in self.escape and self.state
+                      in self.escapedquotes):
+                    escapedstate = self.state
+                    self.state = nextchar
+                else:
+                    self.token += nextchar
+            elif self.state in self.escape:
+                if not nextchar:      # end of file
+                    if self.debug >= 2:
+                        print("shlex: I see EOF in escape state")
+                    # XXX what error should be raised here?
+                    raise ValueError("No escaped character")
+                # In posix shells, only the quote itself or the escape
+                # character may be escaped within quotes.
+                if (escapedstate in self.quotes and
+                        nextchar != self.state and nextchar != escapedstate):
+                    self.token += self.state
+                self.token += nextchar
+                self.state = escapedstate
+            elif self.state in ('a', 'c'):
+                if not nextchar:
+                    self.state = None   # end of file
+                    break
+                elif nextchar in self.whitespace:
+                    if self.debug >= 2:
+                        print("shlex: I see whitespace in word state")
+                    self.state = ' '
+                    if self.token or (self.posix and quoted):
+                        break   # emit current token
+                    else:
+                        continue
+                elif nextchar in self.commenters:
+                    self.instream.readline()
+                    self.lineno += 1
+                    if self.posix:
+                        self.state = ' '
+                        if self.token or (self.posix and quoted):
+                            break   # emit current token
+                        else:
+                            continue
+                elif self.state == 'c':
+                    if nextchar in self.punctuation_chars:
+                        self.token += nextchar
+                    else:
+                        if nextchar not in self.whitespace:
+                            self._pushback_chars.append(nextchar)
+                        self.state = ' '
+                        break
+                elif self.posix and nextchar in self.quotes:
+                    self.state = nextchar
+                elif self.posix and nextchar in self.escape:
+                    escapedstate = 'a'
+                    self.state = nextchar
+                elif (nextchar in self.wordchars or nextchar in self.quotes
+                      or self.whitespace_split):
+                    self.token += nextchar
+                else:
+                    if self.punctuation_chars:
+                        self._pushback_chars.append(nextchar)
+                    else:
+                        self.pushback.appendleft(nextchar)
+                    if self.debug >= 2:
+                        print("shlex: I see punctuation in word state")
+                    self.state = ' '
+                    if self.token or (self.posix and quoted):
+                        break   # emit current token
+                    else:
+                        continue
+        result = self.token
+        self.token = ''
+        if self.posix and not quoted and result == '':
+            result = None
+        if self.debug > 1:
+            if result:
+                print("shlex: raw token=" + repr(result))
+            else:
+                print("shlex: raw token=EOF")
+        return result
+
+    def sourcehook(self, newfile):
+        "Hook called on a filename to be sourced."
+        if newfile[0] == '"':
+            newfile = newfile[1:-1]
+        # This implements cpp-like semantics for relative-path inclusion.
+        if isinstance(self.infile, str) and not os.path.isabs(newfile):
+            newfile = os.path.join(os.path.dirname(self.infile), newfile)
+        return (newfile, open(newfile, "r"))
+
+    def error_leader(self, infile=None, lineno=None):
+        "Emit a C-compiler-like, Emacs-friendly error-message leader."
+        if infile is None:
+            infile = self.infile
+        if lineno is None:
+            lineno = self.lineno
+        return "\"%s\", line %d: " % (infile, lineno)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        token = self.get_token()
+        if token == self.eof:
+            raise StopIteration
+        return token
+
+def split(s, comments=False, posix=True):
+    lex = shlex(s, posix=posix)
+    lex.whitespace_split = True
+    if not comments:
+        lex.commenters = ''
+    return list(lex)
+
+
+_find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search
+
+def quote(s):
+    """Return a shell-escaped version of the string *s*."""
+    if not s:
+        return "''"
+    if _find_unsafe(s) is None:
+        return s
+
+    # use single quotes, and put single quotes into double quotes
+    # the string $'b is then quoted as '$'"'"'b'
+    return "'" + s.replace("'", "'\"'\"'") + "'"
+
+
+def _print_tokens(lexer):
+    while 1:
+        tt = lexer.get_token()
+        if not tt:
+            break
+        print("Token: " + repr(tt))
+
diff --git a/modules/language/python/module/urllib/parse.py b/modules/language/python/module/urllib/parse.py
new file mode 100644
index 0000000..3bfe63c
--- /dev/null
+++ b/modules/language/python/module/urllib/parse.py
@@ -0,0 +1,1022 @@
+module(urllib,parse)
+"""Parse (absolute and relative) URLs.
+
+urlparse module is based upon the following RFC specifications.
+
+RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding
+and L.  Masinter, January 2005.
+
+RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter
+and L.Masinter, December 1999.
+
+RFC 2396:  "Uniform Resource Identifiers (URI)": Generic Syntax by T.
+Berners-Lee, R. Fielding, and L. Masinter, August 1998.
+
+RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998.
+
+RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June
+1995.
+
+RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.
+McCahill, December 1994
+
+RFC 3986 is considered the current standard and any future changes to
+urlparse module should conform with it.  The urlparse module is
+currently not entirely compliant with this RFC due to defacto
+scenarios for parsing, and for backward compatibility purposes, some
+parsing quirks from older RFCs are retained. The testcases in
+test_urlparse.py provides a good indicator of parsing behavior.
+"""
+
+import re
+import sys
+import collections
+
+__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
+           "urlsplit", "urlunsplit", "urlencode", "parse_qs",
+           "parse_qsl", "quote", "quote_plus", "quote_from_bytes",
+           "unquote", "unquote_plus", "unquote_to_bytes",
+           "DefragResult", "ParseResult", "SplitResult",
+           "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"]
+
+# A classification of schemes.
+# The empty string classifies URLs with no scheme specified,
+# being the default value returned by “urlsplit” and “urlparse”.
+
+uses_relative = ['', 'ftp', 'http', 'gopher', 'nntp', 'imap',
+                 'wais', 'file', 'https', 'shttp', 'mms',
+                 'prospero', 'rtsp', 'rtspu', 'sftp',
+                 'svn', 'svn+ssh', 'ws', 'wss']
+
+uses_netloc = ['', 'ftp', 'http', 'gopher', 'nntp', 'telnet',
+               'imap', 'wais', 'file', 'mms', 'https', 'shttp',
+               'snews', 'prospero', 'rtsp', 'rtspu', 'rsync',
+               'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh',
+               'ws', 'wss']
+
+uses_params = ['', 'ftp', 'hdl', 'prospero', 'http', 'imap',
+               'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
+               'mms', 'sftp', 'tel']
+
+# These are not actually used anymore, but should stay for backwards
+# compatibility.  (They are undocumented, but have a public-looking name.)
+
+non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
+                    'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
+
+uses_query = ['', 'http', 'wais', 'imap', 'https', 'shttp', 'mms',
+              'gopher', 'rtsp', 'rtspu', 'sip', 'sips']
+
+uses_fragment = ['', 'ftp', 'hdl', 'http', 'gopher', 'news',
+                 'nntp', 'wais', 'https', 'shttp', 'snews',
+                 'file', 'prospero']
+
+# Characters valid in scheme names
+scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
+                'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+                '0123456789'
+                '+-.')
+
+# XXX: Consider replacing with functools.lru_cache
+MAX_CACHE_SIZE = 20
+_parse_cache = {}
+
+def clear_cache():
+    """Clear the parse cache and the quoters cache."""
+    _parse_cache.clear()
+    _safe_quoters.clear()
+
+
+# Helpers for bytes handling
+# For 3.2, we deliberately require applications that
+# handle improperly quoted URLs to do their own
+# decoding and encoding. If valid use cases are
+# presented, we may relax this by using latin-1
+# decoding internally for 3.3
+_implicit_encoding = 'ascii'
+_implicit_errors = 'strict'
+
+def _noop(obj):
+    return obj
+
+def _encode_result(obj, encoding=_implicit_encoding,
+                        errors=_implicit_errors):
+    return obj.encode(encoding, errors)
+
+def _decode_args(args, encoding=_implicit_encoding,
+                       errors=_implicit_errors):
+    return tuple(x.decode(encoding, errors) if x else '' for x in args)
+
+def _coerce_args(*args):
+    # Invokes decode if necessary to create str args
+    # and returns the coerced inputs along with
+    # an appropriate result coercion function
+    #   - noop for str inputs
+    #   - encoding function otherwise
+    str_input = isinstance(args[0], str)
+    for arg in args[1:]:
+        # We special-case the empty string to support the
+        # "scheme=''" default argument to some functions
+        if arg and isinstance(arg, str) != str_input:
+            raise TypeError("Cannot mix str and non-str arguments")
+    if str_input:
+        return args + (_noop,)
+    return _decode_args(args) + (_encode_result,)
+
+# Result objects are more helpful than simple tuples
+class _ResultMixinStr(object):
+    """Standard approach to encoding parsed results from str to bytes"""
+    __slots__ = ()
+
+    def encode(self, encoding='ascii', errors='strict'):
+        return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self))
+
+
+class _ResultMixinBytes(object):
+    """Standard approach to decoding parsed results from bytes to str"""
+    __slots__ = ()
+
+    def decode(self, encoding='ascii', errors='strict'):
+        return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self))
+
+
+class _NetlocResultMixinBase(object):
+    """Shared methods for the parsed result objects containing a netloc element"""
+    __slots__ = ()
+
+    @property
+    def username(self):
+        return self._userinfo[0]
+
+    @property
+    def password(self):
+        return self._userinfo[1]
+
+    @property
+    def hostname(self):
+        hostname = self._hostinfo[0]
+        if not hostname:
+            hostname = None
+        elif hostname is not None:
+            hostname = hostname.lower()
+        return hostname
+
+    @property
+    def port(self):
+        port = self._hostinfo[1]
+        if port is not None:
+            port = int(port, 10)
+            if not ( 0 <= port <= 65535):
+                raise ValueError("Port out of range 0-65535")
+        return port
+
+
+class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr):
+    __slots__ = ()
+
+    @property
+    def _userinfo(self):
+        netloc = self.netloc
+        userinfo, have_info, hostinfo = netloc.rpartition('@')
+        if have_info:
+            username, have_password, password = userinfo.partition(':')
+            if not have_password:
+                password = None
+        else:
+            username = password = None
+        return username, password
+
+    @property
+    def _hostinfo(self):
+        netloc = self.netloc
+        _, _, hostinfo = netloc.rpartition('@')
+        _, have_open_br, bracketed = hostinfo.partition('[')
+        if have_open_br:
+            hostname, _, port = bracketed.partition(']')
+            _, _, port = port.partition(':')
+        else:
+            hostname, _, port = hostinfo.partition(':')
+        if not port:
+            port = None
+        return hostname, port
+
+
+class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):
+    __slots__ = ()
+
+    @property
+    def _userinfo(self):
+        netloc = self.netloc
+        userinfo, have_info, hostinfo = netloc.rpartition(b'@')
+        if have_info:
+            username, have_password, password = userinfo.partition(b':')
+            if not have_password:
+                password = None
+        else:
+            username = password = None
+        return username, password
+
+    @property
+    def _hostinfo(self):
+        netloc = self.netloc
+        _, _, hostinfo = netloc.rpartition(b'@')
+        _, have_open_br, bracketed = hostinfo.partition(b'[')
+        if have_open_br:
+            hostname, _, port = bracketed.partition(b']')
+            _, _, port = port.partition(b':')
+        else:
+            hostname, _, port = hostinfo.partition(b':')
+        if not port:
+            port = None
+        return hostname, port
+
+
+from collections import namedtuple
+
+_DefragResultBase = namedtuple('DefragResult', 'url fragment')
+_SplitResultBase = namedtuple(
+    'SplitResult', 'scheme netloc path query fragment')
+_ParseResultBase = namedtuple(
+    'ParseResult', 'scheme netloc path params query fragment')
+
+_DefragResultBase.__doc__ = """
+DefragResult(url, fragment)
+
+A 2-tuple that contains the url without fragment identifier and the fragment
+identifier as a separate argument.
+"""
+
+#_DefragResultBase.url.__doc__ = """The URL with no fragment identifier."""
+
+#_DefragResultBase.fragment.__doc__ = """
+#Fragment identifier separated from URL, that allows indirect identification of a
+#secondary resource by reference to a primary resource and additional identifying
+#information.
+#"""
+
+_SplitResultBase.__doc__ = """
+SplitResult(scheme, netloc, path, query, fragment)
+
+A 5-tuple that contains the different components of a URL. Similar to
+ParseResult, but does not split params.
+"""
+
+#_SplitResultBase.scheme.__doc__ = """Specifies URL scheme for the request."""
+
+#_SplitResultBase.netloc.__doc__ = """
+#Network location where the request is made to.
+#"""
+
+#_SplitResultBase.path.__doc__ = """
+#The hierarchical path, such as the path to a file to download.
+#"""
+
+#_SplitResultBase.query.__doc__ = """
+#The query component, that contains non-hierarchical data, that along with data
+#in path component, identifies a resource in the scope of URI's scheme and
+#network location.
+#"""
+
+#_SplitResultBase.fragment.__doc__ = """
+#Fragment identifier, that allows indirect identification of a secondary resource
+#by reference to a primary resource and additional identifying information.
+#"""
+
+_ParseResultBase.__doc__ = """
+ParseResult(scheme, netloc, path, params,  query, fragment)
+
+A 6-tuple that contains components of a parsed URL.
+"""
+
+#_ParseResultBase.scheme.__doc__ = _SplitResultBase.scheme.__doc__
+#_ParseResultBase.netloc.__doc__ = _SplitResultBase.netloc.__doc__
+#_ParseResultBase.path.__doc__ = _SplitResultBase.path.__doc__
+#_ParseResultBase.params.__doc__ = """
+#Parameters for last path element used to dereference the URI in order to provide
+#access to perform some operation on the resource.
+#"""
+
+#_ParseResultBase.query.__doc__ = _SplitResultBase.query.__doc__
+#_ParseResultBase.fragment.__doc__ = _SplitResultBase.fragment.__doc__
+
+
+# For backwards compatibility, alias _NetlocResultMixinStr
+# ResultBase is no longer part of the documented API, but it is
+# retained since deprecating it isn't worth the hassle
+ResultBase = _NetlocResultMixinStr
+
+# Structured result objects for string data
+class DefragResult(_DefragResultBase, _ResultMixinStr):
+    __slots__ = ()
+    def geturl(self):
+        if self.fragment:
+            return self.url + '#' + self.fragment
+        else:
+            return self.url
+
+class SplitResult(_SplitResultBase, _NetlocResultMixinStr):
+    __slots__ = ()
+    def geturl(self):
+        return urlunsplit(self)
+
+class ParseResult(_ParseResultBase, _NetlocResultMixinStr):
+    __slots__ = ()
+    def geturl(self):
+        return urlunparse(self)
+
+# Structured result objects for bytes data
+class DefragResultBytes(_DefragResultBase, _ResultMixinBytes):
+    __slots__ = ()
+    def geturl(self):
+        if self.fragment:
+            return self.url + b'#' + self.fragment
+        else:
+            return self.url
+
+class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes):
+    __slots__ = ()
+    def geturl(self):
+        return urlunsplit(self)
+
+class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes):
+    __slots__ = ()
+    def geturl(self):
+        return urlunparse(self)
+
+# Set up the encode/decode result pairs
+def _fix_result_transcoding():
+    _result_pairs = (
+        (DefragResult, DefragResultBytes),
+        (SplitResult, SplitResultBytes),
+        (ParseResult, ParseResultBytes),
+    )
+    for _decoded, _encoded in _result_pairs:
+        _decoded._encoded_counterpart = _encoded
+        _encoded._decoded_counterpart = _decoded
+
+_fix_result_transcoding()
+del _fix_result_transcoding
+
+def urlparse(url, scheme='', allow_fragments=True):
+    """Parse a URL into 6 components:
+    <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
+    Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
+    Note that we don't break the components up in smaller bits
+    (e.g. netloc is a single string) and we don't expand % escapes."""
+    url, scheme, _coerce_result = _coerce_args(url, scheme)
+    splitresult = urlsplit(url, scheme, allow_fragments)
+    scheme, netloc, url, query, fragment = splitresult
+    if scheme in uses_params and ';' in url:
+        url, params = _splitparams(url)
+    else:
+        params = ''
+    result = ParseResult(scheme, netloc, url, params, query, fragment)
+    return _coerce_result(result)
+
+def _splitparams(url):
+    if '/'  in url:
+        i = url.find(';', url.rfind('/'))
+        if i < 0:
+            return url, ''
+    else:
+        i = url.find(';')
+    return url[:i], url[i+1:]
+
+def _splitnetloc(url, start=0):
+    delim = len(url)   # position of end of domain part of url, default is end
+    for c in '/?#':    # look for delimiters; the order is NOT important
+        wdelim = url.find(c, start)        # find first of this delim
+        if wdelim >= 0:                    # if found
+            delim = min(delim, wdelim)     # use earliest delim position
+    return url[start:delim], url[delim:]   # return (domain, rest)
+
+def urlsplit(url, scheme='', allow_fragments=True):
+    """Parse a URL into 5 components:
+    <scheme>://<netloc>/<path>?<query>#<fragment>
+    Return a 5-tuple: (scheme, netloc, path, query, fragment).
+    Note that we don't break the components up in smaller bits
+    (e.g. netloc is a single string) and we don't expand % escapes."""
+    url, scheme, _coerce_result = _coerce_args(url, scheme)
+    allow_fragments = bool(allow_fragments)
+    key = url, scheme, allow_fragments, type(url), type(scheme)
+    cached = _parse_cache.get(key, None)
+    if cached:
+        return _coerce_result(cached)
+    if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
+        clear_cache()
+    netloc = query = fragment = ''
+    i = url.find(':')
+    if i > 0:
+        if url[:i] == 'http': # optimize the common case
+            scheme = url[:i].lower()
+            url = url[i+1:]
+            if url[:2] == '//':
+                netloc, url = _splitnetloc(url, 2)
+                if (('[' in netloc and ']' not in netloc) or
+                        (']' in netloc and '[' not in netloc)):
+                    raise ValueError("Invalid IPv6 URL")
+            if allow_fragments and '#' in url:
+                url, fragment = url.split('#', 1)
+            if '?' in url:
+                url, query = url.split('?', 1)
+            v = SplitResult(scheme, netloc, url, query, fragment)
+            _parse_cache[key] = v
+            return _coerce_result(v)
+        for c in url[:i]:
+            if c not in scheme_chars:
+                break
+        else:
+            # make sure "url" is not actually a port number (in which case
+            # "scheme" is really part of the path)
+            rest = url[i+1:]
+            if not rest or any(c not in '0123456789' for c in rest):
+                # not a port number
+                scheme, url = url[:i].lower(), rest
+
+    if url[:2] == '//':
+        netloc, url = _splitnetloc(url, 2)
+        if (('[' in netloc and ']' not in netloc) or
+                (']' in netloc and '[' not in netloc)):
+            raise ValueError("Invalid IPv6 URL")
+    if allow_fragments and '#' in url:
+        url, fragment = url.split('#', 1)
+    if '?' in url:
+        url, query = url.split('?', 1)
+    v = SplitResult(scheme, netloc, url, query, fragment)
+    _parse_cache[key] = v
+    return _coerce_result(v)
+
+def urlunparse(components):
+    """Put a parsed URL back together again.  This may result in a
+    slightly different, but equivalent URL, if the URL that was parsed
+    originally had redundant delimiters, e.g. a ? with an empty query
+    (the draft states that these are equivalent)."""
+    scheme, netloc, url, params, query, fragment, _coerce_result = (
+                                                  _coerce_args(*components))
+    if params:
+        url = "%s;%s" % (url, params)
+    return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment)))
+
+def urlunsplit(components):
+    """Combine the elements of a tuple as returned by urlsplit() into a
+    complete URL as a string. The data argument can be any five-item iterable.
+    This may result in a slightly different, but equivalent URL, if the URL that
+    was parsed originally had unnecessary delimiters (for example, a ? with an
+    empty query; the RFC states that these are equivalent)."""
+    scheme, netloc, url, query, fragment, _coerce_result = (
+                                          _coerce_args(*components))
+    if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
+        if url and url[:1] != '/': url = '/' + url
+        url = '//' + (netloc or '') + url
+    if scheme:
+        url = scheme + ':' + url
+    if query:
+        url = url + '?' + query
+    if fragment:
+        url = url + '#' + fragment
+    return _coerce_result(url)
+
+def urljoin(base, url, allow_fragments=True):
+    """Join a base URL and a possibly relative URL to form an absolute
+    interpretation of the latter."""
+    if not base:
+        return url
+    if not url:
+        return base
+
+    base, url, _coerce_result = _coerce_args(base, url)
+    bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
+            urlparse(base, '', allow_fragments)
+    scheme, netloc, path, params, query, fragment = \
+            urlparse(url, bscheme, allow_fragments)
+
+    if scheme != bscheme or scheme not in uses_relative:
+        return _coerce_result(url)
+    if scheme in uses_netloc:
+        if netloc:
+            return _coerce_result(urlunparse((scheme, netloc, path,
+                                              params, query, fragment)))
+        netloc = bnetloc
+
+    if not path and not params:
+        path = bpath
+        params = bparams
+        if not query:
+            query = bquery
+        return _coerce_result(urlunparse((scheme, netloc, path,
+                                          params, query, fragment)))
+
+    base_parts = bpath.split('/')
+    if base_parts[-1] != '':
+        # the last item is not a directory, so will not be taken into account
+        # in resolving the relative path
+        del base_parts[-1]
+
+    # for rfc3986, ignore all base path should the first character be root.
+    if path[:1] == '/':
+        segments = path.split('/')
+    else:
+        segments = base_parts + path.split('/')
+        # filter out elements that would cause redundant slashes on re-joining
+        # the resolved_path
+        segments[1:-1] = filter(None, segments[1:-1])
+
+    resolved_path = []
+
+    for seg in segments:
+        if seg == '..':
+            try:
+                resolved_path.pop()
+            except IndexError:
+                # ignore any .. segments that would otherwise cause an IndexError
+                # when popped from resolved_path if resolving for rfc3986
+                pass
+        elif seg == '.':
+            continue
+        else:
+            resolved_path.append(seg)
+
+    if segments[-1] in ('.', '..'):
+        # do some post-processing here. if the last segment was a relative dir,
+        # then we need to append the trailing '/'
+        resolved_path.append('')
+
+    return _coerce_result(urlunparse((scheme, netloc, '/'.join(
+        resolved_path) or '/', params, query, fragment)))
+
+
+def urldefrag(url):
+    """Removes any existing fragment from URL.
+
+    Returns a tuple of the defragmented URL and the fragment.  If
+    the URL contained no fragments, the second element is the
+    empty string.
+    """
+    url, _coerce_result = _coerce_args(url)
+    if '#' in url:
+        s, n, p, a, q, frag = urlparse(url)
+        defrag = urlunparse((s, n, p, a, q, ''))
+    else:
+        frag = ''
+        defrag = url
+    return _coerce_result(DefragResult(defrag, frag))
+
+_hexdig = '0123456789ABCDEFabcdef'
+_hextobyte = None
+
+def unquote_to_bytes(string):
+    """unquote_to_bytes('abc%20def') -> b'abc def'."""
+    # Note: strings are encoded as UTF-8. This is only an issue if it contains
+    # unescaped non-ASCII characters, which URIs should not.
+    if not string:
+        # Is it a string-like object?
+        string.split
+        return b''
+    if isinstance(string, str):
+        string = string.encode('utf-8')
+    bits = string.split(b'%')
+    if len(bits) == 1:
+        return string
+    res = [bits[0]]
+    append = res.append
+    # Delay the initialization of the table to not waste memory
+    # if the function is never called
+    global _hextobyte
+    if _hextobyte is None:
+        _hextobyte = {(a + b).encode(): bytes([int(a + b, 16)])
+                      for a in _hexdig for b in _hexdig}
+    for item in bits[1:]:
+        try:
+            append(_hextobyte[item[:2]])
+            append(item[2:])
+        except KeyError:
+            append(b'%')
+            append(item)
+    return b''.join(res)
+
+_asciire = re.compile('([\x01-\x7f]+)')
+
+def unquote(string, encoding='utf-8', errors='replace'):
+    """Replace %xx escapes by their single-character equivalent. The optional
+    encoding and errors parameters specify how to decode percent-encoded
+    sequences into Unicode characters, as accepted by the bytes.decode()
+    method.
+    By default, percent-encoded sequences are decoded with UTF-8, and invalid
+    sequences are replaced by a placeholder character.
+
+    unquote('abc%20def') -> 'abc def'.
+    """
+    if '%' not in string:
+        string.split
+        return string
+    if encoding is None:
+        encoding = 'utf-8'
+    if errors is None:
+        errors = 'replace'
+    bits = _asciire.split(string)
+    res = [bits[0]]
+    append = res.append
+    for i in range(1, len(bits), 2):
+        append(unquote_to_bytes(bits[i]).decode(encoding, errors))
+        append(bits[i + 1])
+    return ''.join(res)
+
+
+def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
+             encoding='utf-8', errors='replace'):
+    """Parse a query given as a string argument.
+
+        Arguments:
+
+        qs: percent-encoded query string to be parsed
+
+        keep_blank_values: flag indicating whether blank values in
+            percent-encoded queries should be treated as blank strings.
+            A true value indicates that blanks should be retained as
+            blank strings.  The default false value indicates that
+            blank values are to be ignored and treated as if they were
+            not included.
+
+        strict_parsing: flag indicating what to do with parsing errors.
+            If false (the default), errors are silently ignored.
+            If true, errors raise a ValueError exception.
+
+        encoding and errors: specify how to decode percent-encoded sequences
+            into Unicode characters, as accepted by the bytes.decode() method.
+
+        Returns a dictionary.
+    """
+    parsed_result = {}
+    pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
+                      encoding=encoding, errors=errors)
+    for name, value in pairs:
+        if name in parsed_result:
+            parsed_result[name].append(value)
+        else:
+            parsed_result[name] = [value]
+    return parsed_result
+
+
+def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
+              encoding='utf-8', errors='replace'):
+    """Parse a query given as a string argument.
+
+        Arguments:
+
+        qs: percent-encoded query string to be parsed
+
+        keep_blank_values: flag indicating whether blank values in
+            percent-encoded queries should be treated as blank strings.
+            A true value indicates that blanks should be retained as blank
+            strings.  The default false value indicates that blank values
+            are to be ignored and treated as if they were  not included.
+
+        strict_parsing: flag indicating what to do with parsing errors. If
+            false (the default), errors are silently ignored. If true,
+            errors raise a ValueError exception.
+
+        encoding and errors: specify how to decode percent-encoded sequences
+            into Unicode characters, as accepted by the bytes.decode() method.
+
+        Returns a list, as G-d intended.
+    """
+    qs, _coerce_result = _coerce_args(qs)
+    pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+    r = []
+    for name_value in pairs:
+        if not name_value and not strict_parsing:
+            continue
+        nv = name_value.split('=', 1)
+        if len(nv) != 2:
+            if strict_parsing:
+                raise ValueError("bad query field: %r" % (name_value,))
+            # Handle case of a control-name with no equal sign
+            if keep_blank_values:
+                nv.append('')
+            else:
+                continue
+        if len(nv[1]) or keep_blank_values:
+            name = nv[0].replace('+', ' ')
+            name = unquote(name, encoding=encoding, errors=errors)
+            name = _coerce_result(name)
+            value = nv[1].replace('+', ' ')
+            value = unquote(value, encoding=encoding, errors=errors)
+            value = _coerce_result(value)
+            r.append((name, value))
+    return r
+
+def unquote_plus(string, encoding='utf-8', errors='replace'):
+    """Like unquote(), but also replace plus signs by spaces, as required for
+    unquoting HTML form values.
+
+    unquote_plus('%7e/abc+def') -> '~/abc def'
+    """
+    string = string.replace('+', ' ')
+    return unquote(string, encoding, errors)
+
+_ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+                         b'abcdefghijklmnopqrstuvwxyz'
+                         b'0123456789'
+                         b'_.-')
+_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE)
+_safe_quoters = {}
+
+class Quoter(collections.defaultdict):
+    """A mapping from bytes (in range(0,256)) to strings.
+
+    String values are percent-encoded byte values, unless the key < 128, and
+    in the "safe" set (either the specified safe set, or default set).
+    """
+    # Keeps a cache internally, using defaultdict, for efficiency (lookups
+    # of cached keys don't call Python code at all).
+    def __init__(self, safe):
+        """safe: bytes object."""
+        self.safe = _ALWAYS_SAFE.union(safe)
+
+    def __repr__(self):
+        # Without this, will just display as a defaultdict
+        return "<%s %r>" % (self.__class__.__name__, dict(self))
+
+    def __missing__(self, b):
+        # Handle a cache miss. Store quoted string in cache and return.
+        res = chr(b) if b in self.safe else '%{:02X}'.format(b)
+        self[b] = res
+        return res
+
+def quote(string, safe='/', encoding=None, errors=None):
+    """quote('abc def') -> 'abc%20def'
+
+    Each part of a URL, e.g. the path info, the query, etc., has a
+    different set of reserved characters that must be quoted.
+
+    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
+    the following reserved characters.
+
+    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
+                  "$" | ","
+
+    Each of these characters is reserved in some component of a URL,
+    but not necessarily in all of them.
+
+    By default, the quote function is intended for quoting the path
+    section of a URL.  Thus, it will not encode '/'.  This character
+    is reserved, but in typical usage the quote function is being
+    called on a path where the existing slash characters are used as
+    reserved characters.
+
+    string and safe may be either str or bytes objects. encoding and errors
+    must not be specified if string is a bytes object.
+
+    The optional encoding and errors parameters specify how to deal with
+    non-ASCII characters, as accepted by the str.encode method.
+    By default, encoding='utf-8' (characters are encoded with UTF-8), and
+    errors='strict' (unsupported characters raise a UnicodeEncodeError).
+    """
+    if isinstance(string, str):
+        if not string:
+            return string
+        if encoding is None:
+            encoding = 'utf-8'
+        if errors is None:
+            errors = 'strict'
+        string = string.encode(encoding, errors)
+    else:
+        if encoding is not None:
+            raise TypeError("quote() doesn't support 'encoding' for bytes")
+        if errors is not None:
+            raise TypeError("quote() doesn't support 'errors' for bytes")
+    return quote_from_bytes(string, safe)
+
+def quote_plus(string, safe='', encoding=None, errors=None):
+    """Like quote(), but also replace ' ' with '+', as required for quoting
+    HTML form values. Plus signs in the original string are escaped unless
+    they are included in safe. It also does not have safe default to '/'.
+    """
+    # Check if ' ' in string, where string may either be a str or bytes.  If
+    # there are no spaces, the regular quote will produce the right answer.
+    if ((isinstance(string, str) and ' ' not in string) or
+        (isinstance(string, bytes) and b' ' not in string)):
+        return quote(string, safe, encoding, errors)
+    if isinstance(safe, str):
+        space = ' '
+    else:
+        space = b' '
+    string = quote(string, safe + space, encoding, errors)
+    return string.replace(' ', '+')
+
+def quote_from_bytes(bs, safe='/'):
+    """Like quote(), but accepts a bytes object rather than a str, and does
+    not perform string-to-bytes encoding.  It always returns an ASCII string.
+    quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f'
+    """
+    if not isinstance(bs, (bytes, bytearray)):
+        raise TypeError("quote_from_bytes() expected bytes")
+    if not bs:
+        return ''
+    if isinstance(safe, str):
+        # Normalize 'safe' by converting to bytes and removing non-ASCII chars
+        safe = safe.encode('ascii', 'ignore')
+    else:
+        safe = bytes([c for c in safe if c < 128])
+    if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
+        return bs.decode()
+    try:
+        quoter = _safe_quoters[safe]
+    except KeyError:
+        _safe_quoters[safe] = quoter = Quoter(safe).__getitem__
+    return ''.join([quoter(char) for char in bs])
+
+def urlencode(query, doseq=False, safe='', encoding=None, errors=None,
+              quote_via=quote_plus):
+    """Encode a dict or sequence of two-element tuples into a URL query string.
+
+    If any values in the query arg are sequences and doseq is true, each
+    sequence element is converted to a separate parameter.
+
+    If the query arg is a sequence of two-element tuples, the order of the
+    parameters in the output will match the order of parameters in the
+    input.
+
+    The components of a query arg may each be either a string or a bytes type.
+
+    The safe, encoding, and errors parameters are passed down to the function
+    specified by quote_via (encoding and errors only if a component is a str).
+    """
+
+    if hasattr(query, "items"):
+        query = query.items()
+    else:
+        # It's a bother at times that strings and string-like objects are
+        # sequences.
+        try:
+            # non-sequence items should not work with len()
+            # non-empty strings will fail this
+            if len(query) and not isinstance(query[0], tuple):
+                raise TypeError
+            # Zero-length sequences of all types will get here and succeed,
+            # but that's a minor nit.  Since the original implementation
+            # allowed empty dicts that type of behavior probably should be
+            # preserved for consistency
+        except TypeError:
+            ty, va, tb = sys.exc_info()
+            raise TypeError("not a valid non-string sequence "
+                            "or mapping object").with_traceback(tb)
+
+    l = []
+    if not doseq:
+        for k, v in query:
+            if isinstance(k, bytes):
+                k = quote_via(k, safe)
+            else:
+                k = quote_via(str(k), safe, encoding, errors)
+
+            if isinstance(v, bytes):
+                v = quote_via(v, safe)
+            else:
+                v = quote_via(str(v), safe, encoding, errors)
+            l.append(k + '=' + v)
+    else:
+        for k, v in query:
+            if isinstance(k, bytes):
+                k = quote_via(k, safe)
+            else:
+                k = quote_via(str(k), safe, encoding, errors)
+
+            if isinstance(v, bytes):
+                v = quote_via(v, safe)
+                l.append(k + '=' + v)
+            elif isinstance(v, str):
+                v = quote_via(v, safe, encoding, errors)
+                l.append(k + '=' + v)
+            else:
+                try:
+                    # Is this a sufficient test for sequence-ness?
+                    x = len(v)
+                except TypeError:
+                    # not a sequence
+                    v = quote_via(str(v), safe, encoding, errors)
+                    l.append(k + '=' + v)
+                else:
+                    # loop over the sequence
+                    for elt in v:
+                        if isinstance(elt, bytes):
+                            elt = quote_via(elt, safe)
+                        else:
+                            elt = quote_via(str(elt), safe, encoding, errors)
+                        l.append(k + '=' + elt)
+    return '&'.join(l)
+
+def to_bytes(url):
+    """to_bytes(u"URL") --> 'URL'."""
+    # Most URL schemes require ASCII. If that changes, the conversion
+    # can be relaxed.
+    # XXX get rid of to_bytes()
+    if isinstance(url, str):
+        try:
+            url = url.encode("ASCII").decode()
+        except UnicodeError:
+            raise UnicodeError("URL " + repr(url) +
+                               " contains non-ASCII characters")
+    return url
+
+def unwrap(url):
+    """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
+    url = str(url).strip()
+    if url[:1] == '<' and url[-1:] == '>':
+        url = url[1:-1].strip()
+    if url[:4] == 'URL:': url = url[4:].strip()
+    return url
+
+_typeprog = None
+def splittype(url):
+    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
+    global _typeprog
+    if _typeprog is None:
+        _typeprog = re.compile('([^/:]+):(.*)', re.DOTALL)
+
+    match = _typeprog.match(url)
+    if match:
+        scheme, data = match.groups()
+        return scheme.lower(), data
+    return None, url
+
+_hostprog = None
+def splithost(url):
+    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
+    global _hostprog
+    if _hostprog is None:
+        _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL)
+
+    match = _hostprog.match(url)
+    if match:
+        host_port, path = match.groups()
+        if path and path[0] != '/':
+            path = '/' + path
+        return host_port, path
+    return None, url
+
+def splituser(host):
+    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
+    user, delim, host = host.rpartition('@')
+    return (user if delim else None), host
+
+def splitpasswd(user):
+    """splitpasswd('user:passwd') -> 'user', 'passwd'."""
+    user, delim, passwd = user.partition(':')
+    return user, (passwd if delim else None)
+
+# splittag('/path#tag') --> '/path', 'tag'
+_portprog = None
+def splitport(host):
+    """splitport('host:port') --> 'host', 'port'."""
+    global _portprog
+    if _portprog is None:
+        _portprog = re.compile('(.*):([0-9]*)$', re.DOTALL)
+
+    match = _portprog.match(host)
+    if match:
+        host, port = match.groups()
+        if port:
+            return host, port
+    return host, None
+
+def splitnport(host, defport=-1):
+    """Split host and port, returning numeric port.
+    Return given default port if no ':' found; defaults to -1.
+    Return numerical port if a valid number are found after ':'.
+    Return None if ':' but not a valid number."""
+    host, delim, port = host.rpartition(':')
+    if not delim:
+        host = port
+    elif port:
+        try:
+            nport = int(port)
+        except ValueError:
+            nport = None
+        return host, nport
+    return host, defport
+
+def splitquery(url):
+    """splitquery('/path?query') --> '/path', 'query'."""
+    path, delim, query = url.rpartition('?')
+    if delim:
+        return path, query
+    return url, None
+
+def splittag(url):
+    """splittag('/path#tag') --> '/path', 'tag'."""
+    path, delim, tag = url.rpartition('#')
+    if delim:
+        return path, tag
+    return url, None
+
+def splitattr(url):
+    """splitattr('/path;attr1=value1;attr2=value2;...') ->
+        '/path', ['attr1=value1', 'attr2=value2', ...]."""
+    words = url.split(';')
+    return words[0], words[1:]
+
+def splitvalue(attr):
+    """splitvalue('attr=value') --> 'attr', 'value'."""
+    attr, delim, value = attr.partition('=')
+    return attr, (value if delim else None)
author	Stefan Israelsson Tampe <stefan.itampe@gmail.com>	2018-09-04 16:19:39 +0200
committer	Stefan Israelsson Tampe <stefan.itampe@gmail.com>	2018-09-04 16:19:39 +0200
commit	e37613527a05fc56f8ee5886a868c948b3ee4cfc (patch)
tree	38c51006b81a165133fbd44566940decb06fae5d
parent	8223db0b1a660ad10830e9a5a2fe71858cf52481 (diff)