email policy works mm

author: Stefan Israelsson Tampe <stefan.itampe@gmail.com> 2018-09-06 23:40:42 +0200
committer: Stefan Israelsson Tampe <stefan.itampe@gmail.com> 2018-09-06 23:40:42 +0200
commit: 8733038b4de44a8cfb669420621597457eeef88a (patch)
tree: 9f7345b4b407ea0919320e17955b058cb2f2a5bc
parent: b950c0d70a1d2c95da8d60aca7af02bc50542a2b (diff)
30 files changed, 12018 insertions, 80 deletions
diff --git a/modules/language/python/compile.scm b/modules/language/python/compile.scm
index d4b1011..503a1d8 100644
--- a/modules/language/python/compile.scm
+++ b/modules/language/python/compile.scm
@@ -544,9 +544,6 @@
   (mkfast
    ;; General
    ((__init__)    (O 'py-init))
-   ((__getattr__) (O 'ref))
-   ((__setattr__) (O 'set))
-   ((__delattr__) (O 'del))
    ((__ne__)      (O 'ne))
    ((__eq__)      (O 'equal?))
    ((__repr__)    (O 'repr))
@@ -1221,9 +1218,7 @@
                      (arglist->pkw (clean parents))
                      `(,(G 'cons) (,(G 'quote) ()) (,(G 'quote) ())))
                 ,doc
-                ,(cons
-                  (list 'define '__doc__ doc)
-                  (map (lambda (x) `(define ,x ,(gw-persson x vo))) ls))
+                ,(map (lambda (x) `(define ,x ,(gw-persson x vo))) ls)
                 ,cd)))))))))
  (#:verb
   ((_ x) x))
@@ -1964,7 +1959,7 @@
 
 
 (define (exp vs x)
-  (match (pr x)
+  (match x
     ((e)
      (exp vs e))
     ((tag . l)
@@ -2039,13 +2034,14 @@
                (doc   (cdr e.doc)))
           
 	  `(begin
-             (,(G 'set!) __doc__ ,doc)
 	     ,@start
 	     (,(G 'define) ,fnm (,(G 'make-hash-table)))
 	     ,@(map (lambda (s)
 		      (if (member s (fluid-ref ignore))
 			  `(,cvalues)
-			  `(,(C 'var) ,(cons '__doc__ s)))) globs)
+			  `(,(C 'var) ,s)))
+                    (cons '__doc__ globs))
+             (,(G 'set!) __doc__ ,doc)
 	     ,@e
 	     (,(C 'export-all)))))
 
@@ -2056,17 +2052,17 @@
   
         (if (pair? start)
             (set! x (cdr x)))
-
+        
         (let* ((globs (get-globals x))
                (res   (gensym "res"))
                (e     (map (g globs exp) x)))
           `(,(G 'begin)
-             ,@start
-             ,@(map (lambda (s)
-                      (if (member s (fluid-ref ignore))
-                          `(,cvalues)
-                          `(,(C 'var) ,s))) globs)
-             (,(C 'with-exit) ,@e))))))
+            ,@start
+            ,@(map (lambda (s)
+                     (if (member s (fluid-ref ignore))
+                         `(,cvalues)
+                         `(,(C 'var) ,s))) globs)
+            (,(C 'with-exit) ,@e))))))
 	       
 		     
 
diff --git a/modules/language/python/exceptions.scm b/modules/language/python/exceptions.scm
index 0a3689f..87dd3c5 100644
--- a/modules/language/python/exceptions.scm
+++ b/modules/language/python/exceptions.scm
@@ -1,24 +1,27 @@
 (define-module (language python exceptions)
   #:use-module (oop pf-objects)
   #:use-module (oop goops)
-  #:export (StopIteration GeneratorExit RuntimeError
+  #:export (StopIteration GeneratorExit RuntimeError TabError
                           Exception ValueError TypeError
                           IndexError KeyError AttributeError ArgumentError
                           SyntaxError SystemException
                           OSError ProcessLookupError PermissionError
                           None NotImplemented NotImplementedError
-			  RunTimeError AssertionError ImportError
+			  AssertionError ImportError
                           ModuleNotFoundError BlockingIOError
                           InterruptedError BaseException
-			  ZeroDivisionError ArithmeticError
+			  ZeroDivisionError 
 			  OverflowError RecursionError
 			  Warning DeprecationWarning BytesWarning
-                          ResourceWarning UserWarning
+                          ResourceWarning UserWarning UnicodeTranslateError
                           UnicodeDecodeError LookupError IndentationError
                           KeyboardInterrupt MemoryError NameError
                           EOFError UnicodeError UnicodeEncodeError
                           FileExistsError FileNotFoundError IsADirectoryError
-			  EnvironmentError))
+			  EnvironmentError ConnectionError NotADirectoryError
+                          ConnectionResetError ChildProcessError TimeOutError
+                          BrokenPipeError ConnectionAbortedError
+                          ConnectionRefusedError ArithmeticError))
 
 (define-syntax-rule (aif it p x y) (let ((it p)) (if it x y)))
 
@@ -71,46 +74,67 @@
 (define StopIteration           'StopIteration)
 (define GeneratorExit           'GeneratorExit)
 (define-er EnvironmentError     'EnvironmentError)
-(define-er UnicodeEncodeError   'UnicodeEncodeError)
-(define-er FileExistsError      'FileExistsError)
-(define-er FileNotFoundError    'FileNotFoundError)
-(define-er IsADirectoryError    'IsADirectoryError)
-(define-er UnicodeError         'UnicodeError)
+
 (define-er EOFError             'EOFError)
 (define-er MemoryError          'MemoryError)
 (define-er NameError            'NameError)
-(define-er UnicodeDecodeError   'UnicodeDecodeError)
+
+(define-er ValueError           'ValueError)
+(define-python-class UnicodeError (ValueError))
+(define-python-class UnicodeDecodeError (UnicodeError))
+(define-python-class UnicodeEncodeError (UnicodeError))
+(define-python-class UnicodeTranslateError (UnicodeError))
+
 (define-er LookupError          'LookupError)
-(define-er IndentationError     'IndentationError)
-(define-er OverflowError        'OverflowError)
+  (define-python-class IndexError (LookupError))
+  (define-python-class KeyError   (LookupError))
+
+(define-er ArithmeticError        'OverflowError)
+  (define-python-class OverflowError     (ArithmeticError))
+  (define-python-class ZeroDivisionError (ArithmeticError))
+
+
 (define-er KeyboardInterrupt    'KeyboardInterrupt)
-(define-er RecursionError       'RecursionError)
-(define-er ArithmeticError      'ArithmeticError)
 (define-er BaseException        'BaseException)
-(define-er ZeroDivisionError    'ZeroDivisionError)
 (define-er SystemException      'SystemException)
 (define-er RuntimeError         'RuntimeError)
-(define-er IndexError           'IndexError)
+  (define-python-class NotImplementedError (RuntimeError))
+  (define-python-class RecursionError      (RuntimeError)) 
+
+
 (define-er ArgumentError        'IndexError)
-(define-er ValueError           'ValueError)
 
+
+(define-er OSError              'OSError)
+  (define-python-class BlockingIOError    (OSError))
+  (define-python-class ChildProcessError  (OSError))
+  (define-python-class ConnectionError    (OSError))
+    (define-python-class BrokenPipeError        (ConnectionError))
+    (define-python-class ConnectionAbortedError (ConnectionError))
+    (define-python-class ConnectionRefusedError (ConnectionError))
+    (define-python-class ConnectionResetError   (ConnectionError))
+  (define-python-class FileExistsError    (OSError))
+  (define-python-class FileNotFoundError  (OSError))
+  (define-python-class InterruptedError   (OSError))
+  (define-python-class IsADirectoryError  (OSError))
+  (define-python-class NotADirectoryError (OSError))
+  (define-python-class PermissionError    (OSError))
+  (define-python-class ProcessLookupError (OSError))
+  (define-python-class TimeOutError       (OSError))
 (define None                    'None)
 
-(define-er KeyError             'KeyError)
+
 (define-er TypeError            'TypeError)
 (define-er AttributeError       'AttributeError)
 (define-er SyntaxError          'SyntaxError)
-(define-er OSError              'OSError)
-(define-er ProcessLookupError   'ProcessLookupError)
-(define-er PermissionError      'PermissionError)
-(define-er NotImplementedError  'NotImplementedError)
+ (define-python-class IndentationError (SyntaxError))
+   (define-python-class TabError (IndentationError))
+
 (define-er RunTimeError         'RunTimeError)
 
 (define AssertionError          'AssertionError)
 (define-er ImportError          'ImportError)      
 (define-er ModuleNotFoundError  (ImportError) 'ModuleNotFoundError)
-(define-er BlockingIOError      'BlockingIOError)
-(define-er InterruptedError     'OSError)
 
 (define NotImplemented (list 'NotImplemented))
 
diff --git a/modules/language/python/module/_markupbase.py b/modules/language/python/module/_markupbase.py
new file mode 100644
index 0000000..1c955be
--- /dev/null
+++ b/modules/language/python/module/_markupbase.py
@@ -0,0 +1,397 @@
+module(_markupbase)
+
+"""Shared support for scanning document type declarations in HTML and XHTML.
+
+This module is used as a foundation for the html.parser module.  It has no
+documented public API and should not be used directly.
+
+"""
+
+import re
+
+_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match
+_declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match
+_commentclose = re.compile(r'--\s*>')
+_markedsectionclose = re.compile(r']\s*]\s*>')
+
+# An analysis of the MS-Word extensions is available at
+# http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf
+
+_msmarkedsectionclose = re.compile(r']\s*>')
+
+del re
+
+
+class ParserBase:
+    """Parser base class which provides some common support methods used
+    by the SGML/HTML and XHTML parsers."""
+
+    def __init__(self):
+        if self.__class__ is ParserBase:
+            raise RuntimeError(
+                "_markupbase.ParserBase must be subclassed")
+
+    def error(self, message):
+        raise NotImplementedError(
+            "subclasses of ParserBase must override error()")
+
+    def reset(self):
+        self.lineno = 1
+        self.offset = 0
+
+    def getpos(self):
+        """Return current line number and offset."""
+        return self.lineno, self.offset
+
+    # Internal -- update line number and offset.  This should be
+    # called for each piece of data exactly once, in order -- in other
+    # words the concatenation of all the input strings to this
+    # function should be exactly the entire input.
+    def updatepos(self, i, j):
+        if i >= j:
+            return j
+        rawdata = self.rawdata
+        nlines = rawdata.count("\n", i, j)
+        if nlines:
+            self.lineno = self.lineno + nlines
+            pos = rawdata.rindex("\n", i, j) # Should not fail
+            self.offset = j-(pos+1)
+        else:
+            self.offset = self.offset + j-i
+        return j
+
+    _decl_otherchars = ''
+
+    # Internal -- parse declaration (for use by subclasses).
+    def parse_declaration(self, i):
+        # This is some sort of declaration; in "HTML as
+        # deployed," this should only be the document type
+        # declaration ("<!DOCTYPE html...>").
+        # ISO 8879:1986, however, has more complex
+        # declaration syntax for elements in <!...>, including:
+        # --comment--
+        # [marked section]
+        # name in the following list: ENTITY, DOCTYPE, ELEMENT,
+        # ATTLIST, NOTATION, SHORTREF, USEMAP,
+        # LINKTYPE, LINK, IDLINK, USELINK, SYSTEM
+        rawdata = self.rawdata
+        j = i + 2
+        assert rawdata[i:j] == "<!", "unexpected call to parse_declaration"
+        if rawdata[j:j+1] == ">":
+            # the empty comment <!>
+            return j + 1
+        if rawdata[j:j+1] in ("-", ""):
+            # Start of comment followed by buffer boundary,
+            # or just a buffer boundary.
+            return -1
+        # A simple, practical version could look like: ((name|stringlit) S*) + '>'
+        n = len(rawdata)
+        if rawdata[j:j+2] == '--': #comment
+            # Locate --.*-- as the body of the comment
+            return self.parse_comment(i)
+        elif rawdata[j] == '[': #marked section
+            # Locate [statusWord [...arbitrary SGML...]] as the body of the marked section
+            # Where statusWord is one of TEMP, CDATA, IGNORE, INCLUDE, RCDATA
+            # Note that this is extended by Microsoft Office "Save as Web" function
+            # to include [if...] and [endif].
+            return self.parse_marked_section(i)
+        else: #all other declaration elements
+            decltype, j = self._scan_name(j, i)
+        if j < 0:
+            return j
+        if decltype == "doctype":
+            self._decl_otherchars = ''
+        while j < n:
+            c = rawdata[j]
+            if c == ">":
+                # end of declaration syntax
+                data = rawdata[i+2:j]
+                if decltype == "doctype":
+                    self.handle_decl(data)
+                else:
+                    # According to the HTML5 specs sections "8.2.4.44 Bogus
+                    # comment state" and "8.2.4.45 Markup declaration open
+                    # state", a comment token should be emitted.
+                    # Calling unknown_decl provides more flexibility though.
+                    self.unknown_decl(data)
+                return j + 1
+            if c in "\"'":
+                m = _declstringlit_match(rawdata, j)
+                if not m:
+                    return -1 # incomplete
+                j = m.end()
+            elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
+                name, j = self._scan_name(j, i)
+            elif c in self._decl_otherchars:
+                j = j + 1
+            elif c == "[":
+                # this could be handled in a separate doctype parser
+                if decltype == "doctype":
+                    j = self._parse_doctype_subset(j + 1, i)
+                elif decltype in {"attlist", "linktype", "link", "element"}:
+                    # must tolerate []'d groups in a content model in an element declaration
+                    # also in data attribute specifications of attlist declaration
+                    # also link type declaration subsets in linktype declarations
+                    # also link attribute specification lists in link declarations
+                    self.error("unsupported '[' char in %s declaration" % decltype)
+                else:
+                    self.error("unexpected '[' char in declaration")
+            else:
+                self.error(
+                    "unexpected %r char in declaration" % rawdata[j])
+            if j < 0:
+                return j
+        return -1 # incomplete
+
+    # Internal -- parse a marked section
+    # Override this to handle MS-word extension syntax <![if word]>content<![endif]>
+    def parse_marked_section(self, i, report=1):
+        rawdata= self.rawdata
+        assert rawdata[i:i+3] == '<![', "unexpected call to parse_marked_section()"
+        sectName, j = self._scan_name( i+3, i )
+        if j < 0:
+            return j
+        if sectName in {"temp", "cdata", "ignore", "include", "rcdata"}:
+            # look for standard ]]> ending
+            match= _markedsectionclose.search(rawdata, i+3)
+        elif sectName in {"if", "else", "endif"}:
+            # look for MS Office ]> ending
+            match= _msmarkedsectionclose.search(rawdata, i+3)
+        else:
+            self.error('unknown status keyword %r in marked section' % rawdata[i+3:j])
+        if not match:
+            return -1
+        if report:
+            j = match.start(0)
+            self.unknown_decl(rawdata[i+3: j])
+        return match.end(0)
+
+    # Internal -- parse comment, return length or -1 if not terminated
+    def parse_comment(self, i, report=1):
+        rawdata = self.rawdata
+        if rawdata[i:i+4] != '<!--':
+            self.error('unexpected call to parse_comment()')
+        match = _commentclose.search(rawdata, i+4)
+        if not match:
+            return -1
+        if report:
+            j = match.start(0)
+            self.handle_comment(rawdata[i+4: j])
+        return match.end(0)
+
+    # Internal -- scan past the internal subset in a <!DOCTYPE declaration,
+    # returning the index just past any whitespace following the trailing ']'.
+    def _parse_doctype_subset(self, i, declstartpos):
+        rawdata = self.rawdata
+        n = len(rawdata)
+        j = i
+        while j < n:
+            c = rawdata[j]
+            if c == "<":
+                s = rawdata[j:j+2]
+                if s == "<":
+                    # end of buffer; incomplete
+                    return -1
+                if s != "<!":
+                    self.updatepos(declstartpos, j + 1)
+                    self.error("unexpected char in internal subset (in %r)" % s)
+                if (j + 2) == n:
+                    # end of buffer; incomplete
+                    return -1
+                if (j + 4) > n:
+                    # end of buffer; incomplete
+                    return -1
+                if rawdata[j:j+4] == "<!--":
+                    j = self.parse_comment(j, report=0)
+                    if j < 0:
+                        return j
+                    continue
+                name, j = self._scan_name(j + 2, declstartpos)
+                if j == -1:
+                    return -1
+                if name not in {"attlist", "element", "entity", "notation"}:
+                    self.updatepos(declstartpos, j + 2)
+                    self.error(
+                        "unknown declaration %r in internal subset" % name)
+                # handle the individual names
+                meth = getattr(self, "_parse_doctype_" + name)
+                j = meth(j, declstartpos)
+                if j < 0:
+                    return j
+            elif c == "%":
+                # parameter entity reference
+                if (j + 1) == n:
+                    # end of buffer; incomplete
+                    return -1
+                s, j = self._scan_name(j + 1, declstartpos)
+                if j < 0:
+                    return j
+                if rawdata[j] == ";":
+                    j = j + 1
+            elif c == "]":
+                j = j + 1
+                while j < n and rawdata[j].isspace():
+                    j = j + 1
+                if j < n:
+                    if rawdata[j] == ">":
+                        return j
+                    self.updatepos(declstartpos, j)
+                    self.error("unexpected char after internal subset")
+                else:
+                    return -1
+            elif c.isspace():
+                j = j + 1
+            else:
+                self.updatepos(declstartpos, j)
+                self.error("unexpected char %r in internal subset" % c)
+        # end of buffer reached
+        return -1
+
+    # Internal -- scan past <!ELEMENT declarations
+    def _parse_doctype_element(self, i, declstartpos):
+        name, j = self._scan_name(i, declstartpos)
+        if j == -1:
+            return -1
+        # style content model; just skip until '>'
+        rawdata = self.rawdata
+        if '>' in rawdata[j:]:
+            return rawdata.find(">", j) + 1
+        return -1
+
+    # Internal -- scan past <!ATTLIST declarations
+    def _parse_doctype_attlist(self, i, declstartpos):
+        rawdata = self.rawdata
+        name, j = self._scan_name(i, declstartpos)
+        c = rawdata[j:j+1]
+        if c == "":
+            return -1
+        if c == ">":
+            return j + 1
+        while 1:
+            # scan a series of attribute descriptions; simplified:
+            #   name type [value] [#constraint]
+            name, j = self._scan_name(j, declstartpos)
+            if j < 0:
+                return j
+            c = rawdata[j:j+1]
+            if c == "":
+                return -1
+            if c == "(":
+                # an enumerated type; look for ')'
+                if ")" in rawdata[j:]:
+                    j = rawdata.find(")", j) + 1
+                else:
+                    return -1
+                while rawdata[j:j+1].isspace():
+                    j = j + 1
+                if not rawdata[j:]:
+                    # end of buffer, incomplete
+                    return -1
+            else:
+                name, j = self._scan_name(j, declstartpos)
+            c = rawdata[j:j+1]
+            if not c:
+                return -1
+            if c in "'\"":
+                m = _declstringlit_match(rawdata, j)
+                if m:
+                    j = m.end()
+                else:
+                    return -1
+                c = rawdata[j:j+1]
+                if not c:
+                    return -1
+            if c == "#":
+                if rawdata[j:] == "#":
+                    # end of buffer
+                    return -1
+                name, j = self._scan_name(j + 1, declstartpos)
+                if j < 0:
+                    return j
+                c = rawdata[j:j+1]
+                if not c:
+                    return -1
+            if c == '>':
+                # all done
+                return j + 1
+
+    # Internal -- scan past <!NOTATION declarations
+    def _parse_doctype_notation(self, i, declstartpos):
+        name, j = self._scan_name(i, declstartpos)
+        if j < 0:
+            return j
+        rawdata = self.rawdata
+        while 1:
+            c = rawdata[j:j+1]
+            if not c:
+                # end of buffer; incomplete
+                return -1
+            if c == '>':
+                return j + 1
+            if c in "'\"":
+                m = _declstringlit_match(rawdata, j)
+                if not m:
+                    return -1
+                j = m.end()
+            else:
+                name, j = self._scan_name(j, declstartpos)
+                if j < 0:
+                    return j
+
+    # Internal -- scan past <!ENTITY declarations
+    def _parse_doctype_entity(self, i, declstartpos):
+        rawdata = self.rawdata
+        if rawdata[i:i+1] == "%":
+            j = i + 1
+            while 1:
+                c = rawdata[j:j+1]
+                if not c:
+                    return -1
+                if c.isspace():
+                    j = j + 1
+                else:
+                    break
+        else:
+            j = i
+        name, j = self._scan_name(j, declstartpos)
+        if j < 0:
+            return j
+        while 1:
+            c = self.rawdata[j:j+1]
+            if not c:
+                return -1
+            if c in "'\"":
+                m = _declstringlit_match(rawdata, j)
+                if m:
+                    j = m.end()
+                else:
+                    return -1    # incomplete
+            elif c == ">":
+                return j + 1
+            else:
+                name, j = self._scan_name(j, declstartpos)
+                if j < 0:
+                    return j
+
+    # Internal -- scan a name token and the new position and the token, or
+    # return -1 if we've reached the end of the buffer.
+    def _scan_name(self, i, declstartpos):
+        rawdata = self.rawdata
+        n = len(rawdata)
+        if i == n:
+            return None, -1
+        m = _declname_match(rawdata, i)
+        if m:
+            s = m.group()
+            name = s.strip()
+            if (i + len(s)) == n:
+                return None, -1  # end of buffer
+            return name.lower(), m.end()
+        else:
+            self.updatepos(declstartpos, i)
+            self.error("expected name token at %r"
+                       % rawdata[declstartpos:declstartpos+20])
+
+    # To be overridden -- handlers for unknown objects
+    def unknown_decl(self, data):
+        pass
diff --git a/modules/language/python/module/binascii.scm b/modules/language/python/module/binascii.scm
index 12bdcac..2fc0e62 100644
--- a/modules/language/python/module/binascii.scm
+++ b/modules/language/python/module/binascii.scm
@@ -386,7 +386,7 @@
 		(lp (+ i 2) (cons x r)))
 	      (bytes (reverse r)))))))
 
-(define-inlinable (id x) x)
+(define (id x) x)
 (define-syntax-rule (mkcrc crc_hqx high xor mask)
   (def (crc_hqx data (= value 0))
        (let ((n (len data))
diff --git a/modules/language/python/module/email/_encoded_words.py b/modules/language/python/module/email/_encoded_words.py
new file mode 100644
index 0000000..8c02709
--- /dev/null
+++ b/modules/language/python/module/email/_encoded_words.py
@@ -0,0 +1,222 @@
+module(email,_encoded_words)
+""" Routines for manipulating RFC2047 encoded words.
+
+This is currently a package-private API, but will be considered for promotion
+to a public API if there is demand.
+
+"""
+
+# An ecoded word looks like this:
+#
+#        =?charset[*lang]?cte?encoded_string?=
+#
+# for more information about charset see the charset module.  Here it is one
+# of the preferred MIME charset names (hopefully; you never know when parsing).
+# cte (Content Transfer Encoding) is either 'q' or 'b' (ignoring case).  In
+# theory other letters could be used for other encodings, but in practice this
+# (almost?) never happens.  There could be a public API for adding entries
+# to the CTE tables, but YAGNI for now.  'q' is Quoted Printable, 'b' is
+# Base64.  The meaning of encoded_string should be obvious.  'lang' is optional
+# as indicated by the brackets (they are not part of the syntax) but is almost
+# never encountered in practice.
+#
+# The general interface for a CTE decoder is that it takes the encoded_string
+# as its argument, and returns a tuple (cte_decoded_string, defects).  The
+# cte_decoded_string is the original binary that was encoded using the
+# specified cte.  'defects' is a list of MessageDefect instances indicating any
+# problems encountered during conversion.  'charset' and 'lang' are the
+# corresponding strings extracted from the EW, case preserved.
+#
+# The general interface for a CTE encoder is that it takes a binary sequence
+# as input and returns the cte_encoded_string, which is an ascii-only string.
+#
+# Each decoder must also supply a length function that takes the binary
+# sequence as its argument and returns the length of the resulting encoded
+# string.
+#
+# The main API functions for the module are decode, which calls the decoder
+# referenced by the cte specifier, and encode, which adds the appropriate
+# RFC 2047 "chrome" to the encoded string, and can optionally automatically
+# select the shortest possible encoding.  See their docstrings below for
+# details.
+
+import re
+import base64
+import binascii
+import functools
+from string import ascii_letters, digits
+import email.errors as errors
+
+__all__ = ['decode_q',
+           'encode_q',
+           'decode_b',
+           'encode_b',
+           'len_q',
+           'len_b',
+           'decode',
+           'encode',
+           ]
+
+#
+# Quoted Printable
+#
+
+# regex based decoder.
+_q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub,
+        lambda m: bytes([int(m.group(1), 16)]))
+
+def decode_q(encoded):
+    encoded = encoded.replace(b'_', b' ')
+    return _q_byte_subber(encoded), []
+
+
+# dict mapping bytes to their encoded form
+class _QByteMap(dict):
+
+    safe = b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')
+
+    def __missing__(self, key):
+        if key in self.safe:
+            self[key] = chr(key)
+        else:
+            self[key] = "={:02X}".format(key)
+        return self[key]
+
+_q_byte_map = _QByteMap()
+
+# In headers spaces are mapped to '_'.
+_q_byte_map[ord(' ')] = '_'
+
+def encode_q(bstring):
+    return ''.join(_q_byte_map[x] for x in bstring)
+
+def len_q(bstring):
+    return sum(len(_q_byte_map[x]) for x in bstring)
+
+
+#
+# Base64
+#
+
+def decode_b(encoded):
+    defects = []
+    pad_err = len(encoded) % 4
+    if pad_err:
+        defects.append(errors.InvalidBase64PaddingDefect())
+        padded_encoded = encoded + b'==='[:4-pad_err]
+    else:
+        padded_encoded = encoded
+    try:
+        return base64.b64decode(padded_encoded, validate=True), defects
+    except binascii.Error:
+        # Since we had correct padding, this must an invalid char error.
+        defects = [errors.InvalidBase64CharactersDefect()]
+        # The non-alphabet characters are ignored as far as padding
+        # goes, but we don't know how many there are.  So we'll just
+        # try various padding lengths until something works.
+        for i in 0, 1, 2, 3:
+            try:
+                return base64.b64decode(encoded+b'='*i, validate=False), defects
+            except binascii.Error:
+                if i==0:
+                    defects.append(errors.InvalidBase64PaddingDefect())
+        else:
+            # This should never happen.
+            raise AssertionError("unexpected binascii.Error")
+
+def encode_b(bstring):
+    return base64.b64encode(bstring).decode('ascii')
+
+def len_b(bstring):
+    groups_of_3, leftover = divmod(len(bstring), 3)
+    # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
+    return groups_of_3 * 4 + (4 if leftover else 0)
+
+
+_cte_decoders = {
+    'q': decode_q,
+    'b': decode_b,
+    }
+
+def decode(ew):
+    """Decode encoded word and return (string, charset, lang, defects) tuple.
+
+    An RFC 2047/2243 encoded word has the form:
+
+        =?charset*lang?cte?encoded_string?=
+
+    where '*lang' may be omitted but the other parts may not be.
+
+    This function expects exactly such a string (that is, it does not check the
+    syntax and may raise errors if the string is not well formed), and returns
+    the encoded_string decoded first from its Content Transfer Encoding and
+    then from the resulting bytes into unicode using the specified charset.  If
+    the cte-decoded string does not successfully decode using the specified
+    character set, a defect is added to the defects list and the unknown octets
+    are replaced by the unicode 'unknown' character \\uFDFF.
+
+    The specified charset and language are returned.  The default for language,
+    which is rarely if ever encountered, is the empty string.
+
+    """
+    _, charset, cte, cte_string, _ = ew.split('?')
+    charset, _, lang = charset.partition('*')
+    cte = cte.lower()
+    # Recover the original bytes and do CTE decoding.
+    bstring = cte_string.encode('ascii', 'surrogateescape')
+    bstring, defects = _cte_decoders[cte](bstring)
+    # Turn the CTE decoded bytes into unicode.
+    try:
+        string = bstring.decode(charset)
+    except UnicodeError:
+        defects.append(errors.UndecodableBytesDefect("Encoded word "
+            "contains bytes not decodable using {} charset".format(charset)))
+        string = bstring.decode(charset, 'surrogateescape')
+    except LookupError:
+        string = bstring.decode('ascii', 'surrogateescape')
+        if charset.lower() != 'unknown-8bit':
+            defects.append(errors.CharsetError("Unknown charset {} "
+                "in encoded word; decoded as unknown bytes".format(charset)))
+    return string, charset, lang, defects
+
+
+_cte_encoders = {
+    'q': encode_q,
+    'b': encode_b,
+    }
+
+_cte_encode_length = {
+    'q': len_q,
+    'b': len_b,
+    }
+
+def encode(string, charset='utf-8', encoding=None, lang=''):
+    """Encode string using the CTE encoding that produces the shorter result.
+
+    Produces an RFC 2047/2243 encoded word of the form:
+
+        =?charset*lang?cte?encoded_string?=
+
+    where '*lang' is omitted unless the 'lang' parameter is given a value.
+    Optional argument charset (defaults to utf-8) specifies the charset to use
+    to encode the string to binary before CTE encoding it.  Optional argument
+    'encoding' is the cte specifier for the encoding that should be used ('q'
+    or 'b'); if it is None (the default) the encoding which produces the
+    shortest encoded sequence is used, except that 'q' is preferred if it is up
+    to five characters longer.  Optional argument 'lang' (default '') gives the
+    RFC 2243 language string to specify in the encoded word.
+
+    """
+    if charset == 'unknown-8bit':
+        bstring = string.encode('ascii', 'surrogateescape')
+    else:
+        bstring = string.encode(charset)
+    if encoding is None:
+        qlen = _cte_encode_length['q'](bstring)
+        blen = _cte_encode_length['b'](bstring)
+        # Bias toward q.  5 is arbitrary.
+        encoding = 'q' if qlen - blen < 5 else 'b'
+    encoded = _cte_encoders[encoding](bstring)
+    if lang:
+        lang = '*' + lang
+    return "=?{}{}?{}?{}?=".format(charset, lang, encoding, encoded)
diff --git a/modules/language/python/module/email/_header_value_parser.py b/modules/language/python/module/email/_header_value_parser.py
new file mode 100644
index 0000000..0f4a1cb
--- /dev/null
+++ b/modules/language/python/module/email/_header_value_parser.py
@@ -0,0 +1,2820 @@
+module(email,_header_value_parser)
+
+"""Header value parser implementing various email-related RFC parsing rules.
+
+The parsing methods defined in this module implement various email related
+parsing rules.  Principal among them is RFC 5322, which is the followon
+to RFC 2822 and primarily a clarification of the former.  It also implements
+RFC 2047 encoded word decoding.
+
+RFC 5322 goes to considerable trouble to maintain backward compatibility with
+RFC 822 in the parse phase, while cleaning up the structure on the generation
+phase.  This parser supports correct RFC 5322 generation by tagging white space
+as folding white space only when folding is allowed in the non-obsolete rule
+sets.  Actually, the parser is even more generous when accepting input than RFC
+5322 mandates, following the spirit of Postel's Law, which RFC 5322 encourages.
+Where possible deviations from the standard are annotated on the 'defects'
+attribute of tokens that deviate.
+
+The general structure of the parser follows RFC 5322, and uses its terminology
+where there is a direct correspondence.  Where the implementation requires a
+somewhat different structure than that used by the formal grammar, new terms
+that mimic the closest existing terms are used.  Thus, it really helps to have
+a copy of RFC 5322 handy when studying this code.
+
+Input to the parser is a string that has already been unfolded according to
+RFC 5322 rules.  According to the RFC this unfolding is the very first step, and
+this parser leaves the unfolding step to a higher level message parser, which
+will have already detected the line breaks that need unfolding while
+determining the beginning and end of each header.
+
+The output of the parser is a TokenList object, which is a list subclass.  A
+TokenList is a recursive data structure.  The terminal nodes of the structure
+are Terminal objects, which are subclasses of str.  These do not correspond
+directly to terminal objects in the formal grammar, but are instead more
+practical higher level combinations of true terminals.
+
+All TokenList and Terminal objects have a 'value' attribute, which produces the
+semantically meaningful value of that part of the parse subtree.  The value of
+all whitespace tokens (no matter how many sub-tokens they may contain) is a
+single space, as per the RFC rules.  This includes 'CFWS', which is herein
+included in the general class of whitespace tokens.  There is one exception to
+the rule that whitespace tokens are collapsed into single spaces in values: in
+the value of a 'bare-quoted-string' (a quoted-string with no leading or
+trailing whitespace), any whitespace that appeared between the quotation marks
+is preserved in the returned value.  Note that in all Terminal strings quoted
+pairs are turned into their unquoted values.
+
+All TokenList and Terminal objects also have a string value, which attempts to
+be a "canonical" representation of the RFC-compliant form of the substring that
+produced the parsed subtree, including minimal use of quoted pair quoting.
+Whitespace runs are not collapsed.
+
+Comment tokens also have a 'content' attribute providing the string found
+between the parens (including any nested comments) with whitespace preserved.
+
+All TokenList and Terminal objects have a 'defects' attribute which is a
+possibly empty list all of the defects found while creating the token.  Defects
+may appear on any token in the tree, and a composite list of all defects in the
+subtree is available through the 'all_defects' attribute of any node.  (For
+Terminal notes x.defects == x.all_defects.)
+
+Each object in a parse tree is called a 'token', and each has a 'token_type'
+attribute that gives the name from the RFC 5322 grammar that it represents.
+Not all RFC 5322 nodes are produced, and there is one non-RFC 5322 node that
+may be produced: 'ptext'.  A 'ptext' is a string of printable ascii characters.
+It is returned in place of lists of (ctext/quoted-pair) and
+(qtext/quoted-pair).
+
+XXX: provide complete list of token types.
+"""
+
+import re
+import urllib.parse as parse   # For urllib.parse.unquote
+from string import hexdigits
+from collections import OrderedDict
+from operator import itemgetter
+import email._encoded_words as _ew
+import email.errors as errors
+import email.utils as utils
+
+#
+# Useful constants and functions
+#
+
+WSP = set(' \t')
+CFWS_LEADER = WSP | set('(')
+SPECIALS = set(r'()<>@,:;.\"[]')
+ATOM_ENDS = SPECIALS | WSP
+DOT_ATOM_ENDS = ATOM_ENDS - set('.')
+# '.', '"', and '(' do not end phrases in order to support obs-phrase
+PHRASE_ENDS = SPECIALS - set('."(')
+TSPECIALS = (SPECIALS | set('/?=')) - set('.')
+TOKEN_ENDS = TSPECIALS | WSP
+ASPECIALS = TSPECIALS | set("*'%")
+ATTRIBUTE_ENDS = ASPECIALS | WSP
+EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%')
+
+def quote_string(value):
+    return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
+
+#
+# TokenList and its subclasses
+#
+
+class TokenList(list):
+
+    token_type = None
+    syntactic_break = True
+    ew_combine_allowed = True
+
+    def __init__(self, *args, **kw):
+        super().__init__(*args, **kw)
+        self.defects = []
+
+    def __str__(self):
+        return ''.join(str(x) for x in self)
+
+    def __repr__(self):
+        return '{}({})'.format(self.__class__.__name__,
+                             super().__repr__())
+
+    @property
+    def value(self):
+        return ''.join(x.value for x in self if x.value)
+
+    @property
+    def all_defects(self):
+        return sum((x.all_defects for x in self), self.defects)
+
+    def startswith_fws(self):
+        return self[0].startswith_fws()
+
+    @property
+    def as_ew_allowed(self):
+        """True if all top level tokens of this part may be RFC2047 encoded."""
+        return all(part.as_ew_allowed for part in self)
+
+    @property
+    def comments(self):
+        comments = []
+        for token in self:
+            comments.extend(token.comments)
+        return comments
+
+    def fold(self, *, policy):
+        return _refold_parse_tree(self, policy=policy)
+
+    def pprint(self, indent=''):
+        print(self.ppstr(indent=indent))
+
+    def ppstr(self, indent=''):
+        return '\n'.join(self._pp(indent=indent))
+
+    def _pp(self, indent=''):
+        yield '{}{}/{}('.format(
+            indent,
+            self.__class__.__name__,
+            self.token_type)
+        for token in self:
+            if not hasattr(token, '_pp'):
+                yield (indent + '    !! invalid element in token '
+                                        'list: {!r}'.format(token))
+            else:
+                yield from token._pp(indent+'    ')
+        if self.defects:
+            extra = ' Defects: {}'.format(self.defects)
+        else:
+            extra = ''
+        yield '{}){}'.format(indent, extra)
+
+
+class WhiteSpaceTokenList(TokenList):
+
+    @property
+    def value(self):
+        return ' '
+
+    @property
+    def comments(self):
+        return [x.content for x in self if x.token_type=='comment']
+
+
+class UnstructuredTokenList(TokenList):
+
+    token_type = 'unstructured'
+
+
+class Phrase(TokenList):
+
+    token_type = 'phrase'
+
+class Word(TokenList):
+
+    token_type = 'word'
+
+
+class CFWSList(WhiteSpaceTokenList):
+
+    token_type = 'cfws'
+
+
+class Atom(TokenList):
+
+    token_type = 'atom'
+
+
+class Token(TokenList):
+
+    token_type = 'token'
+    encode_as_ew = False
+
+
+class EncodedWord(TokenList):
+
+    token_type = 'encoded-word'
+    cte = None
+    charset = None
+    lang = None
+
+
+class QuotedString(TokenList):
+
+    token_type = 'quoted-string'
+
+    @property
+    def content(self):
+        for x in self:
+            if x.token_type == 'bare-quoted-string':
+                return x.value
+
+    @property
+    def quoted_value(self):
+        res = []
+        for x in self:
+            if x.token_type == 'bare-quoted-string':
+                res.append(str(x))
+            else:
+                res.append(x.value)
+        return ''.join(res)
+
+    @property
+    def stripped_value(self):
+        for token in self:
+            if token.token_type == 'bare-quoted-string':
+                return token.value
+
+
+class BareQuotedString(QuotedString):
+
+    token_type = 'bare-quoted-string'
+
+    def __str__(self):
+        return quote_string(''.join(str(x) for x in self))
+
+    @property
+    def value(self):
+        return ''.join(str(x) for x in self)
+
+
+class Comment(WhiteSpaceTokenList):
+
+    token_type = 'comment'
+
+    def __str__(self):
+        return ''.join(sum([
+                            ["("],
+                            [self.quote(x) for x in self],
+                            [")"],
+                            ], []))
+
+    def quote(self, value):
+        if value.token_type == 'comment':
+            return str(value)
+        return str(value).replace('\\', '\\\\').replace(
+                                  '(', r'\(').replace(
+                                  ')', r'\)')
+
+    @property
+    def content(self):
+        return ''.join(str(x) for x in self)
+
+    @property
+    def comments(self):
+        return [self.content]
+
+class AddressList(TokenList):
+
+    token_type = 'address-list'
+
+    @property
+    def addresses(self):
+        return [x for x in self if x.token_type=='address']
+
+    @property
+    def mailboxes(self):
+        return sum((x.mailboxes
+                    for x in self if x.token_type=='address'), [])
+
+    @property
+    def all_mailboxes(self):
+        return sum((x.all_mailboxes
+                    for x in self if x.token_type=='address'), [])
+
+
+class Address(TokenList):
+
+    token_type = 'address'
+
+    @property
+    def display_name(self):
+        if self[0].token_type == 'group':
+            return self[0].display_name
+
+    @property
+    def mailboxes(self):
+        if self[0].token_type == 'mailbox':
+            return [self[0]]
+        elif self[0].token_type == 'invalid-mailbox':
+            return []
+        return self[0].mailboxes
+
+    @property
+    def all_mailboxes(self):
+        if self[0].token_type == 'mailbox':
+            return [self[0]]
+        elif self[0].token_type == 'invalid-mailbox':
+            return [self[0]]
+        return self[0].all_mailboxes
+
+class MailboxList(TokenList):
+
+    token_type = 'mailbox-list'
+
+    @property
+    def mailboxes(self):
+        return [x for x in self if x.token_type=='mailbox']
+
+    @property
+    def all_mailboxes(self):
+        return [x for x in self
+            if x.token_type in ('mailbox', 'invalid-mailbox')]
+
+
+class GroupList(TokenList):
+
+    token_type = 'group-list'
+
+    @property
+    def mailboxes(self):
+        if not self or self[0].token_type != 'mailbox-list':
+            return []
+        return self[0].mailboxes
+
+    @property
+    def all_mailboxes(self):
+        if not self or self[0].token_type != 'mailbox-list':
+            return []
+        return self[0].all_mailboxes
+
+
+class Group(TokenList):
+
+    token_type = "group"
+
+    @property
+    def mailboxes(self):
+        if self[2].token_type != 'group-list':
+            return []
+        return self[2].mailboxes
+
+    @property
+    def all_mailboxes(self):
+        if self[2].token_type != 'group-list':
+            return []
+        return self[2].all_mailboxes
+
+    @property
+    def display_name(self):
+        return self[0].display_name
+
+
+class NameAddr(TokenList):
+
+    token_type = 'name-addr'
+
+    @property
+    def display_name(self):
+        if len(self) == 1:
+            return None
+        return self[0].display_name
+
+    @property
+    def local_part(self):
+        return self[-1].local_part
+
+    @property
+    def domain(self):
+        return self[-1].domain
+
+    @property
+    def route(self):
+        return self[-1].route
+
+    @property
+    def addr_spec(self):
+        return self[-1].addr_spec
+
+
+class AngleAddr(TokenList):
+
+    token_type = 'angle-addr'
+
+    @property
+    def local_part(self):
+        for x in self:
+            if x.token_type == 'addr-spec':
+                return x.local_part
+
+    @property
+    def domain(self):
+        for x in self:
+            if x.token_type == 'addr-spec':
+                return x.domain
+
+    @property
+    def route(self):
+        for x in self:
+            if x.token_type == 'obs-route':
+                return x.domains
+
+    @property
+    def addr_spec(self):
+        for x in self:
+            if x.token_type == 'addr-spec':
+                if x.local_part:
+                    return x.addr_spec
+                else:
+                    return quote_string(x.local_part) + x.addr_spec
+        else:
+            return '<>'
+
+
+class ObsRoute(TokenList):
+
+    token_type = 'obs-route'
+
+    @property
+    def domains(self):
+        return [x.domain for x in self if x.token_type == 'domain']
+
+
+class Mailbox(TokenList):
+
+    token_type = 'mailbox'
+
+    @property
+    def display_name(self):
+        if self[0].token_type == 'name-addr':
+            return self[0].display_name
+
+    @property
+    def local_part(self):
+        return self[0].local_part
+
+    @property
+    def domain(self):
+        return self[0].domain
+
+    @property
+    def route(self):
+        if self[0].token_type == 'name-addr':
+            return self[0].route
+
+    @property
+    def addr_spec(self):
+        return self[0].addr_spec
+
+
+class InvalidMailbox(TokenList):
+
+    token_type = 'invalid-mailbox'
+
+    @property
+    def display_name(self):
+        return None
+
+    local_part = domain = route = addr_spec = display_name
+
+
+class Domain(TokenList):
+
+    token_type = 'domain'
+    as_ew_allowed = False
+
+    @property
+    def domain(self):
+        return ''.join(super().value.split())
+
+
+class DotAtom(TokenList):
+
+    token_type = 'dot-atom'
+
+
+class DotAtomText(TokenList):
+
+    token_type = 'dot-atom-text'
+    as_ew_allowed = True
+
+
+class AddrSpec(TokenList):
+
+    token_type = 'addr-spec'
+    as_ew_allowed = False
+
+    @property
+    def local_part(self):
+        return self[0].local_part
+
+    @property
+    def domain(self):
+        if len(self) < 3:
+            return None
+        return self[-1].domain
+
+    @property
+    def value(self):
+        if len(self) < 3:
+            return self[0].value
+        return self[0].value.rstrip()+self[1].value+self[2].value.lstrip()
+
+    @property
+    def addr_spec(self):
+        nameset = set(self.local_part)
+        if len(nameset) > len(nameset-DOT_ATOM_ENDS):
+            lp = quote_string(self.local_part)
+        else:
+            lp = self.local_part
+        if self.domain is not None:
+            return lp + '@' + self.domain
+        return lp
+
+
+class ObsLocalPart(TokenList):
+
+    token_type = 'obs-local-part'
+    as_ew_allowed = False
+
+
+class DisplayName(Phrase):
+
+    token_type = 'display-name'
+    ew_combine_allowed = False
+
+    @property
+    def display_name(self):
+        res = TokenList(self)
+        if res[0].token_type == 'cfws':
+            res.pop(0)
+        else:
+            if res[0][0].token_type == 'cfws':
+                res[0] = TokenList(res[0][1:])
+        if res[-1].token_type == 'cfws':
+            res.pop()
+        else:
+            if res[-1][-1].token_type == 'cfws':
+                res[-1] = TokenList(res[-1][:-1])
+        return res.value
+
+    @property
+    def value(self):
+        quote = False
+        if self.defects:
+            quote = True
+        else:
+            for x in self:
+                if x.token_type == 'quoted-string':
+                    quote = True
+        if quote:
+            pre = post = ''
+            if self[0].token_type=='cfws' or self[0][0].token_type=='cfws':
+                pre = ' '
+            if self[-1].token_type=='cfws' or self[-1][-1].token_type=='cfws':
+                post = ' '
+            return pre+quote_string(self.display_name)+post
+        else:
+            return super().value
+
+
+class LocalPart(TokenList):
+
+    token_type = 'local-part'
+    as_ew_allowed = False
+
+    @property
+    def value(self):
+        if self[0].token_type == "quoted-string":
+            return self[0].quoted_value
+        else:
+            return self[0].value
+
+    @property
+    def local_part(self):
+        # Strip whitespace from front, back, and around dots.
+        res = [DOT]
+        last = DOT
+        last_is_tl = False
+        for tok in self[0] + [DOT]:
+            if tok.token_type == 'cfws':
+                continue
+            if (last_is_tl and tok.token_type == 'dot' and
+                    last[-1].token_type == 'cfws'):
+                res[-1] = TokenList(last[:-1])
+            is_tl = isinstance(tok, TokenList)
+            if (is_tl and last.token_type == 'dot' and
+                    tok[0].token_type == 'cfws'):
+                res.append(TokenList(tok[1:]))
+            else:
+                res.append(tok)
+            last = res[-1]
+            last_is_tl = is_tl
+        res = TokenList(res[1:-1])
+        return res.value
+
+
+class DomainLiteral(TokenList):
+
+    token_type = 'domain-literal'
+    as_ew_allowed = False
+
+    @property
+    def domain(self):
+        return ''.join(super().value.split())
+
+    @property
+    def ip(self):
+        for x in self:
+            if x.token_type == 'ptext':
+                return x.value
+
+
+class MIMEVersion(TokenList):
+
+    token_type = 'mime-version'
+    major = None
+    minor = None
+
+
+class Parameter(TokenList):
+
+    token_type = 'parameter'
+    sectioned = False
+    extended = False
+    charset = 'us-ascii'
+
+    @property
+    def section_number(self):
+        # Because the first token, the attribute (name) eats CFWS, the second
+        # token is always the section if there is one.
+        return self[1].number if self.sectioned else 0
+
+    @property
+    def param_value(self):
+        # This is part of the "handle quoted extended parameters" hack.
+        for token in self:
+            if token.token_type == 'value':
+                return token.stripped_value
+            if token.token_type == 'quoted-string':
+                for token in token:
+                    if token.token_type == 'bare-quoted-string':
+                        for token in token:
+                            if token.token_type == 'value':
+                                return token.stripped_value
+        return ''
+
+
+class InvalidParameter(Parameter):
+
+    token_type = 'invalid-parameter'
+
+
+class Attribute(TokenList):
+
+    token_type = 'attribute'
+
+    @property
+    def stripped_value(self):
+        for token in self:
+            if token.token_type.endswith('attrtext'):
+                return token.value
+
+class Section(TokenList):
+
+    token_type = 'section'
+    number = None
+
+
+class Value(TokenList):
+
+    token_type = 'value'
+
+    @property
+    def stripped_value(self):
+        token = self[0]
+        if token.token_type == 'cfws':
+            token = self[1]
+        if token.token_type.endswith(
+                ('quoted-string', 'attribute', 'extended-attribute')):
+            return token.stripped_value
+        return self.value
+
+
+class MimeParameters(TokenList):
+
+    token_type = 'mime-parameters'
+    syntactic_break = False
+
+    @property
+    def params(self):
+        # The RFC specifically states that the ordering of parameters is not
+        # guaranteed and may be reordered by the transport layer.  So we have
+        # to assume the RFC 2231 pieces can come in any order.  However, we
+        # output them in the order that we first see a given name, which gives
+        # us a stable __str__.
+        params = OrderedDict()
+        for token in self:
+            if not token.token_type.endswith('parameter'):
+                continue
+            if token[0].token_type != 'attribute':
+                continue
+            name = token[0].value.strip()
+            if name not in params:
+                params[name] = []
+            params[name].append((token.section_number, token))
+        for name, parts in params.items():
+            parts = sorted(parts, key=itemgetter(0))
+            first_param = parts[0][1]
+            charset = first_param.charset
+            # Our arbitrary error recovery is to ignore duplicate parameters,
+            # to use appearance order if there are duplicate rfc 2231 parts,
+            # and to ignore gaps.  This mimics the error recovery of get_param.
+            if not first_param.extended and len(parts) > 1:
+                if parts[1][0] == 0:
+                    parts[1][1].defects.append(errors.InvalidHeaderDefect(
+                        'duplicate parameter name; duplicate(s) ignored'))
+                    parts = parts[:1]
+                # Else assume the *0* was missing...note that this is different
+                # from get_param, but we registered a defect for this earlier.
+            value_parts = []
+            i = 0
+            for section_number, param in parts:
+                if section_number != i:
+                    # We could get fancier here and look for a complete
+                    # duplicate extended parameter and ignore the second one
+                    # seen.  But we're not doing that.  The old code didn't.
+                    if not param.extended:
+                        param.defects.append(errors.InvalidHeaderDefect(
+                            'duplicate parameter name; duplicate ignored'))
+                        continue
+                    else:
+                        param.defects.append(errors.InvalidHeaderDefect(
+                            "inconsistent RFC2231 parameter numbering"))
+                i += 1
+                value = param.param_value
+                if param.extended:
+                    try:
+                        value = parse.unquote_to_bytes(value)
+                    except UnicodeEncodeError:
+                        # source had surrogate escaped bytes.  What we do now
+                        # is a bit of an open question.  I'm not sure this is
+                        # the best choice, but it is what the old algorithm did
+                        value = parse.unquote(value, encoding='latin-1')
+                    else:
+                        try:
+                            value = value.decode(charset, 'surrogateescape')
+                        except LookupError:
+                            # XXX: there should really be a custom defect for
+                            # unknown character set to make it easy to find,
+                            # because otherwise unknown charset is a silent
+                            # failure.
+                            value = value.decode('us-ascii', 'surrogateescape')
+                        if utils._has_surrogates(value):
+                            param.defects.append(errors.UndecodableBytesDefect())
+                value_parts.append(value)
+            value = ''.join(value_parts)
+            yield name, value
+
+    def __str__(self):
+        params = []
+        for name, value in self.params:
+            if value:
+                params.append('{}={}'.format(name, quote_string(value)))
+            else:
+                params.append(name)
+        params = '; '.join(params)
+        return ' ' + params if params else ''
+
+
+class ParameterizedHeaderValue(TokenList):
+
+    # Set this false so that the value doesn't wind up on a new line even
+    # if it and the parameters would fit there but not on the first line.
+    syntactic_break = False
+
+    @property
+    def params(self):
+        for token in reversed(self):
+            if token.token_type == 'mime-parameters':
+                return token.params
+        return {}
+
+
+class ContentType(ParameterizedHeaderValue):
+
+    token_type = 'content-type'
+    as_ew_allowed = False
+    maintype = 'text'
+    subtype = 'plain'
+
+
+class ContentDisposition(ParameterizedHeaderValue):
+
+    token_type = 'content-disposition'
+    as_ew_allowed = False
+    content_disposition = None
+
+
+class ContentTransferEncoding(TokenList):
+
+    token_type = 'content-transfer-encoding'
+    as_ew_allowed = False
+    cte = '7bit'
+
+
+class HeaderLabel(TokenList):
+
+    token_type = 'header-label'
+    as_ew_allowed = False
+
+
+class Header(TokenList):
+
+    token_type = 'header'
+
+
+#
+# Terminal classes and instances
+#
+
+class Terminal(str):
+
+    as_ew_allowed = True
+    ew_combine_allowed = True
+    syntactic_break = True
+
+    def __new__(cls, value, token_type):
+        self = str.__newobj__(cls, value)
+        self.token_type = token_type
+        self.defects = []
+        return self
+
+    def __repr__(self):
+        return "{}({})".format(self.__class__.__name__, super().__repr__())
+
+    def pprint(self):
+        print(self.__class__.__name__ + '/' + self.token_type)
+
+    @property
+    def all_defects(self):
+        return list(self.defects)
+
+    def _pp(self, indent=''):
+        return ["{}{}/{}({}){}".format(
+            indent,
+            self.__class__.__name__,
+            self.token_type,
+            super().__repr__(),
+            '' if not self.defects else ' {}'.format(self.defects),
+            )]
+
+    def pop_trailing_ws(self):
+        # This terminates the recursion.
+        return None
+
+    @property
+    def comments(self):
+        return []
+
+    def __getnewargs__(self):
+        return(str(self), self.token_type)
+
+
+class WhiteSpaceTerminal(Terminal):
+
+    @property
+    def value(self):
+        return ' '
+
+    def startswith_fws(self):
+        return True
+
+
+class ValueTerminal(Terminal):
+
+    @property
+    def value(self):
+        return self
+
+    def startswith_fws(self):
+        return False
+
+
+class EWWhiteSpaceTerminal(WhiteSpaceTerminal):
+
+    @property
+    def value(self):
+        return ''
+
+    def __str__(self):
+        return ''
+
+
+# XXX these need to become classes and used as instances so
+# that a program can't change them in a parse tree and screw
+# up other parse trees.  Maybe should have  tests for that, too.
+DOT = ValueTerminal('.', 'dot')
+ListSeparator = ValueTerminal(',', 'list-separator')
+RouteComponentMarker = ValueTerminal('@', 'route-component-marker')
+
+#
+# Parser
+#
+
+# Parse strings according to RFC822/2047/2822/5322 rules.
+#
+# This is a stateless parser.  Each get_XXX function accepts a string and
+# returns either a Terminal or a TokenList representing the RFC object named
+# by the method and a string containing the remaining unparsed characters
+# from the input.  Thus a parser method consumes the next syntactic construct
+# of a given type and returns a token representing the construct plus the
+# unparsed remainder of the input string.
+#
+# For example, if the first element of a structured header is a 'phrase',
+# then:
+#
+#     phrase, value = get_phrase(value)
+#
+# returns the complete phrase from the start of the string value, plus any
+# characters left in the string after the phrase is removed.
+
+
+_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split
+_non_atom_end_matcher = re.compile(r"[^{}]+".format(
+    ''.join(ATOM_ENDS).replace('\\','\\\\').replace(']',r'\]'))).match
+_non_printable_finder = re.compile(r"[\x00-\x20\x7F]").findall
+_non_token_end_matcher = re.compile(r"[^{}]+".format(
+    ''.join(TOKEN_ENDS).replace('\\','\\\\').replace(']',r'\]'))).match
+_non_attribute_end_matcher = re.compile(r"[^{}]+".format(
+    ''.join(ATTRIBUTE_ENDS).replace('\\','\\\\').replace(']',r'\]'))).match
+_non_extended_attribute_end_matcher = re.compile(r"[^{}]+".format(
+    ''.join(EXTENDED_ATTRIBUTE_ENDS).replace(
+                                    '\\','\\\\').replace(']',r'\]'))).match
+
+def _validate_xtext(xtext):
+    """If input token contains ASCII non-printables, register a defect."""
+
+    non_printables = _non_printable_finder(xtext)
+    if non_printables:
+        xtext.defects.append(errors.NonPrintableDefect(non_printables))
+    if utils._has_surrogates(xtext):
+        xtext.defects.append(errors.UndecodableBytesDefect(
+            "Non-ASCII characters found in header token"))
+
+def _get_ptext_to_endchars(value, endchars):
+    """Scan printables/quoted-pairs until endchars and return unquoted ptext.
+
+    This function turns a run of qcontent, ccontent-without-comments, or
+    dtext-with-quoted-printables into a single string by unquoting any
+    quoted printables.  It returns the string, the remaining value, and
+    a flag that is True iff there were any quoted printables decoded.
+
+    """
+    fragment, *remainder = _wsp_splitter(value, 1)
+    vchars = []
+    escape = False
+    had_qp = False
+    for pos in range(len(fragment)):
+        if fragment[pos] == '\\':
+            if escape:
+                escape = False
+                had_qp = True
+            else:
+                escape = True
+                continue
+        if escape:
+            escape = False
+        elif fragment[pos] in endchars:
+            break
+        vchars.append(fragment[pos])
+    else:
+        pos = pos + 1
+    return ''.join(vchars), ''.join([fragment[pos:]] + remainder), had_qp
+
+def get_fws(value):
+    """FWS = 1*WSP
+
+    This isn't the RFC definition.  We're using fws to represent tokens where
+    folding can be done, but when we are parsing the *un*folding has already
+    been done so we don't need to watch out for CRLF.
+
+    """
+    newvalue = value.lstrip()
+    fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
+    return fws, newvalue
+
+def get_encoded_word(value):
+    """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
+
+    """
+    ew = EncodedWord()
+    if not value.startswith('=?'):
+        raise errors.HeaderParseError(
+            "expected encoded word but found {}".format(value))
+    tok, *remainder = value[2:].split('?=', 1)
+    if tok == value[2:]:
+        raise errors.HeaderParseError(
+            "expected encoded word but found {}".format(value))
+    remstr = ''.join(remainder)
+    if len(remstr) > 1 and remstr[0] in hexdigits and remstr[1] in hexdigits:
+        # The ? after the CTE was followed by an encoded word escape (=XX).
+        rest, *remainder = remstr.split('?=', 1)
+        tok = tok + '?=' + rest
+    if len(tok.split()) > 1:
+        ew.defects.append(errors.InvalidHeaderDefect(
+            "whitespace inside encoded word"))
+    ew.cte = value
+    value = ''.join(remainder)
+    try:
+        text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
+    except ValueError:
+        raise errors.HeaderParseError(
+            "encoded word format invalid: '{}'".format(ew.cte))
+    ew.charset = charset
+    ew.lang = lang
+    ew.defects.extend(defects)
+    while text:
+        if text[0] in WSP:
+            token, text = get_fws(text)
+            ew.append(token)
+            continue
+        chars, *remainder = _wsp_splitter(text, 1)
+        vtext = ValueTerminal(chars, 'vtext')
+        _validate_xtext(vtext)
+        ew.append(vtext)
+        text = ''.join(remainder)
+    return ew, value
+
+def get_unstructured(value):
+    """unstructured = (*([FWS] vchar) *WSP) / obs-unstruct
+       obs-unstruct = *((*LF *CR *(obs-utext) *LF *CR)) / FWS)
+       obs-utext = %d0 / obs-NO-WS-CTL / LF / CR
+
+       obs-NO-WS-CTL is control characters except WSP/CR/LF.
+
+    So, basically, we have printable runs, plus control characters or nulls in
+    the obsolete syntax, separated by whitespace.  Since RFC 2047 uses the
+    obsolete syntax in its specification, but requires whitespace on either
+    side of the encoded words, I can see no reason to need to separate the
+    non-printable-non-whitespace from the printable runs if they occur, so we
+    parse this into xtext tokens separated by WSP tokens.
+
+    Because an 'unstructured' value must by definition constitute the entire
+    value, this 'get' routine does not return a remaining value, only the
+    parsed TokenList.
+
+    """
+    # XXX: but what about bare CR and LF?  They might signal the start or
+    # end of an encoded word.  YAGNI for now, since our current parsers
+    # will never send us strings with bare CR or LF.
+
+    unstructured = UnstructuredTokenList()
+    while value:
+        if value[0] in WSP:
+            token, value = get_fws(value)
+            unstructured.append(token)
+            continue
+        if value.startswith('=?'):
+            try:
+                token, value = get_encoded_word(value)
+            except errors.HeaderParseError:
+                # XXX: Need to figure out how to register defects when
+                # appropriate here.
+                pass
+            else:
+                have_ws = True
+                if len(unstructured) > 0:
+                    if unstructured[-1].token_type != 'fws':
+                        unstructured.defects.append(errors.InvalidHeaderDefect(
+                            "missing whitespace before encoded word"))
+                        have_ws = False
+                if have_ws and len(unstructured) > 1:
+                    if unstructured[-2].token_type == 'encoded-word':
+                        unstructured[-1] = EWWhiteSpaceTerminal(
+                            unstructured[-1], 'fws')
+                unstructured.append(token)
+                continue
+        tok, *remainder = _wsp_splitter(value, 1)
+        vtext = ValueTerminal(tok, 'vtext')
+        _validate_xtext(vtext)
+        unstructured.append(vtext)
+        value = ''.join(remainder)
+    return unstructured
+
+def get_qp_ctext(value):
+    r"""ctext = <printable ascii except \ ( )>
+
+    This is not the RFC ctext, since we are handling nested comments in comment
+    and unquoting quoted-pairs here.  We allow anything except the '()'
+    characters, but if we find any ASCII other than the RFC defined printable
+    ASCII, a NonPrintableDefect is added to the token's defects list.  Since
+    quoted pairs are converted to their unquoted values, what is returned is
+    a 'ptext' token.  In this case it is a WhiteSpaceTerminal, so it's value
+    is ' '.
+
+    """
+    ptext, value, _ = _get_ptext_to_endchars(value, '()')
+    ptext = WhiteSpaceTerminal(ptext, 'ptext')
+    _validate_xtext(ptext)
+    return ptext, value
+
+def get_qcontent(value):
+    """qcontent = qtext / quoted-pair
+
+    We allow anything except the DQUOTE character, but if we find any ASCII
+    other than the RFC defined printable ASCII, a NonPrintableDefect is
+    added to the token's defects list.  Any quoted pairs are converted to their
+    unquoted values, so what is returned is a 'ptext' token.  In this case it
+    is a ValueTerminal.
+
+    """
+    ptext, value, _ = _get_ptext_to_endchars(value, '"')
+    ptext = ValueTerminal(ptext, 'ptext')
+    _validate_xtext(ptext)
+    return ptext, value
+
+def get_atext(value):
+    """atext = <matches _atext_matcher>
+
+    We allow any non-ATOM_ENDS in atext, but add an InvalidATextDefect to
+    the token's defects list if we find non-atext characters.
+    """
+    m = _non_atom_end_matcher(value)
+    if not m:
+        raise errors.HeaderParseError(
+            "expected atext but found '{}'".format(value))
+    atext = m.group()
+    value = value[len(atext):]
+    atext = ValueTerminal(atext, 'atext')
+    _validate_xtext(atext)
+    return atext, value
+
+def get_bare_quoted_string(value):
+    """bare-quoted-string = DQUOTE *([FWS] qcontent) [FWS] DQUOTE
+
+    A quoted-string without the leading or trailing white space.  Its
+    value is the text between the quote marks, with whitespace
+    preserved and quoted pairs decoded.
+    """
+    if value[0] != '"':
+        raise errors.HeaderParseError(
+            "expected '\"' but found '{}'".format(value))
+    bare_quoted_string = BareQuotedString()
+    value = value[1:]
+    if value[0] == '"':
+        token, value = get_qcontent(value)
+        bare_quoted_string.append(token)
+    while value and value[0] != '"':
+        if value[0] in WSP:
+            token, value = get_fws(value)
+        elif value[:2] == '=?':
+            try:
+                token, value = get_encoded_word(value)
+                bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
+                    "encoded word inside quoted string"))
+            except errors.HeaderParseError:
+                token, value = get_qcontent(value)
+        else:
+            token, value = get_qcontent(value)
+        bare_quoted_string.append(token)
+    if not value:
+        bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
+            "end of header inside quoted string"))
+        return bare_quoted_string, value
+    return bare_quoted_string, value[1:]
+
+def get_comment(value):
+    """comment = "(" *([FWS] ccontent) [FWS] ")"
+       ccontent = ctext / quoted-pair / comment
+
+    We handle nested comments here, and quoted-pair in our qp-ctext routine.
+    """
+    if value and value[0] != '(':
+        raise errors.HeaderParseError(
+            "expected '(' but found '{}'".format(value))
+    comment = Comment()
+    value = value[1:]
+    while value and value[0] != ")":
+        if value[0] in WSP:
+            token, value = get_fws(value)
+        elif value[0] == '(':
+            token, value = get_comment(value)
+        else:
+            token, value = get_qp_ctext(value)
+        comment.append(token)
+    if not value:
+        comment.defects.append(errors.InvalidHeaderDefect(
+            "end of header inside comment"))
+        return comment, value
+    return comment, value[1:]
+
+def get_cfws(value):
+    """CFWS = (1*([FWS] comment) [FWS]) / FWS
+
+    """
+    cfws = CFWSList()
+    while value and value[0] in CFWS_LEADER:
+        if value[0] in WSP:
+            token, value = get_fws(value)
+        else:
+            token, value = get_comment(value)
+        cfws.append(token)
+    return cfws, value
+
+def get_quoted_string(value):
+    """quoted-string = [CFWS] <bare-quoted-string> [CFWS]
+
+    'bare-quoted-string' is an intermediate class defined by this
+    parser and not by the RFC grammar.  It is the quoted string
+    without any attached CFWS.
+    """
+    quoted_string = QuotedString()
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        quoted_string.append(token)
+    token, value = get_bare_quoted_string(value)
+    quoted_string.append(token)
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        quoted_string.append(token)
+    return quoted_string, value
+
+def get_atom(value):
+    """atom = [CFWS] 1*atext [CFWS]
+
+    An atom could be an rfc2047 encoded word.
+    """
+    atom = Atom()
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        atom.append(token)
+    if value and value[0] in ATOM_ENDS:
+        raise errors.HeaderParseError(
+            "expected atom but found '{}'".format(value))
+    if value.startswith('=?'):
+        try:
+            token, value = get_encoded_word(value)
+        except errors.HeaderParseError:
+            # XXX: need to figure out how to register defects when
+            # appropriate here.
+            token, value = get_atext(value)
+    else:
+        token, value = get_atext(value)
+    atom.append(token)
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        atom.append(token)
+    return atom, value
+
+def get_dot_atom_text(value):
+    """ dot-text = 1*atext *("." 1*atext)
+
+    """
+    dot_atom_text = DotAtomText()
+    if not value or value[0] in ATOM_ENDS:
+        raise errors.HeaderParseError("expected atom at a start of "
+            "dot-atom-text but found '{}'".format(value))
+    while value and value[0] not in ATOM_ENDS:
+        token, value = get_atext(value)
+        dot_atom_text.append(token)
+        if value and value[0] == '.':
+            dot_atom_text.append(DOT)
+            value = value[1:]
+    if dot_atom_text[-1] is DOT:
+        raise errors.HeaderParseError("expected atom at end of dot-atom-text "
+            "but found '{}'".format('.'+value))
+    return dot_atom_text, value
+
+def get_dot_atom(value):
+    """ dot-atom = [CFWS] dot-atom-text [CFWS]
+
+    Any place we can have a dot atom, we could instead have an rfc2047 encoded
+    word.
+    """
+    dot_atom = DotAtom()
+    if value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        dot_atom.append(token)
+    if value.startswith('=?'):
+        try:
+            token, value = get_encoded_word(value)
+        except errors.HeaderParseError:
+            # XXX: need to figure out how to register defects when
+            # appropriate here.
+            token, value = get_dot_atom_text(value)
+    else:
+        token, value = get_dot_atom_text(value)
+    dot_atom.append(token)
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        dot_atom.append(token)
+    return dot_atom, value
+
+def get_word(value):
+    """word = atom / quoted-string
+
+    Either atom or quoted-string may start with CFWS.  We have to peel off this
+    CFWS first to determine which type of word to parse.  Afterward we splice
+    the leading CFWS, if any, into the parsed sub-token.
+
+    If neither an atom or a quoted-string is found before the next special, a
+    HeaderParseError is raised.
+
+    The token returned is either an Atom or a QuotedString, as appropriate.
+    This means the 'word' level of the formal grammar is not represented in the
+    parse tree; this is because having that extra layer when manipulating the
+    parse tree is more confusing than it is helpful.
+
+    """
+    if value[0] in CFWS_LEADER:
+        leader, value = get_cfws(value)
+    else:
+        leader = None
+    if value[0]=='"':
+        token, value = get_quoted_string(value)
+    elif value[0] in SPECIALS:
+        raise errors.HeaderParseError("Expected 'atom' or 'quoted-string' "
+                                      "but found '{}'".format(value))
+    else:
+        token, value = get_atom(value)
+    if leader is not None:
+        token[:0] = [leader]
+    return token, value
+
+def get_phrase(value):
+    """ phrase = 1*word / obs-phrase
+        obs-phrase = word *(word / "." / CFWS)
+
+    This means a phrase can be a sequence of words, periods, and CFWS in any
+    order as long as it starts with at least one word.  If anything other than
+    words is detected, an ObsoleteHeaderDefect is added to the token's defect
+    list.  We also accept a phrase that starts with CFWS followed by a dot;
+    this is registered as an InvalidHeaderDefect, since it is not supported by
+    even the obsolete grammar.
+
+    """
+    phrase = Phrase()
+    try:
+        token, value = get_word(value)
+        phrase.append(token)
+    except errors.HeaderParseError:
+        phrase.defects.append(errors.InvalidHeaderDefect(
+            "phrase does not start with word"))
+    while value and value[0] not in PHRASE_ENDS:
+        if value[0]=='.':
+            phrase.append(DOT)
+            phrase.defects.append(errors.ObsoleteHeaderDefect(
+                "period in 'phrase'"))
+            value = value[1:]
+        else:
+            try:
+                token, value = get_word(value)
+            except errors.HeaderParseError:
+                if value[0] in CFWS_LEADER:
+                    token, value = get_cfws(value)
+                    phrase.defects.append(errors.ObsoleteHeaderDefect(
+                        "comment found without atom"))
+                else:
+                    raise
+            phrase.append(token)
+    return phrase, value
+
+def get_local_part(value):
+    """ local-part = dot-atom / quoted-string / obs-local-part
+
+    """
+    local_part = LocalPart()
+    leader = None
+    if value[0] in CFWS_LEADER:
+        leader, value = get_cfws(value)
+    if not value:
+        raise errors.HeaderParseError(
+            "expected local-part but found '{}'".format(value))
+    try:
+        token, value = get_dot_atom(value)
+    except errors.HeaderParseError:
+        try:
+            token, value = get_word(value)
+        except errors.HeaderParseError:
+            if value[0] != '\\' and value[0] in PHRASE_ENDS:
+                raise
+            token = TokenList()
+    if leader is not None:
+        token[:0] = [leader]
+    local_part.append(token)
+    if value and (value[0]=='\\' or value[0] not in PHRASE_ENDS):
+        obs_local_part, value = get_obs_local_part(str(local_part) + value)
+        if obs_local_part.token_type == 'invalid-obs-local-part':
+            local_part.defects.append(errors.InvalidHeaderDefect(
+                "local-part is not dot-atom, quoted-string, or obs-local-part"))
+        else:
+            local_part.defects.append(errors.ObsoleteHeaderDefect(
+                "local-part is not a dot-atom (contains CFWS)"))
+        local_part[0] = obs_local_part
+    try:
+        local_part.value.encode('ascii')
+    except UnicodeEncodeError:
+        local_part.defects.append(errors.NonASCIILocalPartDefect(
+                "local-part contains non-ASCII characters)"))
+    return local_part, value
+
+def get_obs_local_part(value):
+    """ obs-local-part = word *("." word)
+    """
+    obs_local_part = ObsLocalPart()
+    last_non_ws_was_dot = False
+    while value and (value[0]=='\\' or value[0] not in PHRASE_ENDS):
+        if value[0] == '.':
+            if last_non_ws_was_dot:
+                obs_local_part.defects.append(errors.InvalidHeaderDefect(
+                    "invalid repeated '.'"))
+            obs_local_part.append(DOT)
+            last_non_ws_was_dot = True
+            value = value[1:]
+            continue
+        elif value[0]=='\\':
+            obs_local_part.append(ValueTerminal(value[0],
+                                                'misplaced-special'))
+            value = value[1:]
+            obs_local_part.defects.append(errors.InvalidHeaderDefect(
+                "'\\' character outside of quoted-string/ccontent"))
+            last_non_ws_was_dot = False
+            continue
+        if obs_local_part and obs_local_part[-1].token_type != 'dot':
+            obs_local_part.defects.append(errors.InvalidHeaderDefect(
+                "missing '.' between words"))
+        try:
+            token, value = get_word(value)
+            last_non_ws_was_dot = False
+        except errors.HeaderParseError:
+            if value[0] not in CFWS_LEADER:
+                raise
+            token, value = get_cfws(value)
+        obs_local_part.append(token)
+    if (obs_local_part[0].token_type == 'dot' or
+            obs_local_part[0].token_type=='cfws' and
+            obs_local_part[1].token_type=='dot'):
+        obs_local_part.defects.append(errors.InvalidHeaderDefect(
+            "Invalid leading '.' in local part"))
+    if (obs_local_part[-1].token_type == 'dot' or
+            obs_local_part[-1].token_type=='cfws' and
+            obs_local_part[-2].token_type=='dot'):
+        obs_local_part.defects.append(errors.InvalidHeaderDefect(
+            "Invalid trailing '.' in local part"))
+    if obs_local_part.defects:
+        obs_local_part.token_type = 'invalid-obs-local-part'
+    return obs_local_part, value
+
+def get_dtext(value):
+    r""" dtext = <printable ascii except \ [ ]> / obs-dtext
+        obs-dtext = obs-NO-WS-CTL / quoted-pair
+
+    We allow anything except the excluded characters, but if we find any
+    ASCII other than the RFC defined printable ASCII, a NonPrintableDefect is
+    added to the token's defects list.  Quoted pairs are converted to their
+    unquoted values, so what is returned is a ptext token, in this case a
+    ValueTerminal.  If there were quoted-printables, an ObsoleteHeaderDefect is
+    added to the returned token's defect list.
+
+    """
+    ptext, value, had_qp = _get_ptext_to_endchars(value, '[]')
+    ptext = ValueTerminal(ptext, 'ptext')
+    if had_qp:
+        ptext.defects.append(errors.ObsoleteHeaderDefect(
+            "quoted printable found in domain-literal"))
+    _validate_xtext(ptext)
+    return ptext, value
+
+def _check_for_early_dl_end(value, domain_literal):
+    if value:
+        return False
+    domain_literal.append(errors.InvalidHeaderDefect(
+        "end of input inside domain-literal"))
+    domain_literal.append(ValueTerminal(']', 'domain-literal-end'))
+    return True
+
+def get_domain_literal(value):
+    """ domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
+
+    """
+    domain_literal = DomainLiteral()
+    if value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        domain_literal.append(token)
+    if not value:
+        raise errors.HeaderParseError("expected domain-literal")
+    if value[0] != '[':
+        raise errors.HeaderParseError("expected '[' at start of domain-literal "
+                "but found '{}'".format(value))
+    value = value[1:]
+    if _check_for_early_dl_end(value, domain_literal):
+        return domain_literal, value
+    domain_literal.append(ValueTerminal('[', 'domain-literal-start'))
+    if value[0] in WSP:
+        token, value = get_fws(value)
+        domain_literal.append(token)
+    token, value = get_dtext(value)
+    domain_literal.append(token)
+    if _check_for_early_dl_end(value, domain_literal):
+        return domain_literal, value
+    if value[0] in WSP:
+        token, value = get_fws(value)
+        domain_literal.append(token)
+    if _check_for_early_dl_end(value, domain_literal):
+        return domain_literal, value
+    if value[0] != ']':
+        raise errors.HeaderParseError("expected ']' at end of domain-literal "
+                "but found '{}'".format(value))
+    domain_literal.append(ValueTerminal(']', 'domain-literal-end'))
+    value = value[1:]
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        domain_literal.append(token)
+    return domain_literal, value
+
+def get_domain(value):
+    """ domain = dot-atom / domain-literal / obs-domain
+        obs-domain = atom *("." atom))
+
+    """
+    domain = Domain()
+    leader = None
+    if value[0] in CFWS_LEADER:
+        leader, value = get_cfws(value)
+    if not value:
+        raise errors.HeaderParseError(
+            "expected domain but found '{}'".format(value))
+    if value[0] == '[':
+        token, value = get_domain_literal(value)
+        if leader is not None:
+            token[:0] = [leader]
+        domain.append(token)
+        return domain, value
+    try:
+        token, value = get_dot_atom(value)
+    except errors.HeaderParseError:
+        token, value = get_atom(value)
+    if leader is not None:
+        token[:0] = [leader]
+    domain.append(token)
+    if value and value[0] == '.':
+        domain.defects.append(errors.ObsoleteHeaderDefect(
+            "domain is not a dot-atom (contains CFWS)"))
+        if domain[0].token_type == 'dot-atom':
+            domain[:] = domain[0]
+        while value and value[0] == '.':
+            domain.append(DOT)
+            token, value = get_atom(value[1:])
+            domain.append(token)
+    return domain, value
+
+def get_addr_spec(value):
+    """ addr-spec = local-part "@" domain
+
+    """
+    addr_spec = AddrSpec()
+    token, value = get_local_part(value)
+    addr_spec.append(token)
+    if not value or value[0] != '@':
+        addr_spec.defects.append(errors.InvalidHeaderDefect(
+            "add-spec local part with no domain"))
+        return addr_spec, value
+    addr_spec.append(ValueTerminal('@', 'address-at-symbol'))
+    token, value = get_domain(value[1:])
+    addr_spec.append(token)
+    return addr_spec, value
+
+def get_obs_route(value):
+    """ obs-route = obs-domain-list ":"
+        obs-domain-list = *(CFWS / ",") "@" domain *("," [CFWS] ["@" domain])
+
+        Returns an obs-route token with the appropriate sub-tokens (that is,
+        there is no obs-domain-list in the parse tree).
+    """
+    obs_route = ObsRoute()
+    while value and (value[0]==',' or value[0] in CFWS_LEADER):
+        if value[0] in CFWS_LEADER:
+            token, value = get_cfws(value)
+            obs_route.append(token)
+        elif value[0] == ',':
+            obs_route.append(ListSeparator)
+            value = value[1:]
+    if not value or value[0] != '@':
+        raise errors.HeaderParseError(
+            "expected obs-route domain but found '{}'".format(value))
+    obs_route.append(RouteComponentMarker)
+    token, value = get_domain(value[1:])
+    obs_route.append(token)
+    while value and value[0]==',':
+        obs_route.append(ListSeparator)
+        value = value[1:]
+        if not value:
+            break
+        if value[0] in CFWS_LEADER:
+            token, value = get_cfws(value)
+            obs_route.append(token)
+        if value[0] == '@':
+            obs_route.append(RouteComponentMarker)
+            token, value = get_domain(value[1:])
+            obs_route.append(token)
+    if not value:
+        raise errors.HeaderParseError("end of header while parsing obs-route")
+    if value[0] != ':':
+        raise errors.HeaderParseError( "expected ':' marking end of "
+            "obs-route but found '{}'".format(value))
+    obs_route.append(ValueTerminal(':', 'end-of-obs-route-marker'))
+    return obs_route, value[1:]
+
+def get_angle_addr(value):
+    """ angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr
+        obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS]
+
+    """
+    angle_addr = AngleAddr()
+    if value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        angle_addr.append(token)
+    if not value or value[0] != '<':
+        raise errors.HeaderParseError(
+            "expected angle-addr but found '{}'".format(value))
+    angle_addr.append(ValueTerminal('<', 'angle-addr-start'))
+    value = value[1:]
+    # Although it is not legal per RFC5322, SMTP uses '<>' in certain
+    # circumstances.
+    if value[0] == '>':
+        angle_addr.append(ValueTerminal('>', 'angle-addr-end'))
+        angle_addr.defects.append(errors.InvalidHeaderDefect(
+            "null addr-spec in angle-addr"))
+        value = value[1:]
+        return angle_addr, value
+    try:
+        token, value = get_addr_spec(value)
+    except errors.HeaderParseError:
+        try:
+            token, value = get_obs_route(value)
+            angle_addr.defects.append(errors.ObsoleteHeaderDefect(
+                "obsolete route specification in angle-addr"))
+        except errors.HeaderParseError:
+            raise errors.HeaderParseError(
+                "expected addr-spec or obs-route but found '{}'".format(value))
+        angle_addr.append(token)
+        token, value = get_addr_spec(value)
+    angle_addr.append(token)
+    if value and value[0] == '>':
+        value = value[1:]
+    else:
+        angle_addr.defects.append(errors.InvalidHeaderDefect(
+            "missing trailing '>' on angle-addr"))
+    angle_addr.append(ValueTerminal('>', 'angle-addr-end'))
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        angle_addr.append(token)
+    return angle_addr, value
+
+def get_display_name(value):
+    """ display-name = phrase
+
+    Because this is simply a name-rule, we don't return a display-name
+    token containing a phrase, but rather a display-name token with
+    the content of the phrase.
+
+    """
+    display_name = DisplayName()
+    token, value = get_phrase(value)
+    display_name.extend(token[:])
+    display_name.defects = token.defects[:]
+    return display_name, value
+
+
+def get_name_addr(value):
+    """ name-addr = [display-name] angle-addr
+
+    """
+    name_addr = NameAddr()
+    # Both the optional display name and the angle-addr can start with cfws.
+    leader = None
+    if value[0] in CFWS_LEADER:
+        leader, value = get_cfws(value)
+        if not value:
+            raise errors.HeaderParseError(
+                "expected name-addr but found '{}'".format(leader))
+    if value[0] != '<':
+        if value[0] in PHRASE_ENDS:
+            raise errors.HeaderParseError(
+                "expected name-addr but found '{}'".format(value))
+        token, value = get_display_name(value)
+        if not value:
+            raise errors.HeaderParseError(
+                "expected name-addr but found '{}'".format(token))
+        if leader is not None:
+            token[0][:0] = [leader]
+            leader = None
+        name_addr.append(token)
+    token, value = get_angle_addr(value)
+    if leader is not None:
+        token[:0] = [leader]
+    name_addr.append(token)
+    return name_addr, value
+
+def get_mailbox(value):
+    """ mailbox = name-addr / addr-spec
+
+    """
+    # The only way to figure out if we are dealing with a name-addr or an
+    # addr-spec is to try parsing each one.
+    mailbox = Mailbox()
+    try:
+        token, value = get_name_addr(value)
+    except errors.HeaderParseError:
+        try:
+            token, value = get_addr_spec(value)
+        except errors.HeaderParseError:
+            raise errors.HeaderParseError(
+                "expected mailbox but found '{}'".format(value))
+    if any(isinstance(x, errors.InvalidHeaderDefect)
+                       for x in token.all_defects):
+        mailbox.token_type = 'invalid-mailbox'
+    mailbox.append(token)
+    return mailbox, value
+
+def get_invalid_mailbox(value, endchars):
+    """ Read everything up to one of the chars in endchars.
+
+    This is outside the formal grammar.  The InvalidMailbox TokenList that is
+    returned acts like a Mailbox, but the data attributes are None.
+
+    """
+    invalid_mailbox = InvalidMailbox()
+    while value and value[0] not in endchars:
+        if value[0] in PHRASE_ENDS:
+            invalid_mailbox.append(ValueTerminal(value[0],
+                                                 'misplaced-special'))
+            value = value[1:]
+        else:
+            token, value = get_phrase(value)
+            invalid_mailbox.append(token)
+    return invalid_mailbox, value
+
+def get_mailbox_list(value):
+    """ mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
+        obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS])
+
+    For this routine we go outside the formal grammar in order to improve error
+    handling.  We recognize the end of the mailbox list only at the end of the
+    value or at a ';' (the group terminator).  This is so that we can turn
+    invalid mailboxes into InvalidMailbox tokens and continue parsing any
+    remaining valid mailboxes.  We also allow all mailbox entries to be null,
+    and this condition is handled appropriately at a higher level.
+
+    """
+    mailbox_list = MailboxList()
+    while value and value[0] != ';':
+        try:
+            token, value = get_mailbox(value)
+            mailbox_list.append(token)
+        except errors.HeaderParseError:
+            leader = None
+            if value[0] in CFWS_LEADER:
+                leader, value = get_cfws(value)
+                if not value or value[0] in ',;':
+                    mailbox_list.append(leader)
+                    mailbox_list.defects.append(errors.ObsoleteHeaderDefect(
+                        "empty element in mailbox-list"))
+                else:
+                    token, value = get_invalid_mailbox(value, ',;')
+                    if leader is not None:
+                        token[:0] = [leader]
+                    mailbox_list.append(token)
+                    mailbox_list.defects.append(errors.InvalidHeaderDefect(
+                        "invalid mailbox in mailbox-list"))
+            elif value[0] == ',':
+                mailbox_list.defects.append(errors.ObsoleteHeaderDefect(
+                    "empty element in mailbox-list"))
+            else:
+                token, value = get_invalid_mailbox(value, ',;')
+                if leader is not None:
+                    token[:0] = [leader]
+                mailbox_list.append(token)
+                mailbox_list.defects.append(errors.InvalidHeaderDefect(
+                    "invalid mailbox in mailbox-list"))
+        if value and value[0] not in ',;':
+            # Crap after mailbox; treat it as an invalid mailbox.
+            # The mailbox info will still be available.
+            mailbox = mailbox_list[-1]
+            mailbox.token_type = 'invalid-mailbox'
+            token, value = get_invalid_mailbox(value, ',;')
+            mailbox.extend(token)
+            mailbox_list.defects.append(errors.InvalidHeaderDefect(
+                "invalid mailbox in mailbox-list"))
+        if value and value[0] == ',':
+            mailbox_list.append(ListSeparator)
+            value = value[1:]
+    return mailbox_list, value
+
+
+def get_group_list(value):
+    """ group-list = mailbox-list / CFWS / obs-group-list
+        obs-group-list = 1*([CFWS] ",") [CFWS]
+
+    """
+    group_list = GroupList()
+    if not value:
+        group_list.defects.append(errors.InvalidHeaderDefect(
+            "end of header before group-list"))
+        return group_list, value
+    leader = None
+    if value and value[0] in CFWS_LEADER:
+        leader, value = get_cfws(value)
+        if not value:
+            # This should never happen in email parsing, since CFWS-only is a
+            # legal alternative to group-list in a group, which is the only
+            # place group-list appears.
+            group_list.defects.append(errors.InvalidHeaderDefect(
+                "end of header in group-list"))
+            group_list.append(leader)
+            return group_list, value
+        if value[0] == ';':
+            group_list.append(leader)
+            return group_list, value
+    token, value = get_mailbox_list(value)
+    if len(token.all_mailboxes)==0:
+        if leader is not None:
+            group_list.append(leader)
+        group_list.extend(token)
+        group_list.defects.append(errors.ObsoleteHeaderDefect(
+            "group-list with empty entries"))
+        return group_list, value
+    if leader is not None:
+        token[:0] = [leader]
+    group_list.append(token)
+    return group_list, value
+
+def get_group(value):
+    """ group = display-name ":" [group-list] ";" [CFWS]
+
+    """
+    group = Group()
+    token, value = get_display_name(value)
+    if not value or value[0] != ':':
+        raise errors.HeaderParseError("expected ':' at end of group "
+            "display name but found '{}'".format(value))
+    group.append(token)
+    group.append(ValueTerminal(':', 'group-display-name-terminator'))
+    value = value[1:]
+    if value and value[0] == ';':
+        group.append(ValueTerminal(';', 'group-terminator'))
+        return group, value[1:]
+    token, value = get_group_list(value)
+    group.append(token)
+    if not value:
+        group.defects.append(errors.InvalidHeaderDefect(
+            "end of header in group"))
+    if value[0] != ';':
+        raise errors.HeaderParseError(
+            "expected ';' at end of group but found {}".format(value))
+    group.append(ValueTerminal(';', 'group-terminator'))
+    value = value[1:]
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        group.append(token)
+    return group, value
+
+def get_address(value):
+    """ address = mailbox / group
+
+    Note that counter-intuitively, an address can be either a single address or
+    a list of addresses (a group).  This is why the returned Address object has
+    a 'mailboxes' attribute which treats a single address as a list of length
+    one.  When you need to differentiate between to two cases, extract the single
+    element, which is either a mailbox or a group token.
+
+    """
+    # The formal grammar isn't very helpful when parsing an address.  mailbox
+    # and group, especially when allowing for obsolete forms, start off very
+    # similarly.  It is only when you reach one of @, <, or : that you know
+    # what you've got.  So, we try each one in turn, starting with the more
+    # likely of the two.  We could perhaps make this more efficient by looking
+    # for a phrase and then branching based on the next character, but that
+    # would be a premature optimization.
+    address = Address()
+    try:
+        token, value = get_group(value)
+    except errors.HeaderParseError:
+        try:
+            token, value = get_mailbox(value)
+        except errors.HeaderParseError:
+            raise errors.HeaderParseError(
+                "expected address but found '{}'".format(value))
+    address.append(token)
+    return address, value
+
+def get_address_list(value):
+    """ address_list = (address *("," address)) / obs-addr-list
+        obs-addr-list = *([CFWS] ",") address *("," [address / CFWS])
+
+    We depart from the formal grammar here by continuing to parse until the end
+    of the input, assuming the input to be entirely composed of an
+    address-list.  This is always true in email parsing, and allows us
+    to skip invalid addresses to parse additional valid ones.
+
+    """
+    address_list = AddressList()
+    while value:
+        try:
+            token, value = get_address(value)
+            address_list.append(token)
+        except errors.HeaderParseError as err:
+            leader = None
+            if value[0] in CFWS_LEADER:
+                leader, value = get_cfws(value)
+                if not value or value[0] == ',':
+                    address_list.append(leader)
+                    address_list.defects.append(errors.ObsoleteHeaderDefect(
+                        "address-list entry with no content"))
+                else:
+                    token, value = get_invalid_mailbox(value, ',')
+                    if leader is not None:
+                        token[:0] = [leader]
+                    address_list.append(Address([token]))
+                    address_list.defects.append(errors.InvalidHeaderDefect(
+                        "invalid address in address-list"))
+            elif value[0] == ',':
+                address_list.defects.append(errors.ObsoleteHeaderDefect(
+                    "empty element in address-list"))
+            else:
+                token, value = get_invalid_mailbox(value, ',')
+                if leader is not None:
+                    token[:0] = [leader]
+                address_list.append(Address([token]))
+                address_list.defects.append(errors.InvalidHeaderDefect(
+                    "invalid address in address-list"))
+        if value and value[0] != ',':
+            # Crap after address; treat it as an invalid mailbox.
+            # The mailbox info will still be available.
+            mailbox = address_list[-1][0]
+            mailbox.token_type = 'invalid-mailbox'
+            token, value = get_invalid_mailbox(value, ',')
+            mailbox.extend(token)
+            address_list.defects.append(errors.InvalidHeaderDefect(
+                "invalid address in address-list"))
+        if value:  # Must be a , at this point.
+            address_list.append(ValueTerminal(',', 'list-separator'))
+            value = value[1:]
+    return address_list, value
+
+#
+# XXX: As I begin to add additional header parsers, I'm realizing we probably
+# have two level of parser routines: the get_XXX methods that get a token in
+# the grammar, and parse_XXX methods that parse an entire field value.  So
+# get_address_list above should really be a parse_ method, as probably should
+# be get_unstructured.
+#
+
+def parse_mime_version(value):
+    """ mime-version = [CFWS] 1*digit [CFWS] "." [CFWS] 1*digit [CFWS]
+
+    """
+    # The [CFWS] is implicit in the RFC 2045 BNF.
+    # XXX: This routine is a bit verbose, should factor out a get_int method.
+    mime_version = MIMEVersion()
+    if not value:
+        mime_version.defects.append(errors.HeaderMissingRequiredValue(
+            "Missing MIME version number (eg: 1.0)"))
+        return mime_version
+    if value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        mime_version.append(token)
+        if not value:
+            mime_version.defects.append(errors.HeaderMissingRequiredValue(
+                "Expected MIME version number but found only CFWS"))
+    digits = ''
+    while value and value[0] != '.' and value[0] not in CFWS_LEADER:
+        digits += value[0]
+        value = value[1:]
+    if not digits.isdigit():
+        mime_version.defects.append(errors.InvalidHeaderDefect(
+            "Expected MIME major version number but found {!r}".format(digits)))
+        mime_version.append(ValueTerminal(digits, 'xtext'))
+    else:
+        mime_version.major = int(digits)
+        mime_version.append(ValueTerminal(digits, 'digits'))
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        mime_version.append(token)
+    if not value or value[0] != '.':
+        if mime_version.major is not None:
+            mime_version.defects.append(errors.InvalidHeaderDefect(
+                "Incomplete MIME version; found only major number"))
+        if value:
+            mime_version.append(ValueTerminal(value, 'xtext'))
+        return mime_version
+    mime_version.append(ValueTerminal('.', 'version-separator'))
+    value = value[1:]
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        mime_version.append(token)
+    if not value:
+        if mime_version.major is not None:
+            mime_version.defects.append(errors.InvalidHeaderDefect(
+                "Incomplete MIME version; found only major number"))
+        return mime_version
+    digits = ''
+    while value and value[0] not in CFWS_LEADER:
+        digits += value[0]
+        value = value[1:]
+    if not digits.isdigit():
+        mime_version.defects.append(errors.InvalidHeaderDefect(
+            "Expected MIME minor version number but found {!r}".format(digits)))
+        mime_version.append(ValueTerminal(digits, 'xtext'))
+    else:
+        mime_version.minor = int(digits)
+        mime_version.append(ValueTerminal(digits, 'digits'))
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        mime_version.append(token)
+    if value:
+        mime_version.defects.append(errors.InvalidHeaderDefect(
+            "Excess non-CFWS text after MIME version"))
+        mime_version.append(ValueTerminal(value, 'xtext'))
+    return mime_version
+
+def get_invalid_parameter(value):
+    """ Read everything up to the next ';'.
+
+    This is outside the formal grammar.  The InvalidParameter TokenList that is
+    returned acts like a Parameter, but the data attributes are None.
+
+    """
+    invalid_parameter = InvalidParameter()
+    while value and value[0] != ';':
+        if value[0] in PHRASE_ENDS:
+            invalid_parameter.append(ValueTerminal(value[0],
+                                                   'misplaced-special'))
+            value = value[1:]
+        else:
+            token, value = get_phrase(value)
+            invalid_parameter.append(token)
+    return invalid_parameter, value
+
+def get_ttext(value):
+    """ttext = <matches _ttext_matcher>
+
+    We allow any non-TOKEN_ENDS in ttext, but add defects to the token's
+    defects list if we find non-ttext characters.  We also register defects for
+    *any* non-printables even though the RFC doesn't exclude all of them,
+    because we follow the spirit of RFC 5322.
+
+    """
+    m = _non_token_end_matcher(value)
+    if not m:
+        raise errors.HeaderParseError(
+            "expected ttext but found '{}'".format(value))
+    ttext = m.group()
+    value = value[len(ttext):]
+    ttext = ValueTerminal(ttext, 'ttext')
+    _validate_xtext(ttext)
+    return ttext, value
+
+def get_token(value):
+    """token = [CFWS] 1*ttext [CFWS]
+
+    The RFC equivalent of ttext is any US-ASCII chars except space, ctls, or
+    tspecials.  We also exclude tabs even though the RFC doesn't.
+
+    The RFC implies the CFWS but is not explicit about it in the BNF.
+
+    """
+    mtoken = Token()
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        mtoken.append(token)
+    if value and value[0] in TOKEN_ENDS:
+        raise errors.HeaderParseError(
+            "expected token but found '{}'".format(value))
+    token, value = get_ttext(value)
+    mtoken.append(token)
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        mtoken.append(token)
+    return mtoken, value
+
+def get_attrtext(value):
+    """attrtext = 1*(any non-ATTRIBUTE_ENDS character)
+
+    We allow any non-ATTRIBUTE_ENDS in attrtext, but add defects to the
+    token's defects list if we find non-attrtext characters.  We also register
+    defects for *any* non-printables even though the RFC doesn't exclude all of
+    them, because we follow the spirit of RFC 5322.
+
+    """
+    m = _non_attribute_end_matcher(value)
+    if not m:
+        raise errors.HeaderParseError(
+            "expected attrtext but found {!r}".format(value))
+    attrtext = m.group()
+    value = value[len(attrtext):]
+    attrtext = ValueTerminal(attrtext, 'attrtext')
+    _validate_xtext(attrtext)
+    return attrtext, value
+
+def get_attribute(value):
+    """ [CFWS] 1*attrtext [CFWS]
+
+    This version of the BNF makes the CFWS explicit, and as usual we use a
+    value terminal for the actual run of characters.  The RFC equivalent of
+    attrtext is the token characters, with the subtraction of '*', "'", and '%'.
+    We include tab in the excluded set just as we do for token.
+
+    """
+    attribute = Attribute()
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        attribute.append(token)
+    if value and value[0] in ATTRIBUTE_ENDS:
+        raise errors.HeaderParseError(
+            "expected token but found '{}'".format(value))
+    token, value = get_attrtext(value)
+    attribute.append(token)
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        attribute.append(token)
+    return attribute, value
+
+def get_extended_attrtext(value):
+    """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')
+
+    This is a special parsing routine so that we get a value that
+    includes % escapes as a single string (which we decode as a single
+    string later).
+
+    """
+    m = _non_extended_attribute_end_matcher(value)
+    if not m:
+        raise errors.HeaderParseError(
+            "expected extended attrtext but found {!r}".format(value))
+    attrtext = m.group()
+    value = value[len(attrtext):]
+    attrtext = ValueTerminal(attrtext, 'extended-attrtext')
+    _validate_xtext(attrtext)
+    return attrtext, value
+
+def get_extended_attribute(value):
+    """ [CFWS] 1*extended_attrtext [CFWS]
+
+    This is like the non-extended version except we allow % characters, so that
+    we can pick up an encoded value as a single string.
+
+    """
+    # XXX: should we have an ExtendedAttribute TokenList?
+    attribute = Attribute()
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        attribute.append(token)
+    if value and value[0] in EXTENDED_ATTRIBUTE_ENDS:
+        raise errors.HeaderParseError(
+            "expected token but found '{}'".format(value))
+    token, value = get_extended_attrtext(value)
+    attribute.append(token)
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        attribute.append(token)
+    return attribute, value
+
+def get_section(value):
+    """ '*' digits
+
+    The formal BNF is more complicated because leading 0s are not allowed.  We
+    check for that and add a defect.  We also assume no CFWS is allowed between
+    the '*' and the digits, though the RFC is not crystal clear on that.
+    The caller should already have dealt with leading CFWS.
+
+    """
+    section = Section()
+    if not value or value[0] != '*':
+        raise errors.HeaderParseError("Expected section but found {}".format(
+                                        value))
+    section.append(ValueTerminal('*', 'section-marker'))
+    value = value[1:]
+    if not value or not value[0].isdigit():
+        raise errors.HeaderParseError("Expected section number but "
+                                      "found {}".format(value))
+    digits = ''
+    while value and value[0].isdigit():
+        digits += value[0]
+        value = value[1:]
+    if digits[0] == '0' and digits != '0':
+        section.defects.append(errors.InvalidHeaderError("section number"
+            "has an invalid leading 0"))
+    section.number = int(digits)
+    section.append(ValueTerminal(digits, 'digits'))
+    return section, value
+
+
+def get_value(value):
+    """ quoted-string / attribute
+
+    """
+    v = Value()
+    if not value:
+        raise errors.HeaderParseError("Expected value but found end of string")
+    leader = None
+    if value[0] in CFWS_LEADER:
+        leader, value = get_cfws(value)
+    if not value:
+        raise errors.HeaderParseError("Expected value but found "
+                                      "only {}".format(leader))
+    if value[0] == '"':
+        token, value = get_quoted_string(value)
+    else:
+        token, value = get_extended_attribute(value)
+    if leader is not None:
+        token[:0] = [leader]
+    v.append(token)
+    return v, value
+
+def get_parameter(value):
+    """ attribute [section] ["*"] [CFWS] "=" value
+
+    The CFWS is implied by the RFC but not made explicit in the BNF.  This
+    simplified form of the BNF from the RFC is made to conform with the RFC BNF
+    through some extra checks.  We do it this way because it makes both error
+    recovery and working with the resulting parse tree easier.
+    """
+    # It is possible CFWS would also be implicitly allowed between the section
+    # and the 'extended-attribute' marker (the '*') , but we've never seen that
+    # in the wild and we will therefore ignore the possibility.
+    param = Parameter()
+    token, value = get_attribute(value)
+    param.append(token)
+    if not value or value[0] == ';':
+        param.defects.append(errors.InvalidHeaderDefect("Parameter contains "
+            "name ({}) but no value".format(token)))
+        return param, value
+    if value[0] == '*':
+        try:
+            token, value = get_section(value)
+            param.sectioned = True
+            param.append(token)
+        except errors.HeaderParseError:
+            pass
+        if not value:
+            raise errors.HeaderParseError("Incomplete parameter")
+        if value[0] == '*':
+            param.append(ValueTerminal('*', 'extended-parameter-marker'))
+            value = value[1:]
+            param.extended = True
+    if value[0] != '=':
+        raise errors.HeaderParseError("Parameter not followed by '='")
+    param.append(ValueTerminal('=', 'parameter-separator'))
+    value = value[1:]
+    leader = None
+    if value and value[0] in CFWS_LEADER:
+        token, value = get_cfws(value)
+        param.append(token)
+    remainder = None
+    appendto = param
+    if param.extended and value and value[0] == '"':
+        # Now for some serious hackery to handle the common invalid case of
+        # double quotes around an extended value.  We also accept (with defect)
+        # a value marked as encoded that isn't really.
+        qstring, remainder = get_quoted_string(value)
+        inner_value = qstring.stripped_value
+        semi_valid = False
+        if param.section_number == 0:
+            if inner_value and inner_value[0] == "'":
+                semi_valid = True
+            else:
+                token, rest = get_attrtext(inner_value)
+                if rest and rest[0] == "'":
+                    semi_valid = True
+        else:
+            try:
+                token, rest = get_extended_attrtext(inner_value)
+            except:
+                pass
+            else:
+                if not rest:
+                    semi_valid = True
+        if semi_valid:
+            param.defects.append(errors.InvalidHeaderDefect(
+                "Quoted string value for extended parameter is invalid"))
+            param.append(qstring)
+            for t in qstring:
+                if t.token_type == 'bare-quoted-string':
+                    t[:] = []
+                    appendto = t
+                    break
+            value = inner_value
+        else:
+            remainder = None
+            param.defects.append(errors.InvalidHeaderDefect(
+                "Parameter marked as extended but appears to have a "
+                "quoted string value that is non-encoded"))
+    if value and value[0] == "'":
+        token = None
+    else:
+        token, value = get_value(value)
+    if not param.extended or param.section_number > 0:
+        if not value or value[0] != "'":
+            appendto.append(token)
+            if remainder is not None:
+                assert not value, value
+                value = remainder
+            return param, value
+        param.defects.append(errors.InvalidHeaderDefect(
+            "Apparent initial-extended-value but attribute "
+            "was not marked as extended or was not initial section"))
+    if not value:
+        # Assume the charset/lang is missing and the token is the value.
+        param.defects.append(errors.InvalidHeaderDefect(
+            "Missing required charset/lang delimiters"))
+        appendto.append(token)
+        if remainder is None:
+            return param, value
+    else:
+        if token is not None:
+            for t in token:
+                if t.token_type == 'extended-attrtext':
+                    break
+            t.token_type == 'attrtext'
+            appendto.append(t)
+            param.charset = t.value
+        if value[0] != "'":
+            raise errors.HeaderParseError("Expected RFC2231 char/lang encoding "
+                                          "delimiter, but found {!r}".format(value))
+        appendto.append(ValueTerminal("'", 'RFC2231-delimiter'))
+        value = value[1:]
+        if value and value[0] != "'":
+            token, value = get_attrtext(value)
+            appendto.append(token)
+            param.lang = token.value
+            if not value or value[0] != "'":
+                raise errors.HeaderParseError("Expected RFC2231 char/lang encoding "
+                                  "delimiter, but found {}".format(value))
+        appendto.append(ValueTerminal("'", 'RFC2231-delimiter'))
+        value = value[1:]
+    if remainder is not None:
+        # Treat the rest of value as bare quoted string content.
+        v = Value()
+        while value:
+            if value[0] in WSP:
+                token, value = get_fws(value)
+            else:
+                token, value = get_qcontent(value)
+            v.append(token)
+        token = v
+    else:
+        token, value = get_value(value)
+    appendto.append(token)
+    if remainder is not None:
+        assert not value, value
+        value = remainder
+    return param, value
+
+def parse_mime_parameters(value):
+    """ parameter *( ";" parameter )
+
+    That BNF is meant to indicate this routine should only be called after
+    finding and handling the leading ';'.  There is no corresponding rule in
+    the formal RFC grammar, but it is more convenient for us for the set of
+    parameters to be treated as its own TokenList.
+
+    This is 'parse' routine because it consumes the reminaing value, but it
+    would never be called to parse a full header.  Instead it is called to
+    parse everything after the non-parameter value of a specific MIME header.
+
+    """
+    mime_parameters = MimeParameters()
+    while value:
+        try:
+            token, value = get_parameter(value)
+            mime_parameters.append(token)
+        except errors.HeaderParseError as err:
+            leader = None
+            if value[0] in CFWS_LEADER:
+                leader, value = get_cfws(value)
+            if not value:
+                mime_parameters.append(leader)
+                return mime_parameters
+            if value[0] == ';':
+                if leader is not None:
+                    mime_parameters.append(leader)
+                mime_parameters.defects.append(errors.InvalidHeaderDefect(
+                    "parameter entry with no content"))
+            else:
+                token, value = get_invalid_parameter(value)
+                if leader:
+                    token[:0] = [leader]
+                mime_parameters.append(token)
+                mime_parameters.defects.append(errors.InvalidHeaderDefect(
+                    "invalid parameter {!r}".format(token)))
+        if value and value[0] != ';':
+            # Junk after the otherwise valid parameter.  Mark it as
+            # invalid, but it will have a value.
+            param = mime_parameters[-1]
+            param.token_type = 'invalid-parameter'
+            token, value = get_invalid_parameter(value)
+            param.extend(token)
+            mime_parameters.defects.append(errors.InvalidHeaderDefect(
+                "parameter with invalid trailing text {!r}".format(token)))
+        if value:
+            # Must be a ';' at this point.
+            mime_parameters.append(ValueTerminal(';', 'parameter-separator'))
+            value = value[1:]
+    return mime_parameters
+
+def _find_mime_parameters(tokenlist, value):
+    """Do our best to find the parameters in an invalid MIME header
+
+    """
+    while value and value[0] != ';':
+        if value[0] in PHRASE_ENDS:
+            tokenlist.append(ValueTerminal(value[0], 'misplaced-special'))
+            value = value[1:]
+        else:
+            token, value = get_phrase(value)
+            tokenlist.append(token)
+    if not value:
+        return
+    tokenlist.append(ValueTerminal(';', 'parameter-separator'))
+    tokenlist.append(parse_mime_parameters(value[1:]))
+
+def parse_content_type_header(value):
+    """ maintype "/" subtype *( ";" parameter )
+
+    The maintype and substype are tokens.  Theoretically they could
+    be checked against the official IANA list + x-token, but we
+    don't do that.
+    """
+    ctype = ContentType()
+    recover = False
+    if not value:
+        ctype.defects.append(errors.HeaderMissingRequiredValue(
+            "Missing content type specification"))
+        return ctype
+    try:
+        token, value = get_token(value)
+    except errors.HeaderParseError:
+        ctype.defects.append(errors.InvalidHeaderDefect(
+            "Expected content maintype but found {!r}".format(value)))
+        _find_mime_parameters(ctype, value)
+        return ctype
+    ctype.append(token)
+    # XXX: If we really want to follow the formal grammar we should make
+    # mantype and subtype specialized TokenLists here.  Probably not worth it.
+    if not value or value[0] != '/':
+        ctype.defects.append(errors.InvalidHeaderDefect(
+            "Invalid content type"))
+        if value:
+            _find_mime_parameters(ctype, value)
+        return ctype
+    ctype.maintype = token.value.strip().lower()
+    ctype.append(ValueTerminal('/', 'content-type-separator'))
+    value = value[1:]
+    try:
+        token, value = get_token(value)
+    except errors.HeaderParseError:
+        ctype.defects.append(errors.InvalidHeaderDefect(
+            "Expected content subtype but found {!r}".format(value)))
+        _find_mime_parameters(ctype, value)
+        return ctype
+    ctype.append(token)
+    ctype.subtype = token.value.strip().lower()
+    if not value:
+        return ctype
+    if value[0] != ';':
+        ctype.defects.append(errors.InvalidHeaderDefect(
+            "Only parameters are valid after content type, but "
+            "found {!r}".format(value)))
+        # The RFC requires that a syntactically invalid content-type be treated
+        # as text/plain.  Perhaps we should postel this, but we should probably
+        # only do that if we were checking the subtype value against IANA.
+        del ctype.maintype, ctype.subtype
+        _find_mime_parameters(ctype, value)
+        return ctype
+    ctype.append(ValueTerminal(';', 'parameter-separator'))
+    ctype.append(parse_mime_parameters(value[1:]))
+    return ctype
+
+def parse_content_disposition_header(value):
+    """ disposition-type *( ";" parameter )
+
+    """
+    disp_header = ContentDisposition()
+    if not value:
+        disp_header.defects.append(errors.HeaderMissingRequiredValue(
+            "Missing content disposition"))
+        return disp_header
+    try:
+        token, value = get_token(value)
+    except errors.HeaderParseError:
+        disp_header.defects.append(errors.InvalidHeaderDefect(
+            "Expected content disposition but found {!r}".format(value)))
+        _find_mime_parameters(disp_header, value)
+        return disp_header
+    disp_header.append(token)
+    disp_header.content_disposition = token.value.strip().lower()
+    if not value:
+        return disp_header
+    if value[0] != ';':
+        disp_header.defects.append(errors.InvalidHeaderDefect(
+            "Only parameters are valid after content disposition, but "
+            "found {!r}".format(value)))
+        _find_mime_parameters(disp_header, value)
+        return disp_header
+    disp_header.append(ValueTerminal(';', 'parameter-separator'))
+    disp_header.append(parse_mime_parameters(value[1:]))
+    return disp_header
+
+def parse_content_transfer_encoding_header(value):
+    """ mechanism
+
+    """
+    # We should probably validate the values, since the list is fixed.
+    cte_header = ContentTransferEncoding()
+    if not value:
+        cte_header.defects.append(errors.HeaderMissingRequiredValue(
+            "Missing content transfer encoding"))
+        return cte_header
+    try:
+        token, value = get_token(value)
+    except errors.HeaderParseError:
+        cte_header.defects.append(errors.InvalidHeaderDefect(
+            "Expected content transfer encoding but found {!r}".format(value)))
+    else:
+        cte_header.append(token)
+        cte_header.cte = token.value.strip().lower()
+    if not value:
+        return cte_header
+    while value:
+        cte_header.defects.append(errors.InvalidHeaderDefect(
+            "Extra text after content transfer encoding"))
+        if value[0] in PHRASE_ENDS:
+            cte_header.append(ValueTerminal(value[0], 'misplaced-special'))
+            value = value[1:]
+        else:
+            token, value = get_phrase(value)
+            cte_header.append(token)
+    return cte_header
+
+
+#
+# Header folding
+#
+# Header folding is complex, with lots of rules and corner cases.  The
+# following code does its best to obey the rules and handle the corner
+# cases, but you can be sure there are few bugs:)
+#
+# This folder generally canonicalizes as it goes, preferring the stringified
+# version of each token.  The tokens contain information that supports the
+# folder, including which tokens can be encoded in which ways.
+#
+# Folded text is accumulated in a simple list of strings ('lines'), each
+# one of which should be less than policy.max_line_length ('maxlen').
+#
+
+def _steal_trailing_WSP_if_exists(lines):
+    wsp = ''
+    if lines and lines[-1] and lines[-1][-1] in WSP:
+        wsp = lines[-1][-1]
+        lines[-1] = lines[-1][:-1]
+    return wsp
+
+def _refold_parse_tree(parse_tree, *, policy):
+    """Return string of contents of parse_tree folded according to RFC rules.
+
+    """
+    # max_line_length 0/None means no limit, ie: infinitely long.
+    maxlen = policy.max_line_length or float("+inf")
+    encoding = 'utf-8' if policy.utf8 else 'us-ascii'
+    lines = ['']
+    last_ew = None
+    wrap_as_ew_blocked = 0
+    want_encoding = False
+    end_ew_not_allowed = Terminal('', 'wrap_as_ew_blocked')
+    parts = list(parse_tree)
+    while parts:
+        part = parts.pop(0)
+        if part is end_ew_not_allowed:
+            wrap_as_ew_blocked -= 1
+            continue
+        tstr = str(part)
+        try:
+            tstr.encode(encoding)
+            charset = encoding
+        except UnicodeEncodeError:
+            if any(isinstance(x, errors.UndecodableBytesDefect)
+                   for x in part.all_defects):
+                charset = 'unknown-8bit'
+            else:
+                # If policy.utf8 is false this should really be taken from a
+                # 'charset' property on the policy.
+                charset = 'utf-8'
+            want_encoding = True
+        if part.token_type == 'mime-parameters':
+            # Mime parameter folding (using RFC2231) is extra special.
+            _fold_mime_parameters(part, lines, maxlen, encoding)
+            continue
+        if want_encoding and not wrap_as_ew_blocked:
+            if not part.as_ew_allowed:
+                want_encoding = False
+                last_ew = None
+                if part.syntactic_break:
+                    encoded_part = part.fold(policy=policy)[:-1] # strip nl
+                    if policy.linesep not in encoded_part:
+                        # It fits on a single line
+                        if len(encoded_part) > maxlen - len(lines[-1]):
+                            # But not on this one, so start a new one.
+                            newline = _steal_trailing_WSP_if_exists(lines)
+                            # XXX what if encoded_part has no leading FWS?
+                            lines.append(newline)
+                        lines[-1] += encoded_part
+                        continue
+                # Either this is not a major syntactic break, so we don't
+                # want it on a line by itself even if it fits, or it
+                # doesn't fit on a line by itself.  Either way, fall through
+                # to unpacking the subparts and wrapping them.
+            if not hasattr(part, 'encode'):
+                # It's not a Terminal, do each piece individually.
+                parts = list(part) + parts
+            else:
+                # It's a terminal, wrap it as an encoded word, possibly
+                # combining it with previously encoded words if allowed.
+                last_ew = _fold_as_ew(tstr, lines, maxlen, last_ew,
+                                      part.ew_combine_allowed, charset)
+            want_encoding = False
+            continue
+        if len(tstr) <= maxlen - len(lines[-1]):
+            lines[-1] += tstr
+            continue
+        # This part is too long to fit.  The RFC wants us to break at
+        # "major syntactic breaks", so unless we don't consider this
+        # to be one, check if it will fit on the next line by itself.
+        if (part.syntactic_break and
+                len(tstr) + 1 <= maxlen):
+            newline = _steal_trailing_WSP_if_exists(lines)
+            if newline or part.startswith_fws():
+                lines.append(newline + tstr)
+                continue
+        if not hasattr(part, 'encode'):
+            # It's not a terminal, try folding the subparts.
+            newparts = list(part)
+            if not part.as_ew_allowed:
+                wrap_as_ew_blocked += 1
+                newparts.append(end_ew_not_allowed)
+            parts = newparts + parts
+            continue
+        if part.as_ew_allowed and not wrap_as_ew_blocked:
+            # It doesn't need CTE encoding, but encode it anyway so we can
+            # wrap it.
+            parts.insert(0, part)
+            want_encoding = True
+            continue
+        # We can't figure out how to wrap, it, so give up.
+        newline = _steal_trailing_WSP_if_exists(lines)
+        if newline or part.startswith_fws():
+            lines.append(newline + tstr)
+        else:
+            # We can't fold it onto the next line either...
+            lines[-1] += tstr
+    return policy.linesep.join(lines) + policy.linesep
+
+def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset):
+    """Fold string to_encode into lines as encoded word, combining if allowed.
+    Return the new value for last_ew, or None if ew_combine_allowed is False.
+
+    If there is already an encoded word in the last line of lines (indicated by
+    a non-None value for last_ew) and ew_combine_allowed is true, decode the
+    existing ew, combine it with to_encode, and re-encode.  Otherwise, encode
+    to_encode.  In either case, split to_encode as necessary so that the
+    encoded segments fit within maxlen.
+
+    """
+    if last_ew is not None and ew_combine_allowed:
+        to_encode = str(
+            get_unstructured(lines[-1][last_ew:] + to_encode))
+        lines[-1] = lines[-1][:last_ew]
+    if to_encode[0] in WSP:
+        # We're joining this to non-encoded text, so don't encode
+        # the leading blank.
+        leading_wsp = to_encode[0]
+        to_encode = to_encode[1:]
+        if (len(lines[-1]) == maxlen):
+            lines.append(_steal_trailing_WSP_if_exists(lines))
+        lines[-1] += leading_wsp
+    trailing_wsp = ''
+    if to_encode[-1] in WSP:
+        # Likewise for the trailing space.
+        trailing_wsp = to_encode[-1]
+        to_encode = to_encode[:-1]
+    new_last_ew = len(lines[-1]) if last_ew is None else last_ew
+    while to_encode:
+        remaining_space = maxlen - len(lines[-1])
+        # The RFC2047 chrome takes up 7 characters plus the length
+        # of the charset name.
+        encode_as = 'utf-8' if charset == 'us-ascii' else charset
+        text_space = remaining_space - len(encode_as) - 7
+        if text_space <= 0:
+            lines.append(' ')
+            # XXX We'll get an infinite loop here if maxlen is <= 7
+            continue
+        first_part = to_encode[:text_space]
+        ew = _ew.encode(first_part, charset=encode_as)
+        excess = len(ew) - remaining_space
+        if excess > 0:
+            # encode always chooses the shortest encoding, so this
+            # is guaranteed to fit at this point.
+            first_part = first_part[:-excess]
+            ew = _ew.encode(first_part)
+        lines[-1] += ew
+        to_encode = to_encode[len(first_part):]
+        if to_encode:
+            lines.append(' ')
+            new_last_ew = len(lines[-1])
+    lines[-1] += trailing_wsp
+    return new_last_ew if ew_combine_allowed else None
+
+def _fold_mime_parameters(part, lines, maxlen, encoding):
+    """Fold TokenList 'part' into the 'lines' list as mime parameters.
+
+    Using the decoded list of parameters and values, format them according to
+    the RFC rules, including using RFC2231 encoding if the value cannot be
+    expressed in 'encoding' and/or the parameter+value is too long to fit
+    within 'maxlen'.
+
+    """
+    # Special case for RFC2231 encoding: start from decoded values and use
+    # RFC2231 encoding iff needed.
+    #
+    # Note that the 1 and 2s being added to the length calculations are
+    # accounting for the possibly-needed spaces and semicolons we'll be adding.
+    #
+    for name, value in part.params:
+        # XXX What if this ';' puts us over maxlen the first time through the
+        # loop?  We should split the header value onto a newline in that case,
+        # but to do that we need to recognize the need earlier or reparse the
+        # header, so I'm going to ignore that bug for now.  It'll only put us
+        # one character over.
+        if not lines[-1].rstrip().endswith(';'):
+            lines[-1] += ';'
+        charset = encoding
+        error_handler = 'strict'
+        try:
+            value.encode(encoding)
+            encoding_required = False
+        except UnicodeEncodeError:
+            encoding_required = True
+            if utils._has_surrogates(value):
+                charset = 'unknown-8bit'
+                error_handler = 'surrogateescape'
+            else:
+                charset = 'utf-8'
+        if encoding_required:
+            encoded_value = parse.quote(
+                value, safe='', errors=error_handler)
+            tstr = "{}*={}''{}".format(name, charset, encoded_value)
+        else:
+            tstr = '{}={}'.format(name, quote_string(value))
+        if len(lines[-1]) + len(tstr) + 1 < maxlen:
+            lines[-1] = lines[-1] + ' ' + tstr
+            continue
+        elif len(tstr) + 2 <= maxlen:
+            lines.append(' ' + tstr)
+            continue
+        # We need multiple sections.  We are allowed to mix encoded and
+        # non-encoded sections, but we aren't going to.  We'll encode them all.
+        section = 0
+        extra_chrome = charset + "''"
+        while value:
+            chrome_len = len(name) + len(str(section)) + 3 + len(extra_chrome)
+            if maxlen <= chrome_len + 3:
+                # We need room for the leading blank, the trailing semicolon,
+                # and at least one character of the value.  If we don't
+                # have that, we'd be stuck, so in that case fall back to
+                # the RFC standard width.
+                maxlen = 78
+            splitpoint = maxchars = maxlen - chrome_len - 2
+            while True:
+                partial = value[:splitpoint]
+                encoded_value = parse.quote(
+                    partial, safe='', errors=error_handler)
+                if len(encoded_value) <= maxchars:
+                    break
+                splitpoint -= 1
+            lines.append(" {}*{}*={}{}".format(
+                name, section, extra_chrome, encoded_value))
+            extra_chrome = ''
+            section += 1
+            value = value[splitpoint:]
+            if value:
+                lines[-1] += ';'
diff --git a/modules/language/python/module/email/_policybase.py b/modules/language/python/module/email/_policybase.py
index a8633e2..bbc8b3c 100644
--- a/modules/language/python/module/email/_policybase.py
+++ b/modules/language/python/module/email/_policybase.py
@@ -3,7 +3,6 @@ module(email,_policybase)
 
 Allows fine grained feature control of how the package parses and emits data.
 """
-
 import abc
 import email.header as header
 import email.charset as _charset
@@ -13,6 +12,7 @@ __all__ = [
     'Policy',
     'Compat32',
     'compat32',
+    '_extend_docstrings'
     ]
 
 
@@ -101,9 +101,9 @@ def _extend_docstrings(cls):
     if cls.__doc__ and cls.__doc__.startswith('+'):
         cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__)
     for name, attr in cls.__dict__.items():
-        if attr.__doc__ and attr.__doc__.startswith('+'):
+        if getattr(attr, '__doc__',False) and attr.__doc__.startswith('+'):
             for c in (c for base in cls.__bases__ for c in base.mro()):
-                doc = getattr(getattr(c, name), '__doc__')
+                doc = getattr(getattr(c, name, False), '__doc__', False)
                 if doc:
                     attr.__doc__ = _append_doc(doc, attr.__doc__)
                     break
diff --git a/modules/language/python/module/email/contentmanager.py b/modules/language/python/module/email/contentmanager.py
new file mode 100644
index 0000000..5eaf2ee
--- /dev/null
+++ b/modules/language/python/module/email/contentmanager.py
@@ -0,0 +1,254 @@
+module(email,contentmanager)
+
+__all__=['ContentManager','raw_data_manager']
+
+import binascii
+import email.charset as charset
+import email.message as message
+import email.errors as errors
+import email.quoprimime as quoprimime
+
+class ContentManager:
+
+    def __init__(self):
+        self.get_handlers = {}
+        self.set_handlers = {}
+
+    def add_get_handler(self, key, handler):
+        self.get_handlers[key] = handler
+
+    def get_content(self, msg, *args, **kw):
+        content_type = msg.get_content_type()
+        if content_type in self.get_handlers:
+            return self.get_handlers[content_type](msg, *args, **kw)
+        maintype = msg.get_content_maintype()
+        if maintype in self.get_handlers:
+            return self.get_handlers[maintype](msg, *args, **kw)
+        if '' in self.get_handlers:
+            return self.get_handlers[''](msg, *args, **kw)
+        raise KeyError(content_type)
+
+    def add_set_handler(self, typekey, handler):
+        self.set_handlers[typekey] = handler
+
+    def set_content(self, msg, obj, *args, **kw):
+        if msg.get_content_maintype() == 'multipart':
+            # XXX: is this error a good idea or not?  We can remove it later,
+            # but we can't add it later, so do it for now.
+            raise TypeError("set_content not valid on multipart")
+        handler = self._find_set_handler(msg, obj)
+        msg.clear_content()
+        handler(msg, obj, *args, **kw)
+
+    def _find_set_handler(self, msg, obj):
+        full_path_for_error = None
+        for typ in type(obj).__mro__:
+            if typ in self.set_handlers:
+                return self.set_handlers[typ]
+            qname = typ.__qualname__
+            modname = getattr(typ, '__module__', '')
+            full_path = '.'.join((modname, qname)) if modname else qname
+            if full_path_for_error is None:
+                full_path_for_error = full_path
+            if full_path in self.set_handlers:
+                return self.set_handlers[full_path]
+            if qname in self.set_handlers:
+                return self.set_handlers[qname]
+            name = typ.__name__
+            if name in self.set_handlers:
+                return self.set_handlers[name]
+        if None in self.set_handlers:
+            return self.set_handlers[None]
+        raise KeyError(full_path_for_error)
+
+
+raw_data_manager = ContentManager()
+
+
+def get_text_content(msg, errors='replace'):
+    content = msg.get_payload(decode=True)
+    charset = msg.get_param('charset', 'ASCII')
+    return content.decode(charset, errors=errors)
+raw_data_manager.add_get_handler('text', get_text_content)
+
+
+def get_non_text_content(msg):
+    return msg.get_payload(decode=True)
+for maintype in 'audio image video application'.split():
+    raw_data_manager.add_get_handler(maintype, get_non_text_content)
+
+
+def get_message_content(msg):
+    return msg.get_payload(0)
+for subtype in 'rfc822 external-body'.split():
+    raw_data_manager.add_get_handler('message/'+subtype, get_message_content)
+
+
+def get_and_fixup_unknown_message_content(msg):
+    # If we don't understand a message subtype, we are supposed to treat it as
+    # if it were application/octet-stream, per
+    # tools.ietf.org/html/rfc2046#section-5.2.4.  Feedparser doesn't do that,
+    # so do our best to fix things up.  Note that it is *not* appropriate to
+    # model message/partial content as Message objects, so they are handled
+    # here as well.  (How to reassemble them is out of scope for this comment :)
+    return bytes(msg.get_payload(0))
+raw_data_manager.add_get_handler('message',
+                                 get_and_fixup_unknown_message_content)
+
+
+def _prepare_set(msg, maintype, subtype, headers):
+    msg['Content-Type'] = '/'.join((maintype, subtype))
+    if headers:
+        if not hasattr(headers[0], 'name'):
+            mp = msg.policy
+            headers = [mp.header_factory(*mp.header_source_parse([header]))
+                       for header in headers]
+        try:
+            for header in headers:
+                if header.defects:
+                    raise header.defects[0]
+                msg[header.name] = header
+        except errors.HeaderDefect as exc:
+            raise ValueError("Invalid header: {}".format(
+                                header.fold(policy=msg.policy))) from exc
+
+
+def _finalize_set(msg, disposition, filename, cid, params):
+    if disposition is None and filename is not None:
+        disposition = 'attachment'
+    if disposition is not None:
+        msg['Content-Disposition'] = disposition
+    if filename is not None:
+        msg.set_param('filename',
+                      filename,
+                      header='Content-Disposition',
+                      replace=True)
+    if cid is not None:
+        msg['Content-ID'] = cid
+    if params is not None:
+        for key, value in params.items():
+            msg.set_param(key, value)
+
+
+# XXX: This is a cleaned-up version of base64mime.body_encode (including a bug
+# fix in the calculation of unencoded_bytes_per_line).  It would be nice to
+# drop both this and quoprimime.body_encode in favor of enhanced binascii
+# routines that accepted a max_line_length parameter.
+def _encode_base64(data, max_line_length):
+    encoded_lines = []
+    unencoded_bytes_per_line = max_line_length // 4 * 3
+    for i in range(0, len(data), unencoded_bytes_per_line):
+        thisline = data[i:i+unencoded_bytes_per_line]
+        encoded_lines.append(binascii.b2a_base64(thisline).decode('ascii'))
+    return ''.join(encoded_lines)
+
+
+def _encode_text(string, charset, cte, policy):
+    lines = string.encode(charset).splitlines()
+    linesep = policy.linesep.encode('ascii')
+    def embedded_body(lines): return linesep.join(lines) + linesep
+    def normal_body(lines): return b'\n'.join(lines) + b'\n'
+    if cte==None:
+        # Use heuristics to decide on the "best" encoding.
+        try:
+            return '7bit', normal_body(lines).decode('ascii')
+        except UnicodeDecodeError:
+            pass
+        if (policy.cte_type == '8bit' and
+                max(len(x) for x in lines) <= policy.max_line_length):
+            return '8bit', normal_body(lines).decode('ascii', 'surrogateescape')
+        sniff = embedded_body(lines[:10])
+        sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'),
+                                          policy.max_line_length)
+        sniff_base64 = binascii.b2a_base64(sniff)
+        # This is a little unfair to qp; it includes lineseps, base64 doesn't.
+        if len(sniff_qp) > len(sniff_base64):
+            cte = 'base64'
+        else:
+            cte = 'quoted-printable'
+            if len(lines) <= 10:
+                return cte, sniff_qp
+    if cte == '7bit':
+        data = normal_body(lines).decode('ascii')
+    elif cte == '8bit':
+        data = normal_body(lines).decode('ascii', 'surrogateescape')
+    elif cte == 'quoted-printable':
+        data = quoprimime.body_encode(normal_body(lines).decode('latin-1'),
+                                      policy.max_line_length)
+    elif cte == 'base64':
+        data = _encode_base64(embedded_body(lines), policy.max_line_length)
+    else:
+        raise ValueError("Unknown content transfer encoding {}".format(cte))
+    return cte, data
+
+
+def set_text_content(msg, string, subtype="plain", charset='utf-8', cte=None,
+                     disposition=None, filename=None, cid=None,
+                     params=None, headers=None):
+    _prepare_set(msg, 'text', subtype, headers)
+    cte, payload = _encode_text(string, charset, cte, msg.policy)
+    msg.set_payload(payload)
+    msg.set_param('charset',
+                  charset.ALIASES.get(charset, charset),
+                  replace=True)
+    msg['Content-Transfer-Encoding'] = cte
+    _finalize_set(msg, disposition, filename, cid, params)
+raw_data_manager.add_set_handler(str, set_text_content)
+
+
+def set_message_content(msg, message, subtype="rfc822", cte=None,
+                       disposition=None, filename=None, cid=None,
+                       params=None, headers=None):
+    if subtype == 'partial':
+        raise ValueError("message/partial is not supported for Message objects")
+    if subtype == 'rfc822':
+        if cte not in (None, '7bit', '8bit', 'binary'):
+            # http://tools.ietf.org/html/rfc2046#section-5.2.1 mandate.
+            raise ValueError(
+                "message/rfc822 parts do not support cte={}".format(cte))
+        # 8bit will get coerced on serialization if policy.cte_type='7bit'.  We
+        # may end up claiming 8bit when it isn't needed, but the only negative
+        # result of that should be a gateway that needs to coerce to 7bit
+        # having to look through the whole embedded message to discover whether
+        # or not it actually has to do anything.
+        cte = '8bit' if cte is None else cte
+    elif subtype == 'external-body':
+        if cte not in (None, '7bit'):
+            # http://tools.ietf.org/html/rfc2046#section-5.2.3 mandate.
+            raise ValueError(
+                "message/external-body parts do not support cte={}".format(cte))
+        cte = '7bit'
+    elif cte is None:
+        # http://tools.ietf.org/html/rfc2046#section-5.2.4 says all future
+        # subtypes should be restricted to 7bit, so assume that.
+        cte = '7bit'
+    _prepare_set(msg, 'message', subtype, headers)
+    msg.set_payload([message])
+    msg['Content-Transfer-Encoding'] = cte
+    _finalize_set(msg, disposition, filename, cid, params)
+raw_data_manager.add_set_handler(message.Message, set_message_content)
+
+
+def set_bytes_content(msg, data, maintype, subtype, cte='base64',
+                     disposition=None, filename=None, cid=None,
+                     params=None, headers=None):
+    _prepare_set(msg, maintype, subtype, headers)
+    if cte == 'base64':
+        data = _encode_base64(data, max_line_length=msg.policy.max_line_length)
+    elif cte == 'quoted-printable':
+        # XXX: quoprimime.body_encode won't encode newline characters in data,
+        # so we can't use it.  This means max_line_length is ignored.  Another
+        # bug to fix later.  (Note: encoders.quopri is broken on line ends.)
+        data = binascii.b2a_qp(data, istext=False, header=False, quotetabs=True)
+        data = data.decode('ascii')
+    elif cte == '7bit':
+        # Make sure it really is only ASCII.  The early warning here seems
+        # worth the overhead...if you care write your own content manager :).
+        data.encode('ascii')
+    elif cte in ('8bit', 'binary'):
+        data = data.decode('ascii', 'surrogateescape')
+    msg.set_payload(data)
+    msg['Content-Transfer-Encoding'] = cte
+    _finalize_set(msg, disposition, filename, cid, params)
+for typ in (bytes, bytearray, memoryview):
+    raw_data_manager.add_set_handler(typ, set_bytes_content)
diff --git a/modules/language/python/module/email/feedparser.py b/modules/language/python/module/email/feedparser.py
index f07448a..2f31633 100644
--- a/modules/language/python/module/email/feedparser.py
+++ b/modules/language/python/module/email/feedparser.py
@@ -23,7 +23,6 @@ object's .defects attribute.
 __all__ = ['FeedParser', 'BytesFeedParser']
 
 import re
-
 import email.errors as errors
 from email._policybase import compat32
 from collections import deque
diff --git a/modules/language/python/module/email/generator.py b/modules/language/python/module/email/generator.py
new file mode 100644
index 0000000..2b90fe5
--- /dev/null
+++ b/modules/language/python/module/email/generator.py
@@ -0,0 +1,509 @@
+module(email,generator)
+# Copyright (C) 2001-2010 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Classes to generate plain text from a message object tree."""
+
+__all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator']
+
+import re
+import sys
+import time
+import random
+
+from copy import deepcopy
+from io import StringIO, BytesIO
+from email.utils import _has_surrogates
+
+UNDERSCORE = '_'
+NL = '\n'  # XXX: no longer used by the code below.
+
+NLCRE = re.compile(r'\r\n|\r|\n')
+fcre = re.compile(r'^From ', re.MULTILINE)
+
+
+
+class Generator:
+    """Generates output from a Message object tree.
+
+    This basic generator writes the message to the given file object as plain
+    text.
+    """
+    #
+    # Public interface
+    #
+
+    def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *,
+                 policy=None):
+        """Create the generator for message flattening.
+
+        outfp is the output file-like object for writing the message to.  It
+        must have a write() method.
+
+        Optional mangle_from_ is a flag that, when True (the default if policy
+        is not set), escapes From_ lines in the body of the message by putting
+        a `>' in front of them.
+
+        Optional maxheaderlen specifies the longest length for a non-continued
+        header.  When a header line is longer (in characters, with tabs
+        expanded to 8 spaces) than maxheaderlen, the header will split as
+        defined in the Header class.  Set maxheaderlen to zero to disable
+        header wrapping.  The default is 78, as recommended (but not required)
+        by RFC 2822.
+
+        The policy keyword specifies a policy object that controls a number of
+        aspects of the generator's operation.  If no policy is specified,
+        the policy associated with the Message object passed to the
+        flatten method is used.
+
+        """
+
+        if mangle_from_ is None:
+            mangle_from_ = True if policy is None else policy.mangle_from_
+        self._fp = outfp
+        self._mangle_from_ = mangle_from_
+        self.maxheaderlen = maxheaderlen
+        self.policy = policy
+
+    def write(self, s):
+        # Just delegate to the file object
+        self._fp.write(s)
+
+    def flatten(self, msg, unixfrom=False, linesep=None):
+        r"""Print the message object tree rooted at msg to the output file
+        specified when the Generator instance was created.
+
+        unixfrom is a flag that forces the printing of a Unix From_ delimiter
+        before the first object in the message tree.  If the original message
+        has no From_ delimiter, a `standard' one is crafted.  By default, this
+        is False to inhibit the printing of any From_ delimiter.
+
+        Note that for subobjects, no From_ line is printed.
+
+        linesep specifies the characters used to indicate a new line in
+        the output.  The default value is determined by the policy specified
+        when the Generator instance was created or, if none was specified,
+        from the policy associated with the msg.
+
+        """
+        # We use the _XXX constants for operating on data that comes directly
+        # from the msg, and _encoded_XXX constants for operating on data that
+        # has already been converted (to bytes in the BytesGenerator) and
+        # inserted into a temporary buffer.
+        policy = msg.policy if self.policy is None else self.policy
+        if linesep is not None:
+            policy = policy.clone(linesep=linesep)
+        if self.maxheaderlen is not None:
+            policy = policy.clone(max_line_length=self.maxheaderlen)
+        self._NL = policy.linesep
+        self._encoded_NL = self._encode(self._NL)
+        self._EMPTY = ''
+        self._encoded_EMPTY = self._encode(self._EMPTY)
+        # Because we use clone (below) when we recursively process message
+        # subparts, and because clone uses the computed policy (not None),
+        # submessages will automatically get set to the computed policy when
+        # they are processed by this code.
+        old_gen_policy = self.policy
+        old_msg_policy = msg.policy
+        try:
+            self.policy = policy
+            msg.policy = policy
+            if unixfrom:
+                ufrom = msg.get_unixfrom()
+                if not ufrom:
+                    ufrom = 'From nobody ' + time.ctime(time.time())
+                self.write(ufrom + self._NL)
+            self._write(msg)
+        finally:
+            self.policy = old_gen_policy
+            msg.policy = old_msg_policy
+
+    def clone(self, fp):
+        """Clone this generator with the exact same options."""
+        return self.__class__(fp,
+                              self._mangle_from_,
+                              None, # Use policy setting, which we've adjusted
+                              policy=self.policy)
+
+    #
+    # Protected interface - undocumented ;/
+    #
+
+    # Note that we use 'self.write' when what we are writing is coming from
+    # the source, and self._fp.write when what we are writing is coming from a
+    # buffer (because the Bytes subclass has already had a chance to transform
+    # the data in its write method in that case).  This is an entirely
+    # pragmatic split determined by experiment; we could be more general by
+    # always using write and having the Bytes subclass write method detect when
+    # it has already transformed the input; but, since this whole thing is a
+    # hack anyway this seems good enough.
+
+    def _new_buffer(self):
+        # BytesGenerator overrides this to return BytesIO.
+        return StringIO()
+
+    def _encode(self, s):
+        # BytesGenerator overrides this to encode strings to bytes.
+        return s
+
+    def _write_lines(self, lines):
+        # We have to transform the line endings.
+        if not lines:
+            return
+        lines = NLCRE.split(lines)
+        for line in lines[:-1]:
+            self.write(line)
+            self.write(self._NL)
+        if lines[-1]:
+            self.write(lines[-1])
+        # XXX logic tells me this else should be needed, but the tests fail
+        # with it and pass without it.  (NLCRE.split ends with a blank element
+        # if and only if there was a trailing newline.)
+        #else:
+        #    self.write(self._NL)
+
+    def _write(self, msg):
+        # We can't write the headers yet because of the following scenario:
+        # say a multipart message includes the boundary string somewhere in
+        # its body.  We'd have to calculate the new boundary /before/ we write
+        # the headers so that we can write the correct Content-Type:
+        # parameter.
+        #
+        # The way we do this, so as to make the _handle_*() methods simpler,
+        # is to cache any subpart writes into a buffer.  The we write the
+        # headers and the buffer contents.  That way, subpart handlers can
+        # Do The Right Thing, and can still modify the Content-Type: header if
+        # necessary.
+        oldfp = self._fp
+        try:
+            self._munge_cte = None
+            self._fp = sfp = self._new_buffer()
+            self._dispatch(msg)
+        finally:
+            self._fp = oldfp
+            munge_cte = self._munge_cte
+            del self._munge_cte
+        # If we munged the cte, copy the message again and re-fix the CTE.
+        if munge_cte:
+            msg = deepcopy(msg)
+            msg.replace_header('content-transfer-encoding', munge_cte[0])
+            msg.replace_header('content-type', munge_cte[1])
+        # Write the headers.  First we see if the message object wants to
+        # handle that itself.  If not, we'll do it generically.
+        meth = getattr(msg, '_write_headers', None)
+        if meth is None:
+            self._write_headers(msg)
+        else:
+            meth(self)
+        self._fp.write(sfp.getvalue())
+
+    def _dispatch(self, msg):
+        # Get the Content-Type: for the message, then try to dispatch to
+        # self._handle_<maintype>_<subtype>().  If there's no handler for the
+        # full MIME type, then dispatch to self._handle_<maintype>().  If
+        # that's missing too, then dispatch to self._writeBody().
+        main = msg.get_content_maintype()
+        sub = msg.get_content_subtype()
+        specific = UNDERSCORE.join((main, sub)).replace('-', '_')
+        meth = getattr(self, '_handle_' + specific, None)
+        if meth is None:
+            generic = main.replace('-', '_')
+            meth = getattr(self, '_handle_' + generic, None)
+            if meth is None:
+                meth = self._writeBody
+        meth(msg)
+
+    #
+    # Default handlers
+    #
+
+    def _write_headers(self, msg):
+        for h, v in msg.raw_items():
+            self.write(self.policy.fold(h, v))
+        # A blank line always separates headers from body
+        self.write(self._NL)
+
+    #
+    # Handlers for writing types and subtypes
+    #
+
+    def _handle_text(self, msg):
+        payload = msg.get_payload()
+        if payload is None:
+            return
+        if not isinstance(payload, str):
+            raise TypeError('string payload expected: %s' % type(payload))
+        if _has_surrogates(msg._payload):
+            charset = msg.get_param('charset')
+            if charset is not None:
+                # XXX: This copy stuff is an ugly hack to avoid modifying the
+                # existing message.
+                msg = deepcopy(msg)
+                del msg['content-transfer-encoding']
+                msg.set_payload(payload, charset)
+                payload = msg.get_payload()
+                self._munge_cte = (msg['content-transfer-encoding'],
+                                   msg['content-type'])
+        if self._mangle_from_:
+            payload = fcre.sub('>From ', payload)
+        self._write_lines(payload)
+
+    # Default body handler
+    _writeBody = _handle_text
+
+    def _handle_multipart(self, msg):
+        # The trick here is to write out each part separately, merge them all
+        # together, and then make sure that the boundary we've chosen isn't
+        # present in the payload.
+        msgtexts = []
+        subparts = msg.get_payload()
+        if subparts is None:
+            subparts = []
+        elif isinstance(subparts, str):
+            # e.g. a non-strict parse of a message with no starting boundary.
+            self.write(subparts)
+            return
+        elif not isinstance(subparts, list):
+            # Scalar payload
+            subparts = [subparts]
+        for part in subparts:
+            s = self._new_buffer()
+            g = self.clone(s)
+            g.flatten(part, unixfrom=False, linesep=self._NL)
+            msgtexts.append(s.getvalue())
+        # BAW: What about boundaries that are wrapped in double-quotes?
+        boundary = msg.get_boundary()
+        if not boundary:
+            # Create a boundary that doesn't appear in any of the
+            # message texts.
+            alltext = self._encoded_NL.join(msgtexts)
+            boundary = self._make_boundary(alltext)
+            msg.set_boundary(boundary)
+        # If there's a preamble, write it out, with a trailing CRLF
+        if msg.preamble is not None:
+            if self._mangle_from_:
+                preamble = fcre.sub('>From ', msg.preamble)
+            else:
+                preamble = msg.preamble
+            self._write_lines(preamble)
+            self.write(self._NL)
+        # dash-boundary transport-padding CRLF
+        self.write('--' + boundary + self._NL)
+        # body-part
+        if msgtexts:
+            self._fp.write(msgtexts.pop(0))
+        # *encapsulation
+        # --> delimiter transport-padding
+        # --> CRLF body-part
+        for body_part in msgtexts:
+            # delimiter transport-padding CRLF
+            self.write(self._NL + '--' + boundary + self._NL)
+            # body-part
+            self._fp.write(body_part)
+        # close-delimiter transport-padding
+        self.write(self._NL + '--' + boundary + '--' + self._NL)
+        if msg.epilogue is not None:
+            if self._mangle_from_:
+                epilogue = fcre.sub('>From ', msg.epilogue)
+            else:
+                epilogue = msg.epilogue
+            self._write_lines(epilogue)
+
+    def _handle_multipart_signed(self, msg):
+        # The contents of signed parts has to stay unmodified in order to keep
+        # the signature intact per RFC1847 2.1, so we disable header wrapping.
+        # RDM: This isn't enough to completely preserve the part, but it helps.
+        p = self.policy
+        self.policy = p.clone(max_line_length=0)
+        try:
+            self._handle_multipart(msg)
+        finally:
+            self.policy = p
+
+    def _handle_message_delivery_status(self, msg):
+        # We can't just write the headers directly to self's file object
+        # because this will leave an extra newline between the last header
+        # block and the boundary.  Sigh.
+        blocks = []
+        for part in msg.get_payload():
+            s = self._new_buffer()
+            g = self.clone(s)
+            g.flatten(part, unixfrom=False, linesep=self._NL)
+            text = s.getvalue()
+            lines = text.split(self._encoded_NL)
+            # Strip off the unnecessary trailing empty line
+            if lines and lines[-1] == self._encoded_EMPTY:
+                blocks.append(self._encoded_NL.join(lines[:-1]))
+            else:
+                blocks.append(text)
+        # Now join all the blocks with an empty line.  This has the lovely
+        # effect of separating each block with an empty line, but not adding
+        # an extra one after the last one.
+        self._fp.write(self._encoded_NL.join(blocks))
+
+    def _handle_message(self, msg):
+        s = self._new_buffer()
+        g = self.clone(s)
+        # The payload of a message/rfc822 part should be a multipart sequence
+        # of length 1.  The zeroth element of the list should be the Message
+        # object for the subpart.  Extract that object, stringify it, and
+        # write it out.
+        # Except, it turns out, when it's a string instead, which happens when
+        # and only when HeaderParser is used on a message of mime type
+        # message/rfc822.  Such messages are generated by, for example,
+        # Groupwise when forwarding unadorned messages.  (Issue 7970.)  So
+        # in that case we just emit the string body.
+        payload = msg._payload
+        if isinstance(payload, list):
+            g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
+            payload = s.getvalue()
+        else:
+            payload = self._encode(payload)
+        self._fp.write(payload)
+
+    # This used to be a module level function; we use a classmethod for this
+    # and _compile_re so we can continue to provide the module level function
+    # for backward compatibility by doing
+    #   _make_boundary = Generator._make_boundary
+    # at the end of the module.  It *is* internal, so we could drop that...
+    @classmethod
+    def _make_boundary(cls, text=None):
+        # Craft a random boundary.  If text is given, ensure that the chosen
+        # boundary doesn't appear in the text.
+        token = random.randrange(sys.maxsize)
+        boundary = ('=' * 15) + (_fmt % token) + '=='
+        if text is None:
+            return boundary
+        b = boundary
+        counter = 0
+        while True:
+            cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
+            if not cre.search(text):
+                break
+            b = boundary + '.' + str(counter)
+            counter += 1
+        return b
+
+    @classmethod
+    def _compile_re(cls, s, flags):
+        return re.compile(s, flags)
+
+
+class BytesGenerator(Generator):
+    """Generates a bytes version of a Message object tree.
+
+    Functionally identical to the base Generator except that the output is
+    bytes and not string.  When surrogates were used in the input to encode
+    bytes, these are decoded back to bytes for output.  If the policy has
+    cte_type set to 7bit, then the message is transformed such that the
+    non-ASCII bytes are properly content transfer encoded, using the charset
+    unknown-8bit.
+
+    The outfp object must accept bytes in its write method.
+    """
+
+    def write(self, s):
+        self._fp.write(s.encode('ascii', 'surrogateescape'))
+
+    def _new_buffer(self):
+        return BytesIO()
+
+    def _encode(self, s):
+        return s.encode('ascii')
+
+    def _write_headers(self, msg):
+        # This is almost the same as the string version, except for handling
+        # strings with 8bit bytes.
+        for h, v in msg.raw_items():
+            self._fp.write(self.policy.fold_binary(h, v))
+        # A blank line always separates headers from body
+        self.write(self._NL)
+
+    def _handle_text(self, msg):
+        # If the string has surrogates the original source was bytes, so
+        # just write it back out.
+        if msg._payload is None:
+            return
+        if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit':
+            if self._mangle_from_:
+                msg._payload = fcre.sub(">From ", msg._payload)
+            self._write_lines(msg._payload)
+        else:
+            super(BytesGenerator,self)._handle_text(msg)
+
+    # Default body handler
+    _writeBody = _handle_text
+
+    @classmethod
+    def _compile_re(cls, s, flags):
+        return re.compile(s.encode('ascii'), flags)
+
+
+
+_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
+
+class DecodedGenerator(Generator):
+    """Generates a text representation of a message.
+
+    Like the Generator base class, except that non-text parts are substituted
+    with a format string representing the part.
+    """
+    def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, fmt=None, *,
+                 policy=None):
+        """Like Generator.__init__() except that an additional optional
+        argument is allowed.
+
+        Walks through all subparts of a message.  If the subpart is of main
+        type `text', then it prints the decoded payload of the subpart.
+
+        Otherwise, fmt is a format string that is used instead of the message
+        payload.  fmt is expanded with the following keywords (in
+        %(keyword)s format):
+
+        type       : Full MIME type of the non-text part
+        maintype   : Main MIME type of the non-text part
+        subtype    : Sub-MIME type of the non-text part
+        filename   : Filename of the non-text part
+        description: Description associated with the non-text part
+        encoding   : Content transfer encoding of the non-text part
+
+        The default value for fmt is None, meaning
+
+        [Non-text (%(type)s) part of message omitted, filename %(filename)s]
+        """
+        Generator.__init__(self, outfp, mangle_from_, maxheaderlen,
+                           policy=policy)
+        if fmt is None:
+            self._fmt = _FMT
+        else:
+            self._fmt = fmt
+
+    def _dispatch(self, msg):
+        for part in msg.walk():
+            maintype = part.get_content_maintype()
+            if maintype == 'text':
+                print(part.get_payload(decode=False), file=self)
+            elif maintype == 'multipart':
+                # Just skip this
+                pass
+            else:
+                print(self._fmt % {
+                    'type'       : part.get_content_type(),
+                    'maintype'   : part.get_content_maintype(),
+                    'subtype'    : part.get_content_subtype(),
+                    'filename'   : part.get_filename('[no filename]'),
+                    'description': part.get('Content-Description',
+                                            '[no description]'),
+                    'encoding'   : part.get('Content-Transfer-Encoding',
+                                            '[no encoding]'),
+                    }, file=self)
+
+
+
+# Helper used by Generator._make_boundary
+_width = len(repr(sys.maxsize-1))
+_fmt = '%%0%dd' % _width
+
+# Backward compatibility
+_make_boundary = Generator._make_boundary
diff --git a/modules/language/python/module/email/headerregistry.py b/modules/language/python/module/email/headerregistry.py
new file mode 100644
index 0000000..6b8615d
--- /dev/null
+++ b/modules/language/python/module/email/headerregistry.py
@@ -0,0 +1,592 @@
+module(email,headerregistry)
+"""Representing and manipulating email headers via custom objects.
+
+This module provides an implementation of the HeaderRegistry API.
+The implementation is designed to flexibly follow RFC5322 rules.
+
+Eventually HeaderRegistry will be a public API, but it isn't yet,
+and will probably change some before that happens.
+
+"""
+
+__all__=['HeaderRegistry']
+
+from types import MappingProxyType
+import email.utils as utils
+import email.errors as errors
+import email._header_value_parser as parser
+
+class Address:
+
+    def __init__(self, display_name='', username='', domain='', addr_spec=None):
+        """Create an object representing a full email address.
+
+        An address can have a 'display_name', a 'username', and a 'domain'.  In
+        addition to specifying the username and domain separately, they may be
+        specified together by using the addr_spec keyword *instead of* the
+        username and domain keywords.  If an addr_spec string is specified it
+        must be properly quoted according to RFC 5322 rules; an error will be
+        raised if it is not.
+
+        An Address object has display_name, username, domain, and addr_spec
+        attributes, all of which are read-only.  The addr_spec and the string
+        value of the object are both quoted according to RFC5322 rules, but
+        without any Content Transfer Encoding.
+
+        """
+        # This clause with its potential 'raise' may only happen when an
+        # application program creates an Address object using an addr_spec
+        # keyword.  The email library code itself must always supply username
+        # and domain.
+        if addr_spec is not None:
+            if username or domain:
+                raise TypeError("addrspec specified when username and/or "
+                                "domain also specified")
+            a_s, rest = parser.get_addr_spec(addr_spec)
+            if rest:
+                raise ValueError("Invalid addr_spec; only '{}' "
+                                 "could be parsed from '{}'".format(
+                                    a_s, addr_spec))
+            if a_s.all_defects:
+                raise a_s.all_defects[0]
+            username = a_s.local_part
+            domain = a_s.domain
+        self._display_name = display_name
+        self._username = username
+        self._domain = domain
+
+    @property
+    def display_name(self):
+        return self._display_name
+
+    @property
+    def username(self):
+        return self._username
+
+    @property
+    def domain(self):
+        return self._domain
+
+    @property
+    def addr_spec(self):
+        """The addr_spec (username@domain) portion of the address, quoted
+        according to RFC 5322 rules, but with no Content Transfer Encoding.
+        """
+        nameset = set(self.username)
+        if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
+            lp = parser.quote_string(self.username)
+        else:
+            lp = self.username
+        if self.domain:
+            return lp + '@' + self.domain
+        if not lp:
+            return '<>'
+        return lp
+
+    def __repr__(self):
+        return "{}(display_name={!r}, username={!r}, domain={!r})".format(
+                        self.__class__.__name__,
+                        self.display_name, self.username, self.domain)
+
+    def __str__(self):
+        nameset = set(self.display_name)
+        if len(nameset) > len(nameset-parser.SPECIALS):
+            disp = parser.quote_string(self.display_name)
+        else:
+            disp = self.display_name
+        if disp:
+            addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
+            return "{} <{}>".format(disp, addr_spec)
+        return self.addr_spec
+
+    def __eq__(self, other):
+        if type(other) != type(self):
+            return False
+        return (self.display_name == other.display_name and
+                self.username == other.username and
+                self.domain == other.domain)
+
+
+class Group:
+
+    def __init__(self, display_name=None, addresses=None):
+        """Create an object representing an address group.
+
+        An address group consists of a display_name followed by colon and a
+        list of addresses (see Address) terminated by a semi-colon.  The Group
+        is created by specifying a display_name and a possibly empty list of
+        Address objects.  A Group can also be used to represent a single
+        address that is not in a group, which is convenient when manipulating
+        lists that are a combination of Groups and individual Addresses.  In
+        this case the display_name should be set to None.  In particular, the
+        string representation of a Group whose display_name is None is the same
+        as the Address object, if there is one and only one Address object in
+        the addresses list.
+
+        """
+        self._display_name = display_name
+        self._addresses = tuple(addresses) if addresses else tuple()
+
+    @property
+    def display_name(self):
+        return self._display_name
+
+    @property
+    def addresses(self):
+        return self._addresses
+
+    def __repr__(self):
+        return "{}(display_name={!r}, addresses={!r}".format(
+                 self.__class__.__name__,
+                 self.display_name, self.addresses)
+
+    def __str__(self):
+        if self.display_name is None and len(self.addresses)==1:
+            return str(self.addresses[0])
+        disp = self.display_name
+        if disp is not None:
+            nameset = set(disp)
+            if len(nameset) > len(nameset-parser.SPECIALS):
+                disp = parser.quote_string(disp)
+        adrstr = ", ".join(str(x) for x in self.addresses)
+        adrstr = ' ' + adrstr if adrstr else adrstr
+        return "{}:{};".format(disp, adrstr)
+
+    def __eq__(self, other):
+        if type(other) != type(self):
+            return False
+        return (self.display_name == other.display_name and
+                self.addresses == other.addresses)
+
+
+# Header Classes #
+
+class BaseHeader(str):
+
+    """Base class for message headers.
+
+    Implements generic behavior and provides tools for subclasses.
+
+    A subclass must define a classmethod named 'parse' that takes an unfolded
+    value string and a dictionary as its arguments.  The dictionary will
+    contain one key, 'defects', initialized to an empty list.  After the call
+    the dictionary must contain two additional keys: parse_tree, set to the
+    parse tree obtained from parsing the header, and 'decoded', set to the
+    string value of the idealized representation of the data from the value.
+    (That is, encoded words are decoded, and values that have canonical
+    representations are so represented.)
+
+    The defects key is intended to collect parsing defects, which the message
+    parser will subsequently dispose of as appropriate.  The parser should not,
+    insofar as practical, raise any errors.  Defects should be added to the
+    list instead.  The standard header parsers register defects for RFC
+    compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
+    errors.
+
+    The parse method may add additional keys to the dictionary.  In this case
+    the subclass must define an 'init' method, which will be passed the
+    dictionary as its keyword arguments.  The method should use (usually by
+    setting them as the value of similarly named attributes) and remove all the
+    extra keys added by its parse method, and then use super to call its parent
+    class with the remaining arguments and keywords.
+
+    The subclass should also make sure that a 'max_count' attribute is defined
+    that is either None or 1. XXX: need to better define this API.
+
+    """
+
+    def __new__(cls, name, value):
+        kwds = {'defects': []}
+        cls.parse(value, kwds)
+        if utils._has_surrogates(kwds['decoded']):
+            kwds['decoded'] = utils._sanitize(kwds['decoded'])
+        self = str.__new__(cls, kwds['decoded'])
+        del kwds['decoded']
+        self.init(name, **kwds)
+        return self
+
+    def init(self, name, *, parse_tree, defects):
+        self._name = name
+        self._parse_tree = parse_tree
+        self._defects = defects
+
+    @property
+    def name(self):
+        return self._name
+
+    @property
+    def defects(self):
+        return tuple(self._defects)
+
+    def __reduce__(self):
+        return (
+            _reconstruct_header,
+            (
+                self.__class__.__name__,
+                self.__class__.__bases__,
+                str(self),
+            ),
+            self.__dict__)
+
+    @classmethod
+    def _reconstruct(cls, value):
+        return str.__new__(cls, value)
+
+    def fold(self, *, policy):
+        """Fold header according to policy.
+
+        The parsed representation of the header is folded according to
+        RFC5322 rules, as modified by the policy.  If the parse tree
+        contains surrogateescaped bytes, the bytes are CTE encoded using
+        the charset 'unknown-8bit".
+
+        Any non-ASCII characters in the parse tree are CTE encoded using
+        charset utf-8. XXX: make this a policy setting.
+
+        The returned value is an ASCII-only string possibly containing linesep
+        characters, and ending with a linesep character.  The string includes
+        the header name and the ': ' separator.
+
+        """
+        # At some point we need to put fws here iif it was in the source.
+        header = parser.Header([
+            parser.HeaderLabel([
+                parser.ValueTerminal(self.name, 'header-name'),
+                parser.ValueTerminal(':', 'header-sep')]),
+            ])
+        if self._parse_tree:
+            header.append(
+                parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]))
+        header.append(self._parse_tree)
+        return header.fold(policy=policy)
+
+
+def _reconstruct_header(cls_name, bases, value):
+    return type(cls_name, bases, {})._reconstruct(value)
+
+
+class UnstructuredHeader:
+
+    max_count = None
+    value_parser = staticmethod(parser.get_unstructured)
+
+    @classmethod
+    def parse(cls, value, kwds):
+        kwds['parse_tree'] = cls.value_parser(value)
+        kwds['decoded'] = str(kwds['parse_tree'])
+
+
+class UniqueUnstructuredHeader(UnstructuredHeader):
+
+    max_count = 1
+
+
+class DateHeader:
+
+    """Header whose value consists of a single timestamp.
+
+    Provides an additional attribute, datetime, which is either an aware
+    datetime using a timezone, or a naive datetime if the timezone
+    in the input string is -0000.  Also accepts a datetime as input.
+    The 'value' attribute is the normalized form of the timestamp,
+    which means it is the output of format_datetime on the datetime.
+    """
+
+    max_count = None
+
+    # This is used only for folding, not for creating 'decoded'.
+    value_parser = staticmethod(parser.get_unstructured)
+
+    @classmethod
+    def parse(cls, value, kwds):
+        if not value:
+            kwds['defects'].append(errors.HeaderMissingRequiredValue())
+            kwds['datetime'] = None
+            kwds['decoded'] = ''
+            kwds['parse_tree'] = parser.TokenList()
+            return
+        if isinstance(value, str):
+            value = utils.parsedate_to_datetime(value)
+        kwds['datetime'] = value
+        kwds['decoded'] = utils.format_datetime(kwds['datetime'])
+        kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
+
+    def init(self, *args, **kw):
+        self._datetime = kw.pop('datetime')
+        super().init(*args, **kw)
+
+    @property
+    def datetime(self):
+        return self._datetime
+
+
+class UniqueDateHeader(DateHeader):
+
+    max_count = 1
+
+
+class AddressHeader:
+
+    max_count = None
+
+    @staticmethod
+    def value_parser(value):
+        address_list, value = parser.get_address_list(value)
+        assert not value, 'this should not happen'
+        return address_list
+
+    @classmethod
+    def parse(cls, value, kwds):
+        if isinstance(value, str):
+            # We are translating here from the RFC language (address/mailbox)
+            # to our API language (group/address).
+            kwds['parse_tree'] = address_list = cls.value_parser(value)
+            groups = []
+            for addr in address_list.addresses:
+                groups.append(Group(addr.display_name,
+                                    [Address(mb.display_name or '',
+                                             mb.local_part or '',
+                                             mb.domain or '')
+                                     for mb in addr.all_mailboxes]))
+            defects = list(address_list.all_defects)
+        else:
+            # Assume it is Address/Group stuff
+            if not hasattr(value, '__iter__'):
+                value = [value]
+            groups = [Group(None, [item]) if not hasattr(item, 'addresses')
+                                          else item
+                                    for item in value]
+            defects = []
+        kwds['groups'] = groups
+        kwds['defects'] = defects
+        kwds['decoded'] = ', '.join([str(item) for item in groups])
+        if 'parse_tree' not in kwds:
+            kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
+
+    def init(self, *args, **kw):
+        self._groups = tuple(kw.pop('groups'))
+        self._addresses = None
+        super().init(*args, **kw)
+
+    @property
+    def groups(self):
+        return self._groups
+
+    @property
+    def addresses(self):
+        if self._addresses is None:
+            self._addresses = tuple([address for group in self._groups
+                                             for address in group.addresses])
+        return self._addresses
+
+
+class UniqueAddressHeader(AddressHeader):
+
+    max_count = 1
+
+
+class SingleAddressHeader(AddressHeader):
+
+    @property
+    def address(self):
+        if len(self.addresses)!=1:
+            raise ValueError(("value of single address header {} is not "
+                "a single address").format(self.name))
+        return self.addresses[0]
+
+
+class UniqueSingleAddressHeader(SingleAddressHeader):
+
+    max_count = 1
+
+
+class MIMEVersionHeader:
+
+    max_count = 1
+
+    value_parser = staticmethod(parser.parse_mime_version)
+
+    @classmethod
+    def parse(cls, value, kwds):
+        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+        kwds['decoded'] = str(parse_tree)
+        kwds['defects'].extend(parse_tree.all_defects)
+        kwds['major'] = None if parse_tree.minor is None else parse_tree.major
+        kwds['minor'] = parse_tree.minor
+        if parse_tree.minor is not None:
+            kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
+        else:
+            kwds['version'] = None
+
+    def init(self, *args, **kw):
+        self._version = kw.pop('version')
+        self._major = kw.pop('major')
+        self._minor = kw.pop('minor')
+        super().init(*args, **kw)
+
+    @property
+    def major(self):
+        return self._major
+
+    @property
+    def minor(self):
+        return self._minor
+
+    @property
+    def version(self):
+        return self._version
+
+
+class ParameterizedMIMEHeader:
+
+    # Mixin that handles the params dict.  Must be subclassed and
+    # a property value_parser for the specific header provided.
+
+    max_count = 1
+
+    @classmethod
+    def parse(cls, value, kwds):
+        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+        kwds['decoded'] = str(parse_tree)
+        kwds['defects'].extend(parse_tree.all_defects)
+        if parse_tree.params is None:
+            kwds['params'] = {}
+        else:
+            # The MIME RFCs specify that parameter ordering is arbitrary.
+            kwds['params'] = {utils._sanitize(name).lower():
+                                    utils._sanitize(value)
+                               for name, value in parse_tree.params}
+
+    def init(self, *args, **kw):
+        self._params = kw.pop('params')
+        super().init(*args, **kw)
+
+    @property
+    def params(self):
+        return MappingProxyType(self._params)
+
+
+class ContentTypeHeader(ParameterizedMIMEHeader):
+
+    value_parser = staticmethod(parser.parse_content_type_header)
+
+    def init(self, *args, **kw):
+        super().init(*args, **kw)
+        self._maintype = utils._sanitize(self._parse_tree.maintype)
+        self._subtype = utils._sanitize(self._parse_tree.subtype)
+
+    @property
+    def maintype(self):
+        return self._maintype
+
+    @property
+    def subtype(self):
+        return self._subtype
+
+    @property
+    def content_type(self):
+        return self.maintype + '/' + self.subtype
+
+
+class ContentDispositionHeader(ParameterizedMIMEHeader):
+
+    value_parser = staticmethod(parser.parse_content_disposition_header)
+
+    def init(self, *args, **kw):
+        super().init(*args, **kw)
+        cd = self._parse_tree.content_disposition
+        self._content_disposition = cd if cd is None else utils._sanitize(cd)
+
+    @property
+    def content_disposition(self):
+        return self._content_disposition
+
+
+class ContentTransferEncodingHeader:
+
+    max_count = 1
+
+    value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
+
+    @classmethod
+    def parse(cls, value, kwds):
+        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+        kwds['decoded'] = str(parse_tree)
+        kwds['defects'].extend(parse_tree.all_defects)
+
+    def init(self, *args, **kw):
+        super().init(*args, **kw)
+        self._cte = utils._sanitize(self._parse_tree.cte)
+
+    @property
+    def cte(self):
+        return self._cte
+
+
+# The header factory #
+
+_default_header_map = {
+    'subject':                      UniqueUnstructuredHeader,
+    'date':                         UniqueDateHeader,
+    'resent-date':                  DateHeader,
+    'orig-date':                    UniqueDateHeader,
+    'sender':                       UniqueSingleAddressHeader,
+    'resent-sender':                SingleAddressHeader,
+    'to':                           UniqueAddressHeader,
+    'resent-to':                    AddressHeader,
+    'cc':                           UniqueAddressHeader,
+    'resent-cc':                    AddressHeader,
+    'bcc':                          UniqueAddressHeader,
+    'resent-bcc':                   AddressHeader,
+    'from':                         UniqueAddressHeader,
+    'resent-from':                  AddressHeader,
+    'reply-to':                     UniqueAddressHeader,
+    'mime-version':                 MIMEVersionHeader,
+    'content-type':                 ContentTypeHeader,
+    'content-disposition':          ContentDispositionHeader,
+    'content-transfer-encoding':    ContentTransferEncodingHeader,
+    }
+
+class HeaderRegistry:
+
+    """A header_factory and header registry."""
+
+    def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
+                       use_default_map=True):
+        """Create a header_factory that works with the Policy API.
+
+        base_class is the class that will be the last class in the created
+        header class's __bases__ list.  default_class is the class that will be
+        used if "name" (see __call__) does not appear in the registry.
+        use_default_map controls whether or not the default mapping of names to
+        specialized classes is copied in to the registry when the factory is
+        created.  The default is True.
+
+        """
+        self.registry = {}
+        self.base_class = base_class
+        self.default_class = default_class
+        if use_default_map:
+            self.registry.update(_default_header_map)
+
+    def map_to_type(self, name, cls):
+        """Register cls as the specialized class for handling "name" headers.
+
+        """
+        self.registry[name.lower()] = cls
+
+    def __getitem__(self, name):
+        cls = self.registry.get(name.lower(), self.default_class)
+        return type('_'+cls.__name__, (cls, self.base_class), {})
+
+    def __call__(self, name, value):
+        """Create a header instance for header 'name' from 'value'.
+
+        Creates a header instance by creating a specialized class for parsing
+        and representing the specified header by combining the factory
+        base_class with a specialized class from the registry or the
+        default_class, and passing the name and value to the constructed
+        class's constructor.
+
+        """
+        return self[name](name, value)
diff --git a/modules/language/python/module/email/iterators.py b/modules/language/python/module/email/iterators.py
new file mode 100644
index 0000000..0773bdf
--- /dev/null
+++ b/modules/language/python/module/email/iterators.py
@@ -0,0 +1,72 @@
+module(email,iterators)
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Various types of useful iterators and generators."""
+
+__all__ = [
+    'body_line_iterator',
+    'typed_subpart_iterator',
+    'walk',
+    # Do not include _structure() since it's part of the debugging API.
+    ]
+
+import sys
+from io import StringIO
+
+
+
+# This function will become a method of the Message class
+def walk(self):
+    """Walk over the message tree, yielding each subpart.
+
+    The walk is performed in depth-first order.  This method is a
+    generator.
+    """
+    yield self
+    if self.is_multipart():
+        for subpart in self.get_payload():
+            yield from subpart.walk()
+
+
+
+# These two functions are imported into the Iterators.py interface module.
+def body_line_iterator(msg, decode=False):
+    """Iterate over the parts, returning string payloads line-by-line.
+
+    Optional decode (default False) is passed through to .get_payload().
+    """
+    for subpart in msg.walk():
+        payload = subpart.get_payload(decode=decode)
+        if isinstance(payload, str):
+            yield from StringIO(payload)
+
+
+def typed_subpart_iterator(msg, maintype='text', subtype=None):
+    """Iterate over the subparts with a given MIME type.
+
+    Use `maintype' as the main MIME type to match against; this defaults to
+    "text".  Optional `subtype' is the MIME subtype to match against; if
+    omitted, only the main type is matched.
+    """
+    for subpart in msg.walk():
+        if subpart.get_content_maintype() == maintype:
+            if subtype is None or subpart.get_content_subtype() == subtype:
+                yield subpart
+
+
+
+def _structure(msg, fp=None, level=0, include_default=False):
+    """A handy debugging aid"""
+    if fp is None:
+        fp = sys.stdout
+    tab = ' ' * (level * 4)
+    print(tab + msg.get_content_type(), end='', file=fp)
+    if include_default:
+        print(' [%s]' % msg.get_default_type(), file=fp)
+    else:
+        print(file=fp)
+    if msg.is_multipart():
+        for subpart in msg.get_payload():
+            _structure(subpart, fp, level+1, include_default)
diff --git a/modules/language/python/module/email/message.py b/modules/language/python/module/email/message.py
new file mode 100644
index 0000000..c73aa3c
--- /dev/null
+++ b/modules/language/python/module/email/message.py
@@ -0,0 +1,1164 @@
+module(email,message)
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Basic message object for the email package object model."""
+
+__all__ = ['Message', 'EmailMessage']
+
+import re
+import uu
+import quopri
+from io import BytesIO, StringIO
+
+# Intrapackage imports
+import email.utils as utils
+import email.errors as errors
+from email._policybase import Policy, compat32
+import email.charset as _charset
+from email._encoded_words import decode_b
+Charset = _charset.Charset
+
+SEMISPACE = '; '
+
+# Regular expression that matches `special' characters in parameters, the
+# existence of which force quoting of the parameter value.
+tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
+
+
+def _splitparam(param):
+    # Split header parameters.  BAW: this may be too simple.  It isn't
+    # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
+    # found in the wild.  We may eventually need a full fledged parser.
+    # RDM: we might have a Header here; for now just stringify it.
+    a, sep, b = str(param).partition(';')
+    if not sep:
+        return a.strip(), None
+    return a.strip(), b.strip()
+
+def _formatparam(param, value=None, quote=True):
+    """Convenience function to format and return a key=value pair.
+
+    This will quote the value if needed or if quote is true.  If value is a
+    three tuple (charset, language, value), it will be encoded according
+    to RFC2231 rules.  If it contains non-ascii characters it will likewise
+    be encoded according to RFC2231 rules, using the utf-8 charset and
+    a null language.
+    """
+    if value is not None and len(value) > 0:
+        # A tuple is used for RFC 2231 encoded parameter values where items
+        # are (charset, language, value).  charset is a string, not a Charset
+        # instance.  RFC 2231 encoded values are never quoted, per RFC.
+        if isinstance(value, tuple):
+            # Encode as per RFC 2231
+            param += '*'
+            value = utils.encode_rfc2231(value[2], value[0], value[1])
+            return '%s=%s' % (param, value)
+        else:
+            try:
+                value.encode('ascii')
+            except UnicodeEncodeError:
+                param += '*'
+                value = utils.encode_rfc2231(value, 'utf-8', '')
+                return '%s=%s' % (param, value)
+        # BAW: Please check this.  I think that if quote is set it should
+        # force quoting even if not necessary.
+        if quote or tspecials.search(value):
+            return '%s="%s"' % (param, utils.quote(value))
+        else:
+            return '%s=%s' % (param, value)
+    else:
+        return param
+
+def _parseparam(s):
+    # RDM This might be a Header, so for now stringify it.
+    s = ';' + str(s)
+    plist = []
+    while s[:1] == ';':
+        s = s[1:]
+        end = s.find(';')
+        while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
+            end = s.find(';', end + 1)
+        if end < 0:
+            end = len(s)
+        f = s[:end]
+        if '=' in f:
+            i = f.index('=')
+            f = f[:i].strip().lower() + '=' + f[i+1:].strip()
+        plist.append(f.strip())
+        s = s[end:]
+    return plist
+
+
+def _unquotevalue(value):
+    # This is different than utils.collapse_rfc2231_value() because it doesn't
+    # try to convert the value to a unicode.  Message.get_param() and
+    # Message.get_params() are both currently defined to return the tuple in
+    # the face of RFC 2231 parameters.
+    if isinstance(value, tuple):
+        return value[0], value[1], utils.unquote(value[2])
+    else:
+        return utils.unquote(value)
+
+
+class Message:
+    """Basic message object.
+
+    A message object is defined as something that has a bunch of RFC 2822
+    headers and a payload.  It may optionally have an envelope header
+    (a.k.a. Unix-From or From_ header).  If the message is a container (i.e. a
+    multipart or a message/rfc822), then the payload is a list of Message
+    objects, otherwise it is a string.
+
+    Message objects implement part of the `mapping' interface, which assumes
+    there is exactly one occurrence of the header per message.  Some headers
+    do in fact appear multiple times (e.g. Received) and for those headers,
+    you must use the explicit API to set or get all the headers.  Not all of
+    the mapping methods are implemented.
+    """
+    def __init__(self, policy=compat32):
+        self.policy = policy
+        self._headers = []
+        self._unixfrom = None
+        self._payload = None
+        self._charset = None
+        # Defaults for multipart messages
+        self.preamble = self.epilogue = None
+        self.defects = []
+        # Default content type
+        self._default_type = 'text/plain'
+
+    def __str__(self):
+        """Return the entire formatted message as a string.
+        """
+        return self.as_string()
+
+    def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
+        """Return the entire formatted message as a string.
+
+        Optional 'unixfrom', when true, means include the Unix From_ envelope
+        header.  For backward compatibility reasons, if maxheaderlen is
+        not specified it defaults to 0, so you must override it explicitly
+        if you want a different maxheaderlen.  'policy' is passed to the
+        Generator instance used to serialize the mesasge; if it is not
+        specified the policy associated with the message instance is used.
+
+        If the message object contains binary data that is not encoded
+        according to RFC standards, the non-compliant data will be replaced by
+        unicode "unknown character" code points.
+        """
+        from email.generator import Generator
+        policy = self.policy if policy is None else policy
+        fp = StringIO()
+        g = Generator(fp,
+                      mangle_from_=False,
+                      maxheaderlen=maxheaderlen,
+                      policy=policy)
+        g.flatten(self, unixfrom=unixfrom)
+        return fp.getvalue()
+
+    def __bytes__(self):
+        """Return the entire formatted message as a bytes object.
+        """
+        return self.as_bytes()
+
+    def as_bytes(self, unixfrom=False, policy=None):
+        """Return the entire formatted message as a bytes object.
+
+        Optional 'unixfrom', when true, means include the Unix From_ envelope
+        header.  'policy' is passed to the BytesGenerator instance used to
+        serialize the message; if not specified the policy associated with
+        the message instance is used.
+        """
+        from email.generator import BytesGenerator
+        policy = self.policy if policy is None else policy
+        fp = BytesIO()
+        g = BytesGenerator(fp, mangle_from_=False, policy=policy)
+        g.flatten(self, unixfrom=unixfrom)
+        return fp.getvalue()
+
+    def is_multipart(self):
+        """Return True if the message consists of multiple parts."""
+        return isinstance(self._payload, list)
+
+    #
+    # Unix From_ line
+    #
+    def set_unixfrom(self, unixfrom):
+        self._unixfrom = unixfrom
+
+    def get_unixfrom(self):
+        return self._unixfrom
+
+    #
+    # Payload manipulation.
+    #
+    def attach(self, payload):
+        """Add the given payload to the current payload.
+
+        The current payload will always be a list of objects after this method
+        is called.  If you want to set the payload to a scalar object, use
+        set_payload() instead.
+        """
+        if self._payload is None:
+            self._payload = [payload]
+        else:
+            try:
+                self._payload.append(payload)
+            except AttributeError:
+                raise TypeError("Attach is not valid on a message with a"
+                                " non-multipart payload")
+
+    def get_payload(self, i=None, decode=False):
+        """Return a reference to the payload.
+
+        The payload will either be a list object or a string.  If you mutate
+        the list object, you modify the message's payload in place.  Optional
+        i returns that index into the payload.
+
+        Optional decode is a flag indicating whether the payload should be
+        decoded or not, according to the Content-Transfer-Encoding header
+        (default is False).
+
+        When True and the message is not a multipart, the payload will be
+        decoded if this header's value is `quoted-printable' or `base64'.  If
+        some other encoding is used, or the header is missing, or if the
+        payload has bogus data (i.e. bogus base64 or uuencoded data), the
+        payload is returned as-is.
+
+        If the message is a multipart and the decode flag is True, then None
+        is returned.
+        """
+        # Here is the logic table for this code, based on the email5.0.0 code:
+        #   i     decode  is_multipart  result
+        # ------  ------  ------------  ------------------------------
+        #  None   True    True          None
+        #   i     True    True          None
+        #  None   False   True          _payload (a list)
+        #   i     False   True          _payload element i (a Message)
+        #   i     False   False         error (not a list)
+        #   i     True    False         error (not a list)
+        #  None   False   False         _payload
+        #  None   True    False         _payload decoded (bytes)
+        # Note that Barry planned to factor out the 'decode' case, but that
+        # isn't so easy now that we handle the 8 bit data, which needs to be
+        # converted in both the decode and non-decode path.
+        if self.is_multipart():
+            if decode:
+                return None
+            if i is None:
+                return self._payload
+            else:
+                return self._payload[i]
+        # For backward compatibility, Use isinstance and this error message
+        # instead of the more logical is_multipart test.
+        if i is not None and not isinstance(self._payload, list):
+            raise TypeError('Expected list, got %s' % type(self._payload))
+        payload = self._payload
+        # cte might be a Header, so for now stringify it.
+        cte = str(self.get('content-transfer-encoding', '')).lower()
+        # payload may be bytes here.
+        if isinstance(payload, str):
+            if utils._has_surrogates(payload):
+                bpayload = payload.encode('ascii', 'surrogateescape')
+                if not decode:
+                    try:
+                        payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
+                    except LookupError:
+                        payload = bpayload.decode('ascii', 'replace')
+            elif decode:
+                try:
+                    bpayload = payload.encode('ascii')
+                except UnicodeError:
+                    # This won't happen for RFC compliant messages (messages
+                    # containing only ASCII code points in the unicode input).
+                    # If it does happen, turn the string into bytes in a way
+                    # guaranteed not to fail.
+                    bpayload = payload.encode('raw-unicode-escape')
+        if not decode:
+            return payload
+        if cte == 'quoted-printable':
+            return quopri.decodestring(bpayload)
+        elif cte == 'base64':
+            # XXX: this is a bit of a hack; decode_b should probably be factored
+            # out somewhere, but I haven't figured out where yet.
+            value, defects = decode_b(b''.join(bpayload.splitlines()))
+            for defect in defects:
+                self.policy.handle_defect(self, defect)
+            return value
+        elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
+            in_file = BytesIO(bpayload)
+            out_file = BytesIO()
+            try:
+                uu.decode(in_file, out_file, quiet=True)
+                return out_file.getvalue()
+            except uu.Error:
+                # Some decoding problem
+                return bpayload
+        if isinstance(payload, str):
+            return bpayload
+        return payload
+
+    def set_payload(self, payload, charset=None):
+        """Set the payload to the given value.
+
+        Optional charset sets the message's default character set.  See
+        set_charset() for details.
+        """
+        if hasattr(payload, 'encode'):
+            if charset is None:
+                self._payload = payload
+                return
+            if not isinstance(charset, Charset):
+                charset = Charset(charset)
+            payload = payload.encode(charset.output_charset)
+        if hasattr(payload, 'decode'):
+            self._payload = payload.decode('ascii', 'surrogateescape')
+        else:
+            self._payload = payload
+        if charset is not None:
+            self.set_charset(charset)
+
+    def set_charset(self, charset):
+        """Set the charset of the payload to a given character set.
+
+        charset can be a Charset instance, a string naming a character set, or
+        None.  If it is a string it will be converted to a Charset instance.
+        If charset is None, the charset parameter will be removed from the
+        Content-Type field.  Anything else will generate a TypeError.
+
+        The message will be assumed to be of type text/* encoded with
+        charset.input_charset.  It will be converted to charset.output_charset
+        and encoded properly, if needed, when generating the plain text
+        representation of the message.  MIME headers (MIME-Version,
+        Content-Type, Content-Transfer-Encoding) will be added as needed.
+        """
+        if charset is None:
+            self.del_param('charset')
+            self._charset = None
+            return
+        if not isinstance(charset, Charset):
+            charset = Charset(charset)
+        self._charset = charset
+        if 'MIME-Version' not in self:
+            self.add_header('MIME-Version', '1.0')
+        if 'Content-Type' not in self:
+            self.add_header('Content-Type', 'text/plain',
+                            charset=charset.get_output_charset())
+        else:
+            self.set_param('charset', charset.get_output_charset())
+        if charset != charset.get_output_charset():
+            self._payload = charset.body_encode(self._payload)
+        if 'Content-Transfer-Encoding' not in self:
+            cte = charset.get_body_encoding()
+            try:
+                cte(self)
+            except TypeError:
+                # This 'if' is for backward compatibility, it allows unicode
+                # through even though that won't work correctly if the
+                # message is serialized.
+                payload = self._payload
+                if payload:
+                    try:
+                        payload = payload.encode('ascii', 'surrogateescape')
+                    except UnicodeError:
+                        payload = payload.encode(charset.output_charset)
+                self._payload = charset.body_encode(payload)
+                self.add_header('Content-Transfer-Encoding', cte)
+
+    def get_charset(self):
+        """Return the Charset instance associated with the message's payload.
+        """
+        return self._charset
+
+    #
+    # MAPPING INTERFACE (partial)
+    #
+    def __len__(self):
+        """Return the total number of headers, including duplicates."""
+        return len(self._headers)
+
+    def __getitem__(self, name):
+        """Get a header value.
+
+        Return None if the header is missing instead of raising an exception.
+
+        Note that if the header appeared multiple times, exactly which
+        occurrence gets returned is undefined.  Use get_all() to get all
+        the values matching a header field name.
+        """
+        return self.get(name)
+
+    def __setitem__(self, name, val):
+        """Set the value of a header.
+
+        Note: this does not overwrite an existing header with the same field
+        name.  Use __delitem__() first to delete any existing headers.
+        """
+        max_count = self.policy.header_max_count(name)
+        if max_count:
+            lname = name.lower()
+            found = 0
+            for k, v in self._headers:
+                if k.lower() == lname:
+                    found += 1
+                    if found >= max_count:
+                        raise ValueError("There may be at most {} {} headers "
+                                         "in a message".format(max_count, name))
+        self._headers.append(self.policy.header_store_parse(name, val))
+
+    def __delitem__(self, name):
+        """Delete all occurrences of a header, if present.
+
+        Does not raise an exception if the header is missing.
+        """
+        name = name.lower()
+        newheaders = []
+        for k, v in self._headers:
+            if k.lower() != name:
+                newheaders.append((k, v))
+        self._headers = newheaders
+
+    def __contains__(self, name):
+        return name.lower() in [k.lower() for k, v in self._headers]
+
+    def __iter__(self):
+        for field, value in self._headers:
+            yield field
+
+    def keys(self):
+        """Return a list of all the message's header field names.
+
+        These will be sorted in the order they appeared in the original
+        message, or were added to the message, and may contain duplicates.
+        Any fields deleted and re-inserted are always appended to the header
+        list.
+        """
+        return [k for k, v in self._headers]
+
+    def values(self):
+        """Return a list of all the message's header values.
+
+        These will be sorted in the order they appeared in the original
+        message, or were added to the message, and may contain duplicates.
+        Any fields deleted and re-inserted are always appended to the header
+        list.
+        """
+        return [self.policy.header_fetch_parse(k, v)
+                for k, v in self._headers]
+
+    def items(self):
+        """Get all the message's header fields and values.
+
+        These will be sorted in the order they appeared in the original
+        message, or were added to the message, and may contain duplicates.
+        Any fields deleted and re-inserted are always appended to the header
+        list.
+        """
+        return [(k, self.policy.header_fetch_parse(k, v))
+                for k, v in self._headers]
+
+    def get(self, name, failobj=None):
+        """Get a header value.
+
+        Like __getitem__() but return failobj instead of None when the field
+        is missing.
+        """
+        name = name.lower()
+        for k, v in self._headers:
+            if k.lower() == name:
+                return self.policy.header_fetch_parse(k, v)
+        return failobj
+
+    #
+    # "Internal" methods (public API, but only intended for use by a parser
+    # or generator, not normal application code.
+    #
+
+    def set_raw(self, name, value):
+        """Store name and value in the model without modification.
+
+        This is an "internal" API, intended only for use by a parser.
+        """
+        self._headers.append((name, value))
+
+    def raw_items(self):
+        """Return the (name, value) header pairs without modification.
+
+        This is an "internal" API, intended only for use by a generator.
+        """
+        return iter(self._headers.copy())
+
+    #
+    # Additional useful stuff
+    #
+
+    def get_all(self, name, failobj=None):
+        """Return a list of all the values for the named field.
+
+        These will be sorted in the order they appeared in the original
+        message, and may contain duplicates.  Any fields deleted and
+        re-inserted are always appended to the header list.
+
+        If no such fields exist, failobj is returned (defaults to None).
+        """
+        values = []
+        name = name.lower()
+        for k, v in self._headers:
+            if k.lower() == name:
+                values.append(self.policy.header_fetch_parse(k, v))
+        if not values:
+            return failobj
+        return values
+
+    def add_header(self, _name, _value, **_params):
+        """Extended header setting.
+
+        name is the header field to add.  keyword arguments can be used to set
+        additional parameters for the header field, with underscores converted
+        to dashes.  Normally the parameter will be added as key="value" unless
+        value is None, in which case only the key will be added.  If a
+        parameter value contains non-ASCII characters it can be specified as a
+        three-tuple of (charset, language, value), in which case it will be
+        encoded according to RFC2231 rules.  Otherwise it will be encoded using
+        the utf-8 charset and a language of ''.
+
+        Examples:
+
+        msg.add_header('content-disposition', 'attachment', filename='bud.gif')
+        msg.add_header('content-disposition', 'attachment',
+                       filename=('utf-8', '', Fußballer.ppt'))
+        msg.add_header('content-disposition', 'attachment',
+                       filename='Fußballer.ppt'))
+        """
+        parts = []
+        for k, v in _params.items():
+            if v is None:
+                parts.append(k.replace('_', '-'))
+            else:
+                parts.append(_formatparam(k.replace('_', '-'), v))
+        if _value is not None:
+            parts.insert(0, _value)
+        self[_name] = SEMISPACE.join(parts)
+
+    def replace_header(self, _name, _value):
+        """Replace a header.
+
+        Replace the first matching header found in the message, retaining
+        header order and case.  If no matching header was found, a KeyError is
+        raised.
+        """
+        _name = _name.lower()
+        for i, (k, v) in zip(range(len(self._headers)), self._headers):
+            if k.lower() == _name:
+                self._headers[i] = self.policy.header_store_parse(k, _value)
+                break
+        else:
+            raise KeyError(_name)
+
+    #
+    # Use these three methods instead of the three above.
+    #
+
+    def get_content_type(self):
+        """Return the message's content type.
+
+        The returned string is coerced to lower case of the form
+        `maintype/subtype'.  If there was no Content-Type header in the
+        message, the default type as given by get_default_type() will be
+        returned.  Since according to RFC 2045, messages always have a default
+        type this will always return a value.
+
+        RFC 2045 defines a message's default type to be text/plain unless it
+        appears inside a multipart/digest container, in which case it would be
+        message/rfc822.
+        """
+        missing = object()
+        value = self.get('content-type', missing)
+        if value is missing:
+            # This should have no parameters
+            return self.get_default_type()
+        ctype = _splitparam(value)[0].lower()
+        # RFC 2045, section 5.2 says if its invalid, use text/plain
+        if ctype.count('/') != 1:
+            return 'text/plain'
+        return ctype
+
+    def get_content_maintype(self):
+        """Return the message's main content type.
+
+        This is the `maintype' part of the string returned by
+        get_content_type().
+        """
+        ctype = self.get_content_type()
+        return ctype.split('/')[0]
+
+    def get_content_subtype(self):
+        """Returns the message's sub-content type.
+
+        This is the `subtype' part of the string returned by
+        get_content_type().
+        """
+        ctype = self.get_content_type()
+        return ctype.split('/')[1]
+
+    def get_default_type(self):
+        """Return the `default' content type.
+
+        Most messages have a default content type of text/plain, except for
+        messages that are subparts of multipart/digest containers.  Such
+        subparts have a default content type of message/rfc822.
+        """
+        return self._default_type
+
+    def set_default_type(self, ctype):
+        """Set the `default' content type.
+
+        ctype should be either "text/plain" or "message/rfc822", although this
+        is not enforced.  The default content type is not stored in the
+        Content-Type header.
+        """
+        self._default_type = ctype
+
+    def _get_params_preserve(self, failobj, header):
+        # Like get_params() but preserves the quoting of values.  BAW:
+        # should this be part of the public interface?
+        missing = object()
+        value = self.get(header, missing)
+        if value is missing:
+            return failobj
+        params = []
+        for p in _parseparam(value):
+            try:
+                name, val = p.split('=', 1)
+                name = name.strip()
+                val = val.strip()
+            except ValueError:
+                # Must have been a bare attribute
+                name = p.strip()
+                val = ''
+            params.append((name, val))
+        params = utils.decode_params(params)
+        return params
+
+    def get_params(self, failobj=None, header='content-type', unquote=True):
+        """Return the message's Content-Type parameters, as a list.
+
+        The elements of the returned list are 2-tuples of key/value pairs, as
+        split on the `=' sign.  The left hand side of the `=' is the key,
+        while the right hand side is the value.  If there is no `=' sign in
+        the parameter the value is the empty string.  The value is as
+        described in the get_param() method.
+
+        Optional failobj is the object to return if there is no Content-Type
+        header.  Optional header is the header to search instead of
+        Content-Type.  If unquote is True, the value is unquoted.
+        """
+        missing = object()
+        params = self._get_params_preserve(missing, header)
+        if params is missing:
+            return failobj
+        if unquote:
+            return [(k, _unquotevalue(v)) for k, v in params]
+        else:
+            return params
+
+    def get_param(self, param, failobj=None, header='content-type',
+                  unquote=True):
+        """Return the parameter value if found in the Content-Type header.
+
+        Optional failobj is the object to return if there is no Content-Type
+        header, or the Content-Type header has no such parameter.  Optional
+        header is the header to search instead of Content-Type.
+
+        Parameter keys are always compared case insensitively.  The return
+        value can either be a string, or a 3-tuple if the parameter was RFC
+        2231 encoded.  When it's a 3-tuple, the elements of the value are of
+        the form (CHARSET, LANGUAGE, VALUE).  Note that both CHARSET and
+        LANGUAGE can be None, in which case you should consider VALUE to be
+        encoded in the us-ascii charset.  You can usually ignore LANGUAGE.
+        The parameter value (either the returned string, or the VALUE item in
+        the 3-tuple) is always unquoted, unless unquote is set to False.
+
+        If your application doesn't care whether the parameter was RFC 2231
+        encoded, it can turn the return value into a string as follows:
+
+            rawparam = msg.get_param('foo')
+            param = email.utils.collapse_rfc2231_value(rawparam)
+
+        """
+        if header not in self:
+            return failobj
+        for k, v in self._get_params_preserve(failobj, header):
+            if k.lower() == param.lower():
+                if unquote:
+                    return _unquotevalue(v)
+                else:
+                    return v
+        return failobj
+
+    def set_param(self, param, value, header='Content-Type', requote=True,
+                  charset=None, language='', replace=False):
+        """Set a parameter in the Content-Type header.
+
+        If the parameter already exists in the header, its value will be
+        replaced with the new value.
+
+        If header is Content-Type and has not yet been defined for this
+        message, it will be set to "text/plain" and the new parameter and
+        value will be appended as per RFC 2045.
+
+        An alternate header can be specified in the header argument, and all
+        parameters will be quoted as necessary unless requote is False.
+
+        If charset is specified, the parameter will be encoded according to RFC
+        2231.  Optional language specifies the RFC 2231 language, defaulting
+        to the empty string.  Both charset and language should be strings.
+        """
+        if not isinstance(value, tuple) and charset:
+            value = (charset, language, value)
+
+        if header not in self and header.lower() == 'content-type':
+            ctype = 'text/plain'
+        else:
+            ctype = self.get(header)
+        if not self.get_param(param, header=header):
+            if not ctype:
+                ctype = _formatparam(param, value, requote)
+            else:
+                ctype = SEMISPACE.join(
+                    [ctype, _formatparam(param, value, requote)])
+        else:
+            ctype = ''
+            for old_param, old_value in self.get_params(header=header,
+                                                        unquote=requote):
+                append_param = ''
+                if old_param.lower() == param.lower():
+                    append_param = _formatparam(param, value, requote)
+                else:
+                    append_param = _formatparam(old_param, old_value, requote)
+                if not ctype:
+                    ctype = append_param
+                else:
+                    ctype = SEMISPACE.join([ctype, append_param])
+        if ctype != self.get(header):
+            if replace:
+                self.replace_header(header, ctype)
+            else:
+                del self[header]
+                self[header] = ctype
+
+    def del_param(self, param, header='content-type', requote=True):
+        """Remove the given parameter completely from the Content-Type header.
+
+        The header will be re-written in place without the parameter or its
+        value. All values will be quoted as necessary unless requote is
+        False.  Optional header specifies an alternative to the Content-Type
+        header.
+        """
+        if header not in self:
+            return
+        new_ctype = ''
+        for p, v in self.get_params(header=header, unquote=requote):
+            if p.lower() != param.lower():
+                if not new_ctype:
+                    new_ctype = _formatparam(p, v, requote)
+                else:
+                    new_ctype = SEMISPACE.join([new_ctype,
+                                                _formatparam(p, v, requote)])
+        if new_ctype != self.get(header):
+            del self[header]
+            self[header] = new_ctype
+
+    def set_type(self, type, header='Content-Type', requote=True):
+        """Set the main type and subtype for the Content-Type header.
+
+        type must be a string in the form "maintype/subtype", otherwise a
+        ValueError is raised.
+
+        This method replaces the Content-Type header, keeping all the
+        parameters in place.  If requote is False, this leaves the existing
+        header's quoting as is.  Otherwise, the parameters will be quoted (the
+        default).
+
+        An alternative header can be specified in the header argument.  When
+        the Content-Type header is set, we'll always also add a MIME-Version
+        header.
+        """
+        # BAW: should we be strict?
+        if not type.count('/') == 1:
+            raise ValueError
+        # Set the Content-Type, you get a MIME-Version
+        if header.lower() == 'content-type':
+            del self['mime-version']
+            self['MIME-Version'] = '1.0'
+        if header not in self:
+            self[header] = type
+            return
+        params = self.get_params(header=header, unquote=requote)
+        del self[header]
+        self[header] = type
+        # Skip the first param; it's the old type.
+        for p, v in params[1:]:
+            self.set_param(p, v, header, requote)
+
+    def get_filename(self, failobj=None):
+        """Return the filename associated with the payload if present.
+
+        The filename is extracted from the Content-Disposition header's
+        `filename' parameter, and it is unquoted.  If that header is missing
+        the `filename' parameter, this method falls back to looking for the
+        `name' parameter.
+        """
+        missing = object()
+        filename = self.get_param('filename', missing, 'content-disposition')
+        if filename is missing:
+            filename = self.get_param('name', missing, 'content-type')
+        if filename is missing:
+            return failobj
+        return utils.collapse_rfc2231_value(filename).strip()
+
+    def get_boundary(self, failobj=None):
+        """Return the boundary associated with the payload if present.
+
+        The boundary is extracted from the Content-Type header's `boundary'
+        parameter, and it is unquoted.
+        """
+        missing = object()
+        boundary = self.get_param('boundary', missing)
+        if boundary is missing:
+            return failobj
+        # RFC 2046 says that boundaries may begin but not end in w/s
+        return utils.collapse_rfc2231_value(boundary).rstrip()
+
+    def set_boundary(self, boundary):
+        """Set the boundary parameter in Content-Type to 'boundary'.
+
+        This is subtly different than deleting the Content-Type header and
+        adding a new one with a new boundary parameter via add_header().  The
+        main difference is that using the set_boundary() method preserves the
+        order of the Content-Type header in the original message.
+
+        HeaderParseError is raised if the message has no Content-Type header.
+        """
+        missing = object()
+        params = self._get_params_preserve(missing, 'content-type')
+        if params is missing:
+            # There was no Content-Type header, and we don't know what type
+            # to set it to, so raise an exception.
+            raise errors.HeaderParseError('No Content-Type header found')
+        newparams = []
+        foundp = False
+        for pk, pv in params:
+            if pk.lower() == 'boundary':
+                newparams.append(('boundary', '"%s"' % boundary))
+                foundp = True
+            else:
+                newparams.append((pk, pv))
+        if not foundp:
+            # The original Content-Type header had no boundary attribute.
+            # Tack one on the end.  BAW: should we raise an exception
+            # instead???
+            newparams.append(('boundary', '"%s"' % boundary))
+        # Replace the existing Content-Type header with the new value
+        newheaders = []
+        for h, v in self._headers:
+            if h.lower() == 'content-type':
+                parts = []
+                for k, v in newparams:
+                    if v == '':
+                        parts.append(k)
+                    else:
+                        parts.append('%s=%s' % (k, v))
+                val = SEMISPACE.join(parts)
+                newheaders.append(self.policy.header_store_parse(h, val))
+
+            else:
+                newheaders.append((h, v))
+        self._headers = newheaders
+
+    def get_content_charset(self, failobj=None):
+        """Return the charset parameter of the Content-Type header.
+
+        The returned string is always coerced to lower case.  If there is no
+        Content-Type header, or if that header has no charset parameter,
+        failobj is returned.
+        """
+        missing = object()
+        charset = self.get_param('charset', missing)
+        if charset is missing:
+            return failobj
+        if isinstance(charset, tuple):
+            # RFC 2231 encoded, so decode it, and it better end up as ascii.
+            pcharset = charset[0] or 'us-ascii'
+            try:
+                # LookupError will be raised if the charset isn't known to
+                # Python.  UnicodeError will be raised if the encoded text
+                # contains a character not in the charset.
+                as_bytes = charset[2].encode('raw-unicode-escape')
+                charset = str(as_bytes, pcharset)
+            except (LookupError, UnicodeError):
+                charset = charset[2]
+        # charset characters must be in us-ascii range
+        try:
+            charset.encode('us-ascii')
+        except UnicodeError:
+            return failobj
+        # RFC 2046, $4.1.2 says charsets are not case sensitive
+        return charset.lower()
+
+    def get_charsets(self, failobj=None):
+        """Return a list containing the charset(s) used in this message.
+
+        The returned list of items describes the Content-Type headers'
+        charset parameter for this message and all the subparts in its
+        payload.
+
+        Each item will either be a string (the value of the charset parameter
+        in the Content-Type header of that part) or the value of the
+        'failobj' parameter (defaults to None), if the part does not have a
+        main MIME type of "text", or the charset is not defined.
+
+        The list will contain one string for each part of the message, plus
+        one for the container message (i.e. self), so that a non-multipart
+        message will still return a list of length 1.
+        """
+        return [part.get_content_charset(failobj) for part in self.walk()]
+
+    def get_content_disposition(self):
+        """Return the message's content-disposition if it exists, or None.
+
+        The return values can be either 'inline', 'attachment' or None
+        according to the rfc2183.
+        """
+        value = self.get('content-disposition')
+        if value is None:
+            return None
+        c_d = _splitparam(value)[0].lower()
+        return c_d
+
+    # I.e. def walk(self): ...
+    from email.iterators import walk
+
+
+class MIMEPart(Message):
+
+    def __init__(self, policy=None):
+        if policy is None:
+            from email.policy import default
+            policy = default
+        Message.__init__(self, policy)
+
+
+    def as_string(self, unixfrom=False, maxheaderlen=None, policy=None):
+        """Return the entire formatted message as a string.
+
+        Optional 'unixfrom', when true, means include the Unix From_ envelope
+        header.  maxheaderlen is retained for backward compatibility with the
+        base Message class, but defaults to None, meaning that the policy value
+        for max_line_length controls the header maximum length.  'policy' is
+        passed to the Generator instance used to serialize the mesasge; if it
+        is not specified the policy associated with the message instance is
+        used.
+        """
+        policy = self.policy if policy is None else policy
+        if maxheaderlen is None:
+            maxheaderlen = policy.max_line_length
+        return super().as_string(maxheaderlen=maxheaderlen, policy=policy)
+
+    def __str__(self):
+        return self.as_string(policy=self.policy.clone(utf8=True))
+
+    def is_attachment(self):
+        c_d = self.get('content-disposition')
+        return False if c_d is None else c_d.content_disposition == 'attachment'
+
+    def _find_body(self, part, preferencelist):
+        if part.is_attachment():
+            return
+        maintype, subtype = part.get_content_type().split('/')
+        if maintype == 'text':
+            if subtype in preferencelist:
+                yield (preferencelist.index(subtype), part)
+            return
+        if maintype != 'multipart':
+            return
+        if subtype != 'related':
+            for subpart in part.iter_parts():
+                yield from self._find_body(subpart, preferencelist)
+            return
+        if 'related' in preferencelist:
+            yield (preferencelist.index('related'), part)
+        candidate = None
+        start = part.get_param('start')
+        if start:
+            for subpart in part.iter_parts():
+                if subpart['content-id'] == start:
+                    candidate = subpart
+                    break
+        if candidate is None:
+            subparts = part.get_payload()
+            candidate = subparts[0] if subparts else None
+        if candidate is not None:
+            yield from self._find_body(candidate, preferencelist)
+
+    def get_body(self, preferencelist=('related', 'html', 'plain')):
+        """Return best candidate mime part for display as 'body' of message.
+
+        Do a depth first search, starting with self, looking for the first part
+        matching each of the items in preferencelist, and return the part
+        corresponding to the first item that has a match, or None if no items
+        have a match.  If 'related' is not included in preferencelist, consider
+        the root part of any multipart/related encountered as a candidate
+        match.  Ignore parts with 'Content-Disposition: attachment'.
+        """
+        best_prio = len(preferencelist)
+        body = None
+        for prio, part in self._find_body(self, preferencelist):
+            if prio < best_prio:
+                best_prio = prio
+                body = part
+                if prio == 0:
+                    break
+        return body
+
+    _body_types = {('text', 'plain'),
+                   ('text', 'html'),
+                   ('multipart', 'related'),
+                   ('multipart', 'alternative')}
+    def iter_attachments(self):
+        """Return an iterator over the non-main parts of a multipart.
+
+        Skip the first of each occurrence of text/plain, text/html,
+        multipart/related, or multipart/alternative in the multipart (unless
+        they have a 'Content-Disposition: attachment' header) and include all
+        remaining subparts in the returned iterator.  When applied to a
+        multipart/related, return all parts except the root part.  Return an
+        empty iterator when applied to a multipart/alternative or a
+        non-multipart.
+        """
+        maintype, subtype = self.get_content_type().split('/')
+        if maintype != 'multipart' or subtype == 'alternative':
+            return
+        parts = self.get_payload().copy()
+        if maintype == 'multipart' and subtype == 'related':
+            # For related, we treat everything but the root as an attachment.
+            # The root may be indicated by 'start'; if there's no start or we
+            # can't find the named start, treat the first subpart as the root.
+            start = self.get_param('start')
+            if start:
+                found = False
+                attachments = []
+                for part in parts:
+                    if part.get('content-id') == start:
+                        found = True
+                    else:
+                        attachments.append(part)
+                if found:
+                    yield from attachments
+                    return
+            parts.pop(0)
+            yield from parts
+            return
+        # Otherwise we more or less invert the remaining logic in get_body.
+        # This only really works in edge cases (ex: non-text related or
+        # alternatives) if the sending agent sets content-disposition.
+        seen = []   # Only skip the first example of each candidate type.
+        for part in parts:
+            maintype, subtype = part.get_content_type().split('/')
+            if ((maintype, subtype) in self._body_types and
+                    not part.is_attachment() and subtype not in seen):
+                seen.append(subtype)
+                continue
+            yield part
+
+    def iter_parts(self):
+        """Return an iterator over all immediate subparts of a multipart.
+
+        Return an empty iterator for a non-multipart.
+        """
+        if self.get_content_maintype() == 'multipart':
+            yield from self.get_payload()
+
+    def get_content(self, *args, content_manager=None, **kw):
+        if content_manager is None:
+            content_manager = self.policy.content_manager
+        return content_manager.get_content(self, *args, **kw)
+
+    def set_content(self, *args, content_manager=None, **kw):
+        if content_manager is None:
+            content_manager = self.policy.content_manager
+        content_manager.set_content(self, *args, **kw)
+
+    def _make_multipart(self, subtype, disallowed_subtypes, boundary):
+        if self.get_content_maintype() == 'multipart':
+            existing_subtype = self.get_content_subtype()
+            disallowed_subtypes = disallowed_subtypes + (subtype,)
+            if existing_subtype in disallowed_subtypes:
+                raise ValueError("Cannot convert {} to {}".format(
+                    existing_subtype, subtype))
+        keep_headers = []
+        part_headers = []
+        for name, value in self._headers:
+            if name.lower().startswith('content-'):
+                part_headers.append((name, value))
+            else:
+                keep_headers.append((name, value))
+        if part_headers:
+            # There is existing content, move it to the first subpart.
+            part = type(self)(policy=self.policy)
+            part._headers = part_headers
+            part._payload = self._payload
+            self._payload = [part]
+        else:
+            self._payload = []
+        self._headers = keep_headers
+        self['Content-Type'] = 'multipart/' + subtype
+        if boundary is not None:
+            self.set_param('boundary', boundary)
+
+    def make_related(self, boundary=None):
+        self._make_multipart('related', ('alternative', 'mixed'), boundary)
+
+    def make_alternative(self, boundary=None):
+        self._make_multipart('alternative', ('mixed',), boundary)
+
+    def make_mixed(self, boundary=None):
+        self._make_multipart('mixed', (), boundary)
+
+    def _add_multipart(self, _subtype, *args, _disp=None, **kw):
+        if (self.get_content_maintype() != 'multipart' or
+                self.get_content_subtype() != _subtype):
+            getattr(self, 'make_' + _subtype)()
+        part = type(self)(policy=self.policy)
+        part.set_content(*args, **kw)
+        if _disp and 'content-disposition' not in part:
+            part['Content-Disposition'] = _disp
+        self.attach(part)
+
+    def add_related(self, *args, **kw):
+        self._add_multipart('related', *args, _disp='inline', **kw)
+
+    def add_alternative(self, *args, **kw):
+        self._add_multipart('alternative', *args, **kw)
+
+    def add_attachment(self, *args, **kw):
+        self._add_multipart('mixed', *args, _disp='attachment', **kw)
+
+    def clear(self):
+        self._headers = []
+        self._payload = None
+
+    def clear_content(self):
+        self._headers = [(n, v) for n, v in self._headers
+                         if not n.lower().startswith('content-')]
+        self._payload = None
+
+
+class EmailMessage(MIMEPart):
+
+    def set_content(self, *args, **kw):
+        super().set_content(*args, **kw)
+        if 'MIME-Version' not in self:
+            self['MIME-Version'] = '1.0'
diff --git a/modules/language/python/module/email/parser.py b/modules/language/python/module/email/parser.py
index 5ec164e..962f9dc 100644
--- a/modules/language/python/module/email/parser.py
+++ b/modules/language/python/module/email/parser.py
@@ -9,7 +9,6 @@ __all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
            'FeedParser', 'BytesFeedParser']
 
 from io import StringIO, TextIOWrapper
-
 from email.feedparser import FeedParser, BytesFeedParser
 from email._policybase import compat32
 
diff --git a/modules/language/python/module/email/policy.py b/modules/language/python/module/email/policy.py
new file mode 100644
index 0000000..3f8551c
--- /dev/null
+++ b/modules/language/python/module/email/policy.py
@@ -0,0 +1,231 @@
+module(email,policy)
+"""This will be the home for the policy that hooks in the new
+code that adds all the email6 features.
+"""
+
+import re
+from email._policybase import Policy, Compat32, compat32, _extend_docstrings
+from email.utils import _has_surrogates
+from email.headerregistry import HeaderRegistry as HeaderRegistry
+from email.contentmanager import raw_data_manager
+from email.message import EmailMessage
+
+__all__ = [
+    'Compat32',
+    'compat32',
+    'Policy',
+    'EmailPolicy',
+    'default',
+    'strict',
+    'SMTP',
+    'HTTP',
+    ]
+
+linesep_splitter = re.compile(r'\n|\r')
+
+@_extend_docstrings
+class EmailPolicy(Policy):
+
+    """+
+    PROVISIONAL
+
+    The API extensions enabled by this policy are currently provisional.
+    Refer to the documentation for details.
+
+    This policy adds new header parsing and folding algorithms.  Instead of
+    simple strings, headers are custom objects with custom attributes
+    depending on the type of the field.  The folding algorithm fully
+    implements RFCs 2047 and 5322.
+
+    In addition to the settable attributes listed above that apply to
+    all Policies, this policy adds the following additional attributes:
+
+    utf8                -- if False (the default) message headers will be
+                           serialized as ASCII, using encoded words to encode
+                           any non-ASCII characters in the source strings.  If
+                           True, the message headers will be serialized using
+                           utf8 and will not contain encoded words (see RFC
+                           6532 for more on this serialization format).
+
+    refold_source       -- if the value for a header in the Message object
+                           came from the parsing of some source, this attribute
+                           indicates whether or not a generator should refold
+                           that value when transforming the message back into
+                           stream form.  The possible values are:
+
+                           none  -- all source values use original folding
+                           long  -- source values that have any line that is
+                                    longer than max_line_length will be
+                                    refolded
+                           all  -- all values are refolded.
+
+                           The default is 'long'.
+
+    header_factory      -- a callable that takes two arguments, 'name' and
+                           'value', where 'name' is a header field name and
+                           'value' is an unfolded header field value, and
+                           returns a string-like object that represents that
+                           header.  A default header_factory is provided that
+                           understands some of the RFC5322 header field types.
+                           (Currently address fields and date fields have
+                           special treatment, while all other fields are
+                           treated as unstructured.  This list will be
+                           completed before the extension is marked stable.)
+
+    content_manager     -- an object with at least two methods: get_content
+                           and set_content.  When the get_content or
+                           set_content method of a Message object is called,
+                           it calls the corresponding method of this object,
+                           passing it the message object as its first argument,
+                           and any arguments or keywords that were passed to
+                           it as additional arguments.  The default
+                           content_manager is
+                           :data:`~email.contentmanager.raw_data_manager`.
+
+    """
+
+    message_factory = EmailMessage
+    utf8 = False
+    refold_source = 'long'
+    header_factory = HeaderRegistry()
+    content_manager = raw_data_manager
+
+    def __init__(self, **kw):
+        # Ensure that each new instance gets a unique header factory
+        # (as opposed to clones, which share the factory).
+
+        if 'header_factory' not in kw:
+            object.__setattr__(self, 'header_factory', HeaderRegistry())
+
+        super().__init__(**kw)
+
+
+    def header_max_count(self, name):
+        """+
+        The implementation for this class returns the max_count attribute from
+        the specialized header class that would be used to construct a header
+        of type 'name'.
+        """
+        return self.header_factory[name].max_count
+
+    # The logic of the next three methods is chosen such that it is possible to
+    # switch a Message object between a Compat32 policy and a policy derived
+    # from this class and have the results stay consistent.  This allows a
+    # Message object constructed with this policy to be passed to a library
+    # that only handles Compat32 objects, or to receive such an object and
+    # convert it to use the newer style by just changing its policy.  It is
+    # also chosen because it postpones the relatively expensive full rfc5322
+    # parse until as late as possible when parsing from source, since in many
+    # applications only a few headers will actually be inspected.
+
+    def header_source_parse(self, sourcelines):
+        """+
+        The name is parsed as everything up to the ':' and returned unmodified.
+        The value is determined by stripping leading whitespace off the
+        remainder of the first line, joining all subsequent lines together, and
+        stripping any trailing carriage return or linefeed characters.  (This
+        is the same as Compat32).
+
+        """
+        name, value = sourcelines[0].split(':', 1)
+        value = value.lstrip(' \t') + ''.join(sourcelines[1:])
+        return (name, value.rstrip('\r\n'))
+
+    def header_store_parse(self, name, value):
+        """+
+        The name is returned unchanged.  If the input value has a 'name'
+        attribute and it matches the name ignoring case, the value is returned
+        unchanged.  Otherwise the name and value are passed to header_factory
+        method, and the resulting custom header object is returned as the
+        value.  In this case a ValueError is raised if the input value contains
+        CR or LF characters.
+
+        """
+        if hasattr(value, 'name') and value.name.lower() == name.lower():
+            return (name, value)
+        if isinstance(value, str) and len(value.splitlines())>1:
+            # XXX this error message isn't quite right when we use splitlines
+            # (see issue 22233), but I'm not sure what should happen here.
+            raise ValueError("Header values may not contain linefeed "
+                             "or carriage return characters")
+        return (name, self.header_factory(name, value))
+
+    def header_fetch_parse(self, name, value):
+        """+
+        If the value has a 'name' attribute, it is returned to unmodified.
+        Otherwise the name and the value with any linesep characters removed
+        are passed to the header_factory method, and the resulting custom
+        header object is returned.  Any surrogateescaped bytes get turned
+        into the unicode unknown-character glyph.
+
+        """
+        if hasattr(value, 'name'):
+            return value
+        # We can't use splitlines here because it splits on more than \r and \n.
+        value = ''.join(linesep_splitter.split(value))
+        return self.header_factory(name, value)
+
+    def fold(self, name, value):
+        """+
+        Header folding is controlled by the refold_source policy setting.  A
+        value is considered to be a 'source value' if and only if it does not
+        have a 'name' attribute (having a 'name' attribute means it is a header
+        object of some sort).  If a source value needs to be refolded according
+        to the policy, it is converted into a custom header object by passing
+        the name and the value with any linesep characters removed to the
+        header_factory method.  Folding of a custom header object is done by
+        calling its fold method with the current policy.
+
+        Source values are split into lines using splitlines.  If the value is
+        not to be refolded, the lines are rejoined using the linesep from the
+        policy and returned.  The exception is lines containing non-ascii
+        binary data.  In that case the value is refolded regardless of the
+        refold_source setting, which causes the binary data to be CTE encoded
+        using the unknown-8bit charset.
+
+        """
+        return self._fold(name, value, refold_binary=True)
+
+    def fold_binary(self, name, value):
+        """+
+        The same as fold if cte_type is 7bit, except that the returned value is
+        bytes.
+
+        If cte_type is 8bit, non-ASCII binary data is converted back into
+        bytes.  Headers with binary data are not refolded, regardless of the
+        refold_header setting, since there is no way to know whether the binary
+        data consists of single byte characters or multibyte characters.
+
+        If utf8 is true, headers are encoded to utf8, otherwise to ascii with
+        non-ASCII unicode rendered as encoded words.
+
+        """
+        folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
+        charset = 'utf8' if self.utf8 else 'ascii'
+        return folded.encode(charset, 'surrogateescape')
+
+    def _fold(self, name, value, refold_binary=False):
+        if hasattr(value, 'name'):
+            return value.fold(policy=self)
+        maxlen = self.max_line_length if self.max_line_length else float('inf')
+        lines = value.splitlines()
+        refold = (self.refold_source == 'all' or
+                  self.refold_source == 'long' and
+                    (lines and len(lines[0])+len(name)+2 > maxlen or
+                     any(len(x) > maxlen for x in lines[1:])))
+        if refold or refold_binary and _has_surrogates(value):
+            return self.header_factory(name, ''.join(lines)).fold(policy=self)
+        return name + ': ' + self.linesep.join(lines) + self.linesep
+
+
+default = EmailPolicy()
+
+# Make the default policy use the class default header_factory
+del default.header_factory
+strict = default.clone(raise_on_defect=True)
+
+SMTP = default.clone(linesep='\r\n')
+
+HTTP = default.clone(linesep='\r\n', max_line_length=None)
+
+SMTPUTF8 = SMTP.clone(utf8=True)
diff --git a/modules/language/python/module/email/utils.py b/modules/language/python/module/email/utils.py
index abe3895..861f1ad 100644
--- a/modules/language/python/module/email/utils.py
+++ b/modules/language/python/module/email/utils.py
@@ -24,6 +24,7 @@ __all__ = [
     '_has_surrogates'
     ]
 
+
 import os
 import re
 import time
diff --git a/modules/language/python/module/html.py b/modules/language/python/module/html.py
new file mode 100644
index 0000000..654ffb7
--- /dev/null
+++ b/modules/language/python/module/html.py
@@ -0,0 +1,134 @@
+module(html)
+
+"""
+General functions for HTML manipulation.
+"""
+
+import re as _re
+from html.entities import html5 as _html5
+
+
+__all__ = ['escape', 'unescape']
+
+
+def escape(s, quote=True):
+    """
+    Replace special characters "&", "<" and ">" to HTML-safe sequences.
+    If the optional flag quote is true (the default), the quotation mark
+    characters, both double quote (") and single quote (') characters are also
+    translated.
+    """
+    s = s.replace("&", "&amp;") # Must be done first!
+    s = s.replace("<", "&lt;")
+    s = s.replace(">", "&gt;")
+    if quote:
+        s = s.replace('"', "&quot;")
+        s = s.replace('\'', "&#x27;")
+    return s
+
+
+# see http://www.w3.org/TR/html5/syntax.html#tokenizing-character-references
+
+_invalid_charrefs = {
+    0x00: '\ufffd',  # REPLACEMENT CHARACTER
+    0x0d: '\r',      # CARRIAGE RETURN
+    0x80: '\u20ac',  # EURO SIGN
+    0x81: '\x81',    # <control>
+    0x82: '\u201a',  # SINGLE LOW-9 QUOTATION MARK
+    0x83: '\u0192',  # LATIN SMALL LETTER F WITH HOOK
+    0x84: '\u201e',  # DOUBLE LOW-9 QUOTATION MARK
+    0x85: '\u2026',  # HORIZONTAL ELLIPSIS
+    0x86: '\u2020',  # DAGGER
+    0x87: '\u2021',  # DOUBLE DAGGER
+    0x88: '\u02c6',  # MODIFIER LETTER CIRCUMFLEX ACCENT
+    0x89: '\u2030',  # PER MILLE SIGN
+    0x8a: '\u0160',  # LATIN CAPITAL LETTER S WITH CARON
+    0x8b: '\u2039',  # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    0x8c: '\u0152',  # LATIN CAPITAL LIGATURE OE
+    0x8d: '\x8d',    # <control>
+    0x8e: '\u017d',  # LATIN CAPITAL LETTER Z WITH CARON
+    0x8f: '\x8f',    # <control>
+    0x90: '\x90',    # <control>
+    0x91: '\u2018',  # LEFT SINGLE QUOTATION MARK
+    0x92: '\u2019',  # RIGHT SINGLE QUOTATION MARK
+    0x93: '\u201c',  # LEFT DOUBLE QUOTATION MARK
+    0x94: '\u201d',  # RIGHT DOUBLE QUOTATION MARK
+    0x95: '\u2022',  # BULLET
+    0x96: '\u2013',  # EN DASH
+    0x97: '\u2014',  # EM DASH
+    0x98: '\u02dc',  # SMALL TILDE
+    0x99: '\u2122',  # TRADE MARK SIGN
+    0x9a: '\u0161',  # LATIN SMALL LETTER S WITH CARON
+    0x9b: '\u203a',  # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    0x9c: '\u0153',  # LATIN SMALL LIGATURE OE
+    0x9d: '\x9d',    # <control>
+    0x9e: '\u017e',  # LATIN SMALL LETTER Z WITH CARON
+    0x9f: '\u0178',  # LATIN CAPITAL LETTER Y WITH DIAERESIS
+}
+
+_invalid_codepoints = {
+    # 0x0001 to 0x0008
+    0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
+    # 0x000E to 0x001F
+    0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
+    0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+    # 0x007F to 0x009F
+    0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a,
+    0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+    0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+    # 0xFDD0 to 0xFDEF
+    0xfdd0, 0xfdd1, 0xfdd2, 0xfdd3, 0xfdd4, 0xfdd5, 0xfdd6, 0xfdd7, 0xfdd8,
+    0xfdd9, 0xfdda, 0xfddb, 0xfddc, 0xfddd, 0xfdde, 0xfddf, 0xfde0, 0xfde1,
+    0xfde2, 0xfde3, 0xfde4, 0xfde5, 0xfde6, 0xfde7, 0xfde8, 0xfde9, 0xfdea,
+    0xfdeb, 0xfdec, 0xfded, 0xfdee, 0xfdef,
+    # others
+    0xb, 0xfffe, 0xffff, 0x1fffe, 0x1ffff, 0x2fffe, 0x2ffff, 0x3fffe, 0x3ffff,
+    0x4fffe, 0x4ffff, 0x5fffe, 0x5ffff, 0x6fffe, 0x6ffff, 0x7fffe, 0x7ffff,
+    0x8fffe, 0x8ffff, 0x9fffe, 0x9ffff, 0xafffe, 0xaffff, 0xbfffe, 0xbffff,
+    0xcfffe, 0xcffff, 0xdfffe, 0xdffff, 0xefffe, 0xeffff, 0xffffe, 0xfffff,
+    0x10fffe, 0x10ffff
+}
+
+
+def _replace_charref(s):
+    s = s.group(1)
+    if s[0] == '#':
+        # numeric charref
+        if s[1] in 'xX':
+            num = int(s[2:].rstrip(';'), 16)
+        else:
+            num = int(s[1:].rstrip(';'))
+        if num in _invalid_charrefs:
+            return _invalid_charrefs[num]
+        if 0xD800 <= num <= 0xDFFF or num > 0x10FFFF:
+            return '\uFFFD'
+        if num in _invalid_codepoints:
+            return ''
+        return chr(num)
+    else:
+        # named charref
+        if s in _html5:
+            return _html5[s]
+        # find the longest matching name (as defined by the standard)
+        for x in range(len(s)-1, 1, -1):
+            if s[:x] in _html5:
+                return _html5[s[:x]] + s[x:]
+        else:
+            return '&' + s
+
+
+_charref = _re.compile(r'&(#[0-9]+;?'
+                       r'|#[xX][0-9a-fA-F]+;?'
+                       r'|[^\t\n\f <&#;]{1,32};?)')
+
+def unescape(s):
+    """
+    Convert all named and numeric character references (e.g. &gt;, &#62;,
+    &x3e;) in the string s to the corresponding unicode characters.
+    This function uses the rules defined by the HTML 5 standard
+    for both valid and invalid character references, and the list of
+    HTML 5 named character references defined in html.entities.html5.
+    """
+    if '&' not in s:
+        return s
+    return _charref.sub(_replace_charref, s)
diff --git a/modules/language/python/module/html/entities.py b/modules/language/python/module/html/entities.py
new file mode 100644
index 0000000..a7a888e
--- /dev/null
+++ b/modules/language/python/module/html/entities.py
@@ -0,0 +1,2511 @@
+module(html,entities)
+
+"""HTML character entity references."""
+
+__all__ = ['html5', 'name2codepoint', 'codepoint2name', 'entitydefs']
+
+
+# maps the HTML entity name to the Unicode code point
+name2codepoint = {
+    'AElig':    0x00c6, # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
+    'Aacute':   0x00c1, # latin capital letter A with acute, U+00C1 ISOlat1
+    'Acirc':    0x00c2, # latin capital letter A with circumflex, U+00C2 ISOlat1
+    'Agrave':   0x00c0, # latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
+    'Alpha':    0x0391, # greek capital letter alpha, U+0391
+    'Aring':    0x00c5, # latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
+    'Atilde':   0x00c3, # latin capital letter A with tilde, U+00C3 ISOlat1
+    'Auml':     0x00c4, # latin capital letter A with diaeresis, U+00C4 ISOlat1
+    'Beta':     0x0392, # greek capital letter beta, U+0392
+    'Ccedil':   0x00c7, # latin capital letter C with cedilla, U+00C7 ISOlat1
+    'Chi':      0x03a7, # greek capital letter chi, U+03A7
+    'Dagger':   0x2021, # double dagger, U+2021 ISOpub
+    'Delta':    0x0394, # greek capital letter delta, U+0394 ISOgrk3
+    'ETH':      0x00d0, # latin capital letter ETH, U+00D0 ISOlat1
+    'Eacute':   0x00c9, # latin capital letter E with acute, U+00C9 ISOlat1
+    'Ecirc':    0x00ca, # latin capital letter E with circumflex, U+00CA ISOlat1
+    'Egrave':   0x00c8, # latin capital letter E with grave, U+00C8 ISOlat1
+    'Epsilon':  0x0395, # greek capital letter epsilon, U+0395
+    'Eta':      0x0397, # greek capital letter eta, U+0397
+    'Euml':     0x00cb, # latin capital letter E with diaeresis, U+00CB ISOlat1
+    'Gamma':    0x0393, # greek capital letter gamma, U+0393 ISOgrk3
+    'Iacute':   0x00cd, # latin capital letter I with acute, U+00CD ISOlat1
+    'Icirc':    0x00ce, # latin capital letter I with circumflex, U+00CE ISOlat1
+    'Igrave':   0x00cc, # latin capital letter I with grave, U+00CC ISOlat1
+    'Iota':     0x0399, # greek capital letter iota, U+0399
+    'Iuml':     0x00cf, # latin capital letter I with diaeresis, U+00CF ISOlat1
+    'Kappa':    0x039a, # greek capital letter kappa, U+039A
+    'Lambda':   0x039b, # greek capital letter lambda, U+039B ISOgrk3
+    'Mu':       0x039c, # greek capital letter mu, U+039C
+    'Ntilde':   0x00d1, # latin capital letter N with tilde, U+00D1 ISOlat1
+    'Nu':       0x039d, # greek capital letter nu, U+039D
+    'OElig':    0x0152, # latin capital ligature OE, U+0152 ISOlat2
+    'Oacute':   0x00d3, # latin capital letter O with acute, U+00D3 ISOlat1
+    'Ocirc':    0x00d4, # latin capital letter O with circumflex, U+00D4 ISOlat1
+    'Ograve':   0x00d2, # latin capital letter O with grave, U+00D2 ISOlat1
+    'Omega':    0x03a9, # greek capital letter omega, U+03A9 ISOgrk3
+    'Omicron':  0x039f, # greek capital letter omicron, U+039F
+    'Oslash':   0x00d8, # latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
+    'Otilde':   0x00d5, # latin capital letter O with tilde, U+00D5 ISOlat1
+    'Ouml':     0x00d6, # latin capital letter O with diaeresis, U+00D6 ISOlat1
+    'Phi':      0x03a6, # greek capital letter phi, U+03A6 ISOgrk3
+    'Pi':       0x03a0, # greek capital letter pi, U+03A0 ISOgrk3
+    'Prime':    0x2033, # double prime = seconds = inches, U+2033 ISOtech
+    'Psi':      0x03a8, # greek capital letter psi, U+03A8 ISOgrk3
+    'Rho':      0x03a1, # greek capital letter rho, U+03A1
+    'Scaron':   0x0160, # latin capital letter S with caron, U+0160 ISOlat2
+    'Sigma':    0x03a3, # greek capital letter sigma, U+03A3 ISOgrk3
+    'THORN':    0x00de, # latin capital letter THORN, U+00DE ISOlat1
+    'Tau':      0x03a4, # greek capital letter tau, U+03A4
+    'Theta':    0x0398, # greek capital letter theta, U+0398 ISOgrk3
+    'Uacute':   0x00da, # latin capital letter U with acute, U+00DA ISOlat1
+    'Ucirc':    0x00db, # latin capital letter U with circumflex, U+00DB ISOlat1
+    'Ugrave':   0x00d9, # latin capital letter U with grave, U+00D9 ISOlat1
+    'Upsilon':  0x03a5, # greek capital letter upsilon, U+03A5 ISOgrk3
+    'Uuml':     0x00dc, # latin capital letter U with diaeresis, U+00DC ISOlat1
+    'Xi':       0x039e, # greek capital letter xi, U+039E ISOgrk3
+    'Yacute':   0x00dd, # latin capital letter Y with acute, U+00DD ISOlat1
+    'Yuml':     0x0178, # latin capital letter Y with diaeresis, U+0178 ISOlat2
+    'Zeta':     0x0396, # greek capital letter zeta, U+0396
+    'aacute':   0x00e1, # latin small letter a with acute, U+00E1 ISOlat1
+    'acirc':    0x00e2, # latin small letter a with circumflex, U+00E2 ISOlat1
+    'acute':    0x00b4, # acute accent = spacing acute, U+00B4 ISOdia
+    'aelig':    0x00e6, # latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
+    'agrave':   0x00e0, # latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
+    'alefsym':  0x2135, # alef symbol = first transfinite cardinal, U+2135 NEW
+    'alpha':    0x03b1, # greek small letter alpha, U+03B1 ISOgrk3
+    'amp':      0x0026, # ampersand, U+0026 ISOnum
+    'and':      0x2227, # logical and = wedge, U+2227 ISOtech
+    'ang':      0x2220, # angle, U+2220 ISOamso
+    'aring':    0x00e5, # latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
+    'asymp':    0x2248, # almost equal to = asymptotic to, U+2248 ISOamsr
+    'atilde':   0x00e3, # latin small letter a with tilde, U+00E3 ISOlat1
+    'auml':     0x00e4, # latin small letter a with diaeresis, U+00E4 ISOlat1
+    'bdquo':    0x201e, # double low-9 quotation mark, U+201E NEW
+    'beta':     0x03b2, # greek small letter beta, U+03B2 ISOgrk3
+    'brvbar':   0x00a6, # broken bar = broken vertical bar, U+00A6 ISOnum
+    'bull':     0x2022, # bullet = black small circle, U+2022 ISOpub
+    'cap':      0x2229, # intersection = cap, U+2229 ISOtech
+    'ccedil':   0x00e7, # latin small letter c with cedilla, U+00E7 ISOlat1
+    'cedil':    0x00b8, # cedilla = spacing cedilla, U+00B8 ISOdia
+    'cent':     0x00a2, # cent sign, U+00A2 ISOnum
+    'chi':      0x03c7, # greek small letter chi, U+03C7 ISOgrk3
+    'circ':     0x02c6, # modifier letter circumflex accent, U+02C6 ISOpub
+    'clubs':    0x2663, # black club suit = shamrock, U+2663 ISOpub
+    'cong':     0x2245, # approximately equal to, U+2245 ISOtech
+    'copy':     0x00a9, # copyright sign, U+00A9 ISOnum
+    'crarr':    0x21b5, # downwards arrow with corner leftwards = carriage return, U+21B5 NEW
+    'cup':      0x222a, # union = cup, U+222A ISOtech
+    'curren':   0x00a4, # currency sign, U+00A4 ISOnum
+    'dArr':     0x21d3, # downwards double arrow, U+21D3 ISOamsa
+    'dagger':   0x2020, # dagger, U+2020 ISOpub
+    'darr':     0x2193, # downwards arrow, U+2193 ISOnum
+    'deg':      0x00b0, # degree sign, U+00B0 ISOnum
+    'delta':    0x03b4, # greek small letter delta, U+03B4 ISOgrk3
+    'diams':    0x2666, # black diamond suit, U+2666 ISOpub
+    'divide':   0x00f7, # division sign, U+00F7 ISOnum
+    'eacute':   0x00e9, # latin small letter e with acute, U+00E9 ISOlat1
+    'ecirc':    0x00ea, # latin small letter e with circumflex, U+00EA ISOlat1
+    'egrave':   0x00e8, # latin small letter e with grave, U+00E8 ISOlat1
+    'empty':    0x2205, # empty set = null set = diameter, U+2205 ISOamso
+    'emsp':     0x2003, # em space, U+2003 ISOpub
+    'ensp':     0x2002, # en space, U+2002 ISOpub
+    'epsilon':  0x03b5, # greek small letter epsilon, U+03B5 ISOgrk3
+    'equiv':    0x2261, # identical to, U+2261 ISOtech
+    'eta':      0x03b7, # greek small letter eta, U+03B7 ISOgrk3
+    'eth':      0x00f0, # latin small letter eth, U+00F0 ISOlat1
+    'euml':     0x00eb, # latin small letter e with diaeresis, U+00EB ISOlat1
+    'euro':     0x20ac, # euro sign, U+20AC NEW
+    'exist':    0x2203, # there exists, U+2203 ISOtech
+    'fnof':     0x0192, # latin small f with hook = function = florin, U+0192 ISOtech
+    'forall':   0x2200, # for all, U+2200 ISOtech
+    'frac12':   0x00bd, # vulgar fraction one half = fraction one half, U+00BD ISOnum
+    'frac14':   0x00bc, # vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
+    'frac34':   0x00be, # vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
+    'frasl':    0x2044, # fraction slash, U+2044 NEW
+    'gamma':    0x03b3, # greek small letter gamma, U+03B3 ISOgrk3
+    'ge':       0x2265, # greater-than or equal to, U+2265 ISOtech
+    'gt':       0x003e, # greater-than sign, U+003E ISOnum
+    'hArr':     0x21d4, # left right double arrow, U+21D4 ISOamsa
+    'harr':     0x2194, # left right arrow, U+2194 ISOamsa
+    'hearts':   0x2665, # black heart suit = valentine, U+2665 ISOpub
+    'hellip':   0x2026, # horizontal ellipsis = three dot leader, U+2026 ISOpub
+    'iacute':   0x00ed, # latin small letter i with acute, U+00ED ISOlat1
+    'icirc':    0x00ee, # latin small letter i with circumflex, U+00EE ISOlat1
+    'iexcl':    0x00a1, # inverted exclamation mark, U+00A1 ISOnum
+    'igrave':   0x00ec, # latin small letter i with grave, U+00EC ISOlat1
+    'image':    0x2111, # blackletter capital I = imaginary part, U+2111 ISOamso
+    'infin':    0x221e, # infinity, U+221E ISOtech
+    'int':      0x222b, # integral, U+222B ISOtech
+    'iota':     0x03b9, # greek small letter iota, U+03B9 ISOgrk3
+    'iquest':   0x00bf, # inverted question mark = turned question mark, U+00BF ISOnum
+    'isin':     0x2208, # element of, U+2208 ISOtech
+    'iuml':     0x00ef, # latin small letter i with diaeresis, U+00EF ISOlat1
+    'kappa':    0x03ba, # greek small letter kappa, U+03BA ISOgrk3
+    'lArr':     0x21d0, # leftwards double arrow, U+21D0 ISOtech
+    'lambda':   0x03bb, # greek small letter lambda, U+03BB ISOgrk3
+    'lang':     0x2329, # left-pointing angle bracket = bra, U+2329 ISOtech
+    'laquo':    0x00ab, # left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
+    'larr':     0x2190, # leftwards arrow, U+2190 ISOnum
+    'lceil':    0x2308, # left ceiling = apl upstile, U+2308 ISOamsc
+    'ldquo':    0x201c, # left double quotation mark, U+201C ISOnum
+    'le':       0x2264, # less-than or equal to, U+2264 ISOtech
+    'lfloor':   0x230a, # left floor = apl downstile, U+230A ISOamsc
+    'lowast':   0x2217, # asterisk operator, U+2217 ISOtech
+    'loz':      0x25ca, # lozenge, U+25CA ISOpub
+    'lrm':      0x200e, # left-to-right mark, U+200E NEW RFC 2070
+    'lsaquo':   0x2039, # single left-pointing angle quotation mark, U+2039 ISO proposed
+    'lsquo':    0x2018, # left single quotation mark, U+2018 ISOnum
+    'lt':       0x003c, # less-than sign, U+003C ISOnum
+    'macr':     0x00af, # macron = spacing macron = overline = APL overbar, U+00AF ISOdia
+    'mdash':    0x2014, # em dash, U+2014 ISOpub
+    'micro':    0x00b5, # micro sign, U+00B5 ISOnum
+    'middot':   0x00b7, # middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
+    'minus':    0x2212, # minus sign, U+2212 ISOtech
+    'mu':       0x03bc, # greek small letter mu, U+03BC ISOgrk3
+    'nabla':    0x2207, # nabla = backward difference, U+2207 ISOtech
+    'nbsp':     0x00a0, # no-break space = non-breaking space, U+00A0 ISOnum
+    'ndash':    0x2013, # en dash, U+2013 ISOpub
+    'ne':       0x2260, # not equal to, U+2260 ISOtech
+    'ni':       0x220b, # contains as member, U+220B ISOtech
+    'not':      0x00ac, # not sign, U+00AC ISOnum
+    'notin':    0x2209, # not an element of, U+2209 ISOtech
+    'nsub':     0x2284, # not a subset of, U+2284 ISOamsn
+    'ntilde':   0x00f1, # latin small letter n with tilde, U+00F1 ISOlat1
+    'nu':       0x03bd, # greek small letter nu, U+03BD ISOgrk3
+    'oacute':   0x00f3, # latin small letter o with acute, U+00F3 ISOlat1
+    'ocirc':    0x00f4, # latin small letter o with circumflex, U+00F4 ISOlat1
+    'oelig':    0x0153, # latin small ligature oe, U+0153 ISOlat2
+    'ograve':   0x00f2, # latin small letter o with grave, U+00F2 ISOlat1
+    'oline':    0x203e, # overline = spacing overscore, U+203E NEW
+    'omega':    0x03c9, # greek small letter omega, U+03C9 ISOgrk3
+    'omicron':  0x03bf, # greek small letter omicron, U+03BF NEW
+    'oplus':    0x2295, # circled plus = direct sum, U+2295 ISOamsb
+    'or':       0x2228, # logical or = vee, U+2228 ISOtech
+    'ordf':     0x00aa, # feminine ordinal indicator, U+00AA ISOnum
+    'ordm':     0x00ba, # masculine ordinal indicator, U+00BA ISOnum
+    'oslash':   0x00f8, # latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
+    'otilde':   0x00f5, # latin small letter o with tilde, U+00F5 ISOlat1
+    'otimes':   0x2297, # circled times = vector product, U+2297 ISOamsb
+    'ouml':     0x00f6, # latin small letter o with diaeresis, U+00F6 ISOlat1
+    'para':     0x00b6, # pilcrow sign = paragraph sign, U+00B6 ISOnum
+    'part':     0x2202, # partial differential, U+2202 ISOtech
+    'permil':   0x2030, # per mille sign, U+2030 ISOtech
+    'perp':     0x22a5, # up tack = orthogonal to = perpendicular, U+22A5 ISOtech
+    'phi':      0x03c6, # greek small letter phi, U+03C6 ISOgrk3
+    'pi':       0x03c0, # greek small letter pi, U+03C0 ISOgrk3
+    'piv':      0x03d6, # greek pi symbol, U+03D6 ISOgrk3
+    'plusmn':   0x00b1, # plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
+    'pound':    0x00a3, # pound sign, U+00A3 ISOnum
+    'prime':    0x2032, # prime = minutes = feet, U+2032 ISOtech
+    'prod':     0x220f, # n-ary product = product sign, U+220F ISOamsb
+    'prop':     0x221d, # proportional to, U+221D ISOtech
+    'psi':      0x03c8, # greek small letter psi, U+03C8 ISOgrk3
+    'quot':     0x0022, # quotation mark = APL quote, U+0022 ISOnum
+    'rArr':     0x21d2, # rightwards double arrow, U+21D2 ISOtech
+    'radic':    0x221a, # square root = radical sign, U+221A ISOtech
+    'rang':     0x232a, # right-pointing angle bracket = ket, U+232A ISOtech
+    'raquo':    0x00bb, # right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
+    'rarr':     0x2192, # rightwards arrow, U+2192 ISOnum
+    'rceil':    0x2309, # right ceiling, U+2309 ISOamsc
+    'rdquo':    0x201d, # right double quotation mark, U+201D ISOnum
+    'real':     0x211c, # blackletter capital R = real part symbol, U+211C ISOamso
+    'reg':      0x00ae, # registered sign = registered trade mark sign, U+00AE ISOnum
+    'rfloor':   0x230b, # right floor, U+230B ISOamsc
+    'rho':      0x03c1, # greek small letter rho, U+03C1 ISOgrk3
+    'rlm':      0x200f, # right-to-left mark, U+200F NEW RFC 2070
+    'rsaquo':   0x203a, # single right-pointing angle quotation mark, U+203A ISO proposed
+    'rsquo':    0x2019, # right single quotation mark, U+2019 ISOnum
+    'sbquo':    0x201a, # single low-9 quotation mark, U+201A NEW
+    'scaron':   0x0161, # latin small letter s with caron, U+0161 ISOlat2
+    'sdot':     0x22c5, # dot operator, U+22C5 ISOamsb
+    'sect':     0x00a7, # section sign, U+00A7 ISOnum
+    'shy':      0x00ad, # soft hyphen = discretionary hyphen, U+00AD ISOnum
+    'sigma':    0x03c3, # greek small letter sigma, U+03C3 ISOgrk3
+    'sigmaf':   0x03c2, # greek small letter final sigma, U+03C2 ISOgrk3
+    'sim':      0x223c, # tilde operator = varies with = similar to, U+223C ISOtech
+    'spades':   0x2660, # black spade suit, U+2660 ISOpub
+    'sub':      0x2282, # subset of, U+2282 ISOtech
+    'sube':     0x2286, # subset of or equal to, U+2286 ISOtech
+    'sum':      0x2211, # n-ary summation, U+2211 ISOamsb
+    'sup':      0x2283, # superset of, U+2283 ISOtech
+    'sup1':     0x00b9, # superscript one = superscript digit one, U+00B9 ISOnum
+    'sup2':     0x00b2, # superscript two = superscript digit two = squared, U+00B2 ISOnum
+    'sup3':     0x00b3, # superscript three = superscript digit three = cubed, U+00B3 ISOnum
+    'supe':     0x2287, # superset of or equal to, U+2287 ISOtech
+    'szlig':    0x00df, # latin small letter sharp s = ess-zed, U+00DF ISOlat1
+    'tau':      0x03c4, # greek small letter tau, U+03C4 ISOgrk3
+    'there4':   0x2234, # therefore, U+2234 ISOtech
+    'theta':    0x03b8, # greek small letter theta, U+03B8 ISOgrk3
+    'thetasym': 0x03d1, # greek small letter theta symbol, U+03D1 NEW
+    'thinsp':   0x2009, # thin space, U+2009 ISOpub
+    'thorn':    0x00fe, # latin small letter thorn with, U+00FE ISOlat1
+    'tilde':    0x02dc, # small tilde, U+02DC ISOdia
+    'times':    0x00d7, # multiplication sign, U+00D7 ISOnum
+    'trade':    0x2122, # trade mark sign, U+2122 ISOnum
+    'uArr':     0x21d1, # upwards double arrow, U+21D1 ISOamsa
+    'uacute':   0x00fa, # latin small letter u with acute, U+00FA ISOlat1
+    'uarr':     0x2191, # upwards arrow, U+2191 ISOnum
+    'ucirc':    0x00fb, # latin small letter u with circumflex, U+00FB ISOlat1
+    'ugrave':   0x00f9, # latin small letter u with grave, U+00F9 ISOlat1
+    'uml':      0x00a8, # diaeresis = spacing diaeresis, U+00A8 ISOdia
+    'upsih':    0x03d2, # greek upsilon with hook symbol, U+03D2 NEW
+    'upsilon':  0x03c5, # greek small letter upsilon, U+03C5 ISOgrk3
+    'uuml':     0x00fc, # latin small letter u with diaeresis, U+00FC ISOlat1
+    'weierp':   0x2118, # script capital P = power set = Weierstrass p, U+2118 ISOamso
+    'xi':       0x03be, # greek small letter xi, U+03BE ISOgrk3
+    'yacute':   0x00fd, # latin small letter y with acute, U+00FD ISOlat1
+    'yen':      0x00a5, # yen sign = yuan sign, U+00A5 ISOnum
+    'yuml':     0x00ff, # latin small letter y with diaeresis, U+00FF ISOlat1
+    'zeta':     0x03b6, # greek small letter zeta, U+03B6 ISOgrk3
+    'zwj':      0x200d, # zero width joiner, U+200D NEW RFC 2070
+    'zwnj':     0x200c, # zero width non-joiner, U+200C NEW RFC 2070
+}
+
+
+# maps the HTML5 named character references to the equivalent Unicode character(s)
+html5 = {
+    'Aacute': '\xc1',
+    'aacute': '\xe1',
+    'Aacute;': '\xc1',
+    'aacute;': '\xe1',
+    'Abreve;': '\u0102',
+    'abreve;': '\u0103',
+    'ac;': '\u223e',
+    'acd;': '\u223f',
+    'acE;': '\u223e\u0333',
+    'Acirc': '\xc2',
+    'acirc': '\xe2',
+    'Acirc;': '\xc2',
+    'acirc;': '\xe2',
+    'acute': '\xb4',
+    'acute;': '\xb4',
+    'Acy;': '\u0410',
+    'acy;': '\u0430',
+    'AElig': '\xc6',
+    'aelig': '\xe6',
+    'AElig;': '\xc6',
+    'aelig;': '\xe6',
+    'af;': '\u2061',
+    'Afr;': '\U0001d504',
+    'afr;': '\U0001d51e',
+    'Agrave': '\xc0',
+    'agrave': '\xe0',
+    'Agrave;': '\xc0',
+    'agrave;': '\xe0',
+    'alefsym;': '\u2135',
+    'aleph;': '\u2135',
+    'Alpha;': '\u0391',
+    'alpha;': '\u03b1',
+    'Amacr;': '\u0100',
+    'amacr;': '\u0101',
+    'amalg;': '\u2a3f',
+    'AMP': '&',
+    'amp': '&',
+    'AMP;': '&',
+    'amp;': '&',
+    'And;': '\u2a53',
+    'and;': '\u2227',
+    'andand;': '\u2a55',
+    'andd;': '\u2a5c',
+    'andslope;': '\u2a58',
+    'andv;': '\u2a5a',
+    'ang;': '\u2220',
+    'ange;': '\u29a4',
+    'angle;': '\u2220',
+    'angmsd;': '\u2221',
+    'angmsdaa;': '\u29a8',
+    'angmsdab;': '\u29a9',
+    'angmsdac;': '\u29aa',
+    'angmsdad;': '\u29ab',
+    'angmsdae;': '\u29ac',
+    'angmsdaf;': '\u29ad',
+    'angmsdag;': '\u29ae',
+    'angmsdah;': '\u29af',
+    'angrt;': '\u221f',
+    'angrtvb;': '\u22be',
+    'angrtvbd;': '\u299d',
+    'angsph;': '\u2222',
+    'angst;': '\xc5',
+    'angzarr;': '\u237c',
+    'Aogon;': '\u0104',
+    'aogon;': '\u0105',
+    'Aopf;': '\U0001d538',
+    'aopf;': '\U0001d552',
+    'ap;': '\u2248',
+    'apacir;': '\u2a6f',
+    'apE;': '\u2a70',
+    'ape;': '\u224a',
+    'apid;': '\u224b',
+    'apos;': "'",
+    'ApplyFunction;': '\u2061',
+    'approx;': '\u2248',
+    'approxeq;': '\u224a',
+    'Aring': '\xc5',
+    'aring': '\xe5',
+    'Aring;': '\xc5',
+    'aring;': '\xe5',
+    'Ascr;': '\U0001d49c',
+    'ascr;': '\U0001d4b6',
+    'Assign;': '\u2254',
+    'ast;': '*',
+    'asymp;': '\u2248',
+    'asympeq;': '\u224d',
+    'Atilde': '\xc3',
+    'atilde': '\xe3',
+    'Atilde;': '\xc3',
+    'atilde;': '\xe3',
+    'Auml': '\xc4',
+    'auml': '\xe4',
+    'Auml;': '\xc4',
+    'auml;': '\xe4',
+    'awconint;': '\u2233',
+    'awint;': '\u2a11',
+    'backcong;': '\u224c',
+    'backepsilon;': '\u03f6',
+    'backprime;': '\u2035',
+    'backsim;': '\u223d',
+    'backsimeq;': '\u22cd',
+    'Backslash;': '\u2216',
+    'Barv;': '\u2ae7',
+    'barvee;': '\u22bd',
+    'Barwed;': '\u2306',
+    'barwed;': '\u2305',
+    'barwedge;': '\u2305',
+    'bbrk;': '\u23b5',
+    'bbrktbrk;': '\u23b6',
+    'bcong;': '\u224c',
+    'Bcy;': '\u0411',
+    'bcy;': '\u0431',
+    'bdquo;': '\u201e',
+    'becaus;': '\u2235',
+    'Because;': '\u2235',
+    'because;': '\u2235',
+    'bemptyv;': '\u29b0',
+    'bepsi;': '\u03f6',
+    'bernou;': '\u212c',
+    'Bernoullis;': '\u212c',
+    'Beta;': '\u0392',
+    'beta;': '\u03b2',
+    'beth;': '\u2136',
+    'between;': '\u226c',
+    'Bfr;': '\U0001d505',
+    'bfr;': '\U0001d51f',
+    'bigcap;': '\u22c2',
+    'bigcirc;': '\u25ef',
+    'bigcup;': '\u22c3',
+    'bigodot;': '\u2a00',
+    'bigoplus;': '\u2a01',
+    'bigotimes;': '\u2a02',
+    'bigsqcup;': '\u2a06',
+    'bigstar;': '\u2605',
+    'bigtriangledown;': '\u25bd',
+    'bigtriangleup;': '\u25b3',
+    'biguplus;': '\u2a04',
+    'bigvee;': '\u22c1',
+    'bigwedge;': '\u22c0',
+    'bkarow;': '\u290d',
+    'blacklozenge;': '\u29eb',
+    'blacksquare;': '\u25aa',
+    'blacktriangle;': '\u25b4',
+    'blacktriangledown;': '\u25be',
+    'blacktriangleleft;': '\u25c2',
+    'blacktriangleright;': '\u25b8',
+    'blank;': '\u2423',
+    'blk12;': '\u2592',
+    'blk14;': '\u2591',
+    'blk34;': '\u2593',
+    'block;': '\u2588',
+    'bne;': '=\u20e5',
+    'bnequiv;': '\u2261\u20e5',
+    'bNot;': '\u2aed',
+    'bnot;': '\u2310',
+    'Bopf;': '\U0001d539',
+    'bopf;': '\U0001d553',
+    'bot;': '\u22a5',
+    'bottom;': '\u22a5',
+    'bowtie;': '\u22c8',
+    'boxbox;': '\u29c9',
+    'boxDL;': '\u2557',
+    'boxDl;': '\u2556',
+    'boxdL;': '\u2555',
+    'boxdl;': '\u2510',
+    'boxDR;': '\u2554',
+    'boxDr;': '\u2553',
+    'boxdR;': '\u2552',
+    'boxdr;': '\u250c',
+    'boxH;': '\u2550',
+    'boxh;': '\u2500',
+    'boxHD;': '\u2566',
+    'boxHd;': '\u2564',
+    'boxhD;': '\u2565',
+    'boxhd;': '\u252c',
+    'boxHU;': '\u2569',
+    'boxHu;': '\u2567',
+    'boxhU;': '\u2568',
+    'boxhu;': '\u2534',
+    'boxminus;': '\u229f',
+    'boxplus;': '\u229e',
+    'boxtimes;': '\u22a0',
+    'boxUL;': '\u255d',
+    'boxUl;': '\u255c',
+    'boxuL;': '\u255b',
+    'boxul;': '\u2518',
+    'boxUR;': '\u255a',
+    'boxUr;': '\u2559',
+    'boxuR;': '\u2558',
+    'boxur;': '\u2514',
+    'boxV;': '\u2551',
+    'boxv;': '\u2502',
+    'boxVH;': '\u256c',
+    'boxVh;': '\u256b',
+    'boxvH;': '\u256a',
+    'boxvh;': '\u253c',
+    'boxVL;': '\u2563',
+    'boxVl;': '\u2562',
+    'boxvL;': '\u2561',
+    'boxvl;': '\u2524',
+    'boxVR;': '\u2560',
+    'boxVr;': '\u255f',
+    'boxvR;': '\u255e',
+    'boxvr;': '\u251c',
+    'bprime;': '\u2035',
+    'Breve;': '\u02d8',
+    'breve;': '\u02d8',
+    'brvbar': '\xa6',
+    'brvbar;': '\xa6',
+    'Bscr;': '\u212c',
+    'bscr;': '\U0001d4b7',
+    'bsemi;': '\u204f',
+    'bsim;': '\u223d',
+    'bsime;': '\u22cd',
+    'bsol;': '\\',
+    'bsolb;': '\u29c5',
+    'bsolhsub;': '\u27c8',
+    'bull;': '\u2022',
+    'bullet;': '\u2022',
+    'bump;': '\u224e',
+    'bumpE;': '\u2aae',
+    'bumpe;': '\u224f',
+    'Bumpeq;': '\u224e',
+    'bumpeq;': '\u224f',
+    'Cacute;': '\u0106',
+    'cacute;': '\u0107',
+    'Cap;': '\u22d2',
+    'cap;': '\u2229',
+    'capand;': '\u2a44',
+    'capbrcup;': '\u2a49',
+    'capcap;': '\u2a4b',
+    'capcup;': '\u2a47',
+    'capdot;': '\u2a40',
+    'CapitalDifferentialD;': '\u2145',
+    'caps;': '\u2229\ufe00',
+    'caret;': '\u2041',
+    'caron;': '\u02c7',
+    'Cayleys;': '\u212d',
+    'ccaps;': '\u2a4d',
+    'Ccaron;': '\u010c',
+    'ccaron;': '\u010d',
+    'Ccedil': '\xc7',
+    'ccedil': '\xe7',
+    'Ccedil;': '\xc7',
+    'ccedil;': '\xe7',
+    'Ccirc;': '\u0108',
+    'ccirc;': '\u0109',
+    'Cconint;': '\u2230',
+    'ccups;': '\u2a4c',
+    'ccupssm;': '\u2a50',
+    'Cdot;': '\u010a',
+    'cdot;': '\u010b',
+    'cedil': '\xb8',
+    'cedil;': '\xb8',
+    'Cedilla;': '\xb8',
+    'cemptyv;': '\u29b2',
+    'cent': '\xa2',
+    'cent;': '\xa2',
+    'CenterDot;': '\xb7',
+    'centerdot;': '\xb7',
+    'Cfr;': '\u212d',
+    'cfr;': '\U0001d520',
+    'CHcy;': '\u0427',
+    'chcy;': '\u0447',
+    'check;': '\u2713',
+    'checkmark;': '\u2713',
+    'Chi;': '\u03a7',
+    'chi;': '\u03c7',
+    'cir;': '\u25cb',
+    'circ;': '\u02c6',
+    'circeq;': '\u2257',
+    'circlearrowleft;': '\u21ba',
+    'circlearrowright;': '\u21bb',
+    'circledast;': '\u229b',
+    'circledcirc;': '\u229a',
+    'circleddash;': '\u229d',
+    'CircleDot;': '\u2299',
+    'circledR;': '\xae',
+    'circledS;': '\u24c8',
+    'CircleMinus;': '\u2296',
+    'CirclePlus;': '\u2295',
+    'CircleTimes;': '\u2297',
+    'cirE;': '\u29c3',
+    'cire;': '\u2257',
+    'cirfnint;': '\u2a10',
+    'cirmid;': '\u2aef',
+    'cirscir;': '\u29c2',
+    'ClockwiseContourIntegral;': '\u2232',
+    'CloseCurlyDoubleQuote;': '\u201d',
+    'CloseCurlyQuote;': '\u2019',
+    'clubs;': '\u2663',
+    'clubsuit;': '\u2663',
+    'Colon;': '\u2237',
+    'colon;': ':',
+    'Colone;': '\u2a74',
+    'colone;': '\u2254',
+    'coloneq;': '\u2254',
+    'comma;': ',',
+    'commat;': '@',
+    'comp;': '\u2201',
+    'compfn;': '\u2218',
+    'complement;': '\u2201',
+    'complexes;': '\u2102',
+    'cong;': '\u2245',
+    'congdot;': '\u2a6d',
+    'Congruent;': '\u2261',
+    'Conint;': '\u222f',
+    'conint;': '\u222e',
+    'ContourIntegral;': '\u222e',
+    'Copf;': '\u2102',
+    'copf;': '\U0001d554',
+    'coprod;': '\u2210',
+    'Coproduct;': '\u2210',
+    'COPY': '\xa9',
+    'copy': '\xa9',
+    'COPY;': '\xa9',
+    'copy;': '\xa9',
+    'copysr;': '\u2117',
+    'CounterClockwiseContourIntegral;': '\u2233',
+    'crarr;': '\u21b5',
+    'Cross;': '\u2a2f',
+    'cross;': '\u2717',
+    'Cscr;': '\U0001d49e',
+    'cscr;': '\U0001d4b8',
+    'csub;': '\u2acf',
+    'csube;': '\u2ad1',
+    'csup;': '\u2ad0',
+    'csupe;': '\u2ad2',
+    'ctdot;': '\u22ef',
+    'cudarrl;': '\u2938',
+    'cudarrr;': '\u2935',
+    'cuepr;': '\u22de',
+    'cuesc;': '\u22df',
+    'cularr;': '\u21b6',
+    'cularrp;': '\u293d',
+    'Cup;': '\u22d3',
+    'cup;': '\u222a',
+    'cupbrcap;': '\u2a48',
+    'CupCap;': '\u224d',
+    'cupcap;': '\u2a46',
+    'cupcup;': '\u2a4a',
+    'cupdot;': '\u228d',
+    'cupor;': '\u2a45',
+    'cups;': '\u222a\ufe00',
+    'curarr;': '\u21b7',
+    'curarrm;': '\u293c',
+    'curlyeqprec;': '\u22de',
+    'curlyeqsucc;': '\u22df',
+    'curlyvee;': '\u22ce',
+    'curlywedge;': '\u22cf',
+    'curren': '\xa4',
+    'curren;': '\xa4',
+    'curvearrowleft;': '\u21b6',
+    'curvearrowright;': '\u21b7',
+    'cuvee;': '\u22ce',
+    'cuwed;': '\u22cf',
+    'cwconint;': '\u2232',
+    'cwint;': '\u2231',
+    'cylcty;': '\u232d',
+    'Dagger;': '\u2021',
+    'dagger;': '\u2020',
+    'daleth;': '\u2138',
+    'Darr;': '\u21a1',
+    'dArr;': '\u21d3',
+    'darr;': '\u2193',
+    'dash;': '\u2010',
+    'Dashv;': '\u2ae4',
+    'dashv;': '\u22a3',
+    'dbkarow;': '\u290f',
+    'dblac;': '\u02dd',
+    'Dcaron;': '\u010e',
+    'dcaron;': '\u010f',
+    'Dcy;': '\u0414',
+    'dcy;': '\u0434',
+    'DD;': '\u2145',
+    'dd;': '\u2146',
+    'ddagger;': '\u2021',
+    'ddarr;': '\u21ca',
+    'DDotrahd;': '\u2911',
+    'ddotseq;': '\u2a77',
+    'deg': '\xb0',
+    'deg;': '\xb0',
+    'Del;': '\u2207',
+    'Delta;': '\u0394',
+    'delta;': '\u03b4',
+    'demptyv;': '\u29b1',
+    'dfisht;': '\u297f',
+    'Dfr;': '\U0001d507',
+    'dfr;': '\U0001d521',
+    'dHar;': '\u2965',
+    'dharl;': '\u21c3',
+    'dharr;': '\u21c2',
+    'DiacriticalAcute;': '\xb4',
+    'DiacriticalDot;': '\u02d9',
+    'DiacriticalDoubleAcute;': '\u02dd',
+    'DiacriticalGrave;': '`',
+    'DiacriticalTilde;': '\u02dc',
+    'diam;': '\u22c4',
+    'Diamond;': '\u22c4',
+    'diamond;': '\u22c4',
+    'diamondsuit;': '\u2666',
+    'diams;': '\u2666',
+    'die;': '\xa8',
+    'DifferentialD;': '\u2146',
+    'digamma;': '\u03dd',
+    'disin;': '\u22f2',
+    'div;': '\xf7',
+    'divide': '\xf7',
+    'divide;': '\xf7',
+    'divideontimes;': '\u22c7',
+    'divonx;': '\u22c7',
+    'DJcy;': '\u0402',
+    'djcy;': '\u0452',
+    'dlcorn;': '\u231e',
+    'dlcrop;': '\u230d',
+    'dollar;': '$',
+    'Dopf;': '\U0001d53b',
+    'dopf;': '\U0001d555',
+    'Dot;': '\xa8',
+    'dot;': '\u02d9',
+    'DotDot;': '\u20dc',
+    'doteq;': '\u2250',
+    'doteqdot;': '\u2251',
+    'DotEqual;': '\u2250',
+    'dotminus;': '\u2238',
+    'dotplus;': '\u2214',
+    'dotsquare;': '\u22a1',
+    'doublebarwedge;': '\u2306',
+    'DoubleContourIntegral;': '\u222f',
+    'DoubleDot;': '\xa8',
+    'DoubleDownArrow;': '\u21d3',
+    'DoubleLeftArrow;': '\u21d0',
+    'DoubleLeftRightArrow;': '\u21d4',
+    'DoubleLeftTee;': '\u2ae4',
+    'DoubleLongLeftArrow;': '\u27f8',
+    'DoubleLongLeftRightArrow;': '\u27fa',
+    'DoubleLongRightArrow;': '\u27f9',
+    'DoubleRightArrow;': '\u21d2',
+    'DoubleRightTee;': '\u22a8',
+    'DoubleUpArrow;': '\u21d1',
+    'DoubleUpDownArrow;': '\u21d5',
+    'DoubleVerticalBar;': '\u2225',
+    'DownArrow;': '\u2193',
+    'Downarrow;': '\u21d3',
+    'downarrow;': '\u2193',
+    'DownArrowBar;': '\u2913',
+    'DownArrowUpArrow;': '\u21f5',
+    'DownBreve;': '\u0311',
+    'downdownarrows;': '\u21ca',
+    'downharpoonleft;': '\u21c3',
+    'downharpoonright;': '\u21c2',
+    'DownLeftRightVector;': '\u2950',
+    'DownLeftTeeVector;': '\u295e',
+    'DownLeftVector;': '\u21bd',
+    'DownLeftVectorBar;': '\u2956',
+    'DownRightTeeVector;': '\u295f',
+    'DownRightVector;': '\u21c1',
+    'DownRightVectorBar;': '\u2957',
+    'DownTee;': '\u22a4',
+    'DownTeeArrow;': '\u21a7',
+    'drbkarow;': '\u2910',
+    'drcorn;': '\u231f',
+    'drcrop;': '\u230c',
+    'Dscr;': '\U0001d49f',
+    'dscr;': '\U0001d4b9',
+    'DScy;': '\u0405',
+    'dscy;': '\u0455',
+    'dsol;': '\u29f6',
+    'Dstrok;': '\u0110',
+    'dstrok;': '\u0111',
+    'dtdot;': '\u22f1',
+    'dtri;': '\u25bf',
+    'dtrif;': '\u25be',
+    'duarr;': '\u21f5',
+    'duhar;': '\u296f',
+    'dwangle;': '\u29a6',
+    'DZcy;': '\u040f',
+    'dzcy;': '\u045f',
+    'dzigrarr;': '\u27ff',
+    'Eacute': '\xc9',
+    'eacute': '\xe9',
+    'Eacute;': '\xc9',
+    'eacute;': '\xe9',
+    'easter;': '\u2a6e',
+    'Ecaron;': '\u011a',
+    'ecaron;': '\u011b',
+    'ecir;': '\u2256',
+    'Ecirc': '\xca',
+    'ecirc': '\xea',
+    'Ecirc;': '\xca',
+    'ecirc;': '\xea',
+    'ecolon;': '\u2255',
+    'Ecy;': '\u042d',
+    'ecy;': '\u044d',
+    'eDDot;': '\u2a77',
+    'Edot;': '\u0116',
+    'eDot;': '\u2251',
+    'edot;': '\u0117',
+    'ee;': '\u2147',
+    'efDot;': '\u2252',
+    'Efr;': '\U0001d508',
+    'efr;': '\U0001d522',
+    'eg;': '\u2a9a',
+    'Egrave': '\xc8',
+    'egrave': '\xe8',
+    'Egrave;': '\xc8',
+    'egrave;': '\xe8',
+    'egs;': '\u2a96',
+    'egsdot;': '\u2a98',
+    'el;': '\u2a99',
+    'Element;': '\u2208',
+    'elinters;': '\u23e7',
+    'ell;': '\u2113',
+    'els;': '\u2a95',
+    'elsdot;': '\u2a97',
+    'Emacr;': '\u0112',
+    'emacr;': '\u0113',
+    'empty;': '\u2205',
+    'emptyset;': '\u2205',
+    'EmptySmallSquare;': '\u25fb',
+    'emptyv;': '\u2205',
+    'EmptyVerySmallSquare;': '\u25ab',
+    'emsp13;': '\u2004',
+    'emsp14;': '\u2005',
+    'emsp;': '\u2003',
+    'ENG;': '\u014a',
+    'eng;': '\u014b',
+    'ensp;': '\u2002',
+    'Eogon;': '\u0118',
+    'eogon;': '\u0119',
+    'Eopf;': '\U0001d53c',
+    'eopf;': '\U0001d556',
+    'epar;': '\u22d5',
+    'eparsl;': '\u29e3',
+    'eplus;': '\u2a71',
+    'epsi;': '\u03b5',
+    'Epsilon;': '\u0395',
+    'epsilon;': '\u03b5',
+    'epsiv;': '\u03f5',
+    'eqcirc;': '\u2256',
+    'eqcolon;': '\u2255',
+    'eqsim;': '\u2242',
+    'eqslantgtr;': '\u2a96',
+    'eqslantless;': '\u2a95',
+    'Equal;': '\u2a75',
+    'equals;': '=',
+    'EqualTilde;': '\u2242',
+    'equest;': '\u225f',
+    'Equilibrium;': '\u21cc',
+    'equiv;': '\u2261',
+    'equivDD;': '\u2a78',
+    'eqvparsl;': '\u29e5',
+    'erarr;': '\u2971',
+    'erDot;': '\u2253',
+    'Escr;': '\u2130',
+    'escr;': '\u212f',
+    'esdot;': '\u2250',
+    'Esim;': '\u2a73',
+    'esim;': '\u2242',
+    'Eta;': '\u0397',
+    'eta;': '\u03b7',
+    'ETH': '\xd0',
+    'eth': '\xf0',
+    'ETH;': '\xd0',
+    'eth;': '\xf0',
+    'Euml': '\xcb',
+    'euml': '\xeb',
+    'Euml;': '\xcb',
+    'euml;': '\xeb',
+    'euro;': '\u20ac',
+    'excl;': '!',
+    'exist;': '\u2203',
+    'Exists;': '\u2203',
+    'expectation;': '\u2130',
+    'ExponentialE;': '\u2147',
+    'exponentiale;': '\u2147',
+    'fallingdotseq;': '\u2252',
+    'Fcy;': '\u0424',
+    'fcy;': '\u0444',
+    'female;': '\u2640',
+    'ffilig;': '\ufb03',
+    'fflig;': '\ufb00',
+    'ffllig;': '\ufb04',
+    'Ffr;': '\U0001d509',
+    'ffr;': '\U0001d523',
+    'filig;': '\ufb01',
+    'FilledSmallSquare;': '\u25fc',
+    'FilledVerySmallSquare;': '\u25aa',
+    'fjlig;': 'fj',
+    'flat;': '\u266d',
+    'fllig;': '\ufb02',
+    'fltns;': '\u25b1',
+    'fnof;': '\u0192',
+    'Fopf;': '\U0001d53d',
+    'fopf;': '\U0001d557',
+    'ForAll;': '\u2200',
+    'forall;': '\u2200',
+    'fork;': '\u22d4',
+    'forkv;': '\u2ad9',
+    'Fouriertrf;': '\u2131',
+    'fpartint;': '\u2a0d',
+    'frac12': '\xbd',
+    'frac12;': '\xbd',
+    'frac13;': '\u2153',
+    'frac14': '\xbc',
+    'frac14;': '\xbc',
+    'frac15;': '\u2155',
+    'frac16;': '\u2159',
+    'frac18;': '\u215b',
+    'frac23;': '\u2154',
+    'frac25;': '\u2156',
+    'frac34': '\xbe',
+    'frac34;': '\xbe',
+    'frac35;': '\u2157',
+    'frac38;': '\u215c',
+    'frac45;': '\u2158',
+    'frac56;': '\u215a',
+    'frac58;': '\u215d',
+    'frac78;': '\u215e',
+    'frasl;': '\u2044',
+    'frown;': '\u2322',
+    'Fscr;': '\u2131',
+    'fscr;': '\U0001d4bb',
+    'gacute;': '\u01f5',
+    'Gamma;': '\u0393',
+    'gamma;': '\u03b3',
+    'Gammad;': '\u03dc',
+    'gammad;': '\u03dd',
+    'gap;': '\u2a86',
+    'Gbreve;': '\u011e',
+    'gbreve;': '\u011f',
+    'Gcedil;': '\u0122',
+    'Gcirc;': '\u011c',
+    'gcirc;': '\u011d',
+    'Gcy;': '\u0413',
+    'gcy;': '\u0433',
+    'Gdot;': '\u0120',
+    'gdot;': '\u0121',
+    'gE;': '\u2267',
+    'ge;': '\u2265',
+    'gEl;': '\u2a8c',
+    'gel;': '\u22db',
+    'geq;': '\u2265',
+    'geqq;': '\u2267',
+    'geqslant;': '\u2a7e',
+    'ges;': '\u2a7e',
+    'gescc;': '\u2aa9',
+    'gesdot;': '\u2a80',
+    'gesdoto;': '\u2a82',
+    'gesdotol;': '\u2a84',
+    'gesl;': '\u22db\ufe00',
+    'gesles;': '\u2a94',
+    'Gfr;': '\U0001d50a',
+    'gfr;': '\U0001d524',
+    'Gg;': '\u22d9',
+    'gg;': '\u226b',
+    'ggg;': '\u22d9',
+    'gimel;': '\u2137',
+    'GJcy;': '\u0403',
+    'gjcy;': '\u0453',
+    'gl;': '\u2277',
+    'gla;': '\u2aa5',
+    'glE;': '\u2a92',
+    'glj;': '\u2aa4',
+    'gnap;': '\u2a8a',
+    'gnapprox;': '\u2a8a',
+    'gnE;': '\u2269',
+    'gne;': '\u2a88',
+    'gneq;': '\u2a88',
+    'gneqq;': '\u2269',
+    'gnsim;': '\u22e7',
+    'Gopf;': '\U0001d53e',
+    'gopf;': '\U0001d558',
+    'grave;': '`',
+    'GreaterEqual;': '\u2265',
+    'GreaterEqualLess;': '\u22db',
+    'GreaterFullEqual;': '\u2267',
+    'GreaterGreater;': '\u2aa2',
+    'GreaterLess;': '\u2277',
+    'GreaterSlantEqual;': '\u2a7e',
+    'GreaterTilde;': '\u2273',
+    'Gscr;': '\U0001d4a2',
+    'gscr;': '\u210a',
+    'gsim;': '\u2273',
+    'gsime;': '\u2a8e',
+    'gsiml;': '\u2a90',
+    'GT': '>',
+    'gt': '>',
+    'GT;': '>',
+    'Gt;': '\u226b',
+    'gt;': '>',
+    'gtcc;': '\u2aa7',
+    'gtcir;': '\u2a7a',
+    'gtdot;': '\u22d7',
+    'gtlPar;': '\u2995',
+    'gtquest;': '\u2a7c',
+    'gtrapprox;': '\u2a86',
+    'gtrarr;': '\u2978',
+    'gtrdot;': '\u22d7',
+    'gtreqless;': '\u22db',
+    'gtreqqless;': '\u2a8c',
+    'gtrless;': '\u2277',
+    'gtrsim;': '\u2273',
+    'gvertneqq;': '\u2269\ufe00',
+    'gvnE;': '\u2269\ufe00',
+    'Hacek;': '\u02c7',
+    'hairsp;': '\u200a',
+    'half;': '\xbd',
+    'hamilt;': '\u210b',
+    'HARDcy;': '\u042a',
+    'hardcy;': '\u044a',
+    'hArr;': '\u21d4',
+    'harr;': '\u2194',
+    'harrcir;': '\u2948',
+    'harrw;': '\u21ad',
+    'Hat;': '^',
+    'hbar;': '\u210f',
+    'Hcirc;': '\u0124',
+    'hcirc;': '\u0125',
+    'hearts;': '\u2665',
+    'heartsuit;': '\u2665',
+    'hellip;': '\u2026',
+    'hercon;': '\u22b9',
+    'Hfr;': '\u210c',
+    'hfr;': '\U0001d525',
+    'HilbertSpace;': '\u210b',
+    'hksearow;': '\u2925',
+    'hkswarow;': '\u2926',
+    'hoarr;': '\u21ff',
+    'homtht;': '\u223b',
+    'hookleftarrow;': '\u21a9',
+    'hookrightarrow;': '\u21aa',
+    'Hopf;': '\u210d',
+    'hopf;': '\U0001d559',
+    'horbar;': '\u2015',
+    'HorizontalLine;': '\u2500',
+    'Hscr;': '\u210b',
+    'hscr;': '\U0001d4bd',
+    'hslash;': '\u210f',
+    'Hstrok;': '\u0126',
+    'hstrok;': '\u0127',
+    'HumpDownHump;': '\u224e',
+    'HumpEqual;': '\u224f',
+    'hybull;': '\u2043',
+    'hyphen;': '\u2010',
+    'Iacute': '\xcd',
+    'iacute': '\xed',
+    'Iacute;': '\xcd',
+    'iacute;': '\xed',
+    'ic;': '\u2063',
+    'Icirc': '\xce',
+    'icirc': '\xee',
+    'Icirc;': '\xce',
+    'icirc;': '\xee',
+    'Icy;': '\u0418',
+    'icy;': '\u0438',
+    'Idot;': '\u0130',
+    'IEcy;': '\u0415',
+    'iecy;': '\u0435',
+    'iexcl': '\xa1',
+    'iexcl;': '\xa1',
+    'iff;': '\u21d4',
+    'Ifr;': '\u2111',
+    'ifr;': '\U0001d526',
+    'Igrave': '\xcc',
+    'igrave': '\xec',
+    'Igrave;': '\xcc',
+    'igrave;': '\xec',
+    'ii;': '\u2148',
+    'iiiint;': '\u2a0c',
+    'iiint;': '\u222d',
+    'iinfin;': '\u29dc',
+    'iiota;': '\u2129',
+    'IJlig;': '\u0132',
+    'ijlig;': '\u0133',
+    'Im;': '\u2111',
+    'Imacr;': '\u012a',
+    'imacr;': '\u012b',
+    'image;': '\u2111',
+    'ImaginaryI;': '\u2148',
+    'imagline;': '\u2110',
+    'imagpart;': '\u2111',
+    'imath;': '\u0131',
+    'imof;': '\u22b7',
+    'imped;': '\u01b5',
+    'Implies;': '\u21d2',
+    'in;': '\u2208',
+    'incare;': '\u2105',
+    'infin;': '\u221e',
+    'infintie;': '\u29dd',
+    'inodot;': '\u0131',
+    'Int;': '\u222c',
+    'int;': '\u222b',
+    'intcal;': '\u22ba',
+    'integers;': '\u2124',
+    'Integral;': '\u222b',
+    'intercal;': '\u22ba',
+    'Intersection;': '\u22c2',
+    'intlarhk;': '\u2a17',
+    'intprod;': '\u2a3c',
+    'InvisibleComma;': '\u2063',
+    'InvisibleTimes;': '\u2062',
+    'IOcy;': '\u0401',
+    'iocy;': '\u0451',
+    'Iogon;': '\u012e',
+    'iogon;': '\u012f',
+    'Iopf;': '\U0001d540',
+    'iopf;': '\U0001d55a',
+    'Iota;': '\u0399',
+    'iota;': '\u03b9',
+    'iprod;': '\u2a3c',
+    'iquest': '\xbf',
+    'iquest;': '\xbf',
+    'Iscr;': '\u2110',
+    'iscr;': '\U0001d4be',
+    'isin;': '\u2208',
+    'isindot;': '\u22f5',
+    'isinE;': '\u22f9',
+    'isins;': '\u22f4',
+    'isinsv;': '\u22f3',
+    'isinv;': '\u2208',
+    'it;': '\u2062',
+    'Itilde;': '\u0128',
+    'itilde;': '\u0129',
+    'Iukcy;': '\u0406',
+    'iukcy;': '\u0456',
+    'Iuml': '\xcf',
+    'iuml': '\xef',
+    'Iuml;': '\xcf',
+    'iuml;': '\xef',
+    'Jcirc;': '\u0134',
+    'jcirc;': '\u0135',
+    'Jcy;': '\u0419',
+    'jcy;': '\u0439',
+    'Jfr;': '\U0001d50d',
+    'jfr;': '\U0001d527',
+    'jmath;': '\u0237',
+    'Jopf;': '\U0001d541',
+    'jopf;': '\U0001d55b',
+    'Jscr;': '\U0001d4a5',
+    'jscr;': '\U0001d4bf',
+    'Jsercy;': '\u0408',
+    'jsercy;': '\u0458',
+    'Jukcy;': '\u0404',
+    'jukcy;': '\u0454',
+    'Kappa;': '\u039a',
+    'kappa;': '\u03ba',
+    'kappav;': '\u03f0',
+    'Kcedil;': '\u0136',
+    'kcedil;': '\u0137',
+    'Kcy;': '\u041a',
+    'kcy;': '\u043a',
+    'Kfr;': '\U0001d50e',
+    'kfr;': '\U0001d528',
+    'kgreen;': '\u0138',
+    'KHcy;': '\u0425',
+    'khcy;': '\u0445',
+    'KJcy;': '\u040c',
+    'kjcy;': '\u045c',
+    'Kopf;': '\U0001d542',
+    'kopf;': '\U0001d55c',
+    'Kscr;': '\U0001d4a6',
+    'kscr;': '\U0001d4c0',
+    'lAarr;': '\u21da',
+    'Lacute;': '\u0139',
+    'lacute;': '\u013a',
+    'laemptyv;': '\u29b4',
+    'lagran;': '\u2112',
+    'Lambda;': '\u039b',
+    'lambda;': '\u03bb',
+    'Lang;': '\u27ea',
+    'lang;': '\u27e8',
+    'langd;': '\u2991',
+    'langle;': '\u27e8',
+    'lap;': '\u2a85',
+    'Laplacetrf;': '\u2112',
+    'laquo': '\xab',
+    'laquo;': '\xab',
+    'Larr;': '\u219e',
+    'lArr;': '\u21d0',
+    'larr;': '\u2190',
+    'larrb;': '\u21e4',
+    'larrbfs;': '\u291f',
+    'larrfs;': '\u291d',
+    'larrhk;': '\u21a9',
+    'larrlp;': '\u21ab',
+    'larrpl;': '\u2939',
+    'larrsim;': '\u2973',
+    'larrtl;': '\u21a2',
+    'lat;': '\u2aab',
+    'lAtail;': '\u291b',
+    'latail;': '\u2919',
+    'late;': '\u2aad',
+    'lates;': '\u2aad\ufe00',
+    'lBarr;': '\u290e',
+    'lbarr;': '\u290c',
+    'lbbrk;': '\u2772',
+    'lbrace;': '{',
+    'lbrack;': '[',
+    'lbrke;': '\u298b',
+    'lbrksld;': '\u298f',
+    'lbrkslu;': '\u298d',
+    'Lcaron;': '\u013d',
+    'lcaron;': '\u013e',
+    'Lcedil;': '\u013b',
+    'lcedil;': '\u013c',
+    'lceil;': '\u2308',
+    'lcub;': '{',
+    'Lcy;': '\u041b',
+    'lcy;': '\u043b',
+    'ldca;': '\u2936',
+    'ldquo;': '\u201c',
+    'ldquor;': '\u201e',
+    'ldrdhar;': '\u2967',
+    'ldrushar;': '\u294b',
+    'ldsh;': '\u21b2',
+    'lE;': '\u2266',
+    'le;': '\u2264',
+    'LeftAngleBracket;': '\u27e8',
+    'LeftArrow;': '\u2190',
+    'Leftarrow;': '\u21d0',
+    'leftarrow;': '\u2190',
+    'LeftArrowBar;': '\u21e4',
+    'LeftArrowRightArrow;': '\u21c6',
+    'leftarrowtail;': '\u21a2',
+    'LeftCeiling;': '\u2308',
+    'LeftDoubleBracket;': '\u27e6',
+    'LeftDownTeeVector;': '\u2961',
+    'LeftDownVector;': '\u21c3',
+    'LeftDownVectorBar;': '\u2959',
+    'LeftFloor;': '\u230a',
+    'leftharpoondown;': '\u21bd',
+    'leftharpoonup;': '\u21bc',
+    'leftleftarrows;': '\u21c7',
+    'LeftRightArrow;': '\u2194',
+    'Leftrightarrow;': '\u21d4',
+    'leftrightarrow;': '\u2194',
+    'leftrightarrows;': '\u21c6',
+    'leftrightharpoons;': '\u21cb',
+    'leftrightsquigarrow;': '\u21ad',
+    'LeftRightVector;': '\u294e',
+    'LeftTee;': '\u22a3',
+    'LeftTeeArrow;': '\u21a4',
+    'LeftTeeVector;': '\u295a',
+    'leftthreetimes;': '\u22cb',
+    'LeftTriangle;': '\u22b2',
+    'LeftTriangleBar;': '\u29cf',
+    'LeftTriangleEqual;': '\u22b4',
+    'LeftUpDownVector;': '\u2951',
+    'LeftUpTeeVector;': '\u2960',
+    'LeftUpVector;': '\u21bf',
+    'LeftUpVectorBar;': '\u2958',
+    'LeftVector;': '\u21bc',
+    'LeftVectorBar;': '\u2952',
+    'lEg;': '\u2a8b',
+    'leg;': '\u22da',
+    'leq;': '\u2264',
+    'leqq;': '\u2266',
+    'leqslant;': '\u2a7d',
+    'les;': '\u2a7d',
+    'lescc;': '\u2aa8',
+    'lesdot;': '\u2a7f',
+    'lesdoto;': '\u2a81',
+    'lesdotor;': '\u2a83',
+    'lesg;': '\u22da\ufe00',
+    'lesges;': '\u2a93',
+    'lessapprox;': '\u2a85',
+    'lessdot;': '\u22d6',
+    'lesseqgtr;': '\u22da',
+    'lesseqqgtr;': '\u2a8b',
+    'LessEqualGreater;': '\u22da',
+    'LessFullEqual;': '\u2266',
+    'LessGreater;': '\u2276',
+    'lessgtr;': '\u2276',
+    'LessLess;': '\u2aa1',
+    'lesssim;': '\u2272',
+    'LessSlantEqual;': '\u2a7d',
+    'LessTilde;': '\u2272',
+    'lfisht;': '\u297c',
+    'lfloor;': '\u230a',
+    'Lfr;': '\U0001d50f',
+    'lfr;': '\U0001d529',
+    'lg;': '\u2276',
+    'lgE;': '\u2a91',
+    'lHar;': '\u2962',
+    'lhard;': '\u21bd',
+    'lharu;': '\u21bc',
+    'lharul;': '\u296a',
+    'lhblk;': '\u2584',
+    'LJcy;': '\u0409',
+    'ljcy;': '\u0459',
+    'Ll;': '\u22d8',
+    'll;': '\u226a',
+    'llarr;': '\u21c7',
+    'llcorner;': '\u231e',
+    'Lleftarrow;': '\u21da',
+    'llhard;': '\u296b',
+    'lltri;': '\u25fa',
+    'Lmidot;': '\u013f',
+    'lmidot;': '\u0140',
+    'lmoust;': '\u23b0',
+    'lmoustache;': '\u23b0',
+    'lnap;': '\u2a89',
+    'lnapprox;': '\u2a89',
+    'lnE;': '\u2268',
+    'lne;': '\u2a87',
+    'lneq;': '\u2a87',
+    'lneqq;': '\u2268',
+    'lnsim;': '\u22e6',
+    'loang;': '\u27ec',
+    'loarr;': '\u21fd',
+    'lobrk;': '\u27e6',
+    'LongLeftArrow;': '\u27f5',
+    'Longleftarrow;': '\u27f8',
+    'longleftarrow;': '\u27f5',
+    'LongLeftRightArrow;': '\u27f7',
+    'Longleftrightarrow;': '\u27fa',
+    'longleftrightarrow;': '\u27f7',
+    'longmapsto;': '\u27fc',
+    'LongRightArrow;': '\u27f6',
+    'Longrightarrow;': '\u27f9',
+    'longrightarrow;': '\u27f6',
+    'looparrowleft;': '\u21ab',
+    'looparrowright;': '\u21ac',
+    'lopar;': '\u2985',
+    'Lopf;': '\U0001d543',
+    'lopf;': '\U0001d55d',
+    'loplus;': '\u2a2d',
+    'lotimes;': '\u2a34',
+    'lowast;': '\u2217',
+    'lowbar;': '_',
+    'LowerLeftArrow;': '\u2199',
+    'LowerRightArrow;': '\u2198',
+    'loz;': '\u25ca',
+    'lozenge;': '\u25ca',
+    'lozf;': '\u29eb',
+    'lpar;': '(',
+    'lparlt;': '\u2993',
+    'lrarr;': '\u21c6',
+    'lrcorner;': '\u231f',
+    'lrhar;': '\u21cb',
+    'lrhard;': '\u296d',
+    'lrm;': '\u200e',
+    'lrtri;': '\u22bf',
+    'lsaquo;': '\u2039',
+    'Lscr;': '\u2112',
+    'lscr;': '\U0001d4c1',
+    'Lsh;': '\u21b0',
+    'lsh;': '\u21b0',
+    'lsim;': '\u2272',
+    'lsime;': '\u2a8d',
+    'lsimg;': '\u2a8f',
+    'lsqb;': '[',
+    'lsquo;': '\u2018',
+    'lsquor;': '\u201a',
+    'Lstrok;': '\u0141',
+    'lstrok;': '\u0142',
+    'LT': '<',
+    'lt': '<',
+    'LT;': '<',
+    'Lt;': '\u226a',
+    'lt;': '<',
+    'ltcc;': '\u2aa6',
+    'ltcir;': '\u2a79',
+    'ltdot;': '\u22d6',
+    'lthree;': '\u22cb',
+    'ltimes;': '\u22c9',
+    'ltlarr;': '\u2976',
+    'ltquest;': '\u2a7b',
+    'ltri;': '\u25c3',
+    'ltrie;': '\u22b4',
+    'ltrif;': '\u25c2',
+    'ltrPar;': '\u2996',
+    'lurdshar;': '\u294a',
+    'luruhar;': '\u2966',
+    'lvertneqq;': '\u2268\ufe00',
+    'lvnE;': '\u2268\ufe00',
+    'macr': '\xaf',
+    'macr;': '\xaf',
+    'male;': '\u2642',
+    'malt;': '\u2720',
+    'maltese;': '\u2720',
+    'Map;': '\u2905',
+    'map;': '\u21a6',
+    'mapsto;': '\u21a6',
+    'mapstodown;': '\u21a7',
+    'mapstoleft;': '\u21a4',
+    'mapstoup;': '\u21a5',
+    'marker;': '\u25ae',
+    'mcomma;': '\u2a29',
+    'Mcy;': '\u041c',
+    'mcy;': '\u043c',
+    'mdash;': '\u2014',
+    'mDDot;': '\u223a',
+    'measuredangle;': '\u2221',
+    'MediumSpace;': '\u205f',
+    'Mellintrf;': '\u2133',
+    'Mfr;': '\U0001d510',
+    'mfr;': '\U0001d52a',
+    'mho;': '\u2127',
+    'micro': '\xb5',
+    'micro;': '\xb5',
+    'mid;': '\u2223',
+    'midast;': '*',
+    'midcir;': '\u2af0',
+    'middot': '\xb7',
+    'middot;': '\xb7',
+    'minus;': '\u2212',
+    'minusb;': '\u229f',
+    'minusd;': '\u2238',
+    'minusdu;': '\u2a2a',
+    'MinusPlus;': '\u2213',
+    'mlcp;': '\u2adb',
+    'mldr;': '\u2026',
+    'mnplus;': '\u2213',
+    'models;': '\u22a7',
+    'Mopf;': '\U0001d544',
+    'mopf;': '\U0001d55e',
+    'mp;': '\u2213',
+    'Mscr;': '\u2133',
+    'mscr;': '\U0001d4c2',
+    'mstpos;': '\u223e',
+    'Mu;': '\u039c',
+    'mu;': '\u03bc',
+    'multimap;': '\u22b8',
+    'mumap;': '\u22b8',
+    'nabla;': '\u2207',
+    'Nacute;': '\u0143',
+    'nacute;': '\u0144',
+    'nang;': '\u2220\u20d2',
+    'nap;': '\u2249',
+    'napE;': '\u2a70\u0338',
+    'napid;': '\u224b\u0338',
+    'napos;': '\u0149',
+    'napprox;': '\u2249',
+    'natur;': '\u266e',
+    'natural;': '\u266e',
+    'naturals;': '\u2115',
+    'nbsp': '\xa0',
+    'nbsp;': '\xa0',
+    'nbump;': '\u224e\u0338',
+    'nbumpe;': '\u224f\u0338',
+    'ncap;': '\u2a43',
+    'Ncaron;': '\u0147',
+    'ncaron;': '\u0148',
+    'Ncedil;': '\u0145',
+    'ncedil;': '\u0146',
+    'ncong;': '\u2247',
+    'ncongdot;': '\u2a6d\u0338',
+    'ncup;': '\u2a42',
+    'Ncy;': '\u041d',
+    'ncy;': '\u043d',
+    'ndash;': '\u2013',
+    'ne;': '\u2260',
+    'nearhk;': '\u2924',
+    'neArr;': '\u21d7',
+    'nearr;': '\u2197',
+    'nearrow;': '\u2197',
+    'nedot;': '\u2250\u0338',
+    'NegativeMediumSpace;': '\u200b',
+    'NegativeThickSpace;': '\u200b',
+    'NegativeThinSpace;': '\u200b',
+    'NegativeVeryThinSpace;': '\u200b',
+    'nequiv;': '\u2262',
+    'nesear;': '\u2928',
+    'nesim;': '\u2242\u0338',
+    'NestedGreaterGreater;': '\u226b',
+    'NestedLessLess;': '\u226a',
+    'NewLine;': '\n',
+    'nexist;': '\u2204',
+    'nexists;': '\u2204',
+    'Nfr;': '\U0001d511',
+    'nfr;': '\U0001d52b',
+    'ngE;': '\u2267\u0338',
+    'nge;': '\u2271',
+    'ngeq;': '\u2271',
+    'ngeqq;': '\u2267\u0338',
+    'ngeqslant;': '\u2a7e\u0338',
+    'nges;': '\u2a7e\u0338',
+    'nGg;': '\u22d9\u0338',
+    'ngsim;': '\u2275',
+    'nGt;': '\u226b\u20d2',
+    'ngt;': '\u226f',
+    'ngtr;': '\u226f',
+    'nGtv;': '\u226b\u0338',
+    'nhArr;': '\u21ce',
+    'nharr;': '\u21ae',
+    'nhpar;': '\u2af2',
+    'ni;': '\u220b',
+    'nis;': '\u22fc',
+    'nisd;': '\u22fa',
+    'niv;': '\u220b',
+    'NJcy;': '\u040a',
+    'njcy;': '\u045a',
+    'nlArr;': '\u21cd',
+    'nlarr;': '\u219a',
+    'nldr;': '\u2025',
+    'nlE;': '\u2266\u0338',
+    'nle;': '\u2270',
+    'nLeftarrow;': '\u21cd',
+    'nleftarrow;': '\u219a',
+    'nLeftrightarrow;': '\u21ce',
+    'nleftrightarrow;': '\u21ae',
+    'nleq;': '\u2270',
+    'nleqq;': '\u2266\u0338',
+    'nleqslant;': '\u2a7d\u0338',
+    'nles;': '\u2a7d\u0338',
+    'nless;': '\u226e',
+    'nLl;': '\u22d8\u0338',
+    'nlsim;': '\u2274',
+    'nLt;': '\u226a\u20d2',
+    'nlt;': '\u226e',
+    'nltri;': '\u22ea',
+    'nltrie;': '\u22ec',
+    'nLtv;': '\u226a\u0338',
+    'nmid;': '\u2224',
+    'NoBreak;': '\u2060',
+    'NonBreakingSpace;': '\xa0',
+    'Nopf;': '\u2115',
+    'nopf;': '\U0001d55f',
+    'not': '\xac',
+    'Not;': '\u2aec',
+    'not;': '\xac',
+    'NotCongruent;': '\u2262',
+    'NotCupCap;': '\u226d',
+    'NotDoubleVerticalBar;': '\u2226',
+    'NotElement;': '\u2209',
+    'NotEqual;': '\u2260',
+    'NotEqualTilde;': '\u2242\u0338',
+    'NotExists;': '\u2204',
+    'NotGreater;': '\u226f',
+    'NotGreaterEqual;': '\u2271',
+    'NotGreaterFullEqual;': '\u2267\u0338',
+    'NotGreaterGreater;': '\u226b\u0338',
+    'NotGreaterLess;': '\u2279',
+    'NotGreaterSlantEqual;': '\u2a7e\u0338',
+    'NotGreaterTilde;': '\u2275',
+    'NotHumpDownHump;': '\u224e\u0338',
+    'NotHumpEqual;': '\u224f\u0338',
+    'notin;': '\u2209',
+    'notindot;': '\u22f5\u0338',
+    'notinE;': '\u22f9\u0338',
+    'notinva;': '\u2209',
+    'notinvb;': '\u22f7',
+    'notinvc;': '\u22f6',
+    'NotLeftTriangle;': '\u22ea',
+    'NotLeftTriangleBar;': '\u29cf\u0338',
+    'NotLeftTriangleEqual;': '\u22ec',
+    'NotLess;': '\u226e',
+    'NotLessEqual;': '\u2270',
+    'NotLessGreater;': '\u2278',
+    'NotLessLess;': '\u226a\u0338',
+    'NotLessSlantEqual;': '\u2a7d\u0338',
+    'NotLessTilde;': '\u2274',
+    'NotNestedGreaterGreater;': '\u2aa2\u0338',
+    'NotNestedLessLess;': '\u2aa1\u0338',
+    'notni;': '\u220c',
+    'notniva;': '\u220c',
+    'notnivb;': '\u22fe',
+    'notnivc;': '\u22fd',
+    'NotPrecedes;': '\u2280',
+    'NotPrecedesEqual;': '\u2aaf\u0338',
+    'NotPrecedesSlantEqual;': '\u22e0',
+    'NotReverseElement;': '\u220c',
+    'NotRightTriangle;': '\u22eb',
+    'NotRightTriangleBar;': '\u29d0\u0338',
+    'NotRightTriangleEqual;': '\u22ed',
+    'NotSquareSubset;': '\u228f\u0338',
+    'NotSquareSubsetEqual;': '\u22e2',
+    'NotSquareSuperset;': '\u2290\u0338',
+    'NotSquareSupersetEqual;': '\u22e3',
+    'NotSubset;': '\u2282\u20d2',
+    'NotSubsetEqual;': '\u2288',
+    'NotSucceeds;': '\u2281',
+    'NotSucceedsEqual;': '\u2ab0\u0338',
+    'NotSucceedsSlantEqual;': '\u22e1',
+    'NotSucceedsTilde;': '\u227f\u0338',
+    'NotSuperset;': '\u2283\u20d2',
+    'NotSupersetEqual;': '\u2289',
+    'NotTilde;': '\u2241',
+    'NotTildeEqual;': '\u2244',
+    'NotTildeFullEqual;': '\u2247',
+    'NotTildeTilde;': '\u2249',
+    'NotVerticalBar;': '\u2224',
+    'npar;': '\u2226',
+    'nparallel;': '\u2226',
+    'nparsl;': '\u2afd\u20e5',
+    'npart;': '\u2202\u0338',
+    'npolint;': '\u2a14',
+    'npr;': '\u2280',
+    'nprcue;': '\u22e0',
+    'npre;': '\u2aaf\u0338',
+    'nprec;': '\u2280',
+    'npreceq;': '\u2aaf\u0338',
+    'nrArr;': '\u21cf',
+    'nrarr;': '\u219b',
+    'nrarrc;': '\u2933\u0338',
+    'nrarrw;': '\u219d\u0338',
+    'nRightarrow;': '\u21cf',
+    'nrightarrow;': '\u219b',
+    'nrtri;': '\u22eb',
+    'nrtrie;': '\u22ed',
+    'nsc;': '\u2281',
+    'nsccue;': '\u22e1',
+    'nsce;': '\u2ab0\u0338',
+    'Nscr;': '\U0001d4a9',
+    'nscr;': '\U0001d4c3',
+    'nshortmid;': '\u2224',
+    'nshortparallel;': '\u2226',
+    'nsim;': '\u2241',
+    'nsime;': '\u2244',
+    'nsimeq;': '\u2244',
+    'nsmid;': '\u2224',
+    'nspar;': '\u2226',
+    'nsqsube;': '\u22e2',
+    'nsqsupe;': '\u22e3',
+    'nsub;': '\u2284',
+    'nsubE;': '\u2ac5\u0338',
+    'nsube;': '\u2288',
+    'nsubset;': '\u2282\u20d2',
+    'nsubseteq;': '\u2288',
+    'nsubseteqq;': '\u2ac5\u0338',
+    'nsucc;': '\u2281',
+    'nsucceq;': '\u2ab0\u0338',
+    'nsup;': '\u2285',
+    'nsupE;': '\u2ac6\u0338',
+    'nsupe;': '\u2289',
+    'nsupset;': '\u2283\u20d2',
+    'nsupseteq;': '\u2289',
+    'nsupseteqq;': '\u2ac6\u0338',
+    'ntgl;': '\u2279',
+    'Ntilde': '\xd1',
+    'ntilde': '\xf1',
+    'Ntilde;': '\xd1',
+    'ntilde;': '\xf1',
+    'ntlg;': '\u2278',
+    'ntriangleleft;': '\u22ea',
+    'ntrianglelefteq;': '\u22ec',
+    'ntriangleright;': '\u22eb',
+    'ntrianglerighteq;': '\u22ed',
+    'Nu;': '\u039d',
+    'nu;': '\u03bd',
+    'num;': '#',
+    'numero;': '\u2116',
+    'numsp;': '\u2007',
+    'nvap;': '\u224d\u20d2',
+    'nVDash;': '\u22af',
+    'nVdash;': '\u22ae',
+    'nvDash;': '\u22ad',
+    'nvdash;': '\u22ac',
+    'nvge;': '\u2265\u20d2',
+    'nvgt;': '>\u20d2',
+    'nvHarr;': '\u2904',
+    'nvinfin;': '\u29de',
+    'nvlArr;': '\u2902',
+    'nvle;': '\u2264\u20d2',
+    'nvlt;': '<\u20d2',
+    'nvltrie;': '\u22b4\u20d2',
+    'nvrArr;': '\u2903',
+    'nvrtrie;': '\u22b5\u20d2',
+    'nvsim;': '\u223c\u20d2',
+    'nwarhk;': '\u2923',
+    'nwArr;': '\u21d6',
+    'nwarr;': '\u2196',
+    'nwarrow;': '\u2196',
+    'nwnear;': '\u2927',
+    'Oacute': '\xd3',
+    'oacute': '\xf3',
+    'Oacute;': '\xd3',
+    'oacute;': '\xf3',
+    'oast;': '\u229b',
+    'ocir;': '\u229a',
+    'Ocirc': '\xd4',
+    'ocirc': '\xf4',
+    'Ocirc;': '\xd4',
+    'ocirc;': '\xf4',
+    'Ocy;': '\u041e',
+    'ocy;': '\u043e',
+    'odash;': '\u229d',
+    'Odblac;': '\u0150',
+    'odblac;': '\u0151',
+    'odiv;': '\u2a38',
+    'odot;': '\u2299',
+    'odsold;': '\u29bc',
+    'OElig;': '\u0152',
+    'oelig;': '\u0153',
+    'ofcir;': '\u29bf',
+    'Ofr;': '\U0001d512',
+    'ofr;': '\U0001d52c',
+    'ogon;': '\u02db',
+    'Ograve': '\xd2',
+    'ograve': '\xf2',
+    'Ograve;': '\xd2',
+    'ograve;': '\xf2',
+    'ogt;': '\u29c1',
+    'ohbar;': '\u29b5',
+    'ohm;': '\u03a9',
+    'oint;': '\u222e',
+    'olarr;': '\u21ba',
+    'olcir;': '\u29be',
+    'olcross;': '\u29bb',
+    'oline;': '\u203e',
+    'olt;': '\u29c0',
+    'Omacr;': '\u014c',
+    'omacr;': '\u014d',
+    'Omega;': '\u03a9',
+    'omega;': '\u03c9',
+    'Omicron;': '\u039f',
+    'omicron;': '\u03bf',
+    'omid;': '\u29b6',
+    'ominus;': '\u2296',
+    'Oopf;': '\U0001d546',
+    'oopf;': '\U0001d560',
+    'opar;': '\u29b7',
+    'OpenCurlyDoubleQuote;': '\u201c',
+    'OpenCurlyQuote;': '\u2018',
+    'operp;': '\u29b9',
+    'oplus;': '\u2295',
+    'Or;': '\u2a54',
+    'or;': '\u2228',
+    'orarr;': '\u21bb',
+    'ord;': '\u2a5d',
+    'order;': '\u2134',
+    'orderof;': '\u2134',
+    'ordf': '\xaa',
+    'ordf;': '\xaa',
+    'ordm': '\xba',
+    'ordm;': '\xba',
+    'origof;': '\u22b6',
+    'oror;': '\u2a56',
+    'orslope;': '\u2a57',
+    'orv;': '\u2a5b',
+    'oS;': '\u24c8',
+    'Oscr;': '\U0001d4aa',
+    'oscr;': '\u2134',
+    'Oslash': '\xd8',
+    'oslash': '\xf8',
+    'Oslash;': '\xd8',
+    'oslash;': '\xf8',
+    'osol;': '\u2298',
+    'Otilde': '\xd5',
+    'otilde': '\xf5',
+    'Otilde;': '\xd5',
+    'otilde;': '\xf5',
+    'Otimes;': '\u2a37',
+    'otimes;': '\u2297',
+    'otimesas;': '\u2a36',
+    'Ouml': '\xd6',
+    'ouml': '\xf6',
+    'Ouml;': '\xd6',
+    'ouml;': '\xf6',
+    'ovbar;': '\u233d',
+    'OverBar;': '\u203e',
+    'OverBrace;': '\u23de',
+    'OverBracket;': '\u23b4',
+    'OverParenthesis;': '\u23dc',
+    'par;': '\u2225',
+    'para': '\xb6',
+    'para;': '\xb6',
+    'parallel;': '\u2225',
+    'parsim;': '\u2af3',
+    'parsl;': '\u2afd',
+    'part;': '\u2202',
+    'PartialD;': '\u2202',
+    'Pcy;': '\u041f',
+    'pcy;': '\u043f',
+    'percnt;': '%',
+    'period;': '.',
+    'permil;': '\u2030',
+    'perp;': '\u22a5',
+    'pertenk;': '\u2031',
+    'Pfr;': '\U0001d513',
+    'pfr;': '\U0001d52d',
+    'Phi;': '\u03a6',
+    'phi;': '\u03c6',
+    'phiv;': '\u03d5',
+    'phmmat;': '\u2133',
+    'phone;': '\u260e',
+    'Pi;': '\u03a0',
+    'pi;': '\u03c0',
+    'pitchfork;': '\u22d4',
+    'piv;': '\u03d6',
+    'planck;': '\u210f',
+    'planckh;': '\u210e',
+    'plankv;': '\u210f',
+    'plus;': '+',
+    'plusacir;': '\u2a23',
+    'plusb;': '\u229e',
+    'pluscir;': '\u2a22',
+    'plusdo;': '\u2214',
+    'plusdu;': '\u2a25',
+    'pluse;': '\u2a72',
+    'PlusMinus;': '\xb1',
+    'plusmn': '\xb1',
+    'plusmn;': '\xb1',
+    'plussim;': '\u2a26',
+    'plustwo;': '\u2a27',
+    'pm;': '\xb1',
+    'Poincareplane;': '\u210c',
+    'pointint;': '\u2a15',
+    'Popf;': '\u2119',
+    'popf;': '\U0001d561',
+    'pound': '\xa3',
+    'pound;': '\xa3',
+    'Pr;': '\u2abb',
+    'pr;': '\u227a',
+    'prap;': '\u2ab7',
+    'prcue;': '\u227c',
+    'prE;': '\u2ab3',
+    'pre;': '\u2aaf',
+    'prec;': '\u227a',
+    'precapprox;': '\u2ab7',
+    'preccurlyeq;': '\u227c',
+    'Precedes;': '\u227a',
+    'PrecedesEqual;': '\u2aaf',
+    'PrecedesSlantEqual;': '\u227c',
+    'PrecedesTilde;': '\u227e',
+    'preceq;': '\u2aaf',
+    'precnapprox;': '\u2ab9',
+    'precneqq;': '\u2ab5',
+    'precnsim;': '\u22e8',
+    'precsim;': '\u227e',
+    'Prime;': '\u2033',
+    'prime;': '\u2032',
+    'primes;': '\u2119',
+    'prnap;': '\u2ab9',
+    'prnE;': '\u2ab5',
+    'prnsim;': '\u22e8',
+    'prod;': '\u220f',
+    'Product;': '\u220f',
+    'profalar;': '\u232e',
+    'profline;': '\u2312',
+    'profsurf;': '\u2313',
+    'prop;': '\u221d',
+    'Proportion;': '\u2237',
+    'Proportional;': '\u221d',
+    'propto;': '\u221d',
+    'prsim;': '\u227e',
+    'prurel;': '\u22b0',
+    'Pscr;': '\U0001d4ab',
+    'pscr;': '\U0001d4c5',
+    'Psi;': '\u03a8',
+    'psi;': '\u03c8',
+    'puncsp;': '\u2008',
+    'Qfr;': '\U0001d514',
+    'qfr;': '\U0001d52e',
+    'qint;': '\u2a0c',
+    'Qopf;': '\u211a',
+    'qopf;': '\U0001d562',
+    'qprime;': '\u2057',
+    'Qscr;': '\U0001d4ac',
+    'qscr;': '\U0001d4c6',
+    'quaternions;': '\u210d',
+    'quatint;': '\u2a16',
+    'quest;': '?',
+    'questeq;': '\u225f',
+    'QUOT': '"',
+    'quot': '"',
+    'QUOT;': '"',
+    'quot;': '"',
+    'rAarr;': '\u21db',
+    'race;': '\u223d\u0331',
+    'Racute;': '\u0154',
+    'racute;': '\u0155',
+    'radic;': '\u221a',
+    'raemptyv;': '\u29b3',
+    'Rang;': '\u27eb',
+    'rang;': '\u27e9',
+    'rangd;': '\u2992',
+    'range;': '\u29a5',
+    'rangle;': '\u27e9',
+    'raquo': '\xbb',
+    'raquo;': '\xbb',
+    'Rarr;': '\u21a0',
+    'rArr;': '\u21d2',
+    'rarr;': '\u2192',
+    'rarrap;': '\u2975',
+    'rarrb;': '\u21e5',
+    'rarrbfs;': '\u2920',
+    'rarrc;': '\u2933',
+    'rarrfs;': '\u291e',
+    'rarrhk;': '\u21aa',
+    'rarrlp;': '\u21ac',
+    'rarrpl;': '\u2945',
+    'rarrsim;': '\u2974',
+    'Rarrtl;': '\u2916',
+    'rarrtl;': '\u21a3',
+    'rarrw;': '\u219d',
+    'rAtail;': '\u291c',
+    'ratail;': '\u291a',
+    'ratio;': '\u2236',
+    'rationals;': '\u211a',
+    'RBarr;': '\u2910',
+    'rBarr;': '\u290f',
+    'rbarr;': '\u290d',
+    'rbbrk;': '\u2773',
+    'rbrace;': '}',
+    'rbrack;': ']',
+    'rbrke;': '\u298c',
+    'rbrksld;': '\u298e',
+    'rbrkslu;': '\u2990',
+    'Rcaron;': '\u0158',
+    'rcaron;': '\u0159',
+    'Rcedil;': '\u0156',
+    'rcedil;': '\u0157',
+    'rceil;': '\u2309',
+    'rcub;': '}',
+    'Rcy;': '\u0420',
+    'rcy;': '\u0440',
+    'rdca;': '\u2937',
+    'rdldhar;': '\u2969',
+    'rdquo;': '\u201d',
+    'rdquor;': '\u201d',
+    'rdsh;': '\u21b3',
+    'Re;': '\u211c',
+    'real;': '\u211c',
+    'realine;': '\u211b',
+    'realpart;': '\u211c',
+    'reals;': '\u211d',
+    'rect;': '\u25ad',
+    'REG': '\xae',
+    'reg': '\xae',
+    'REG;': '\xae',
+    'reg;': '\xae',
+    'ReverseElement;': '\u220b',
+    'ReverseEquilibrium;': '\u21cb',
+    'ReverseUpEquilibrium;': '\u296f',
+    'rfisht;': '\u297d',
+    'rfloor;': '\u230b',
+    'Rfr;': '\u211c',
+    'rfr;': '\U0001d52f',
+    'rHar;': '\u2964',
+    'rhard;': '\u21c1',
+    'rharu;': '\u21c0',
+    'rharul;': '\u296c',
+    'Rho;': '\u03a1',
+    'rho;': '\u03c1',
+    'rhov;': '\u03f1',
+    'RightAngleBracket;': '\u27e9',
+    'RightArrow;': '\u2192',
+    'Rightarrow;': '\u21d2',
+    'rightarrow;': '\u2192',
+    'RightArrowBar;': '\u21e5',
+    'RightArrowLeftArrow;': '\u21c4',
+    'rightarrowtail;': '\u21a3',
+    'RightCeiling;': '\u2309',
+    'RightDoubleBracket;': '\u27e7',
+    'RightDownTeeVector;': '\u295d',
+    'RightDownVector;': '\u21c2',
+    'RightDownVectorBar;': '\u2955',
+    'RightFloor;': '\u230b',
+    'rightharpoondown;': '\u21c1',
+    'rightharpoonup;': '\u21c0',
+    'rightleftarrows;': '\u21c4',
+    'rightleftharpoons;': '\u21cc',
+    'rightrightarrows;': '\u21c9',
+    'rightsquigarrow;': '\u219d',
+    'RightTee;': '\u22a2',
+    'RightTeeArrow;': '\u21a6',
+    'RightTeeVector;': '\u295b',
+    'rightthreetimes;': '\u22cc',
+    'RightTriangle;': '\u22b3',
+    'RightTriangleBar;': '\u29d0',
+    'RightTriangleEqual;': '\u22b5',
+    'RightUpDownVector;': '\u294f',
+    'RightUpTeeVector;': '\u295c',
+    'RightUpVector;': '\u21be',
+    'RightUpVectorBar;': '\u2954',
+    'RightVector;': '\u21c0',
+    'RightVectorBar;': '\u2953',
+    'ring;': '\u02da',
+    'risingdotseq;': '\u2253',
+    'rlarr;': '\u21c4',
+    'rlhar;': '\u21cc',
+    'rlm;': '\u200f',
+    'rmoust;': '\u23b1',
+    'rmoustache;': '\u23b1',
+    'rnmid;': '\u2aee',
+    'roang;': '\u27ed',
+    'roarr;': '\u21fe',
+    'robrk;': '\u27e7',
+    'ropar;': '\u2986',
+    'Ropf;': '\u211d',
+    'ropf;': '\U0001d563',
+    'roplus;': '\u2a2e',
+    'rotimes;': '\u2a35',
+    'RoundImplies;': '\u2970',
+    'rpar;': ')',
+    'rpargt;': '\u2994',
+    'rppolint;': '\u2a12',
+    'rrarr;': '\u21c9',
+    'Rrightarrow;': '\u21db',
+    'rsaquo;': '\u203a',
+    'Rscr;': '\u211b',
+    'rscr;': '\U0001d4c7',
+    'Rsh;': '\u21b1',
+    'rsh;': '\u21b1',
+    'rsqb;': ']',
+    'rsquo;': '\u2019',
+    'rsquor;': '\u2019',
+    'rthree;': '\u22cc',
+    'rtimes;': '\u22ca',
+    'rtri;': '\u25b9',
+    'rtrie;': '\u22b5',
+    'rtrif;': '\u25b8',
+    'rtriltri;': '\u29ce',
+    'RuleDelayed;': '\u29f4',
+    'ruluhar;': '\u2968',
+    'rx;': '\u211e',
+    'Sacute;': '\u015a',
+    'sacute;': '\u015b',
+    'sbquo;': '\u201a',
+    'Sc;': '\u2abc',
+    'sc;': '\u227b',
+    'scap;': '\u2ab8',
+    'Scaron;': '\u0160',
+    'scaron;': '\u0161',
+    'sccue;': '\u227d',
+    'scE;': '\u2ab4',
+    'sce;': '\u2ab0',
+    'Scedil;': '\u015e',
+    'scedil;': '\u015f',
+    'Scirc;': '\u015c',
+    'scirc;': '\u015d',
+    'scnap;': '\u2aba',
+    'scnE;': '\u2ab6',
+    'scnsim;': '\u22e9',
+    'scpolint;': '\u2a13',
+    'scsim;': '\u227f',
+    'Scy;': '\u0421',
+    'scy;': '\u0441',
+    'sdot;': '\u22c5',
+    'sdotb;': '\u22a1',
+    'sdote;': '\u2a66',
+    'searhk;': '\u2925',
+    'seArr;': '\u21d8',
+    'searr;': '\u2198',
+    'searrow;': '\u2198',
+    'sect': '\xa7',
+    'sect;': '\xa7',
+    'semi;': ';',
+    'seswar;': '\u2929',
+    'setminus;': '\u2216',
+    'setmn;': '\u2216',
+    'sext;': '\u2736',
+    'Sfr;': '\U0001d516',
+    'sfr;': '\U0001d530',
+    'sfrown;': '\u2322',
+    'sharp;': '\u266f',
+    'SHCHcy;': '\u0429',
+    'shchcy;': '\u0449',
+    'SHcy;': '\u0428',
+    'shcy;': '\u0448',
+    'ShortDownArrow;': '\u2193',
+    'ShortLeftArrow;': '\u2190',
+    'shortmid;': '\u2223',
+    'shortparallel;': '\u2225',
+    'ShortRightArrow;': '\u2192',
+    'ShortUpArrow;': '\u2191',
+    'shy': '\xad',
+    'shy;': '\xad',
+    'Sigma;': '\u03a3',
+    'sigma;': '\u03c3',
+    'sigmaf;': '\u03c2',
+    'sigmav;': '\u03c2',
+    'sim;': '\u223c',
+    'simdot;': '\u2a6a',
+    'sime;': '\u2243',
+    'simeq;': '\u2243',
+    'simg;': '\u2a9e',
+    'simgE;': '\u2aa0',
+    'siml;': '\u2a9d',
+    'simlE;': '\u2a9f',
+    'simne;': '\u2246',
+    'simplus;': '\u2a24',
+    'simrarr;': '\u2972',
+    'slarr;': '\u2190',
+    'SmallCircle;': '\u2218',
+    'smallsetminus;': '\u2216',
+    'smashp;': '\u2a33',
+    'smeparsl;': '\u29e4',
+    'smid;': '\u2223',
+    'smile;': '\u2323',
+    'smt;': '\u2aaa',
+    'smte;': '\u2aac',
+    'smtes;': '\u2aac\ufe00',
+    'SOFTcy;': '\u042c',
+    'softcy;': '\u044c',
+    'sol;': '/',
+    'solb;': '\u29c4',
+    'solbar;': '\u233f',
+    'Sopf;': '\U0001d54a',
+    'sopf;': '\U0001d564',
+    'spades;': '\u2660',
+    'spadesuit;': '\u2660',
+    'spar;': '\u2225',
+    'sqcap;': '\u2293',
+    'sqcaps;': '\u2293\ufe00',
+    'sqcup;': '\u2294',
+    'sqcups;': '\u2294\ufe00',
+    'Sqrt;': '\u221a',
+    'sqsub;': '\u228f',
+    'sqsube;': '\u2291',
+    'sqsubset;': '\u228f',
+    'sqsubseteq;': '\u2291',
+    'sqsup;': '\u2290',
+    'sqsupe;': '\u2292',
+    'sqsupset;': '\u2290',
+    'sqsupseteq;': '\u2292',
+    'squ;': '\u25a1',
+    'Square;': '\u25a1',
+    'square;': '\u25a1',
+    'SquareIntersection;': '\u2293',
+    'SquareSubset;': '\u228f',
+    'SquareSubsetEqual;': '\u2291',
+    'SquareSuperset;': '\u2290',
+    'SquareSupersetEqual;': '\u2292',
+    'SquareUnion;': '\u2294',
+    'squarf;': '\u25aa',
+    'squf;': '\u25aa',
+    'srarr;': '\u2192',
+    'Sscr;': '\U0001d4ae',
+    'sscr;': '\U0001d4c8',
+    'ssetmn;': '\u2216',
+    'ssmile;': '\u2323',
+    'sstarf;': '\u22c6',
+    'Star;': '\u22c6',
+    'star;': '\u2606',
+    'starf;': '\u2605',
+    'straightepsilon;': '\u03f5',
+    'straightphi;': '\u03d5',
+    'strns;': '\xaf',
+    'Sub;': '\u22d0',
+    'sub;': '\u2282',
+    'subdot;': '\u2abd',
+    'subE;': '\u2ac5',
+    'sube;': '\u2286',
+    'subedot;': '\u2ac3',
+    'submult;': '\u2ac1',
+    'subnE;': '\u2acb',
+    'subne;': '\u228a',
+    'subplus;': '\u2abf',
+    'subrarr;': '\u2979',
+    'Subset;': '\u22d0',
+    'subset;': '\u2282',
+    'subseteq;': '\u2286',
+    'subseteqq;': '\u2ac5',
+    'SubsetEqual;': '\u2286',
+    'subsetneq;': '\u228a',
+    'subsetneqq;': '\u2acb',
+    'subsim;': '\u2ac7',
+    'subsub;': '\u2ad5',
+    'subsup;': '\u2ad3',
+    'succ;': '\u227b',
+    'succapprox;': '\u2ab8',
+    'succcurlyeq;': '\u227d',
+    'Succeeds;': '\u227b',
+    'SucceedsEqual;': '\u2ab0',
+    'SucceedsSlantEqual;': '\u227d',
+    'SucceedsTilde;': '\u227f',
+    'succeq;': '\u2ab0',
+    'succnapprox;': '\u2aba',
+    'succneqq;': '\u2ab6',
+    'succnsim;': '\u22e9',
+    'succsim;': '\u227f',
+    'SuchThat;': '\u220b',
+    'Sum;': '\u2211',
+    'sum;': '\u2211',
+    'sung;': '\u266a',
+    'sup1': '\xb9',
+    'sup1;': '\xb9',
+    'sup2': '\xb2',
+    'sup2;': '\xb2',
+    'sup3': '\xb3',
+    'sup3;': '\xb3',
+    'Sup;': '\u22d1',
+    'sup;': '\u2283',
+    'supdot;': '\u2abe',
+    'supdsub;': '\u2ad8',
+    'supE;': '\u2ac6',
+    'supe;': '\u2287',
+    'supedot;': '\u2ac4',
+    'Superset;': '\u2283',
+    'SupersetEqual;': '\u2287',
+    'suphsol;': '\u27c9',
+    'suphsub;': '\u2ad7',
+    'suplarr;': '\u297b',
+    'supmult;': '\u2ac2',
+    'supnE;': '\u2acc',
+    'supne;': '\u228b',
+    'supplus;': '\u2ac0',
+    'Supset;': '\u22d1',
+    'supset;': '\u2283',
+    'supseteq;': '\u2287',
+    'supseteqq;': '\u2ac6',
+    'supsetneq;': '\u228b',
+    'supsetneqq;': '\u2acc',
+    'supsim;': '\u2ac8',
+    'supsub;': '\u2ad4',
+    'supsup;': '\u2ad6',
+    'swarhk;': '\u2926',
+    'swArr;': '\u21d9',
+    'swarr;': '\u2199',
+    'swarrow;': '\u2199',
+    'swnwar;': '\u292a',
+    'szlig': '\xdf',
+    'szlig;': '\xdf',
+    'Tab;': '\t',
+    'target;': '\u2316',
+    'Tau;': '\u03a4',
+    'tau;': '\u03c4',
+    'tbrk;': '\u23b4',
+    'Tcaron;': '\u0164',
+    'tcaron;': '\u0165',
+    'Tcedil;': '\u0162',
+    'tcedil;': '\u0163',
+    'Tcy;': '\u0422',
+    'tcy;': '\u0442',
+    'tdot;': '\u20db',
+    'telrec;': '\u2315',
+    'Tfr;': '\U0001d517',
+    'tfr;': '\U0001d531',
+    'there4;': '\u2234',
+    'Therefore;': '\u2234',
+    'therefore;': '\u2234',
+    'Theta;': '\u0398',
+    'theta;': '\u03b8',
+    'thetasym;': '\u03d1',
+    'thetav;': '\u03d1',
+    'thickapprox;': '\u2248',
+    'thicksim;': '\u223c',
+    'ThickSpace;': '\u205f\u200a',
+    'thinsp;': '\u2009',
+    'ThinSpace;': '\u2009',
+    'thkap;': '\u2248',
+    'thksim;': '\u223c',
+    'THORN': '\xde',
+    'thorn': '\xfe',
+    'THORN;': '\xde',
+    'thorn;': '\xfe',
+    'Tilde;': '\u223c',
+    'tilde;': '\u02dc',
+    'TildeEqual;': '\u2243',
+    'TildeFullEqual;': '\u2245',
+    'TildeTilde;': '\u2248',
+    'times': '\xd7',
+    'times;': '\xd7',
+    'timesb;': '\u22a0',
+    'timesbar;': '\u2a31',
+    'timesd;': '\u2a30',
+    'tint;': '\u222d',
+    'toea;': '\u2928',
+    'top;': '\u22a4',
+    'topbot;': '\u2336',
+    'topcir;': '\u2af1',
+    'Topf;': '\U0001d54b',
+    'topf;': '\U0001d565',
+    'topfork;': '\u2ada',
+    'tosa;': '\u2929',
+    'tprime;': '\u2034',
+    'TRADE;': '\u2122',
+    'trade;': '\u2122',
+    'triangle;': '\u25b5',
+    'triangledown;': '\u25bf',
+    'triangleleft;': '\u25c3',
+    'trianglelefteq;': '\u22b4',
+    'triangleq;': '\u225c',
+    'triangleright;': '\u25b9',
+    'trianglerighteq;': '\u22b5',
+    'tridot;': '\u25ec',
+    'trie;': '\u225c',
+    'triminus;': '\u2a3a',
+    'TripleDot;': '\u20db',
+    'triplus;': '\u2a39',
+    'trisb;': '\u29cd',
+    'tritime;': '\u2a3b',
+    'trpezium;': '\u23e2',
+    'Tscr;': '\U0001d4af',
+    'tscr;': '\U0001d4c9',
+    'TScy;': '\u0426',
+    'tscy;': '\u0446',
+    'TSHcy;': '\u040b',
+    'tshcy;': '\u045b',
+    'Tstrok;': '\u0166',
+    'tstrok;': '\u0167',
+    'twixt;': '\u226c',
+    'twoheadleftarrow;': '\u219e',
+    'twoheadrightarrow;': '\u21a0',
+    'Uacute': '\xda',
+    'uacute': '\xfa',
+    'Uacute;': '\xda',
+    'uacute;': '\xfa',
+    'Uarr;': '\u219f',
+    'uArr;': '\u21d1',
+    'uarr;': '\u2191',
+    'Uarrocir;': '\u2949',
+    'Ubrcy;': '\u040e',
+    'ubrcy;': '\u045e',
+    'Ubreve;': '\u016c',
+    'ubreve;': '\u016d',
+    'Ucirc': '\xdb',
+    'ucirc': '\xfb',
+    'Ucirc;': '\xdb',
+    'ucirc;': '\xfb',
+    'Ucy;': '\u0423',
+    'ucy;': '\u0443',
+    'udarr;': '\u21c5',
+    'Udblac;': '\u0170',
+    'udblac;': '\u0171',
+    'udhar;': '\u296e',
+    'ufisht;': '\u297e',
+    'Ufr;': '\U0001d518',
+    'ufr;': '\U0001d532',
+    'Ugrave': '\xd9',
+    'ugrave': '\xf9',
+    'Ugrave;': '\xd9',
+    'ugrave;': '\xf9',
+    'uHar;': '\u2963',
+    'uharl;': '\u21bf',
+    'uharr;': '\u21be',
+    'uhblk;': '\u2580',
+    'ulcorn;': '\u231c',
+    'ulcorner;': '\u231c',
+    'ulcrop;': '\u230f',
+    'ultri;': '\u25f8',
+    'Umacr;': '\u016a',
+    'umacr;': '\u016b',
+    'uml': '\xa8',
+    'uml;': '\xa8',
+    'UnderBar;': '_',
+    'UnderBrace;': '\u23df',
+    'UnderBracket;': '\u23b5',
+    'UnderParenthesis;': '\u23dd',
+    'Union;': '\u22c3',
+    'UnionPlus;': '\u228e',
+    'Uogon;': '\u0172',
+    'uogon;': '\u0173',
+    'Uopf;': '\U0001d54c',
+    'uopf;': '\U0001d566',
+    'UpArrow;': '\u2191',
+    'Uparrow;': '\u21d1',
+    'uparrow;': '\u2191',
+    'UpArrowBar;': '\u2912',
+    'UpArrowDownArrow;': '\u21c5',
+    'UpDownArrow;': '\u2195',
+    'Updownarrow;': '\u21d5',
+    'updownarrow;': '\u2195',
+    'UpEquilibrium;': '\u296e',
+    'upharpoonleft;': '\u21bf',
+    'upharpoonright;': '\u21be',
+    'uplus;': '\u228e',
+    'UpperLeftArrow;': '\u2196',
+    'UpperRightArrow;': '\u2197',
+    'Upsi;': '\u03d2',
+    'upsi;': '\u03c5',
+    'upsih;': '\u03d2',
+    'Upsilon;': '\u03a5',
+    'upsilon;': '\u03c5',
+    'UpTee;': '\u22a5',
+    'UpTeeArrow;': '\u21a5',
+    'upuparrows;': '\u21c8',
+    'urcorn;': '\u231d',
+    'urcorner;': '\u231d',
+    'urcrop;': '\u230e',
+    'Uring;': '\u016e',
+    'uring;': '\u016f',
+    'urtri;': '\u25f9',
+    'Uscr;': '\U0001d4b0',
+    'uscr;': '\U0001d4ca',
+    'utdot;': '\u22f0',
+    'Utilde;': '\u0168',
+    'utilde;': '\u0169',
+    'utri;': '\u25b5',
+    'utrif;': '\u25b4',
+    'uuarr;': '\u21c8',
+    'Uuml': '\xdc',
+    'uuml': '\xfc',
+    'Uuml;': '\xdc',
+    'uuml;': '\xfc',
+    'uwangle;': '\u29a7',
+    'vangrt;': '\u299c',
+    'varepsilon;': '\u03f5',
+    'varkappa;': '\u03f0',
+    'varnothing;': '\u2205',
+    'varphi;': '\u03d5',
+    'varpi;': '\u03d6',
+    'varpropto;': '\u221d',
+    'vArr;': '\u21d5',
+    'varr;': '\u2195',
+    'varrho;': '\u03f1',
+    'varsigma;': '\u03c2',
+    'varsubsetneq;': '\u228a\ufe00',
+    'varsubsetneqq;': '\u2acb\ufe00',
+    'varsupsetneq;': '\u228b\ufe00',
+    'varsupsetneqq;': '\u2acc\ufe00',
+    'vartheta;': '\u03d1',
+    'vartriangleleft;': '\u22b2',
+    'vartriangleright;': '\u22b3',
+    'Vbar;': '\u2aeb',
+    'vBar;': '\u2ae8',
+    'vBarv;': '\u2ae9',
+    'Vcy;': '\u0412',
+    'vcy;': '\u0432',
+    'VDash;': '\u22ab',
+    'Vdash;': '\u22a9',
+    'vDash;': '\u22a8',
+    'vdash;': '\u22a2',
+    'Vdashl;': '\u2ae6',
+    'Vee;': '\u22c1',
+    'vee;': '\u2228',
+    'veebar;': '\u22bb',
+    'veeeq;': '\u225a',
+    'vellip;': '\u22ee',
+    'Verbar;': '\u2016',
+    'verbar;': '|',
+    'Vert;': '\u2016',
+    'vert;': '|',
+    'VerticalBar;': '\u2223',
+    'VerticalLine;': '|',
+    'VerticalSeparator;': '\u2758',
+    'VerticalTilde;': '\u2240',
+    'VeryThinSpace;': '\u200a',
+    'Vfr;': '\U0001d519',
+    'vfr;': '\U0001d533',
+    'vltri;': '\u22b2',
+    'vnsub;': '\u2282\u20d2',
+    'vnsup;': '\u2283\u20d2',
+    'Vopf;': '\U0001d54d',
+    'vopf;': '\U0001d567',
+    'vprop;': '\u221d',
+    'vrtri;': '\u22b3',
+    'Vscr;': '\U0001d4b1',
+    'vscr;': '\U0001d4cb',
+    'vsubnE;': '\u2acb\ufe00',
+    'vsubne;': '\u228a\ufe00',
+    'vsupnE;': '\u2acc\ufe00',
+    'vsupne;': '\u228b\ufe00',
+    'Vvdash;': '\u22aa',
+    'vzigzag;': '\u299a',
+    'Wcirc;': '\u0174',
+    'wcirc;': '\u0175',
+    'wedbar;': '\u2a5f',
+    'Wedge;': '\u22c0',
+    'wedge;': '\u2227',
+    'wedgeq;': '\u2259',
+    'weierp;': '\u2118',
+    'Wfr;': '\U0001d51a',
+    'wfr;': '\U0001d534',
+    'Wopf;': '\U0001d54e',
+    'wopf;': '\U0001d568',
+    'wp;': '\u2118',
+    'wr;': '\u2240',
+    'wreath;': '\u2240',
+    'Wscr;': '\U0001d4b2',
+    'wscr;': '\U0001d4cc',
+    'xcap;': '\u22c2',
+    'xcirc;': '\u25ef',
+    'xcup;': '\u22c3',
+    'xdtri;': '\u25bd',
+    'Xfr;': '\U0001d51b',
+    'xfr;': '\U0001d535',
+    'xhArr;': '\u27fa',
+    'xharr;': '\u27f7',
+    'Xi;': '\u039e',
+    'xi;': '\u03be',
+    'xlArr;': '\u27f8',
+    'xlarr;': '\u27f5',
+    'xmap;': '\u27fc',
+    'xnis;': '\u22fb',
+    'xodot;': '\u2a00',
+    'Xopf;': '\U0001d54f',
+    'xopf;': '\U0001d569',
+    'xoplus;': '\u2a01',
+    'xotime;': '\u2a02',
+    'xrArr;': '\u27f9',
+    'xrarr;': '\u27f6',
+    'Xscr;': '\U0001d4b3',
+    'xscr;': '\U0001d4cd',
+    'xsqcup;': '\u2a06',
+    'xuplus;': '\u2a04',
+    'xutri;': '\u25b3',
+    'xvee;': '\u22c1',
+    'xwedge;': '\u22c0',
+    'Yacute': '\xdd',
+    'yacute': '\xfd',
+    'Yacute;': '\xdd',
+    'yacute;': '\xfd',
+    'YAcy;': '\u042f',
+    'yacy;': '\u044f',
+    'Ycirc;': '\u0176',
+    'ycirc;': '\u0177',
+    'Ycy;': '\u042b',
+    'ycy;': '\u044b',
+    'yen': '\xa5',
+    'yen;': '\xa5',
+    'Yfr;': '\U0001d51c',
+    'yfr;': '\U0001d536',
+    'YIcy;': '\u0407',
+    'yicy;': '\u0457',
+    'Yopf;': '\U0001d550',
+    'yopf;': '\U0001d56a',
+    'Yscr;': '\U0001d4b4',
+    'yscr;': '\U0001d4ce',
+    'YUcy;': '\u042e',
+    'yucy;': '\u044e',
+    'yuml': '\xff',
+    'Yuml;': '\u0178',
+    'yuml;': '\xff',
+    'Zacute;': '\u0179',
+    'zacute;': '\u017a',
+    'Zcaron;': '\u017d',
+    'zcaron;': '\u017e',
+    'Zcy;': '\u0417',
+    'zcy;': '\u0437',
+    'Zdot;': '\u017b',
+    'zdot;': '\u017c',
+    'zeetrf;': '\u2128',
+    'ZeroWidthSpace;': '\u200b',
+    'Zeta;': '\u0396',
+    'zeta;': '\u03b6',
+    'Zfr;': '\u2128',
+    'zfr;': '\U0001d537',
+    'ZHcy;': '\u0416',
+    'zhcy;': '\u0436',
+    'zigrarr;': '\u21dd',
+    'Zopf;': '\u2124',
+    'zopf;': '\U0001d56b',
+    'Zscr;': '\U0001d4b5',
+    'zscr;': '\U0001d4cf',
+    'zwj;': '\u200d',
+    'zwnj;': '\u200c',
+}
+
+# maps the Unicode code point to the HTML entity name
+codepoint2name = {}
+
+# maps the HTML entity name to the character
+# (or a character reference if the character is outside the Latin-1 range)
+entitydefs = {}
+
+for (name, codepoint) in name2codepoint.items():
+    codepoint2name[codepoint] = name
+    entitydefs[name] = chr(codepoint)
+
+del name, codepoint
diff --git a/modules/language/python/module/html/parser.py b/modules/language/python/module/html/parser.py
new file mode 100644
index 0000000..c46eda8
--- /dev/null
+++ b/modules/language/python/module/html/parser.py
@@ -0,0 +1,472 @@
+module(html,parser)
+
+"""A parser for HTML and XHTML."""
+
+# This file is based on sgmllib.py, but the API is slightly different.
+
+# XXX There should be a way to distinguish between PCDATA (parsed
+# character data -- the normal case), RCDATA (replaceable character
+# data -- only char and entity references and end tags are special)
+# and CDATA (character data -- only end tags are special).
+
+
+import re
+import warnings
+import _markupbase
+
+from html import unescape
+
+
+__all__ = ['HTMLParser']
+
+# Regular expressions used for parsing
+
+interesting_normal = re.compile('[&<]')
+incomplete = re.compile('&[a-zA-Z#]')
+
+entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
+charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
+
+starttagopen = re.compile('<[a-zA-Z]')
+piclose = re.compile('>')
+commentclose = re.compile(r'--\s*>')
+# Note:
+#  1) if you change tagfind/attrfind remember to update locatestarttagend too;
+#  2) if you change tagfind/attrfind and/or locatestarttagend the parser will
+#     explode, so don't do it.
+# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
+# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
+tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*')
+attrfind_tolerant = re.compile(
+    r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*'
+    r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*')
+locatestarttagend_tolerant = re.compile(r"""
+  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
+  (?:[\s/]*                          # optional whitespace before attribute name
+    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
+      (?:\s*=+\s*                    # value indicator
+        (?:'[^']*'                   # LITA-enclosed value
+          |"[^"]*"                   # LIT-enclosed value
+          |(?!['"])[^>\s]*           # bare value
+         )
+         (?:\s*,)*                   # possibly followed by a comma
+       )?(?:\s|/(?!>))*
+     )*
+   )?
+  \s*                                # trailing whitespace
+""", re.VERBOSE)
+endendtag = re.compile('>')
+# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between
+# </ and the tag name, so maybe this should be fixed
+endtagfind = re.compile(r'</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
+
+
+
+class HTMLParser(_markupbase.ParserBase):
+    """Find tags and other markup and call handler functions.
+
+    Usage:
+        p = HTMLParser()
+        p.feed(data)
+        ...
+        p.close()
+
+    Start tags are handled by calling self.handle_starttag() or
+    self.handle_startendtag(); end tags by self.handle_endtag().  The
+    data between tags is passed from the parser to the derived class
+    by calling self.handle_data() with the data as argument (the data
+    may be split up in arbitrary chunks).  If convert_charrefs is
+    True the character references are converted automatically to the
+    corresponding Unicode character (and self.handle_data() is no
+    longer split in chunks), otherwise they are passed by calling
+    self.handle_entityref() or self.handle_charref() with the string
+    containing respectively the named or numeric reference as the
+    argument.
+    """
+
+    CDATA_CONTENT_ELEMENTS = ("script", "style")
+
+    def __init__(self, *, convert_charrefs=True):
+        """Initialize and reset this instance.
+
+        If convert_charrefs is True (the default), all character references
+        are automatically converted to the corresponding Unicode characters.
+        """
+        self.convert_charrefs = convert_charrefs
+        self.reset()
+
+    def reset(self):
+        """Reset this instance.  Loses all unprocessed data."""
+        self.rawdata = ''
+        self.lasttag = '???'
+        self.interesting = interesting_normal
+        self.cdata_elem = None
+        _markupbase.ParserBase.reset(self)
+
+    def feed(self, data):
+        r"""Feed data to the parser.
+
+        Call this as often as you want, with as little or as much text
+        as you want (may include '\n').
+        """
+        self.rawdata = self.rawdata + data
+        self.goahead(0)
+
+    def close(self):
+        """Handle any buffered data."""
+        self.goahead(1)
+
+    __starttag_text = None
+
+    def get_starttag_text(self):
+        """Return full source of start tag: '<...>'."""
+        return self.__starttag_text
+
+    def set_cdata_mode(self, elem):
+        self.cdata_elem = elem.lower()
+        self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
+
+    def clear_cdata_mode(self):
+        self.interesting = interesting_normal
+        self.cdata_elem = None
+
+    # Internal -- handle data as far as reasonable.  May leave state
+    # and data to be processed by a subsequent call.  If 'end' is
+    # true, force handling all data as if followed by EOF marker.
+    def goahead(self, end):
+        rawdata = self.rawdata
+        i = 0
+        n = len(rawdata)
+        while i < n:
+            if self.convert_charrefs and not self.cdata_elem:
+                j = rawdata.find('<', i)
+                if j < 0:
+                    # if we can't find the next <, either we are at the end
+                    # or there's more text incoming.  If the latter is True,
+                    # we can't pass the text to handle_data in case we have
+                    # a charref cut in half at end.  Try to determine if
+                    # this is the case before proceeding by looking for an
+                    # & near the end and see if it's followed by a space or ;.
+                    amppos = rawdata.rfind('&', max(i, n-34))
+                    if (amppos >= 0 and
+                        not re.compile(r'[\s;]').search(rawdata, amppos)):
+                        break  # wait till we get all the text
+                    j = n
+            else:
+                match = self.interesting.search(rawdata, i)  # < or &
+                if match:
+                    j = match.start()
+                else:
+                    if self.cdata_elem:
+                        break
+                    j = n
+            if i < j:
+                if self.convert_charrefs and not self.cdata_elem:
+                    self.handle_data(unescape(rawdata[i:j]))
+                else:
+                    self.handle_data(rawdata[i:j])
+            i = self.updatepos(i, j)
+            if i == n: break
+            startswith = rawdata.startswith
+            if startswith('<', i):
+                if starttagopen.match(rawdata, i): # < + letter
+                    k = self.parse_starttag(i)
+                elif startswith("</", i):
+                    k = self.parse_endtag(i)
+                elif startswith("<!--", i):
+                    k = self.parse_comment(i)
+                elif startswith("<?", i):
+                    k = self.parse_pi(i)
+                elif startswith("<!", i):
+                    k = self.parse_html_declaration(i)
+                elif (i + 1) < n:
+                    self.handle_data("<")
+                    k = i + 1
+                else:
+                    break
+                if k < 0:
+                    if not end:
+                        break
+                    k = rawdata.find('>', i + 1)
+                    if k < 0:
+                        k = rawdata.find('<', i + 1)
+                        if k < 0:
+                            k = i + 1
+                    else:
+                        k += 1
+                    if self.convert_charrefs and not self.cdata_elem:
+                        self.handle_data(unescape(rawdata[i:k]))
+                    else:
+                        self.handle_data(rawdata[i:k])
+                i = self.updatepos(i, k)
+            elif startswith("&#", i):
+                match = charref.match(rawdata, i)
+                if match:
+                    name = match.group()[2:-1]
+                    self.handle_charref(name)
+                    k = match.end()
+                    if not startswith(';', k-1):
+                        k = k - 1
+                    i = self.updatepos(i, k)
+                    continue
+                else:
+                    if ";" in rawdata[i:]:  # bail by consuming &#
+                        self.handle_data(rawdata[i:i+2])
+                        i = self.updatepos(i, i+2)
+                    break
+            elif startswith('&', i):
+                match = entityref.match(rawdata, i)
+                if match:
+                    name = match.group(1)
+                    self.handle_entityref(name)
+                    k = match.end()
+                    if not startswith(';', k-1):
+                        k = k - 1
+                    i = self.updatepos(i, k)
+                    continue
+                match = incomplete.match(rawdata, i)
+                if match:
+                    # match.group() will contain at least 2 chars
+                    if end and match.group() == rawdata[i:]:
+                        k = match.end()
+                        if k <= i:
+                            k = n
+                        i = self.updatepos(i, i + 1)
+                    # incomplete
+                    break
+                elif (i + 1) < n:
+                    # not the end of the buffer, and can't be confused
+                    # with some other construct
+                    self.handle_data("&")
+                    i = self.updatepos(i, i + 1)
+                else:
+                    break
+            else:
+                assert 0, "interesting.search() lied"
+        # end while
+        if end and i < n and not self.cdata_elem:
+            if self.convert_charrefs and not self.cdata_elem:
+                self.handle_data(unescape(rawdata[i:n]))
+            else:
+                self.handle_data(rawdata[i:n])
+            i = self.updatepos(i, n)
+        self.rawdata = rawdata[i:]
+
+    # Internal -- parse html declarations, return length or -1 if not terminated
+    # See w3.org/TR/html5/tokenization.html#markup-declaration-open-state
+    # See also parse_declaration in _markupbase
+    def parse_html_declaration(self, i):
+        rawdata = self.rawdata
+        assert rawdata[i:i+2] == '<!', ('unexpected call to '
+                                        'parse_html_declaration()')
+        if rawdata[i:i+4] == '<!--':
+            # this case is actually already handled in goahead()
+            return self.parse_comment(i)
+        elif rawdata[i:i+3] == '<![':
+            return self.parse_marked_section(i)
+        elif rawdata[i:i+9].lower() == '<!doctype':
+            # find the closing >
+            gtpos = rawdata.find('>', i+9)
+            if gtpos == -1:
+                return -1
+            self.handle_decl(rawdata[i+2:gtpos])
+            return gtpos+1
+        else:
+            return self.parse_bogus_comment(i)
+
+    # Internal -- parse bogus comment, return length or -1 if not terminated
+    # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
+    def parse_bogus_comment(self, i, report=1):
+        rawdata = self.rawdata
+        assert rawdata[i:i+2] in ('<!', '</'), ('unexpected call to '
+                                                'parse_comment()')
+        pos = rawdata.find('>', i+2)
+        if pos == -1:
+            return -1
+        if report:
+            self.handle_comment(rawdata[i+2:pos])
+        return pos + 1
+
+    # Internal -- parse processing instr, return end or -1 if not terminated
+    def parse_pi(self, i):
+        rawdata = self.rawdata
+        assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
+        match = piclose.search(rawdata, i+2) # >
+        if not match:
+            return -1
+        j = match.start()
+        self.handle_pi(rawdata[i+2: j])
+        j = match.end()
+        return j
+
+    # Internal -- handle starttag, return end or -1 if not terminated
+    def parse_starttag(self, i):
+        self.__starttag_text = None
+        endpos = self.check_for_whole_start_tag(i)
+        if endpos < 0:
+            return endpos
+        rawdata = self.rawdata
+        self.__starttag_text = rawdata[i:endpos]
+
+        # Now parse the data between i+1 and j into a tag and attrs
+        attrs = []
+        match = tagfind_tolerant.match(rawdata, i+1)
+        assert match, 'unexpected call to parse_starttag()'
+        k = match.end()
+        self.lasttag = tag = match.group(1).lower()
+        while k < endpos:
+            m = attrfind_tolerant.match(rawdata, k)
+            if not m:
+                break
+            attrname, rest, attrvalue = m.group(1, 2, 3)
+            if not rest:
+                attrvalue = None
+            elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
+                 attrvalue[:1] == '"' == attrvalue[-1:]:
+                attrvalue = attrvalue[1:-1]
+            if attrvalue:
+                attrvalue = unescape(attrvalue)
+            attrs.append((attrname.lower(), attrvalue))
+            k = m.end()
+
+        end = rawdata[k:endpos].strip()
+        if end not in (">", "/>"):
+            lineno, offset = self.getpos()
+            if "\n" in self.__starttag_text:
+                lineno = lineno + self.__starttag_text.count("\n")
+                offset = len(self.__starttag_text) \
+                         - self.__starttag_text.rfind("\n")
+            else:
+                offset = offset + len(self.__starttag_text)
+            self.handle_data(rawdata[i:endpos])
+            return endpos
+        if end.endswith('/>'):
+            # XHTML-style empty tag: <span attr="value" />
+            self.handle_startendtag(tag, attrs)
+        else:
+            self.handle_starttag(tag, attrs)
+            if tag in self.CDATA_CONTENT_ELEMENTS:
+                self.set_cdata_mode(tag)
+        return endpos
+
+    # Internal -- check to see if we have a complete starttag; return end
+    # or -1 if incomplete.
+    def check_for_whole_start_tag(self, i):
+        rawdata = self.rawdata
+        m = locatestarttagend_tolerant.match(rawdata, i)
+        if m:
+            j = m.end()
+            next = rawdata[j:j+1]
+            if next == ">":
+                return j + 1
+            if next == "/":
+                if rawdata.startswith("/>", j):
+                    return j + 2
+                if rawdata.startswith("/", j):
+                    # buffer boundary
+                    return -1
+                # else bogus input
+                if j > i:
+                    return j
+                else:
+                    return i + 1
+            if next == "":
+                # end of input
+                return -1
+            if next in ("abcdefghijklmnopqrstuvwxyz=/"
+                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
+                # end of input in or before attribute value, or we have the
+                # '/' from a '/>' ending
+                return -1
+            if j > i:
+                return j
+            else:
+                return i + 1
+        raise AssertionError("we should not get here!")
+
+    # Internal -- parse endtag, return end or -1 if incomplete
+    def parse_endtag(self, i):
+        rawdata = self.rawdata
+        assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
+        match = endendtag.search(rawdata, i+1) # >
+        if not match:
+            return -1
+        gtpos = match.end()
+        match = endtagfind.match(rawdata, i) # </ + tag + >
+        if not match:
+            if self.cdata_elem is not None:
+                self.handle_data(rawdata[i:gtpos])
+                return gtpos
+            # find the name: w3.org/TR/html5/tokenization.html#tag-name-state
+            namematch = tagfind_tolerant.match(rawdata, i+2)
+            if not namematch:
+                # w3.org/TR/html5/tokenization.html#end-tag-open-state
+                if rawdata[i:i+3] == '</>':
+                    return i+3
+                else:
+                    return self.parse_bogus_comment(i)
+            tagname = namematch.group(1).lower()
+            # consume and ignore other stuff between the name and the >
+            # Note: this is not 100% correct, since we might have things like
+            # </tag attr=">">, but looking for > after tha name should cover
+            # most of the cases and is much simpler
+            gtpos = rawdata.find('>', namematch.end())
+            self.handle_endtag(tagname)
+            return gtpos+1
+
+        elem = match.group(1).lower() # script or style
+        if self.cdata_elem is not None:
+            if elem != self.cdata_elem:
+                self.handle_data(rawdata[i:gtpos])
+                return gtpos
+
+        self.handle_endtag(elem.lower())
+        self.clear_cdata_mode()
+        return gtpos
+
+    # Overridable -- finish processing of start+end tag: <tag.../>
+    def handle_startendtag(self, tag, attrs):
+        self.handle_starttag(tag, attrs)
+        self.handle_endtag(tag)
+
+    # Overridable -- handle start tag
+    def handle_starttag(self, tag, attrs):
+        pass
+
+    # Overridable -- handle end tag
+    def handle_endtag(self, tag):
+        pass
+
+    # Overridable -- handle character reference
+    def handle_charref(self, name):
+        pass
+
+    # Overridable -- handle entity reference
+    def handle_entityref(self, name):
+        pass
+
+    # Overridable -- handle data
+    def handle_data(self, data):
+        pass
+
+    # Overridable -- handle comment
+    def handle_comment(self, data):
+        pass
+
+    # Overridable -- handle declaration
+    def handle_decl(self, decl):
+        pass
+
+    # Overridable -- handle processing instruction
+    def handle_pi(self, data):
+        pass
+
+    def unknown_decl(self, data):
+        pass
+
+    # Internal -- helper to remove special character quoting
+    def unescape(self, s):
+        warnings.warn('The unescape method is deprecated and will be removed '
+                      'in 3.5, use html.unescape() instead.',
+                      DeprecationWarning, stacklevel=2)
+        return unescape(s)
diff --git a/modules/language/python/module/http.py b/modules/language/python/module/http.py
new file mode 100644
index 0000000..bd9a971
--- /dev/null
+++ b/modules/language/python/module/http.py
@@ -0,0 +1,136 @@
+module(http)
+
+from enum import IntEnum
+
+__all__ = ['HTTPStatus']
+
+class HTTPStatus(IntEnum):
+    """HTTP status codes and reason phrases
+
+    Status codes from the following RFCs are all observed:
+
+        * RFC 7231: Hypertext Transfer Protocol (HTTP/1.1), obsoletes 2616
+        * RFC 6585: Additional HTTP Status Codes
+        * RFC 3229: Delta encoding in HTTP
+        * RFC 4918: HTTP Extensions for WebDAV, obsoletes 2518
+        * RFC 5842: Binding Extensions to WebDAV
+        * RFC 7238: Permanent Redirect
+        * RFC 2295: Transparent Content Negotiation in HTTP
+        * RFC 2774: An HTTP Extension Framework
+    """
+    def __new__(cls, value, phrase, description=''):
+        obj = int.__newobj__(cls, value)
+        obj._value_ = value
+
+        obj.phrase = phrase
+        obj.description = description
+        return obj
+
+    # informational
+    CONTINUE = 100, 'Continue', 'Request received, please continue'
+    SWITCHING_PROTOCOLS = (101, 'Switching Protocols',
+            'Switching to new protocol; obey Upgrade header')
+    PROCESSING = 102, 'Processing'
+
+    # success
+    OK = 200, 'OK', 'Request fulfilled, document follows'
+    CREATED = 201, 'Created', 'Document created, URL follows'
+    ACCEPTED = (202, 'Accepted',
+        'Request accepted, processing continues off-line')
+    NON_AUTHORITATIVE_INFORMATION = (203,
+        'Non-Authoritative Information', 'Request fulfilled from cache')
+    NO_CONTENT = 204, 'No Content', 'Request fulfilled, nothing follows'
+    RESET_CONTENT = 205, 'Reset Content', 'Clear input form for further input'
+    PARTIAL_CONTENT = 206, 'Partial Content', 'Partial content follows'
+    MULTI_STATUS = 207, 'Multi-Status'
+    ALREADY_REPORTED = 208, 'Already Reported'
+    IM_USED = 226, 'IM Used'
+
+    # redirection
+    MULTIPLE_CHOICES = (300, 'Multiple Choices',
+        'Object has several resources -- see URI list')
+    MOVED_PERMANENTLY = (301, 'Moved Permanently',
+        'Object moved permanently -- see URI list')
+    FOUND = 302, 'Found', 'Object moved temporarily -- see URI list'
+    SEE_OTHER = 303, 'See Other', 'Object moved -- see Method and URL list'
+    NOT_MODIFIED = (304, 'Not Modified',
+        'Document has not changed since given time')
+    USE_PROXY = (305, 'Use Proxy',
+        'You must use proxy specified in Location to access this resource')
+    TEMPORARY_REDIRECT = (307, 'Temporary Redirect',
+        'Object moved temporarily -- see URI list')
+    PERMANENT_REDIRECT = (308, 'Permanent Redirect',
+        'Object moved temporarily -- see URI list')
+
+    # client error
+    BAD_REQUEST = (400, 'Bad Request',
+        'Bad request syntax or unsupported method')
+    UNAUTHORIZED = (401, 'Unauthorized',
+        'No permission -- see authorization schemes')
+    PAYMENT_REQUIRED = (402, 'Payment Required',
+        'No payment -- see charging schemes')
+    FORBIDDEN = (403, 'Forbidden',
+        'Request forbidden -- authorization will not help')
+    NOT_FOUND = (404, 'Not Found',
+        'Nothing matches the given URI')
+    METHOD_NOT_ALLOWED = (405, 'Method Not Allowed',
+        'Specified method is invalid for this resource')
+    NOT_ACCEPTABLE = (406, 'Not Acceptable',
+        'URI not available in preferred format')
+    PROXY_AUTHENTICATION_REQUIRED = (407,
+        'Proxy Authentication Required',
+        'You must authenticate with this proxy before proceeding')
+    REQUEST_TIMEOUT = (408, 'Request Timeout',
+        'Request timed out; try again later')
+    CONFLICT = 409, 'Conflict', 'Request conflict'
+    GONE = (410, 'Gone',
+        'URI no longer exists and has been permanently removed')
+    LENGTH_REQUIRED = (411, 'Length Required',
+        'Client must specify Content-Length')
+    PRECONDITION_FAILED = (412, 'Precondition Failed',
+        'Precondition in headers is false')
+    REQUEST_ENTITY_TOO_LARGE = (413, 'Request Entity Too Large',
+        'Entity is too large')
+    REQUEST_URI_TOO_LONG = (414, 'Request-URI Too Long',
+        'URI is too long')
+    UNSUPPORTED_MEDIA_TYPE = (415, 'Unsupported Media Type',
+        'Entity body in unsupported format')
+    REQUESTED_RANGE_NOT_SATISFIABLE = (416,
+        'Requested Range Not Satisfiable',
+        'Cannot satisfy request range')
+    EXPECTATION_FAILED = (417, 'Expectation Failed',
+        'Expect condition could not be satisfied')
+    UNPROCESSABLE_ENTITY = 422, 'Unprocessable Entity'
+    LOCKED = 423, 'Locked'
+    FAILED_DEPENDENCY = 424, 'Failed Dependency'
+    UPGRADE_REQUIRED = 426, 'Upgrade Required'
+    PRECONDITION_REQUIRED = (428, 'Precondition Required',
+        'The origin server requires the request to be conditional')
+    TOO_MANY_REQUESTS = (429, 'Too Many Requests',
+        'The user has sent too many requests in '
+        'a given amount of time ("rate limiting")')
+    REQUEST_HEADER_FIELDS_TOO_LARGE = (431,
+        'Request Header Fields Too Large',
+        'The server is unwilling to process the request because its header '
+        'fields are too large')
+
+    # server errors
+    INTERNAL_SERVER_ERROR = (500, 'Internal Server Error',
+        'Server got itself in trouble')
+    NOT_IMPLEMENTED = (501, 'Not Implemented',
+        'Server does not support this operation')
+    BAD_GATEWAY = (502, 'Bad Gateway',
+        'Invalid responses from another server/proxy')
+    SERVICE_UNAVAILABLE = (503, 'Service Unavailable',
+        'The server cannot process the request due to a high load')
+    GATEWAY_TIMEOUT = (504, 'Gateway Timeout',
+        'The gateway server did not receive a timely response')
+    HTTP_VERSION_NOT_SUPPORTED = (505, 'HTTP Version Not Supported',
+        'Cannot fulfill request')
+    VARIANT_ALSO_NEGOTIATES = 506, 'Variant Also Negotiates'
+    INSUFFICIENT_STORAGE = 507, 'Insufficient Storage'
+    LOOP_DETECTED = 508, 'Loop Detected'
+    NOT_EXTENDED = 510, 'Not Extended'
+    NETWORK_AUTHENTICATION_REQUIRED = (511,
+        'Network Authentication Required',
+        'The client needs to authenticate to gain network access')
diff --git a/modules/language/python/module/http/client.py b/modules/language/python/module/http/client.py
new file mode 100644
index 0000000..8dd6028
--- /dev/null
+++ b/modules/language/python/module/http/client.py
@@ -0,0 +1,1480 @@
+module(http,client)
+
+r"""HTTP/1.1 client library
+
+<intro stuff goes here>
+<other stuff, too>
+
+HTTPConnection goes through a number of "states", which define when a client
+may legally make another request or fetch the response for a particular
+request. This diagram details these state transitions:
+
+    (null)
+      |
+      | HTTPConnection()
+      v
+    Idle
+      |
+      | putrequest()
+      v
+    Request-started
+      |
+      | ( putheader() )*  endheaders()
+      v
+    Request-sent
+      |\_____________________________
+      |                              | getresponse() raises
+      | response = getresponse()     | ConnectionError
+      v                              v
+    Unread-response                Idle
+    [Response-headers-read]
+      |\____________________
+      |                     |
+      | response.read()     | putrequest()
+      v                     v
+    Idle                  Req-started-unread-response
+                     ______/|
+                   /        |
+   response.read() |        | ( putheader() )*  endheaders()
+                   v        v
+       Request-started    Req-sent-unread-response
+                            |
+                            | response.read()
+                            v
+                          Request-sent
+
+This diagram presents the following rules:
+  -- a second request may not be started until {response-headers-read}
+  -- a response [object] cannot be retrieved until {request-sent}
+  -- there is no differentiation between an unread response body and a
+     partially read response body
+
+Note: this enforcement is applied by the HTTPConnection class. The
+      HTTPResponse class does not enforce this state machine, which
+      implies sophisticated clients may accelerate the request/response
+      pipeline. Caution should be taken, though: accelerating the states
+      beyond the above pattern may imply knowledge of the server's
+      connection-close behavior for certain requests. For example, it
+      is impossible to tell whether the server will close the connection
+      UNTIL the response headers have been read; this means that further
+      requests cannot be placed into the pipeline until it is known that
+      the server will NOT be closing the connection.
+
+Logical State                  __state            __response
+-------------                  -------            ----------
+Idle                           _CS_IDLE           None
+Request-started                _CS_REQ_STARTED    None
+Request-sent                   _CS_REQ_SENT       None
+Unread-response                _CS_IDLE           <response_class>
+Req-started-unread-response    _CS_REQ_STARTED    <response_class>
+Req-sent-unread-response       _CS_REQ_SENT       <response_class>
+"""
+
+import email.parser
+import email.message
+import http
+import io
+import os
+import re
+import socket
+import collections
+from urllib.parse import urlsplit
+
+# HTTPMessage, parse_headers(), and the HTTP status code constants are
+# intentionally omitted for simplicity
+__all__ = ["HTTPResponse", "HTTPConnection",
+           "HTTPException", "NotConnected", "UnknownProtocol",
+           "UnknownTransferEncoding", "UnimplementedFileMode",
+           "IncompleteRead", "InvalidURL", "ImproperConnectionState",
+           "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
+           "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error",
+           "responses"]
+
+HTTP_PORT = 80
+HTTPS_PORT = 443
+
+_UNKNOWN = 'UNKNOWN'
+
+# connection states
+_CS_IDLE = 'Idle'
+_CS_REQ_STARTED = 'Request-started'
+_CS_REQ_SENT = 'Request-sent'
+
+# hack to maintain backwards compatibility
+globals().update(http.HTTPStatus.__members__)
+
+# another hack to maintain backwards compatibility
+# Mapping status codes to official W3C names
+pk(6)
+responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()}
+pk(7)
+# maximal amount of data to read at one time in _safe_read
+MAXAMOUNT = 1048576
+
+# maximal line length when calling readline().
+_MAXLINE = 65536
+_MAXHEADERS = 100
+
+# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
+#
+# VCHAR          = %x21-7E
+# obs-text       = %x80-FF
+# header-field   = field-name ":" OWS field-value OWS
+# field-name     = token
+# field-value    = *( field-content / obs-fold )
+# field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
+# field-vchar    = VCHAR / obs-text
+#
+# obs-fold       = CRLF 1*( SP / HTAB )
+#                ; obsolete line folding
+#                ; see Section 3.2.4
+
+# token          = 1*tchar
+#
+# tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
+#                / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
+#                / DIGIT / ALPHA
+#                ; any VCHAR, except delimiters
+#
+# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
+
+# the patterns for both name and value are more lenient than RFC
+# definitions to allow for backwards compatibility
+_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
+_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
+
+# We always set the Content-Length header for these methods because some
+# servers will otherwise respond with a 411
+_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
+
+pk(8)
+def _encode(data, name='data'):
+    """Call data.encode("latin-1") but show a better error message."""
+    try:
+        return data.encode("latin-1")
+    except UnicodeEncodeError as err:
+        raise UnicodeEncodeError(
+            err.encoding,
+            err.object,
+            err.start,
+            err.end,
+            "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') "
+            "if you want to send it encoded in UTF-8." %
+            (name.title(), data[err.start:err.end], name)) from None
+
+
+class HTTPMessage(email.message.Message):
+    # XXX The only usage of this method is in
+    # http.server.CGIHTTPRequestHandler.  Maybe move the code there so
+    # that it doesn't need to be part of the public API.  The API has
+    # never been defined so this could cause backwards compatibility
+    # issues.
+
+    def getallmatchingheaders(self, name):
+        """Find all header lines matching a given header name.
+
+        Look through the list of headers and find all lines matching a given
+        header name (and their continuation lines).  A list of the lines is
+        returned, without interpretation.  If the header does not occur, an
+        empty list is returned.  If the header occurs multiple times, all
+        occurrences are returned.  Case is not important in the header name.
+
+        """
+        name = name.lower() + ':'
+        n = len(name)
+        lst = []
+        hit = 0
+        for line in self.keys():
+            if line[:n].lower() == name:
+                hit = 1
+            elif not line[:1].isspace():
+                hit = 0
+            if hit:
+                lst.append(line)
+        return lst
+
+def parse_headers(fp, _class=HTTPMessage):
+    """Parses only RFC2822 headers from a file pointer.
+
+    email Parser wants to see strings rather than bytes.
+    But a TextIOWrapper around self.rfile would buffer too many bytes
+    from the stream, bytes which we later need to read as bytes.
+    So we read the correct bytes here, as bytes, for email Parser
+    to parse.
+
+    """
+    headers = []
+    while True:
+        line = fp.readline(_MAXLINE + 1)
+        if len(line) > _MAXLINE:
+            raise LineTooLong("header line")
+        headers.append(line)
+        if len(headers) > _MAXHEADERS:
+            raise HTTPException("got more than %d headers" % _MAXHEADERS)
+        if line in (b'\r\n', b'\n', b''):
+            break
+    hstring = b''.join(headers).decode('iso-8859-1')
+    return email.parser.Parser(_class=_class).parsestr(hstring)
+
+
+class HTTPResponse(io.BufferedIOBase):
+
+    # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
+
+    # The bytes from the socket object are iso-8859-1 strings.
+    # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
+    # text following RFC 2047.  The basic status line parsing only
+    # accepts iso-8859-1.
+
+    def __init__(self, sock, debuglevel=0, method=None, url=None):
+        # If the response includes a content-length header, we need to
+        # make sure that the client doesn't read more than the
+        # specified number of bytes.  If it does, it will block until
+        # the server times out and closes the connection.  This will
+        # happen if a self.fp.read() is done (without a size) whether
+        # self.fp is buffered or not.  So, no self.fp.read() by
+        # clients unless they know what they are doing.
+        self.fp = sock.makefile("rb")
+        self.debuglevel = debuglevel
+        self._method = method
+
+        # The HTTPResponse object is returned via urllib.  The clients
+        # of http and urllib expect different attributes for the
+        # headers.  headers is used here and supports urllib.  msg is
+        # provided as a backwards compatibility layer for http
+        # clients.
+
+        self.headers = self.msg = None
+
+        # from the Status-Line of the response
+        self.version = _UNKNOWN # HTTP-Version
+        self.status = _UNKNOWN  # Status-Code
+        self.reason = _UNKNOWN  # Reason-Phrase
+
+        self.chunked = _UNKNOWN         # is "chunked" being used?
+        self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
+        self.length = _UNKNOWN          # number of bytes left in response
+        self.will_close = _UNKNOWN      # conn will close at end of response
+
+    def _read_status(self):
+        line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
+        if len(line) > _MAXLINE:
+            raise LineTooLong("status line")
+        if self.debuglevel > 0:
+            print("reply:", repr(line))
+        if not line:
+            # Presumably, the server closed the connection before
+            # sending a valid response.
+            raise RemoteDisconnected("Remote end closed connection without"
+                                     " response")
+        try:
+            version, status, reason = line.split(None, 2)
+        except ValueError:
+            try:
+                version, status = line.split(None, 1)
+                reason = ""
+            except ValueError:
+                # empty version will cause next test to fail.
+                version = ""
+        if not version.startswith("HTTP/"):
+            self._close_conn()
+            raise BadStatusLine(line)
+
+        # The status code is a three-digit number
+        try:
+            status = int(status)
+            if status < 100 or status > 999:
+                raise BadStatusLine(line)
+        except ValueError:
+            raise BadStatusLine(line)
+        return version, status, reason
+
+    def begin(self):
+        if self.headers is not None:
+            # we've already started reading the response
+            return
+
+        # read until we get a non-100 response
+        while True:
+            version, status, reason = self._read_status()
+            if status != CONTINUE:
+                break
+            # skip the header from the 100 response
+            while True:
+                skip = self.fp.readline(_MAXLINE + 1)
+                if len(skip) > _MAXLINE:
+                    raise LineTooLong("header line")
+                skip = skip.strip()
+                if not skip:
+                    break
+                if self.debuglevel > 0:
+                    print("header:", skip)
+
+        self.code = self.status = status
+        self.reason = reason.strip()
+        if version in ("HTTP/1.0", "HTTP/0.9"):
+            # Some servers might still return "0.9", treat it as 1.0 anyway
+            self.version = 10
+        elif version.startswith("HTTP/1."):
+            self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
+        else:
+            raise UnknownProtocol(version)
+
+        self.headers = self.msg = parse_headers(self.fp)
+
+        if self.debuglevel > 0:
+            for hdr in self.headers:
+                print("header:", hdr, end=" ")
+
+        # are we using the chunked-style of transfer encoding?
+        tr_enc = self.headers.get("transfer-encoding")
+        if tr_enc and tr_enc.lower() == "chunked":
+            self.chunked = True
+            self.chunk_left = None
+        else:
+            self.chunked = False
+
+        # will the connection close at the end of the response?
+        self.will_close = self._check_close()
+
+        # do we have a Content-Length?
+        # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
+        self.length = None
+        length = self.headers.get("content-length")
+
+         # are we using the chunked-style of transfer encoding?
+        tr_enc = self.headers.get("transfer-encoding")
+        if length and not self.chunked:
+            try:
+                self.length = int(length)
+            except ValueError:
+                self.length = None
+            else:
+                if self.length < 0:  # ignore nonsensical negative lengths
+                    self.length = None
+        else:
+            self.length = None
+
+        # does the body have a fixed length? (of zero)
+        if (status == NO_CONTENT or status == NOT_MODIFIED or
+            100 <= status < 200 or      # 1xx codes
+            self._method == "HEAD"):
+            self.length = 0
+
+        # if the connection remains open, and we aren't using chunked, and
+        # a content-length was not provided, then assume that the connection
+        # WILL close.
+        if (not self.will_close and
+            not self.chunked and
+            self.length is None):
+            self.will_close = True
+
+    def _check_close(self):
+        conn = self.headers.get("connection")
+        if self.version == 11:
+            # An HTTP/1.1 proxy is assumed to stay open unless
+            # explicitly closed.
+            conn = self.headers.get("connection")
+            if conn and "close" in conn.lower():
+                return True
+            return False
+
+        # Some HTTP/1.0 implementations have support for persistent
+        # connections, using rules different than HTTP/1.1.
+
+        # For older HTTP, Keep-Alive indicates persistent connection.
+        if self.headers.get("keep-alive"):
+            return False
+
+        # At least Akamai returns a "Connection: Keep-Alive" header,
+        # which was supposed to be sent by the client.
+        if conn and "keep-alive" in conn.lower():
+            return False
+
+        # Proxy-Connection is a netscape hack.
+        pconn = self.headers.get("proxy-connection")
+        if pconn and "keep-alive" in pconn.lower():
+            return False
+
+        # otherwise, assume it will close
+        return True
+
+    def _close_conn(self):
+        fp = self.fp
+        self.fp = None
+        fp.close()
+
+    def close(self):
+        try:
+            super().close() # set "closed" flag
+        finally:
+            if self.fp:
+                self._close_conn()
+
+    # These implementations are for the benefit of io.BufferedReader.
+
+    # XXX This class should probably be revised to act more like
+    # the "raw stream" that BufferedReader expects.
+
+    def flush(self):
+        super().flush()
+        if self.fp:
+            self.fp.flush()
+
+    def readable(self):
+        """Always returns True"""
+        return True
+
+    # End of "raw stream" methods
+
+    def isclosed(self):
+        """True if the connection is closed."""
+        # NOTE: it is possible that we will not ever call self.close(). This
+        #       case occurs when will_close is TRUE, length is None, and we
+        #       read up to the last byte, but NOT past it.
+        #
+        # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
+        #          called, meaning self.isclosed() is meaningful.
+        return self.fp is None
+
+    def read(self, amt=None):
+        if self.fp is None:
+            return b""
+
+        if self._method == "HEAD":
+            self._close_conn()
+            return b""
+
+        if amt is not None:
+            # Amount is given, implement using readinto
+            b = bytearray(amt)
+            n = self.readinto(b)
+            return memoryview(b)[:n].tobytes()
+        else:
+            # Amount is not given (unbounded read) so we must check self.length
+            # and self.chunked
+
+            if self.chunked:
+                return self._readall_chunked()
+
+            if self.length is None:
+                s = self.fp.read()
+            else:
+                try:
+                    s = self._safe_read(self.length)
+                except IncompleteRead:
+                    self._close_conn()
+                    raise
+                self.length = 0
+            self._close_conn()        # we read everything
+            return s
+
+    def readinto(self, b):
+        """Read up to len(b) bytes into bytearray b and return the number
+        of bytes read.
+        """
+
+        if self.fp is None:
+            return 0
+
+        if self._method == "HEAD":
+            self._close_conn()
+            return 0
+
+        if self.chunked:
+            return self._readinto_chunked(b)
+
+        if self.length is not None:
+            if len(b) > self.length:
+                # clip the read to the "end of response"
+                b = memoryview(b)[0:self.length]
+
+        # we do not use _safe_read() here because this may be a .will_close
+        # connection, and the user is reading more bytes than will be provided
+        # (for example, reading in 1k chunks)
+        n = self.fp.readinto(b)
+        if not n and b:
+            # Ideally, we would raise IncompleteRead if the content-length
+            # wasn't satisfied, but it might break compatibility.
+            self._close_conn()
+        elif self.length is not None:
+            self.length -= n
+            if not self.length:
+                self._close_conn()
+        return n
+
+    def _read_next_chunk_size(self):
+        # Read the next chunk size from the file
+        line = self.fp.readline(_MAXLINE + 1)
+        if len(line) > _MAXLINE:
+            raise LineTooLong("chunk size")
+        i = line.find(b";")
+        if i >= 0:
+            line = line[:i] # strip chunk-extensions
+        try:
+            return int(line, 16)
+        except ValueError:
+            # close the connection as protocol synchronisation is
+            # probably lost
+            self._close_conn()
+            raise
+
+    def _read_and_discard_trailer(self):
+        # read and discard trailer up to the CRLF terminator
+        ### note: we shouldn't have any trailers!
+        while True:
+            line = self.fp.readline(_MAXLINE + 1)
+            if len(line) > _MAXLINE:
+                raise LineTooLong("trailer line")
+            if not line:
+                # a vanishingly small number of sites EOF without
+                # sending the trailer
+                break
+            if line in (b'\r\n', b'\n', b''):
+                break
+
+    def _get_chunk_left(self):
+        # return self.chunk_left, reading a new chunk if necessary.
+        # chunk_left == 0: at the end of the current chunk, need to close it
+        # chunk_left == None: No current chunk, should read next.
+        # This function returns non-zero or None if the last chunk has
+        # been read.
+        chunk_left = self.chunk_left
+        if not chunk_left: # Can be 0 or None
+            if chunk_left is not None:
+                # We are at the end of chunk, discard chunk end
+                self._safe_read(2)  # toss the CRLF at the end of the chunk
+            try:
+                chunk_left = self._read_next_chunk_size()
+            except ValueError:
+                raise IncompleteRead(b'')
+            if chunk_left == 0:
+                # last chunk: 1*("0") [ chunk-extension ] CRLF
+                self._read_and_discard_trailer()
+                # we read everything; close the "file"
+                self._close_conn()
+                chunk_left = None
+            self.chunk_left = chunk_left
+        return chunk_left
+
+    def _readall_chunked(self):
+        assert self.chunked != _UNKNOWN
+        value = []
+        try:
+            while True:
+                chunk_left = self._get_chunk_left()
+                if chunk_left is None:
+                    break
+                value.append(self._safe_read(chunk_left))
+                self.chunk_left = 0
+            return b''.join(value)
+        except IncompleteRead:
+            raise IncompleteRead(b''.join(value))
+
+    def _readinto_chunked(self, b):
+        assert self.chunked != _UNKNOWN
+        total_bytes = 0
+        mvb = memoryview(b)
+        try:
+            while True:
+                chunk_left = self._get_chunk_left()
+                if chunk_left is None:
+                    return total_bytes
+
+                if len(mvb) <= chunk_left:
+                    n = self._safe_readinto(mvb)
+                    self.chunk_left = chunk_left - n
+                    return total_bytes + n
+
+                temp_mvb = mvb[:chunk_left]
+                n = self._safe_readinto(temp_mvb)
+                mvb = mvb[n:]
+                total_bytes += n
+                self.chunk_left = 0
+
+        except IncompleteRead:
+            raise IncompleteRead(bytes(b[0:total_bytes]))
+
+    def _safe_read(self, amt):
+        """Read the number of bytes requested, compensating for partial reads.
+
+        Normally, we have a blocking socket, but a read() can be interrupted
+        by a signal (resulting in a partial read).
+
+        Note that we cannot distinguish between EOF and an interrupt when zero
+        bytes have been read. IncompleteRead() will be raised in this
+        situation.
+
+        This function should be used when <amt> bytes "should" be present for
+        reading. If the bytes are truly not available (due to EOF), then the
+        IncompleteRead exception can be used to detect the problem.
+        """
+        s = []
+        while amt > 0:
+            chunk = self.fp.read(min(amt, MAXAMOUNT))
+            if not chunk:
+                raise IncompleteRead(b''.join(s), amt)
+            s.append(chunk)
+            amt -= len(chunk)
+        return b"".join(s)
+
+    def _safe_readinto(self, b):
+        """Same as _safe_read, but for reading into a buffer."""
+        total_bytes = 0
+        mvb = memoryview(b)
+        while total_bytes < len(b):
+            if MAXAMOUNT < len(mvb):
+                temp_mvb = mvb[0:MAXAMOUNT]
+                n = self.fp.readinto(temp_mvb)
+            else:
+                n = self.fp.readinto(mvb)
+            if not n:
+                raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b))
+            mvb = mvb[n:]
+            total_bytes += n
+        return total_bytes
+
+    def read1(self, n=-1):
+        """Read with at most one underlying system call.  If at least one
+        byte is buffered, return that instead.
+        """
+        if self.fp is None or self._method == "HEAD":
+            return b""
+        if self.chunked:
+            return self._read1_chunked(n)
+        if self.length is not None and (n < 0 or n > self.length):
+            n = self.length
+        try:
+            result = self.fp.read1(n)
+        except ValueError:
+            if n >= 0:
+                raise
+            # some implementations, like BufferedReader, don't support -1
+            # Read an arbitrarily selected largeish chunk.
+            result = self.fp.read1(16*1024)
+        if not result and n:
+            self._close_conn()
+        elif self.length is not None:
+            self.length -= len(result)
+        return result
+
+    def peek(self, n=-1):
+        # Having this enables IOBase.readline() to read more than one
+        # byte at a time
+        if self.fp is None or self._method == "HEAD":
+            return b""
+        if self.chunked:
+            return self._peek_chunked(n)
+        return self.fp.peek(n)
+
+    def readline(self, limit=-1):
+        if self.fp is None or self._method == "HEAD":
+            return b""
+        if self.chunked:
+            # Fallback to IOBase readline which uses peek() and read()
+            return super().readline(limit)
+        if self.length is not None and (limit < 0 or limit > self.length):
+            limit = self.length
+        result = self.fp.readline(limit)
+        if not result and limit:
+            self._close_conn()
+        elif self.length is not None:
+            self.length -= len(result)
+        return result
+
+    def _read1_chunked(self, n):
+        # Strictly speaking, _get_chunk_left() may cause more than one read,
+        # but that is ok, since that is to satisfy the chunked protocol.
+        chunk_left = self._get_chunk_left()
+        if chunk_left is None or n == 0:
+            return b''
+        if not (0 <= n <= chunk_left):
+            n = chunk_left # if n is negative or larger than chunk_left
+        read = self.fp.read1(n)
+        self.chunk_left -= len(read)
+        if not read:
+            raise IncompleteRead(b"")
+        return read
+
+    def _peek_chunked(self, n):
+        # Strictly speaking, _get_chunk_left() may cause more than one read,
+        # but that is ok, since that is to satisfy the chunked protocol.
+        try:
+            chunk_left = self._get_chunk_left()
+        except IncompleteRead:
+            return b'' # peek doesn't worry about protocol
+        if chunk_left is None:
+            return b'' # eof
+        # peek is allowed to return more than requested.  Just request the
+        # entire chunk, and truncate what we get.
+        return self.fp.peek(chunk_left)[:chunk_left]
+
+    def fileno(self):
+        return self.fp.fileno()
+
+    def getheader(self, name, default=None):
+        '''Returns the value of the header matching *name*.
+
+        If there are multiple matching headers, the values are
+        combined into a single string separated by commas and spaces.
+
+        If no matching header is found, returns *default* or None if
+        the *default* is not specified.
+
+        If the headers are unknown, raises http.client.ResponseNotReady.
+
+        '''
+        if self.headers is None:
+            raise ResponseNotReady()
+        headers = self.headers.get_all(name) or default
+        if isinstance(headers, str) or not hasattr(headers, '__iter__'):
+            return headers
+        else:
+            return ', '.join(headers)
+
+    def getheaders(self):
+        """Return list of (header, value) tuples."""
+        if self.headers is None:
+            raise ResponseNotReady()
+        return list(self.headers.items())
+
+    # We override IOBase.__iter__ so that it doesn't check for closed-ness
+
+    def __iter__(self):
+        return self
+
+    # For compatibility with old-style urllib responses.
+
+    def info(self):
+        '''Returns an instance of the class mimetools.Message containing
+        meta-information associated with the URL.
+
+        When the method is HTTP, these headers are those returned by
+        the server at the head of the retrieved HTML page (including
+        Content-Length and Content-Type).
+
+        When the method is FTP, a Content-Length header will be
+        present if (as is now usual) the server passed back a file
+        length in response to the FTP retrieval request. A
+        Content-Type header will be present if the MIME type can be
+        guessed.
+
+        When the method is local-file, returned headers will include
+        a Date representing the file's last-modified time, a
+        Content-Length giving file size, and a Content-Type
+        containing a guess at the file's type. See also the
+        description of the mimetools module.
+
+        '''
+        return self.headers
+
+    def geturl(self):
+        '''Return the real URL of the page.
+
+        In some cases, the HTTP server redirects a client to another
+        URL. The urlopen() function handles this transparently, but in
+        some cases the caller needs to know which URL the client was
+        redirected to. The geturl() method can be used to get at this
+        redirected URL.
+
+        '''
+        return self.url
+
+    def getcode(self):
+        '''Return the HTTP status code that was sent with the response,
+        or None if the URL is not an HTTP URL.
+
+        '''
+        return self.status
+
+class HTTPConnection:
+
+    _http_vsn = 11
+    _http_vsn_str = 'HTTP/1.1'
+
+    response_class = HTTPResponse
+    default_port = HTTP_PORT
+    auto_open = 1
+    debuglevel = 0
+
+    @staticmethod
+    def _is_textIO(stream):
+        """Test whether a file-like object is a text or a binary stream.
+        """
+        return isinstance(stream, io.TextIOBase)
+
+    @staticmethod
+    def _get_content_length(body, method):
+        """Get the content-length based on the body.
+
+        If the body is None, we set Content-Length: 0 for methods that expect
+        a body (RFC 7230, Section 3.3.2). We also set the Content-Length for
+        any method if the body is a str or bytes-like object and not a file.
+        """
+        if body is None:
+            # do an explicit check for not None here to distinguish
+            # between unset and set but empty
+            if method.upper() in _METHODS_EXPECTING_BODY:
+                return 0
+            else:
+                return None
+
+        if hasattr(body, 'read'):
+            # file-like object.
+            return None
+
+        try:
+            # does it implement the buffer protocol (bytes, bytearray, array)?
+            mv = memoryview(body)
+            return mv.nbytes
+        except TypeError:
+            pass
+
+        if isinstance(body, str):
+            return len(body)
+
+        return None
+
+    def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+                 source_address=None):
+        self.timeout = timeout
+        self.source_address = source_address
+        self.sock = None
+        self._buffer = []
+        self.__response = None
+        self.__state = _CS_IDLE
+        self._method = None
+        self._tunnel_host = None
+        self._tunnel_port = None
+        self._tunnel_headers = {}
+
+        (self.host, self.port) = self._get_hostport(host, port)
+
+        # This is stored as an instance variable to allow unit
+        # tests to replace it with a suitable mockup
+        self._create_connection = socket.create_connection
+
+    def set_tunnel(self, host, port=None, headers=None):
+        """Set up host and port for HTTP CONNECT tunnelling.
+
+        In a connection that uses HTTP CONNECT tunneling, the host passed to the
+        constructor is used as a proxy server that relays all communication to
+        the endpoint passed to `set_tunnel`. This done by sending an HTTP
+        CONNECT request to the proxy server when the connection is established.
+
+        This method must be called before the HTML connection has been
+        established.
+
+        The headers argument should be a mapping of extra HTTP headers to send
+        with the CONNECT request.
+        """
+
+        if self.sock:
+            raise RuntimeError("Can't set up tunnel for established connection")
+
+        self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
+        if headers:
+            self._tunnel_headers = headers
+        else:
+            self._tunnel_headers.clear()
+
+    def _get_hostport(self, host, port):
+        if port is None:
+            i = host.rfind(':')
+            j = host.rfind(']')         # ipv6 addresses have [...]
+            if i > j:
+                try:
+                    port = int(host[i+1:])
+                except ValueError:
+                    if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
+                        port = self.default_port
+                    else:
+                        raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
+                host = host[:i]
+            else:
+                port = self.default_port
+            if host and host[0] == '[' and host[-1] == ']':
+                host = host[1:-1]
+
+        return (host, port)
+
+    def set_debuglevel(self, level):
+        self.debuglevel = level
+
+    def _tunnel(self):
+        connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host,
+            self._tunnel_port)
+        connect_bytes = connect_str.encode("ascii")
+        self.send(connect_bytes)
+        for header, value in self._tunnel_headers.items():
+            header_str = "%s: %s\r\n" % (header, value)
+            header_bytes = header_str.encode("latin-1")
+            self.send(header_bytes)
+        self.send(b'\r\n')
+
+        response = self.response_class(self.sock, method=self._method)
+        (version, code, message) = response._read_status()
+
+        if code != http.HTTPStatus.OK:
+            self.close()
+            raise OSError("Tunnel connection failed: %d %s" % (code,
+                                                               message.strip()))
+        while True:
+            line = response.fp.readline(_MAXLINE + 1)
+            if len(line) > _MAXLINE:
+                raise LineTooLong("header line")
+            if not line:
+                # for sites which EOF without sending a trailer
+                break
+            if line in (b'\r\n', b'\n', b''):
+                break
+
+            if self.debuglevel > 0:
+                print('header:', line.decode())
+
+    def connect(self):
+        """Connect to the host and port specified in __init__."""
+        self.sock = self._create_connection(
+            (self.host,self.port), self.timeout, self.source_address)
+        self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+
+        if self._tunnel_host:
+            self._tunnel()
+
+    def close(self):
+        """Close the connection to the HTTP server."""
+        self.__state = _CS_IDLE
+        try:
+            sock = self.sock
+            if sock:
+                self.sock = None
+                sock.close()   # close it manually... there may be other refs
+        finally:
+            response = self.__response
+            if response:
+                self.__response = None
+                response.close()
+
+    def send(self, data):
+        """Send `data' to the server.
+        ``data`` can be a string object, a bytes object, an array object, a
+        file-like object that supports a .read() method, or an iterable object.
+        """
+
+        if self.sock is None:
+            if self.auto_open:
+                self.connect()
+            else:
+                raise NotConnected()
+
+        if self.debuglevel > 0:
+            print("send:", repr(data))
+        blocksize = 8192
+        if hasattr(data, "read") :
+            if self.debuglevel > 0:
+                print("sendIng a read()able")
+            encode = self._is_textIO(data)
+            if encode and self.debuglevel > 0:
+                print("encoding file using iso-8859-1")
+            while 1:
+                datablock = data.read(blocksize)
+                if not datablock:
+                    break
+                if encode:
+                    datablock = datablock.encode("iso-8859-1")
+                self.sock.sendall(datablock)
+            return
+        try:
+            self.sock.sendall(data)
+        except TypeError:
+            if isinstance(data, collections.Iterable):
+                for d in data:
+                    self.sock.sendall(d)
+            else:
+                raise TypeError("data should be a bytes-like object "
+                                "or an iterable, got %r" % type(data))
+
+    def _output(self, s):
+        """Add a line of output to the current request buffer.
+
+        Assumes that the line does *not* end with \\r\\n.
+        """
+        self._buffer.append(s)
+
+    def _read_readable(self, readable):
+        blocksize = 8192
+        if self.debuglevel > 0:
+            print("sendIng a read()able")
+        encode = self._is_textIO(readable)
+        if encode and self.debuglevel > 0:
+            print("encoding file using iso-8859-1")
+        while True:
+            datablock = readable.read(blocksize)
+            if not datablock:
+                break
+            if encode:
+                datablock = datablock.encode("iso-8859-1")
+            yield datablock
+
+    def _send_output(self, message_body=None, encode_chunked=False):
+        """Send the currently buffered request and clear the buffer.
+
+        Appends an extra \\r\\n to the buffer.
+        A message_body may be specified, to be appended to the request.
+        """
+        self._buffer.extend((b"", b""))
+        msg = b"\r\n".join(self._buffer)
+        del self._buffer[:]
+        self.send(msg)
+
+        if message_body is not None:
+
+            # create a consistent interface to message_body
+            if hasattr(message_body, 'read'):
+                # Let file-like take precedence over byte-like.  This
+                # is needed to allow the current position of mmap'ed
+                # files to be taken into account.
+                chunks = self._read_readable(message_body)
+            else:
+                try:
+                    # this is solely to check to see if message_body
+                    # implements the buffer API.  it /would/ be easier
+                    # to capture if PyObject_CheckBuffer was exposed
+                    # to Python.
+                    memoryview(message_body)
+                except TypeError:
+                    try:
+                        chunks = iter(message_body)
+                    except TypeError:
+                        raise TypeError("message_body should be a bytes-like "
+                                        "object or an iterable, got %r"
+                                        % type(message_body))
+                else:
+                    # the object implements the buffer interface and
+                    # can be passed directly into socket methods
+                    chunks = (message_body,)
+
+            for chunk in chunks:
+                if not chunk:
+                    if self.debuglevel > 0:
+                        print('Zero length chunk ignored')
+                    continue
+
+                if encode_chunked and self._http_vsn == 11:
+                    # chunked encoding
+                    chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \
+                        + b'\r\n'
+                self.send(chunk)
+
+            if encode_chunked and self._http_vsn == 11:
+                # end chunked transfer
+                self.send(b'0\r\n\r\n')
+
+    def putrequest(self, method, url, skip_host=False,
+                   skip_accept_encoding=False):
+        """Send a request to the server.
+
+        `method' specifies an HTTP request method, e.g. 'GET'.
+        `url' specifies the object being requested, e.g. '/index.html'.
+        `skip_host' if True does not add automatically a 'Host:' header
+        `skip_accept_encoding' if True does not add automatically an
+           'Accept-Encoding:' header
+        """
+
+        # if a prior response has been completed, then forget about it.
+        if self.__response and self.__response.isclosed():
+            self.__response = None
+
+
+        # in certain cases, we cannot issue another request on this connection.
+        # this occurs when:
+        #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
+        #   2) a response to a previous request has signalled that it is going
+        #      to close the connection upon completion.
+        #   3) the headers for the previous response have not been read, thus
+        #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
+        #
+        # if there is no prior response, then we can request at will.
+        #
+        # if point (2) is true, then we will have passed the socket to the
+        # response (effectively meaning, "there is no prior response"), and
+        # will open a new one when a new request is made.
+        #
+        # Note: if a prior response exists, then we *can* start a new request.
+        #       We are not allowed to begin fetching the response to this new
+        #       request, however, until that prior response is complete.
+        #
+        if self.__state == _CS_IDLE:
+            self.__state = _CS_REQ_STARTED
+        else:
+            raise CannotSendRequest(self.__state)
+
+        # Save the method we use, we need it later in the response phase
+        self._method = method
+        if not url:
+            url = '/'
+        request = '%s %s %s' % (method, url, self._http_vsn_str)
+
+        # Non-ASCII characters should have been eliminated earlier
+        self._output(request.encode('ascii'))
+
+        if self._http_vsn == 11:
+            # Issue some standard headers for better HTTP/1.1 compliance
+
+            if not skip_host:
+                # this header is issued *only* for HTTP/1.1
+                # connections. more specifically, this means it is
+                # only issued when the client uses the new
+                # HTTPConnection() class. backwards-compat clients
+                # will be using HTTP/1.0 and those clients may be
+                # issuing this header themselves. we should NOT issue
+                # it twice; some web servers (such as Apache) barf
+                # when they see two Host: headers
+
+                # If we need a non-standard port,include it in the
+                # header.  If the request is going through a proxy,
+                # but the host of the actual URL, not the host of the
+                # proxy.
+
+                netloc = ''
+                if url.startswith('http'):
+                    nil, netloc, nil, nil, nil = urlsplit(url)
+
+                if netloc:
+                    try:
+                        netloc_enc = netloc.encode("ascii")
+                    except UnicodeEncodeError:
+                        netloc_enc = netloc.encode("idna")
+                    self.putheader('Host', netloc_enc)
+                else:
+                    if self._tunnel_host:
+                        host = self._tunnel_host
+                        port = self._tunnel_port
+                    else:
+                        host = self.host
+                        port = self.port
+
+                    try:
+                        host_enc = host.encode("ascii")
+                    except UnicodeEncodeError:
+                        host_enc = host.encode("idna")
+
+                    # As per RFC 273, IPv6 address should be wrapped with []
+                    # when used as Host header
+
+                    if host.find(':') >= 0:
+                        host_enc = b'[' + host_enc + b']'
+
+                    if port == self.default_port:
+                        self.putheader('Host', host_enc)
+                    else:
+                        host_enc = host_enc.decode("ascii")
+                        self.putheader('Host', "%s:%s" % (host_enc, port))
+
+            # note: we are assuming that clients will not attempt to set these
+            #       headers since *this* library must deal with the
+            #       consequences. this also means that when the supporting
+            #       libraries are updated to recognize other forms, then this
+            #       code should be changed (removed or updated).
+
+            # we only want a Content-Encoding of "identity" since we don't
+            # support encodings such as x-gzip or x-deflate.
+            if not skip_accept_encoding:
+                self.putheader('Accept-Encoding', 'identity')
+
+            # we can accept "chunked" Transfer-Encodings, but no others
+            # NOTE: no TE header implies *only* "chunked"
+            #self.putheader('TE', 'chunked')
+
+            # if TE is supplied in the header, then it must appear in a
+            # Connection header.
+            #self.putheader('Connection', 'TE')
+
+        else:
+            # For HTTP/1.0, the server will assume "not chunked"
+            pass
+
+    def putheader(self, header, *values):
+        """Send a request header line to the server.
+
+        For example: h.putheader('Accept', 'text/html')
+        """
+        if self.__state != _CS_REQ_STARTED:
+            raise CannotSendHeader()
+
+        if hasattr(header, 'encode'):
+            header = header.encode('ascii')
+
+        if not _is_legal_header_name(header):
+            raise ValueError('Invalid header name %r' % (header,))
+
+        values = list(values)
+        for i, one_value in enumerate(values):
+            if hasattr(one_value, 'encode'):
+                values[i] = one_value.encode('latin-1')
+            elif isinstance(one_value, int):
+                values[i] = str(one_value).encode('ascii')
+
+            if _is_illegal_header_value(values[i]):
+                raise ValueError('Invalid header value %r' % (values[i],))
+
+        value = b'\r\n\t'.join(values)
+        header = header + b': ' + value
+        self._output(header)
+
+    def endheaders(self, message_body=None, *, encode_chunked=False):
+        """Indicate that the last header line has been sent to the server.
+
+        This method sends the request to the server.  The optional message_body
+        argument can be used to pass a message body associated with the
+        request.
+        """
+        if self.__state == _CS_REQ_STARTED:
+            self.__state = _CS_REQ_SENT
+        else:
+            raise CannotSendHeader()
+        self._send_output(message_body, encode_chunked=encode_chunked)
+
+    def request(self, method, url, body=None, headers={}, *,
+                encode_chunked=False):
+        """Send a complete request to the server."""
+        self._send_request(method, url, body, headers, encode_chunked)
+
+    def _send_request(self, method, url, body, headers, encode_chunked):
+        # Honor explicitly requested Host: and Accept-Encoding: headers.
+        header_names = frozenset(k.lower() for k in headers)
+        skips = {}
+        if 'host' in header_names:
+            skips['skip_host'] = 1
+        if 'accept-encoding' in header_names:
+            skips['skip_accept_encoding'] = 1
+
+        self.putrequest(method, url, **skips)
+
+        # chunked encoding will happen if HTTP/1.1 is used and either
+        # the caller passes encode_chunked=True or the following
+        # conditions hold:
+        # 1. content-length has not been explicitly set
+        # 2. the body is a file or iterable, but not a str or bytes-like
+        # 3. Transfer-Encoding has NOT been explicitly set by the caller
+
+        if 'content-length' not in header_names:
+            # only chunk body if not explicitly set for backwards
+            # compatibility, assuming the client code is already handling the
+            # chunking
+            if 'transfer-encoding' not in header_names:
+                # if content-length cannot be automatically determined, fall
+                # back to chunked encoding
+                encode_chunked = False
+                content_length = self._get_content_length(body, method)
+                if content_length is None:
+                    if body is not None:
+                        if self.debuglevel > 0:
+                            print('Unable to determine size of %r' % body)
+                        encode_chunked = True
+                        self.putheader('Transfer-Encoding', 'chunked')
+                else:
+                    self.putheader('Content-Length', str(content_length))
+        else:
+            encode_chunked = False
+
+        for hdr, value in headers.items():
+            self.putheader(hdr, value)
+        if isinstance(body, str):
+            # RFC 2616 Section 3.7.1 says that text default has a
+            # default charset of iso-8859-1.
+            body = _encode(body, 'body')
+        self.endheaders(body, encode_chunked=encode_chunked)
+
+    def getresponse(self):
+        """Get the response from the server.
+
+        If the HTTPConnection is in the correct state, returns an
+        instance of HTTPResponse or of whatever object is returned by
+        the response_class variable.
+
+        If a request has not been sent or if a previous response has
+        not be handled, ResponseNotReady is raised.  If the HTTP
+        response indicates that the connection should be closed, then
+        it will be closed before the response is returned.  When the
+        connection is closed, the underlying socket is closed.
+        """
+
+        # if a prior response has been completed, then forget about it.
+        if self.__response and self.__response.isclosed():
+            self.__response = None
+
+        # if a prior response exists, then it must be completed (otherwise, we
+        # cannot read this response's header to determine the connection-close
+        # behavior)
+        #
+        # note: if a prior response existed, but was connection-close, then the
+        # socket and response were made independent of this HTTPConnection
+        # object since a new request requires that we open a whole new
+        # connection
+        #
+        # this means the prior response had one of two states:
+        #   1) will_close: this connection was reset and the prior socket and
+        #                  response operate independently
+        #   2) persistent: the response was retained and we await its
+        #                  isclosed() status to become true.
+        #
+        if self.__state != _CS_REQ_SENT or self.__response:
+            raise ResponseNotReady(self.__state)
+
+        if self.debuglevel > 0:
+            response = self.response_class(self.sock, self.debuglevel,
+                                           method=self._method)
+        else:
+            response = self.response_class(self.sock, method=self._method)
+
+        try:
+            try:
+                response.begin()
+            except ConnectionError:
+                self.close()
+                raise
+            assert response.will_close != _UNKNOWN
+            self.__state = _CS_IDLE
+
+            if response.will_close:
+                # this effectively passes the connection to the response
+                self.close()
+            else:
+                # remember this, so we can tell when it is complete
+                self.__response = response
+
+            return response
+        except:
+            response.close()
+            raise
+
+try:
+    import ssl
+except ImportError:
+    pass
+else:
+    class HTTPSConnection(HTTPConnection):
+        "This class allows communication via SSL."
+
+        default_port = HTTPS_PORT
+
+        # XXX Should key_file and cert_file be deprecated in favour of context?
+
+        def __init__(self, host, port=None, key_file=None, cert_file=None,
+                     timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+                     source_address=None, *, context=None,
+                     check_hostname=None):
+            super(HTTPSConnection, self).__init__(host, port, timeout,
+                                                  source_address)
+            if (key_file is not None or cert_file is not None or
+                        check_hostname is not None):
+                import warnings
+                warnings.warn("key_file, cert_file and check_hostname are "
+                              "deprecated, use a custom context instead.",
+                              DeprecationWarning, 2)
+            self.key_file = key_file
+            self.cert_file = cert_file
+            if context is None:
+                context = ssl._create_default_https_context()
+            will_verify = context.verify_mode != ssl.CERT_NONE
+            if check_hostname is None:
+                check_hostname = context.check_hostname
+            if check_hostname and not will_verify:
+                raise ValueError("check_hostname needs a SSL context with "
+                                 "either CERT_OPTIONAL or CERT_REQUIRED")
+            if key_file or cert_file:
+                context.load_cert_chain(cert_file, key_file)
+            self._context = context
+            self._check_hostname = check_hostname
+
+        def connect(self):
+            "Connect to a host on a given (SSL) port."
+
+            super().connect()
+
+            if self._tunnel_host:
+                server_hostname = self._tunnel_host
+            else:
+                server_hostname = self.host
+
+            self.sock = self._context.wrap_socket(self.sock,
+                                                  server_hostname=server_hostname)
+            if not self._context.check_hostname and self._check_hostname:
+                try:
+                    ssl.match_hostname(self.sock.getpeercert(), server_hostname)
+                except Exception:
+                    self.sock.shutdown(socket.SHUT_RDWR)
+                    self.sock.close()
+                    raise
+
+    __all__.append("HTTPSConnection")
+
+class HTTPException(Exception):
+    # Subclasses that define an __init__ must call Exception.__init__
+    # or define self.args.  Otherwise, str() will fail.
+    pass
+
+class NotConnected(HTTPException):
+    pass
+
+class InvalidURL(HTTPException):
+    pass
+
+class UnknownProtocol(HTTPException):
+    def __init__(self, version):
+        self.args = version,
+        self.version = version
+
+class UnknownTransferEncoding(HTTPException):
+    pass
+
+class UnimplementedFileMode(HTTPException):
+    pass
+
+class IncompleteRead(HTTPException):
+    def __init__(self, partial, expected=None):
+        self.args = partial,
+        self.partial = partial
+        self.expected = expected
+    def __repr__(self):
+        if self.expected is not None:
+            e = ', %i more expected' % self.expected
+        else:
+            e = ''
+        return '%s(%i bytes read%s)' % (self.__class__.__name__,
+                                        len(self.partial), e)
+    def __str__(self):
+        return repr(self)
+
+class ImproperConnectionState(HTTPException):
+    pass
+
+class CannotSendRequest(ImproperConnectionState):
+    pass
+
+class CannotSendHeader(ImproperConnectionState):
+    pass
+
+class ResponseNotReady(ImproperConnectionState):
+    pass
+
+class BadStatusLine(HTTPException):
+    def __init__(self, line):
+        if not line:
+            line = repr(line)
+        self.args = line,
+        self.line = line
+
+class LineTooLong(HTTPException):
+    def __init__(self, line_type):
+        HTTPException.__init__(self, "got more than %d bytes when reading %s"
+                                     % (_MAXLINE, line_type))
+
+class RemoteDisconnected(ConnectionResetError, BadStatusLine):
+    def __init__(self, *pos, **kw):
+        BadStatusLine.__init__(self, "")
+        ConnectionResetError.__init__(self, *pos, **kw)
+
+# for backwards compatibility
+error = HTTPException
diff --git a/modules/language/python/module/python.scm b/modules/language/python/module/python.scm
index 5c962e6..6f78f8f 100644
--- a/modules/language/python/module/python.scm
+++ b/modules/language/python/module/python.scm
@@ -2,6 +2,7 @@
   #:use-module (language python module _python)
   #:use-module (language python compile       )
   #:use-module (language python module        )
+  #:use-module ((language python module string) #:select ())
   #:use-module (language python memoryview    )
   #:use-module ((oop pf-objects) #:select (define-python-class))
   #:use-module ((language python format2) #:select ())
diff --git a/modules/language/python/module/re/parser.scm b/modules/language/python/module/re/parser.scm
index 40469e2..765500d 100644
--- a/modules/language/python/module/re/parser.scm
+++ b/modules/language/python/module/re/parser.scm
@@ -14,7 +14,7 @@
 (mk q+? #:+?   "+?")
 
 (define subexpr (f-list #:sub
-                        (f-seq (f-tag "(")  (Ds ee) (f-tag ")"))))
+                        (f-seq (f-tag "(")  (Ds ee-) (f-tag ")"))))
 
 (define f-back
   (f-or (f-list #:class (mk-token (f-reg! "[AZbBdDsSwntr]")))
@@ -58,6 +58,7 @@
                         ")"))
 (define (bch f) (f-or! (f-seq (f-or! (f-tag "\\n") f-nl)      
                               (f-out (list->string (list #\newline))))
+                       (f-seq (f-char #\\) f-back)
                        f))
 (define bbody (f-cons (f-or!
 		       (f-list #:range (bch (mk-token (f-reg! ".")))
@@ -83,7 +84,7 @@
 
 
 (define f-bar  (f-tag "|"))
-(define qq     (ch (f-reg "[][?+|*.$^()\\]")))
+(define qq     (ch (f-reg "[[?+|*.$^()\\]")))
 (define atom   (f-or qq f-. flags2 choice subexpr anongroup namegroup incant coment
 		     lookh lookh! rev rev! f-^ f-$ flags))
 (define spec   (f-list #:op atom (f-or! q+? q?? q*? q* q? q+ repn? repnm? repn repnm)))
@@ -93,7 +94,9 @@
                        (f* (f-or! f-com f-nl (f-reg "[ \t\r]")))
                        f-true)))
 (define line   (f-cons* #:seq ws aatom ws (ff* (f-seq ws aatom ws) )))
+(define line-   (f-cons* #:seq aatom (ff* (f-seq aatom) )))
 (define ee     (f-cons* #:or line (ff* (f-seq f-bar line))))
+(define ee-     (f-cons* #:or line- (ff* (f-seq f-bar line-))))
 (define pretty (make-fluid #f))
 (define (parse-reg str)
   (with-fluids ((*whitespace* ws))
diff --git a/modules/language/python/module/socketserver.py b/modules/language/python/module/socketserver.py
new file mode 100644
index 0000000..516f718
--- /dev/null
+++ b/modules/language/python/module/socketserver.py
@@ -0,0 +1,795 @@
+module(socketserver)
+
+"""Generic socket server classes.
+
+This module tries to capture the various aspects of defining a server:
+
+For socket-based servers:
+
+- address family:
+        - AF_INET{,6}: IP (Internet Protocol) sockets (default)
+        - AF_UNIX: Unix domain sockets
+        - others, e.g. AF_DECNET are conceivable (see <socket.h>
+- socket type:
+        - SOCK_STREAM (reliable stream, e.g. TCP)
+        - SOCK_DGRAM (datagrams, e.g. UDP)
+
+For request-based servers (including socket-based):
+
+- client address verification before further looking at the request
+        (This is actually a hook for any processing that needs to look
+         at the request before anything else, e.g. logging)
+- how to handle multiple requests:
+        - synchronous (one request is handled at a time)
+        - forking (each request is handled by a new process)
+        - threading (each request is handled by a new thread)
+
+The classes in this module favor the server type that is simplest to
+write: a synchronous TCP/IP server.  This is bad class design, but
+save some typing.  (There's also the issue that a deep class hierarchy
+slows down method lookups.)
+
+There are five classes in an inheritance diagram, four of which represent
+synchronous servers of four types:
+
+        +------------+
+        | BaseServer |
+        +------------+
+              |
+              v
+        +-----------+        +------------------+
+        | TCPServer |------->| UnixStreamServer |
+        +-----------+        +------------------+
+              |
+              v
+        +-----------+        +--------------------+
+        | UDPServer |------->| UnixDatagramServer |
+        +-----------+        +--------------------+
+
+Note that UnixDatagramServer derives from UDPServer, not from
+UnixStreamServer -- the only difference between an IP and a Unix
+stream server is the address family, which is simply repeated in both
+unix server classes.
+
+Forking and threading versions of each type of server can be created
+using the ForkingMixIn and ThreadingMixIn mix-in classes.  For
+instance, a threading UDP server class is created as follows:
+
+        class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
+
+The Mix-in class must come first, since it overrides a method defined
+in UDPServer! Setting the various member variables also changes
+the behavior of the underlying server mechanism.
+
+To implement a service, you must derive a class from
+BaseRequestHandler and redefine its handle() method.  You can then run
+various versions of the service by combining one of the server classes
+with your request handler class.
+
+The request handler class must be different for datagram or stream
+services.  This can be hidden by using the request handler
+subclasses StreamRequestHandler or DatagramRequestHandler.
+
+Of course, you still have to use your head!
+
+For instance, it makes no sense to use a forking server if the service
+contains state in memory that can be modified by requests (since the
+modifications in the child process would never reach the initial state
+kept in the parent process and passed to each child).  In this case,
+you can use a threading server, but you will probably have to use
+locks to avoid two requests that come in nearly simultaneous to apply
+conflicting changes to the server state.
+
+On the other hand, if you are building e.g. an HTTP server, where all
+data is stored externally (e.g. in the file system), a synchronous
+class will essentially render the service "deaf" while one request is
+being handled -- which may be for a very long time if a client is slow
+to read all the data it has requested.  Here a threading or forking
+server is appropriate.
+
+In some cases, it may be appropriate to process part of a request
+synchronously, but to finish processing in a forked child depending on
+the request data.  This can be implemented by using a synchronous
+server and doing an explicit fork in the request handler class
+handle() method.
+
+Another approach to handling multiple simultaneous requests in an
+environment that supports neither threads nor fork (or where these are
+too expensive or inappropriate for the service) is to maintain an
+explicit table of partially finished requests and to use a selector to
+decide which request to work on next (or whether to handle a new
+incoming request).  This is particularly important for stream services
+where each client can potentially be connected for a long time (if
+threads or subprocesses cannot be used).
+
+Future work:
+- Standard classes for Sun RPC (which uses either UDP or TCP)
+- Standard mix-in classes to implement various authentication
+  and encryption schemes
+
+XXX Open problems:
+- What to do with out-of-band data?
+
+BaseServer:
+- split generic "request" functionality out into BaseServer class.
+  Copyright (C) 2000  Luke Kenneth Casson Leighton <lkcl@samba.org>
+
+  example: read entries from a SQL database (requires overriding
+  get_request() to return a table entry from the database).
+  entry is processed by a RequestHandlerClass.
+
+"""
+
+# Author of the BaseServer patch: Luke Kenneth Casson Leighton
+
+__version__ = "0.4"
+
+
+import socket
+import selectors
+import os
+import errno
+import sys
+try:
+    import threading
+except ImportError:
+    import dummy_threading as threading
+from io import BufferedIOBase
+from time import monotonic as time
+
+__all__ = ["BaseServer", "TCPServer", "UDPServer",
+           "ThreadingUDPServer", "ThreadingTCPServer",
+           "BaseRequestHandler", "StreamRequestHandler",
+           "DatagramRequestHandler", "ThreadingMixIn"]
+if hasattr(os, "fork"):
+    __all__.extend(["ForkingUDPServer","ForkingTCPServer", "ForkingMixIn"])
+if hasattr(socket, "AF_UNIX"):
+    __all__.extend(["UnixStreamServer","UnixDatagramServer",
+                    "ThreadingUnixStreamServer",
+                    "ThreadingUnixDatagramServer"])
+
+# poll/select have the advantage of not requiring any extra file descriptor,
+# contrarily to epoll/kqueue (also, they require a single syscall).
+if hasattr(selectors, 'PollSelector'):
+    _ServerSelector = selectors.PollSelector
+else:
+    _ServerSelector = selectors.SelectSelector
+
+
+class BaseServer:
+
+    """Base class for server classes.
+
+    Methods for the caller:
+
+    - __init__(server_address, RequestHandlerClass)
+    - serve_forever(poll_interval=0.5)
+    - shutdown()
+    - handle_request()  # if you do not use serve_forever()
+    - fileno() -> int   # for selector
+
+    Methods that may be overridden:
+
+    - server_bind()
+    - server_activate()
+    - get_request() -> request, client_address
+    - handle_timeout()
+    - verify_request(request, client_address)
+    - server_close()
+    - process_request(request, client_address)
+    - shutdown_request(request)
+    - close_request(request)
+    - service_actions()
+    - handle_error()
+
+    Methods for derived classes:
+
+    - finish_request(request, client_address)
+
+    Class variables that may be overridden by derived classes or
+    instances:
+
+    - timeout
+    - address_family
+    - socket_type
+    - allow_reuse_address
+
+    Instance variables:
+
+    - RequestHandlerClass
+    - socket
+
+    """
+
+    timeout = None
+
+    def __init__(self, server_address, RequestHandlerClass):
+        """Constructor.  May be extended, do not override."""
+        self.server_address = server_address
+        self.RequestHandlerClass = RequestHandlerClass
+        self.__is_shut_down = threading.Event()
+        self.__shutdown_request = False
+
+    def server_activate(self):
+        """Called by constructor to activate the server.
+
+        May be overridden.
+
+        """
+        pass
+
+    def serve_forever(self, poll_interval=0.5):
+        """Handle one request at a time until shutdown.
+
+        Polls for shutdown every poll_interval seconds. Ignores
+        self.timeout. If you need to do periodic tasks, do them in
+        another thread.
+        """
+        self.__is_shut_down.clear()
+        try:
+            # XXX: Consider using another file descriptor or connecting to the
+            # socket to wake this up instead of polling. Polling reduces our
+            # responsiveness to a shutdown request and wastes cpu at all other
+            # times.
+            with _ServerSelector() as selector:
+                selector.register(self, selectors.EVENT_READ)
+
+                while not self.__shutdown_request:
+                    ready = selector.select(poll_interval)
+                    if ready:
+                        self._handle_request_noblock()
+
+                    self.service_actions()
+        finally:
+            self.__shutdown_request = False
+            self.__is_shut_down.set()
+
+    def shutdown(self):
+        """Stops the serve_forever loop.
+
+        Blocks until the loop has finished. This must be called while
+        serve_forever() is running in another thread, or it will
+        deadlock.
+        """
+        self.__shutdown_request = True
+        self.__is_shut_down.wait()
+
+    def service_actions(self):
+        """Called by the serve_forever() loop.
+
+        May be overridden by a subclass / Mixin to implement any code that
+        needs to be run during the loop.
+        """
+        pass
+
+    # The distinction between handling, getting, processing and finishing a
+    # request is fairly arbitrary.  Remember:
+    #
+    # - handle_request() is the top-level call.  It calls selector.select(),
+    #   get_request(), verify_request() and process_request()
+    # - get_request() is different for stream or datagram sockets
+    # - process_request() is the place that may fork a new process or create a
+    #   new thread to finish the request
+    # - finish_request() instantiates the request handler class; this
+    #   constructor will handle the request all by itself
+
+    def handle_request(self):
+        """Handle one request, possibly blocking.
+
+        Respects self.timeout.
+        """
+        # Support people who used socket.settimeout() to escape
+        # handle_request before self.timeout was available.
+        timeout = self.socket.gettimeout()
+        if timeout is None:
+            timeout = self.timeout
+        elif self.timeout is not None:
+            timeout = min(timeout, self.timeout)
+        if timeout is not None:
+            deadline = time() + timeout
+
+        # Wait until a request arrives or the timeout expires - the loop is
+        # necessary to accommodate early wakeups due to EINTR.
+        with _ServerSelector() as selector:
+            selector.register(self, selectors.EVENT_READ)
+
+            while True:
+                ready = selector.select(timeout)
+                if ready:
+                    return self._handle_request_noblock()
+                else:
+                    if timeout is not None:
+                        timeout = deadline - time()
+                        if timeout < 0:
+                            return self.handle_timeout()
+
+    def _handle_request_noblock(self):
+        """Handle one request, without blocking.
+
+        I assume that selector.select() has returned that the socket is
+        readable before this function was called, so there should be no risk of
+        blocking in get_request().
+        """
+        try:
+            request, client_address = self.get_request()
+        except OSError:
+            return
+        if self.verify_request(request, client_address):
+            try:
+                self.process_request(request, client_address)
+            except Exception:
+                self.handle_error(request, client_address)
+                self.shutdown_request(request)
+            except:
+                self.shutdown_request(request)
+                raise
+        else:
+            self.shutdown_request(request)
+
+    def handle_timeout(self):
+        """Called if no new request arrives within self.timeout.
+
+        Overridden by ForkingMixIn.
+        """
+        pass
+
+    def verify_request(self, request, client_address):
+        """Verify the request.  May be overridden.
+
+        Return True if we should proceed with this request.
+
+        """
+        return True
+
+    def process_request(self, request, client_address):
+        """Call finish_request.
+
+        Overridden by ForkingMixIn and ThreadingMixIn.
+
+        """
+        self.finish_request(request, client_address)
+        self.shutdown_request(request)
+
+    def server_close(self):
+        """Called to clean-up the server.
+
+        May be overridden.
+
+        """
+        pass
+
+    def finish_request(self, request, client_address):
+        """Finish one request by instantiating RequestHandlerClass."""
+        self.RequestHandlerClass(request, client_address, self)
+
+    def shutdown_request(self, request):
+        """Called to shutdown and close an individual request."""
+        self.close_request(request)
+
+    def close_request(self, request):
+        """Called to clean up an individual request."""
+        pass
+
+    def handle_error(self, request, client_address):
+        """Handle an error gracefully.  May be overridden.
+
+        The default is to print a traceback and continue.
+
+        """
+        print('-'*40, file=sys.stderr)
+        print('Exception happened during processing of request from',
+            client_address, file=sys.stderr)
+        import traceback
+        traceback.print_exc()
+        print('-'*40, file=sys.stderr)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        self.server_close()
+
+
+class TCPServer(BaseServer):
+
+    """Base class for various socket-based server classes.
+
+    Defaults to synchronous IP stream (i.e., TCP).
+
+    Methods for the caller:
+
+    - __init__(server_address, RequestHandlerClass, bind_and_activate=True)
+    - serve_forever(poll_interval=0.5)
+    - shutdown()
+    - handle_request()  # if you don't use serve_forever()
+    - fileno() -> int   # for selector
+
+    Methods that may be overridden:
+
+    - server_bind()
+    - server_activate()
+    - get_request() -> request, client_address
+    - handle_timeout()
+    - verify_request(request, client_address)
+    - process_request(request, client_address)
+    - shutdown_request(request)
+    - close_request(request)
+    - handle_error()
+
+    Methods for derived classes:
+
+    - finish_request(request, client_address)
+
+    Class variables that may be overridden by derived classes or
+    instances:
+
+    - timeout
+    - address_family
+    - socket_type
+    - request_queue_size (only for stream sockets)
+    - allow_reuse_address
+
+    Instance variables:
+
+    - server_address
+    - RequestHandlerClass
+    - socket
+
+    """
+
+    address_family = socket.AF_INET
+
+    socket_type = socket.SOCK_STREAM
+
+    request_queue_size = 5
+
+    allow_reuse_address = False
+
+    def __init__(self, server_address, RequestHandlerClass, bind_and_activate=True):
+        """Constructor.  May be extended, do not override."""
+        BaseServer.__init__(self, server_address, RequestHandlerClass)
+        self.socket = socket.socket(self.address_family,
+                                    self.socket_type)
+        if bind_and_activate:
+            try:
+                self.server_bind()
+                self.server_activate()
+            except:
+                self.server_close()
+                raise
+
+    def server_bind(self):
+        """Called by constructor to bind the socket.
+
+        May be overridden.
+
+        """
+        if self.allow_reuse_address:
+            self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        self.socket.bind(self.server_address)
+        self.server_address = self.socket.getsockname()
+
+    def server_activate(self):
+        """Called by constructor to activate the server.
+
+        May be overridden.
+
+        """
+        self.socket.listen(self.request_queue_size)
+
+    def server_close(self):
+        """Called to clean-up the server.
+
+        May be overridden.
+
+        """
+        self.socket.close()
+
+    def fileno(self):
+        """Return socket file number.
+
+        Interface required by selector.
+
+        """
+        return self.socket.fileno()
+
+    def get_request(self):
+        """Get the request and client address from the socket.
+
+        May be overridden.
+
+        """
+        return self.socket.accept()
+
+    def shutdown_request(self, request):
+        """Called to shutdown and close an individual request."""
+        try:
+            #explicitly shutdown.  socket.close() merely releases
+            #the socket and waits for GC to perform the actual close.
+            request.shutdown(socket.SHUT_WR)
+        except OSError:
+            pass #some platforms may raise ENOTCONN here
+        self.close_request(request)
+
+    def close_request(self, request):
+        """Called to clean up an individual request."""
+        request.close()
+
+
+class UDPServer(TCPServer):
+
+    """UDP server class."""
+
+    allow_reuse_address = False
+
+    socket_type = socket.SOCK_DGRAM
+
+    max_packet_size = 8192
+
+    def get_request(self):
+        data, client_addr = self.socket.recvfrom(self.max_packet_size)
+        return (data, self.socket), client_addr
+
+    def server_activate(self):
+        # No need to call listen() for UDP.
+        pass
+
+    def shutdown_request(self, request):
+        # No need to shutdown anything.
+        self.close_request(request)
+
+    def close_request(self, request):
+        # No need to close anything.
+        pass
+
+if hasattr(os, "fork"):
+    class ForkingMixIn:
+        """Mix-in class to handle each request in a new process."""
+
+        timeout = 300
+        active_children = None
+        max_children = 40
+
+        def collect_children(self):
+            """Internal routine to wait for children that have exited."""
+            if self.active_children is None:
+                return
+
+            # If we're above the max number of children, wait and reap them until
+            # we go back below threshold. Note that we use waitpid(-1) below to be
+            # able to collect children in size(<defunct children>) syscalls instead
+            # of size(<children>): the downside is that this might reap children
+            # which we didn't spawn, which is why we only resort to this when we're
+            # above max_children.
+            while len(self.active_children) >= self.max_children:
+                try:
+                    pid, _ = os.waitpid(-1, 0)
+                    self.active_children.discard(pid)
+                except ChildProcessError:
+                    # we don't have any children, we're done
+                    self.active_children.clear()
+                except OSError:
+                    break
+
+            # Now reap all defunct children.
+            for pid in self.active_children.copy():
+                try:
+                    pid, _ = os.waitpid(pid, os.WNOHANG)
+                    # if the child hasn't exited yet, pid will be 0 and ignored by
+                    # discard() below
+                    self.active_children.discard(pid)
+                except ChildProcessError:
+                    # someone else reaped it
+                    self.active_children.discard(pid)
+                except OSError:
+                    pass
+
+        def handle_timeout(self):
+            """Wait for zombies after self.timeout seconds of inactivity.
+
+            May be extended, do not override.
+            """
+            self.collect_children()
+
+        def service_actions(self):
+            """Collect the zombie child processes regularly in the ForkingMixIn.
+
+            service_actions is called in the BaseServer's serve_forver loop.
+            """
+            self.collect_children()
+
+        def process_request(self, request, client_address):
+            """Fork a new subprocess to process the request."""
+            pid = os.fork()
+            if pid:
+                # Parent process
+                if self.active_children is None:
+                    self.active_children = set()
+                self.active_children.add(pid)
+                self.close_request(request)
+                return
+            else:
+                # Child process.
+                # This must never return, hence os._exit()!
+                status = 1
+                try:
+                    self.finish_request(request, client_address)
+                    status = 0
+                except Exception:
+                    self.handle_error(request, client_address)
+                finally:
+                    try:
+                        self.shutdown_request(request)
+                    finally:
+                        os._exit(status)
+
+
+class ThreadingMixIn:
+    """Mix-in class to handle each request in a new thread."""
+
+    # Decides how threads will act upon termination of the
+    # main process
+    daemon_threads = False
+
+    def process_request_thread(self, request, client_address):
+        """Same as in BaseServer but as a thread.
+
+        In addition, exception handling is done here.
+
+        """
+        try:
+            self.finish_request(request, client_address)
+        except Exception:
+            self.handle_error(request, client_address)
+        finally:
+            self.shutdown_request(request)
+
+    def process_request(self, request, client_address):
+        """Start a new thread to process the request."""
+        t = threading.Thread(target = self.process_request_thread,
+                             args = (request, client_address))
+        t.daemon = self.daemon_threads
+        t.start()
+
+
+if hasattr(os, "fork"):
+    class ForkingUDPServer(ForkingMixIn, UDPServer): pass
+    class ForkingTCPServer(ForkingMixIn, TCPServer): pass
+
+class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
+class ThreadingTCPServer(ThreadingMixIn, TCPServer): pass
+
+if hasattr(socket, 'AF_UNIX'):
+
+    class UnixStreamServer(TCPServer):
+        address_family = socket.AF_UNIX
+
+    class UnixDatagramServer(UDPServer):
+        address_family = socket.AF_UNIX
+
+    class ThreadingUnixStreamServer(ThreadingMixIn, UnixStreamServer): pass
+
+    class ThreadingUnixDatagramServer(ThreadingMixIn, UnixDatagramServer): pass
+
+class BaseRequestHandler:
+
+    """Base class for request handler classes.
+
+    This class is instantiated for each request to be handled.  The
+    constructor sets the instance variables request, client_address
+    and server, and then calls the handle() method.  To implement a
+    specific service, all you need to do is to derive a class which
+    defines a handle() method.
+
+    The handle() method can find the request as self.request, the
+    client address as self.client_address, and the server (in case it
+    needs access to per-server information) as self.server.  Since a
+    separate instance is created for each request, the handle() method
+    can define other arbitrary instance variables.
+
+    """
+
+    def __init__(self, request, client_address, server):
+        self.request = request
+        self.client_address = client_address
+        self.server = server
+        self.setup()
+        try:
+            self.handle()
+        finally:
+            self.finish()
+
+    def setup(self):
+        pass
+
+    def handle(self):
+        pass
+
+    def finish(self):
+        pass
+
+
+# The following two classes make it possible to use the same service
+# class for stream or datagram servers.
+# Each class sets up these instance variables:
+# - rfile: a file object from which receives the request is read
+# - wfile: a file object to which the reply is written
+# When the handle() method returns, wfile is flushed properly
+
+
+class StreamRequestHandler(BaseRequestHandler):
+
+    """Define self.rfile and self.wfile for stream sockets."""
+
+    # Default buffer sizes for rfile, wfile.
+    # We default rfile to buffered because otherwise it could be
+    # really slow for large data (a getc() call per byte); we make
+    # wfile unbuffered because (a) often after a write() we want to
+    # read and we need to flush the line; (b) big writes to unbuffered
+    # files are typically optimized by stdio even when big reads
+    # aren't.
+    rbufsize = -1
+    wbufsize = 0
+
+    # A timeout to apply to the request socket, if not None.
+    timeout = None
+
+    # Disable nagle algorithm for this socket, if True.
+    # Use only when wbufsize != 0, to avoid small packets.
+    disable_nagle_algorithm = False
+
+    def setup(self):
+        self.connection = self.request
+        if self.timeout is not None:
+            self.connection.settimeout(self.timeout)
+        if self.disable_nagle_algorithm:
+            self.connection.setsockopt(socket.IPPROTO_TCP,
+                                       socket.TCP_NODELAY, True)
+        self.rfile = self.connection.makefile('rb', self.rbufsize)
+        if self.wbufsize == 0:
+            self.wfile = _SocketWriter(self.connection)
+        else:
+            self.wfile = self.connection.makefile('wb', self.wbufsize)
+
+    def finish(self):
+        if not self.wfile.closed:
+            try:
+                self.wfile.flush()
+            except socket.error:
+                # A final socket error may have occurred here, such as
+                # the local error ECONNABORTED.
+                pass
+        self.wfile.close()
+        self.rfile.close()
+
+class _SocketWriter(BufferedIOBase):
+    """Simple writable BufferedIOBase implementation for a socket
+
+    Does not hold data in a buffer, avoiding any need to call flush()."""
+
+    def __init__(self, sock):
+        self._sock = sock
+
+    def writable(self):
+        return True
+
+    def write(self, b):
+        self._sock.sendall(b)
+        with memoryview(b) as view:
+            return view.nbytes
+
+    def fileno(self):
+        return self._sock.fileno()
+
+class DatagramRequestHandler(BaseRequestHandler):
+
+    """Define self.rfile and self.wfile for datagram sockets."""
+
+    def setup(self):
+        from io import BytesIO
+        self.packet, self.socket = self.request
+        self.rfile = BytesIO(self.packet)
+        self.wfile = BytesIO()
+
+    def finish(self):
+        self.socket.sendto(self.wfile.getvalue(), self.client_address)
diff --git a/modules/language/python/module/string.scm b/modules/language/python/module/string.scm
index 9c0d818..63adfcd 100644
--- a/modules/language/python/module/string.scm
+++ b/modules/language/python/module/string.scm
@@ -338,7 +338,7 @@
     (lambda (self format_string args kwargs)
       (set self '_args '())
       (for ((s fn fo co : ((ref self 'parse) format_string))) ((ss '("")))
-           (vformat self s fn fo co ss args kwargs)
+           (vformat1 self s fn fo co ss args kwargs)
            #:final
            (begin
              ((ref self 'check_unused_args) (ref self '_args) args kwargs)
diff --git a/modules/language/python/module/threading.scm b/modules/language/python/module/threading.scm
index 7a7d669..1f1f83b 100644
--- a/modules/language/python/module/threading.scm
+++ b/modules/language/python/module/threading.scm
@@ -2,8 +2,9 @@
   #:use-module (ice-9 threads)
   #:use-module (oop pf-objects)
   #:use-module (language python def)
+  #:use-module (language python exceptions)
   #:use-module (language python list)
-  #:export (RLock start_new_thread allocate_lock))
+  #:export (RLock start_new_thread allocate_lock Thread))
 
 (define-python-class RLock ()
   (define __init__
@@ -40,3 +41,17 @@
   (call-with-new-thread
    (lambda ()
      (apply fkn (to-list args)))))
+
+(define-python-class Thread ()
+  (define __init__
+    (lam (self (= target None) (= args '()))
+         (set self 'target target)
+         (set self 'args   args)))
+
+  (define start
+    (lambda (self)
+      (call-with-new-thread
+       (lambda ()
+         (apply (ref self 'target)
+                (to-list (ref self 'args))))))))
+  
diff --git a/modules/language/python/module/urllib.py b/modules/language/python/module/urllib.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/modules/language/python/module/urllib.py
diff --git a/modules/language/python/module/urllib/error.py b/modules/language/python/module/urllib/error.py
new file mode 100644
index 0000000..b6b1211
--- /dev/null
+++ b/modules/language/python/module/urllib/error.py
@@ -0,0 +1,78 @@
+module(urllib,error)
+
+"""Exception classes raised by urllib.
+
+The base exception class is URLError, which inherits from OSError.  It
+doesn't define any behavior of its own, but is the base class for all
+exceptions defined in this package.
+
+HTTPError is an exception class that is also a valid HTTP response
+instance.  It behaves this way because HTTP protocol errors are valid
+responses, with a status code, headers, and a body.  In some contexts,
+an application may want to handle an exception like a regular
+response.
+"""
+
+import urllib.response
+
+__all__ = ['URLError', 'HTTPError', 'ContentTooShortError']
+
+
+class URLError(OSError):
+    # URLError is a sub-type of OSError, but it doesn't share any of
+    # the implementation.  need to override __init__ and __str__.
+    # It sets self.args for compatibility with other EnvironmentError
+    # subclasses, but args doesn't have the typical format with errno in
+    # slot 0 and strerror in slot 1.  This may be better than nothing.
+    def __init__(self, reason, filename=None):
+        self.args = reason,
+        self.reason = reason
+        if filename is not None:
+            self.filename = filename
+
+    def __str__(self):
+        return '<urlopen error %s>' % self.reason
+
+
+class HTTPError(URLError, urllib.response.addinfourl):
+    """Raised when HTTP error occurs, but also acts like non-error return"""
+    __super_init = urllib.response.addinfourl.__init__
+
+    def __init__(self, url, code, msg, hdrs, fp):
+        self.code = code
+        self.msg = msg
+        self.hdrs = hdrs
+        self.fp = fp
+        self.filename = url
+        # The addinfourl classes depend on fp being a valid file
+        # object.  In some cases, the HTTPError may not have a valid
+        # file object.  If this happens, the simplest workaround is to
+        # not initialize the base classes.
+        if fp is not None:
+            self.__super_init(fp, hdrs, url, code)
+
+    def __str__(self):
+        return 'HTTP Error %s: %s' % (self.code, self.msg)
+
+    def __repr__(self):
+        return '<HTTPError %s: %r>' % (self.code, self.msg)
+
+    # since URLError specifies a .reason attribute, HTTPError should also
+    #  provide this attribute. See issue13211 for discussion.
+    @property
+    def reason(self):
+        return self.msg
+
+    @property
+    def headers(self):
+        return self.hdrs
+
+    @headers.setter
+    def headers(self, headers):
+        self.hdrs = headers
+
+class ContentTooShortError(URLError):
+    """Exception raised when downloaded size does not match content-length."""
+    def __init__(self, message, content):
+        URLError.__init__(self, message)
+        self.content = content
diff --git a/modules/language/python/number.scm b/modules/language/python/number.scm
index 3d95ded..6ede96c 100644
--- a/modules/language/python/number.scm
+++ b/modules/language/python/number.scm
@@ -402,6 +402,12 @@
 			   (let ((mask (ash 1 (- i 1))))
 			     (- s mask))
 			   s))))))))
+  (define __newobj__
+    (lambda (cls n)
+      (let ((obj ((rawref object '__new__) cls)))
+        (slot-set! obj 'x (__new__ cls n))
+        obj)))
+       
   (define __new__
     (letrec ((__new__
               (case-lambda
diff --git a/modules/language/python/string.scm b/modules/language/python/string.scm
index ad80a87..864a7a1 100644
--- a/modules/language/python/string.scm
+++ b/modules/language/python/string.scm
@@ -409,10 +409,8 @@
 (define-py (py-split split s . l)
   (define N  1000000000000)
 
-  (define ws (list (list (char->integer #\space))
-                   (list (char->integer #\newline))
-                   (list (char->integer #\tab))))
-
+  (define ws? #f)
+  (define ws  (list #\space #\newline #\tab #\return))
   (define (to-ch x) (string-ref (scm-str x) 0))
   
   (define (mksep sep)
@@ -426,38 +424,61 @@
            (else
             (to-ch x))) l)                      
          #:final
-         (list (reverse l))))
+         (reverse l)))
 
   (call-with-values
       (lambda ()
         (match l
-          (()      (values (list ws  ) N))
-          ((sep)   (values (mksep sep) N))
-          ((sep n) (values (mksep sep) n))))
+          (()
+           (set! ws? #t)
+           (values '() N))
+          
+          ((sep)
+           (values (mksep sep) N))
+          
+          ((sep n)
+           (values (mksep sep) n))))
+    
     (lambda (sep n)
-      (let lp ((l (to-list s)) (i 0) (v '()) (r '()))
+      (let lp ((l (string->list (scm-str s))) (i 0) (v '()) (r '()))
         (if (= i n)
-            (reverse r)
+            (reverse (cons (list->string l) r))
             (if (pair? l)
-                (let ((ch (to-ch (car l))))
-                  (let lp2 ((ss sep))
-                    (if (pair? ss)
-                        (let lp3 ((sl (car ss)) (l3 l))                      
-                          (if (pair? sl)
-                              (if (pair? l3)
-                                  (let ((s (car sl)))
-                                    (if (eqv? s ch)
-                                        (lp3 (cdr sl) (cdr l3))
-                                        (lp2 (cdr ss))))
-                                  (lp2 (cdr ss)))
-                              (lp l3 (+ i 1)
-                                  '()
-                                  (cons
-                                   (list->string (reverse v))
-                                   r))))
-                        (lp (cdr l) i (cons ch v) r))))
+                (let ((ch (car l)))
+                  (if ws?
+                      (if (member ch ws)
+                          (let lp2 ((l (cdr l)))
+                            (if (pair? l)
+                                (let ((ch (car l)))
+                                  (if (member ch ws)
+                                      (lp2 (cdr l))
+                                      (lp l (+ i 1) '()
+                                          (cons
+                                           (list->string (reverse v))
+                                           r))))
+                                (lp l (+ i 1) '()
+                                    (cons
+                                     (list->string (reverse v))
+                                     r))))
+                          (lp (cdr l) i (cons ch v) r))
+                      (if (eq? ch (car sep))
+                          (let lp2 ((ll (cdr l)) (s (cdr sep)))
+                            (if (pair? s)
+                                (if (pair? ll)
+                                    (let ((ch2 (car ll)))
+                                      (if (eq? ch2 (car s))
+                                          (lp2 (cdr ll) (cdr sep))
+                                          (lp (cdr l) i (cons ch v) r)))
+                                    (lp (cdr l) i (cons ch v) r))
+                                (lp ll (+ i 1) '()
+                                    (cons
+                                     (list->string (reverse v))
+                                     r))))
+                          (lp (cdr l) i (cons ch v) r))))
                 (reverse (cons (list->string (reverse v)) r))))))))
 
+                      
+                                               
                           
               
               
@@ -598,7 +619,7 @@
 
 (define b?       #f)
 (define b-decode #f)
-(define-python-class string (<py-string>)
+(define-python-class string (<py> <py-string>)
   (define __init__
     (case-lambda
       ((self)
@@ -620,7 +641,13 @@
   (define __new__
     (lambda x
       (apply __init__ x)))
-
+  
+  (define __newobj__
+    (lambda (cls value)
+      (let ((obj ((rawref object '__new__) cls)))
+        (slot-set! obj 'str (__new__ cls value))
+        obj)))
+  
   (define __repr__
     (lambda (self)
       (slot-ref self 'str))))
author	Stefan Israelsson Tampe <stefan.itampe@gmail.com>	2018-09-06 23:40:42 +0200
committer	Stefan Israelsson Tampe <stefan.itampe@gmail.com>	2018-09-06 23:40:42 +0200
commit	8733038b4de44a8cfb669420621597457eeef88a (patch)
tree	9f7345b4b407ea0919320e17955b058cb2f2a5bc
parent	b950c0d70a1d2c95da8d60aca7af02bc50542a2b (diff)