summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Israelsson Tampe <stefan.itampe@gmail.com>2018-09-04 23:14:09 +0200
committerStefan Israelsson Tampe <stefan.itampe@gmail.com>2018-09-04 23:14:09 +0200
commit82ad02775f51f07be36a602b0e7b617001a2becb (patch)
tree7b5a07dd7c557d2f905e5181a6dfa5d18214edda
parent880b3bf82a99a94e5e9bf8b0d90327ff54bed339 (diff)
more email stuff
-rw-r--r--modules/language/python/compile.scm105
-rw-r--r--modules/language/python/eval.scm1
-rw-r--r--modules/language/python/module/collections.scm12
-rw-r--r--modules/language/python/module/email/_parseaddr.py3
-rw-r--r--modules/language/python/module/email/_policybase.py375
-rw-r--r--modules/language/python/module/email/feedparser.py537
-rw-r--r--modules/language/python/module/email/parser.py130
-rw-r--r--modules/language/python/module/email/utils.py389
-rw-r--r--modules/language/python/module/selectors.py8
9 files changed, 1530 insertions, 30 deletions
diff --git a/modules/language/python/compile.scm b/modules/language/python/compile.scm
index e013130..28d1490 100644
--- a/modules/language/python/compile.scm
+++ b/modules/language/python/compile.scm
@@ -272,6 +272,14 @@
(gensym "_")
x))
+(define (is-special? vs x)
+ (if (or-map (lambda (x) (match x ((#:starexpr . _) #t) (_ #f))) x)
+ (let lp ((l (map (g vs exp) x)))
+ (if (pair? l)
+ `((@ (guile) cons) ,(car l) ,(lp (cdr l)))
+ `((@ (guile) quote) ())))
+ #f))
+
(define (gen-sel vs e item)
(match e
(#f item)
@@ -394,6 +402,12 @@
(flat (exp '() v2)))
s)
(union (flat (exp '() v1)) s)))
+
+ ((#:starexpr #:power _ v1 . _)
+ (union
+ (flat (exp '() v1))
+ s))
+
(_ s)))
'()
l)))
@@ -1448,8 +1462,9 @@
(#:try
((_ x (or #f ()) #f . fin)
(if fin
- `(,(T 'try) (lambda () ,(exp vs x)) #:finally (lambda () ,(exp vs fin)))
- `(,(T 'try) (lambda () ,(exp vs x)))))
+ `(,(T 'try) (,(G 'lambda) () ,(exp vs x))
+ #:finally (,(G 'lambda) () ,(exp vs fin)))
+ `(,(T 'try) (,(G 'lambda) () ,(exp vs x)))))
((_ x exc else . fin)
`(,(T 'try) (lambda () ,(exp vs x))
@@ -1465,14 +1480,15 @@
(let ((l (gensym "l")))
(lp exc
(cons
- `(#:except ,(exp vs test) => (lambda (,(exp vs as) . ,l)
- ,(exp vs code)))
+ `(#:except ,(exp vs test) => (,(G 'lambda)
+ (,(exp vs as) . ,l)
+ ,(exp vs code)))
r))))
- (()
+ (()
(reverse r))))
,@(if else `((#:except #t ,(exp vs else))) '())
- ,@(if fin `(#:finally (lambda () ,(exp vs fin))) '()))))
+ ,@(if fin `(#:finally (,(G 'lambda) () ,(exp vs fin))) '()))))
(#:subexpr
((_ . l)
@@ -1606,6 +1622,10 @@
(#:global
((_ . _)
`(,cvalues)))
+
+ (#:starexpr
+ ((_ _ _ id . _)
+ `(#:star ,(exp vs id))))
(#:list
((_ x (and e (#:cfor . _)))
@@ -1692,21 +1712,37 @@
((and (= (length u) 1) (not op))
(let ((vars (map (lambda (x) (gensym "v")) l))
+ (spec (gensym "special"))
(q (gensym "q"))
- (f (gensym "f")))
- `(,(G 'begin)
- (call-with-values (lambda () ,(exp vs (car u)))
- (,(G 'letrec) ((,f
- (case-lambda
- ((,q)
- (,(G 'if) (pair? ,q)
+ (f (gensym "f"))
+ (a? (is-special? vs l)))
+ (if a?
+ `(,(G 'begin)
+ (call-with-values (lambda () ,(exp vs (car u)))
+ (,(G 'letrec) ((,f
+ (case-lambda
+ ((,q)
+ (,(G 'if) (pair? ,q)
(,(G 'apply) ,f ,q)
(,(G 'apply) ,f (,(L 'to-list) ,q))))
- (,vars
- ,@(map (lambda (l v) (make-set vs op l v))
- l vars)))))
- ,f))
- (,cvalues))))
+ (,spec
+ (,(C 'qset!) ,a? ,spec)))))
+ ,f))
+ (,cvalues))
+
+ `(,(G 'begin)
+ (call-with-values (lambda () ,(exp vs (car u)))
+ (,(G 'letrec) ((,f
+ (case-lambda
+ ((,q)
+ (,(G 'if) (pair? ,q)
+ (,(G 'apply) ,f ,q)
+ (,(G 'apply) ,f (,(L 'to-list) ,q))))
+ (,vars
+ ,@(map (lambda (l v) (make-set vs op l v))
+ l vars)))))
+ ,f))
+ (,cvalues)))))
((and (= (length l) 1) (not op))
`(,(G 'begin)
@@ -2752,9 +2788,34 @@
(pk (syntax->datum x))
x)
+(define (get-q-n x)
+ (syntax-case x ()
+ ((cons a b)
+ (+ 1 (get-q-n #'b)))
+ ((q ())
+ 0)))
+
+(define (take-n n v)
+ (let lp ((i 0) (v (reverse v)) (r '()))
+ (if (< i n)
+ (if (pair? v)
+ (lp (+ i 1) (cdr v) (cons (car v) r))
+ (raise (ValueError "wrone number of values in values")))
+ (cons
+ (reverse v)
+ r))))
+
(define-syntax qset!
(lambda (x)
(syntax-case x (@@ @)
+ ((_ (cons (#:star x) y) v)
+ (let ((n (get-q-n #'y)))
+ #`(let* ((h.r (take-n #,n v))
+ (h (car h.r))
+ (r (cdr h.r)))
+ (qset! x h)
+ (qset0! y r))))
+
((_ (cons x y) v)
(equal? (syntax->datum #'cons) '(@ (guile) cons))
#'(let ((w (to-list v)))
@@ -2781,6 +2842,14 @@
(define-syntax qset0!
(lambda (x)
(syntax-case x (@@ @)
+ ((_ (cons (#:star x) y) v)
+ (let ((n (get-q-n #'y)))
+ #`(let* ((h.r (take-n v #,n))
+ (h (car h.r))
+ (r (cdr h.r)))
+ (qset! x h)
+ (qset0! y r))))
+
((_ (cons x y) v)
(equal? (syntax->datum #'cons) '(@ (guile) cons))
#'(let ((w v))
diff --git a/modules/language/python/eval.scm b/modules/language/python/eval.scm
index 73056fd..1cd92ad 100644
--- a/modules/language/python/eval.scm
+++ b/modules/language/python/eval.scm
@@ -75,7 +75,6 @@
(define __contains__
(lambda (self key)
- (pk key)
(if (string? key) (set! key (string->symbol key)))
(for ((k v : (__iter__ self))) ()
(if (eq? k key)
diff --git a/modules/language/python/module/collections.scm b/modules/language/python/module/collections.scm
index 39a4d45..c4d87e4 100644
--- a/modules/language/python/module/collections.scm
+++ b/modules/language/python/module/collections.scm
@@ -611,11 +611,13 @@
(string-split field_names #\,)))
(set! field_names
- (let lp ((fs field_names))
- (if (pair? fs)
- (append (string-split (car fs) #\space)
- (lp (cdr fs)))
- '())))
+ (for ((fs : field_names)) ((r '()))
+ (if (string? fs)
+ (append (string-split fs #\space)
+ r)
+ (cons fs r))
+ #:final
+ r))
(set! field_names (py-list (py-map scm-str field_names)))
(set! typename (scm-str typename))
diff --git a/modules/language/python/module/email/_parseaddr.py b/modules/language/python/module/email/_parseaddr.py
index 0f53dab..c9460a1 100644
--- a/modules/language/python/module/email/_parseaddr.py
+++ b/modules/language/python/module/email/_parseaddr.py
@@ -11,7 +11,10 @@ __all__ = [
'mktime_tz',
'parsedate',
'parsedate_tz',
+ '_parsedate_tz',
'quote',
+ 'AddressList',
+
]
import time, calendar
diff --git a/modules/language/python/module/email/_policybase.py b/modules/language/python/module/email/_policybase.py
new file mode 100644
index 0000000..a8633e2
--- /dev/null
+++ b/modules/language/python/module/email/_policybase.py
@@ -0,0 +1,375 @@
+module(email,_policybase)
+"""Policy framework for the email package.
+
+Allows fine grained feature control of how the package parses and emits data.
+"""
+
+import abc
+import email.header as header
+import email.charset as _charset
+from email.utils import _has_surrogates
+
+__all__ = [
+ 'Policy',
+ 'Compat32',
+ 'compat32',
+ ]
+
+
+class _PolicyBase:
+
+ """Policy Object basic framework.
+
+ This class is useless unless subclassed. A subclass should define
+ class attributes with defaults for any values that are to be
+ managed by the Policy object. The constructor will then allow
+ non-default values to be set for these attributes at instance
+ creation time. The instance will be callable, taking these same
+ attributes keyword arguments, and returning a new instance
+ identical to the called instance except for those values changed
+ by the keyword arguments. Instances may be added, yielding new
+ instances with any non-default values from the right hand
+ operand overriding those in the left hand operand. That is,
+
+ A + B == A(<non-default values of B>)
+
+ The repr of an instance can be used to reconstruct the object
+ if and only if the repr of the values can be used to reconstruct
+ those values.
+
+ """
+
+ def __init__(self, **kw):
+ """Create new Policy, possibly overriding some defaults.
+
+ See class docstring for a list of overridable attributes.
+
+ """
+ for name, value in kw.items():
+ if hasattr(self, name):
+ super(_PolicyBase,self).__setattr__(name, value)
+ else:
+ raise TypeError(
+ "{!r} is an invalid keyword argument for {}".format(
+ name, self.__class__.__name__))
+
+ def __repr__(self):
+ args = [ "{}={!r}".format(name, value)
+ for name, value in self.__dict__.items() ]
+ return "{}({})".format(self.__class__.__name__, ', '.join(args))
+
+ def clone(self, **kw):
+ """Return a new instance with specified attributes changed.
+
+ The new instance has the same attribute values as the current object,
+ except for the changes passed in as keyword arguments.
+
+ """
+ newpolicy = self.__class__.__new__(self.__class__)
+ for attr, value in self.__dict__.items():
+ object.__setattr__(newpolicy, attr, value)
+ for attr, value in kw.items():
+ if not hasattr(self, attr):
+ raise TypeError(
+ "{!r} is an invalid keyword argument for {}".format(
+ attr, self.__class__.__name__))
+ object.__setattr__(newpolicy, attr, value)
+ return newpolicy
+
+ def __setattr__(self, name, value):
+ if hasattr(self, name):
+ msg = "{!r} object attribute {!r} is read-only"
+ else:
+ msg = "{!r} object has no attribute {!r}"
+ raise AttributeError(msg.format(self.__class__.__name__, name))
+
+ def __add__(self, other):
+ """Non-default values from right operand override those from left.
+
+ The object returned is a new instance of the subclass.
+
+ """
+ return self.clone(**other.__dict__)
+
+
+def _append_doc(doc, added_doc):
+ doc = doc.rsplit('\n', 1)[0]
+ added_doc = added_doc.split('\n', 1)[1]
+ return doc + '\n' + added_doc
+
+def _extend_docstrings(cls):
+ if cls.__doc__ and cls.__doc__.startswith('+'):
+ cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__)
+ for name, attr in cls.__dict__.items():
+ if attr.__doc__ and attr.__doc__.startswith('+'):
+ for c in (c for base in cls.__bases__ for c in base.mro()):
+ doc = getattr(getattr(c, name), '__doc__')
+ if doc:
+ attr.__doc__ = _append_doc(doc, attr.__doc__)
+ break
+ return cls
+
+
+class Policy(_PolicyBase, metaclass=abc.ABCMeta):
+
+ r"""Controls for how messages are interpreted and formatted.
+
+ Most of the classes and many of the methods in the email package accept
+ Policy objects as parameters. A Policy object contains a set of values and
+ functions that control how input is interpreted and how output is rendered.
+ For example, the parameter 'raise_on_defect' controls whether or not an RFC
+ violation results in an error being raised or not, while 'max_line_length'
+ controls the maximum length of output lines when a Message is serialized.
+
+ Any valid attribute may be overridden when a Policy is created by passing
+ it as a keyword argument to the constructor. Policy objects are immutable,
+ but a new Policy object can be created with only certain values changed by
+ calling the Policy instance with keyword arguments. Policy objects can
+ also be added, producing a new Policy object in which the non-default
+ attributes set in the right hand operand overwrite those specified in the
+ left operand.
+
+ Settable attributes:
+
+ raise_on_defect -- If true, then defects should be raised as errors.
+ Default: False.
+
+ linesep -- string containing the value to use as separation
+ between output lines. Default '\n'.
+
+ cte_type -- Type of allowed content transfer encodings
+
+ 7bit -- ASCII only
+ 8bit -- Content-Transfer-Encoding: 8bit is allowed
+
+ Default: 8bit. Also controls the disposition of
+ (RFC invalid) binary data in headers; see the
+ documentation of the binary_fold method.
+
+ max_line_length -- maximum length of lines, excluding 'linesep',
+ during serialization. None or 0 means no line
+ wrapping is done. Default is 78.
+
+ mangle_from_ -- a flag that, when True escapes From_ lines in the
+ body of the message by putting a `>' in front of
+ them. This is used when the message is being
+ serialized by a generator. Default: True.
+
+ message_factory -- the class to use to create new message objects.
+ If the value is None, the default is Message.
+
+ """
+
+ raise_on_defect = False
+ linesep = '\n'
+ cte_type = '8bit'
+ max_line_length = 78
+ mangle_from_ = False
+ message_factory = None
+
+ def handle_defect(self, obj, defect):
+ """Based on policy, either raise defect or call register_defect.
+
+ handle_defect(obj, defect)
+
+ defect should be a Defect subclass, but in any case must be an
+ Exception subclass. obj is the object on which the defect should be
+ registered if it is not raised. If the raise_on_defect is True, the
+ defect is raised as an error, otherwise the object and the defect are
+ passed to register_defect.
+
+ This method is intended to be called by parsers that discover defects.
+ The email package parsers always call it with Defect instances.
+
+ """
+ if self.raise_on_defect:
+ raise defect
+ self.register_defect(obj, defect)
+
+ def register_defect(self, obj, defect):
+ """Record 'defect' on 'obj'.
+
+ Called by handle_defect if raise_on_defect is False. This method is
+ part of the Policy API so that Policy subclasses can implement custom
+ defect handling. The default implementation calls the append method of
+ the defects attribute of obj. The objects used by the email package by
+ default that get passed to this method will always have a defects
+ attribute with an append method.
+
+ """
+ obj.defects.append(defect)
+
+ def header_max_count(self, name):
+ """Return the maximum allowed number of headers named 'name'.
+
+ Called when a header is added to a Message object. If the returned
+ value is not 0 or None, and there are already a number of headers with
+ the name 'name' equal to the value returned, a ValueError is raised.
+
+ Because the default behavior of Message's __setitem__ is to append the
+ value to the list of headers, it is easy to create duplicate headers
+ without realizing it. This method allows certain headers to be limited
+ in the number of instances of that header that may be added to a
+ Message programmatically. (The limit is not observed by the parser,
+ which will faithfully produce as many headers as exist in the message
+ being parsed.)
+
+ The default implementation returns None for all header names.
+ """
+ return None
+
+ @abc.abstractmethod
+ def header_source_parse(self, sourcelines):
+ """Given a list of linesep terminated strings constituting the lines of
+ a single header, return the (name, value) tuple that should be stored
+ in the model. The input lines should retain their terminating linesep
+ characters. The lines passed in by the email package may contain
+ surrogateescaped binary data.
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def header_store_parse(self, name, value):
+ """Given the header name and the value provided by the application
+ program, return the (name, value) that should be stored in the model.
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def header_fetch_parse(self, name, value):
+ """Given the header name and the value from the model, return the value
+ to be returned to the application program that is requesting that
+ header. The value passed in by the email package may contain
+ surrogateescaped binary data if the lines were parsed by a BytesParser.
+ The returned value should not contain any surrogateescaped data.
+
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def fold(self, name, value):
+ """Given the header name and the value from the model, return a string
+ containing linesep characters that implement the folding of the header
+ according to the policy controls. The value passed in by the email
+ package may contain surrogateescaped binary data if the lines were
+ parsed by a BytesParser. The returned value should not contain any
+ surrogateescaped data.
+
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def fold_binary(self, name, value):
+ """Given the header name and the value from the model, return binary
+ data containing linesep characters that implement the folding of the
+ header according to the policy controls. The value passed in by the
+ email package may contain surrogateescaped binary data.
+
+ """
+ raise NotImplementedError
+
+
+#@_extend_docstrings
+class Compat32(Policy):
+
+ """+
+ This particular policy is the backward compatibility Policy. It
+ replicates the behavior of the email package version 5.1.
+ """
+
+ mangle_from_ = True
+
+ def _sanitize_header(self, name, value):
+ # If the header value contains surrogates, return a Header using
+ # the unknown-8bit charset to encode the bytes as encoded words.
+ if not isinstance(value, str):
+ # Assume it is already a header object
+ return value
+ if _has_surrogates(value):
+ return header.Header(value, charset=_charset.UNKNOWN8BIT,
+ header_name=name)
+ else:
+ return value
+
+ def header_source_parse(self, sourcelines):
+ """+
+ The name is parsed as everything up to the ':' and returned unmodified.
+ The value is determined by stripping leading whitespace off the
+ remainder of the first line, joining all subsequent lines together, and
+ stripping any trailing carriage return or linefeed characters.
+
+ """
+ name, value = sourcelines[0].split(':', 1)
+ value = value.lstrip(' \t') + ''.join(sourcelines[1:])
+ return (name, value.rstrip('\r\n'))
+
+ def header_store_parse(self, name, value):
+ """+
+ The name and value are returned unmodified.
+ """
+ return (name, value)
+
+ def header_fetch_parse(self, name, value):
+ """+
+ If the value contains binary data, it is converted into a Header object
+ using the unknown-8bit charset. Otherwise it is returned unmodified.
+ """
+ return self._sanitize_header(name, value)
+
+ def fold(self, name, value):
+ """+
+ Headers are folded using the Header folding algorithm, which preserves
+ existing line breaks in the value, and wraps each resulting line to the
+ max_line_length. Non-ASCII binary data are CTE encoded using the
+ unknown-8bit charset.
+
+ """
+ return self._fold(name, value, sanitize=True)
+
+ def fold_binary(self, name, value):
+ """+
+ Headers are folded using the Header folding algorithm, which preserves
+ existing line breaks in the value, and wraps each resulting line to the
+ max_line_length. If cte_type is 7bit, non-ascii binary data is CTE
+ encoded using the unknown-8bit charset. Otherwise the original source
+ header is used, with its existing line breaks and/or binary data.
+
+ """
+ folded = self._fold(name, value, sanitize=self.cte_type=='7bit')
+ return folded.encode('ascii', 'surrogateescape')
+
+ def _fold(self, name, value, sanitize):
+ parts = []
+ parts.append('%s: ' % name)
+ if isinstance(value, str):
+ if _has_surrogates(value):
+ if sanitize:
+ h = header.Header(value,
+ charset=_charset.UNKNOWN8BIT,
+ header_name=name)
+ else:
+ # If we have raw 8bit data in a byte string, we have no idea
+ # what the encoding is. There is no safe way to split this
+ # string. If it's ascii-subset, then we could do a normal
+ # ascii split, but if it's multibyte then we could break the
+ # string. There's no way to know so the least harm seems to
+ # be to not split the string and risk it being too long.
+ parts.append(value)
+ h = None
+ else:
+ h = header.Header(value, header_name=name)
+ else:
+ # Assume it is a Header-like object.
+ h = value
+ if h is not None:
+ # The Header class interprets a value of None for maxlinelen as the
+ # default value of 78, as recommended by RFC 2822.
+ maxlinelen = 0
+ if self.max_line_length is not None:
+ maxlinelen = self.max_line_length
+ parts.append(h.encode(linesep=self.linesep, maxlinelen=maxlinelen))
+ parts.append(self.linesep)
+ return ''.join(parts)
+
+
+compat32 = Compat32()
diff --git a/modules/language/python/module/email/feedparser.py b/modules/language/python/module/email/feedparser.py
new file mode 100644
index 0000000..f07448a
--- /dev/null
+++ b/modules/language/python/module/email/feedparser.py
@@ -0,0 +1,537 @@
+module(email,feedparser)
+# Copyright (C) 2004-2006 Python Software Foundation
+# Authors: Baxter, Wouters and Warsaw
+# Contact: email-sig@python.org
+
+"""FeedParser - An email feed parser.
+
+The feed parser implements an interface for incrementally parsing an email
+message, line by line. This has advantages for certain applications, such as
+those reading email messages off a socket.
+
+FeedParser.feed() is the primary interface for pushing new data into the
+parser. It returns when there's nothing more it can do with the available
+data. When you have no more data to push into the parser, call .close().
+This completes the parsing and returns the root message object.
+
+The other advantage of this parser is that it will never raise a parsing
+exception. Instead, when it finds something unexpected, it adds a 'defect' to
+the current message. Defects are just instances that live on the message
+object's .defects attribute.
+"""
+
+__all__ = ['FeedParser', 'BytesFeedParser']
+
+import re
+
+import email.errors as errors
+from email._policybase import compat32
+from collections import deque
+from io import StringIO
+
+NLCRE = re.compile(r'\r\n|\r|\n')
+NLCRE_bol = re.compile(r'(\r\n|\r|\n)')
+NLCRE_eol = re.compile(r'(\r\n|\r|\n)\Z')
+NLCRE_crack = re.compile(r'(\r\n|\r|\n)')
+# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
+# except controls, SP, and ":".
+headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])')
+EMPTYSTRING = ''
+NL = '\n'
+
+NeedMoreData = object()
+
+
+
+class BufferedSubFile(object):
+ """A file-ish object that can have new data loaded into it.
+
+ You can also push and pop line-matching predicates onto a stack. When the
+ current predicate matches the current line, a false EOF response
+ (i.e. empty string) is returned instead. This lets the parser adhere to a
+ simple abstraction -- it parses until EOF closes the current message.
+ """
+ def __init__(self):
+ # Text stream of the last partial line pushed into this object.
+ # See issue 22233 for why this is a text stream and not a list.
+ self._partial = StringIO(newline='')
+ # A deque of full, pushed lines
+ self._lines = deque()
+ # The stack of false-EOF checking predicates.
+ self._eofstack = []
+ # A flag indicating whether the file has been closed or not.
+ self._closed = False
+
+ def push_eof_matcher(self, pred):
+ self._eofstack.append(pred)
+
+ def pop_eof_matcher(self):
+ return self._eofstack.pop()
+
+ def close(self):
+ # Don't forget any trailing partial line.
+ self._partial.seek(0)
+ self.pushlines(self._partial.readlines())
+ self._partial.seek(0)
+ self._partial.truncate()
+ self._closed = True
+
+ def readline(self):
+ if not self._lines:
+ if self._closed:
+ return ''
+ return NeedMoreData
+ # Pop the line off the stack and see if it matches the current
+ # false-EOF predicate.
+ line = self._lines.popleft()
+ # RFC 2046, section 5.1.2 requires us to recognize outer level
+ # boundaries at any level of inner nesting. Do this, but be sure it's
+ # in the order of most to least nested.
+ for ateof in reversed(self._eofstack):
+ if ateof(line):
+ # We're at the false EOF. But push the last line back first.
+ self._lines.appendleft(line)
+ return ''
+ return line
+
+ def unreadline(self, line):
+ # Let the consumer push a line back into the buffer.
+ assert line is not NeedMoreData
+ self._lines.appendleft(line)
+
+ def push(self, data):
+ """Push some new data into this object."""
+ self._partial.write(data)
+ if '\n' not in data and '\r' not in data:
+ # No new complete lines, wait for more.
+ return
+
+ # Crack into lines, preserving the linesep characters.
+ self._partial.seek(0)
+ parts = self._partial.readlines()
+ self._partial.seek(0)
+ self._partial.truncate()
+
+ # If the last element of the list does not end in a newline, then treat
+ # it as a partial line. We only check for '\n' here because a line
+ # ending with '\r' might be a line that was split in the middle of a
+ # '\r\n' sequence (see bugs 1555570 and 1721862).
+ if not parts[-1].endswith('\n'):
+ self._partial.write(parts.pop())
+ self.pushlines(parts)
+
+ def pushlines(self, lines):
+ self._lines.extend(lines)
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ line = self.readline()
+ if line == '':
+ raise StopIteration
+ return line
+
+
+
+class FeedParser:
+ """A feed-style parser of email."""
+
+ def __init__(self, _factory=None, *, policy=compat32):
+ """_factory is called with no arguments to create a new message obj
+
+ The policy keyword specifies a policy object that controls a number of
+ aspects of the parser's operation. The default policy maintains
+ backward compatibility.
+
+ """
+ self.policy = policy
+ self._old_style_factory = False
+ if _factory is None:
+ if policy.message_factory is None:
+ from email.message import Message
+ self._factory = Message
+ else:
+ self._factory = policy.message_factory
+ else:
+ self._factory = _factory
+ try:
+ _factory(policy=self.policy)
+ except TypeError:
+ # Assume this is an old-style factory
+ self._old_style_factory = True
+ self._input = BufferedSubFile()
+ self._msgstack = []
+ self._parse = self._parsegen().__next__
+ self._cur = None
+ self._last = None
+ self._headersonly = False
+
+ # Non-public interface for supporting Parser's headersonly flag
+ def _set_headersonly(self):
+ self._headersonly = True
+
+ def feed(self, data):
+ """Push more data into the parser."""
+ self._input.push(data)
+ self._call_parse()
+
+ def _call_parse(self):
+ try:
+ self._parse()
+ except StopIteration:
+ pass
+
+ def close(self):
+ """Parse all remaining data and return the root message object."""
+ self._input.close()
+ self._call_parse()
+ root = self._pop_message()
+ assert not self._msgstack
+ # Look for final set of defects
+ if root.get_content_maintype() == 'multipart' \
+ and not root.is_multipart():
+ defect = errors.MultipartInvariantViolationDefect()
+ self.policy.handle_defect(root, defect)
+ return root
+
+ def _new_message(self):
+ if self._old_style_factory:
+ msg = self._factory()
+ else:
+ msg = self._factory(policy=self.policy)
+ if self._cur and self._cur.get_content_type() == 'multipart/digest':
+ msg.set_default_type('message/rfc822')
+ if self._msgstack:
+ self._msgstack[-1].attach(msg)
+ self._msgstack.append(msg)
+ self._cur = msg
+ self._last = msg
+
+ def _pop_message(self):
+ retval = self._msgstack.pop()
+ if self._msgstack:
+ self._cur = self._msgstack[-1]
+ else:
+ self._cur = None
+ return retval
+
+ def _parsegen(self):
+ # Create a new message and start by parsing headers.
+ self._new_message()
+ headers = []
+ # Collect the headers, searching for a line that doesn't match the RFC
+ # 2822 header or continuation pattern (including an empty line).
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if not headerRE.match(line):
+ # If we saw the RFC defined header/body separator
+ # (i.e. newline), just throw it away. Otherwise the line is
+ # part of the body so push it back.
+ if not NLCRE.match(line):
+ defect = errors.MissingHeaderBodySeparatorDefect()
+ self.policy.handle_defect(self._cur, defect)
+ self._input.unreadline(line)
+ break
+ headers.append(line)
+ # Done with the headers, so parse them and figure out what we're
+ # supposed to see in the body of the message.
+ self._parse_headers(headers)
+ # Headers-only parsing is a backwards compatibility hack, which was
+ # necessary in the older parser, which could raise errors. All
+ # remaining lines in the input are thrown into the message body.
+ if self._headersonly:
+ lines = []
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if line == '':
+ break
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+ return
+ if self._cur.get_content_type() == 'message/delivery-status':
+ # message/delivery-status contains blocks of headers separated by
+ # a blank line. We'll represent each header block as a separate
+ # nested message object, but the processing is a bit different
+ # than standard message/* types because there is no body for the
+ # nested messages. A blank line separates the subparts.
+ while True:
+ self._input.push_eof_matcher(NLCRE.match)
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ msg = self._pop_message()
+ # We need to pop the EOF matcher in order to tell if we're at
+ # the end of the current file, not the end of the last block
+ # of message headers.
+ self._input.pop_eof_matcher()
+ # The input stream must be sitting at the newline or at the
+ # EOF. We want to see if we're at the end of this subpart, so
+ # first consume the blank line, then test the next line to see
+ # if we're at this subpart's EOF.
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ if line == '':
+ break
+ # Not at EOF so this is a line we're going to need.
+ self._input.unreadline(line)
+ return
+ if self._cur.get_content_maintype() == 'message':
+ # The message claims to be a message/* type, then what follows is
+ # another RFC 2822 message.
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ self._pop_message()
+ return
+ if self._cur.get_content_maintype() == 'multipart':
+ boundary = self._cur.get_boundary()
+ if boundary is None:
+ # The message /claims/ to be a multipart but it has not
+ # defined a boundary. That's a problem which we'll handle by
+ # reading everything until the EOF and marking the message as
+ # defective.
+ defect = errors.NoBoundaryInMultipartDefect()
+ self.policy.handle_defect(self._cur, defect)
+ lines = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+ return
+ # Make sure a valid content type was specified per RFC 2045:6.4.
+ if (self._cur.get('content-transfer-encoding', '8bit').lower()
+ not in ('7bit', '8bit', 'binary')):
+ defect = errors.InvalidMultipartContentTransferEncodingDefect()
+ self.policy.handle_defect(self._cur, defect)
+ # Create a line match predicate which matches the inter-part
+ # boundary as well as the end-of-multipart boundary. Don't push
+ # this onto the input stream until we've scanned past the
+ # preamble.
+ separator = '--' + boundary
+ boundaryre = re.compile(
+ '(?P<sep>' + re.escape(separator) +
+ r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
+ capturing_preamble = True
+ preamble = []
+ linesep = False
+ close_boundary_seen = False
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if line == '':
+ break
+ mo = boundaryre.match(line)
+ if mo:
+ # If we're looking at the end boundary, we're done with
+ # this multipart. If there was a newline at the end of
+ # the closing boundary, then we need to initialize the
+ # epilogue with the empty string (see below).
+ if mo.group('end'):
+ close_boundary_seen = True
+ linesep = mo.group('linesep')
+ break
+ # We saw an inter-part boundary. Were we in the preamble?
+ if capturing_preamble:
+ if preamble:
+ # According to RFC 2046, the last newline belongs
+ # to the boundary.
+ lastline = preamble[-1]
+ eolmo = NLCRE_eol.search(lastline)
+ if eolmo:
+ preamble[-1] = lastline[:-len(eolmo.group(0))]
+ self._cur.preamble = EMPTYSTRING.join(preamble)
+ capturing_preamble = False
+ self._input.unreadline(line)
+ continue
+ # We saw a boundary separating two parts. Consume any
+ # multiple boundary lines that may be following. Our
+ # interpretation of RFC 2046 BNF grammar does not produce
+ # body parts within such double boundaries.
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ mo = boundaryre.match(line)
+ if not mo:
+ self._input.unreadline(line)
+ break
+ # Recurse to parse this subpart; the input stream points
+ # at the subpart's first line.
+ self._input.push_eof_matcher(boundaryre.match)
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ # Because of RFC 2046, the newline preceding the boundary
+ # separator actually belongs to the boundary, not the
+ # previous subpart's payload (or epilogue if the previous
+ # part is a multipart).
+ if self._last.get_content_maintype() == 'multipart':
+ epilogue = self._last.epilogue
+ if epilogue == '':
+ self._last.epilogue = None
+ elif epilogue is not None:
+ mo = NLCRE_eol.search(epilogue)
+ if mo:
+ end = len(mo.group(0))
+ self._last.epilogue = epilogue[:-end]
+ else:
+ payload = self._last._payload
+ if isinstance(payload, str):
+ mo = NLCRE_eol.search(payload)
+ if mo:
+ payload = payload[:-len(mo.group(0))]
+ self._last._payload = payload
+ self._input.pop_eof_matcher()
+ self._pop_message()
+ # Set the multipart up for newline cleansing, which will
+ # happen if we're in a nested multipart.
+ self._last = self._cur
+ else:
+ # I think we must be in the preamble
+ assert capturing_preamble
+ preamble.append(line)
+ # We've seen either the EOF or the end boundary. If we're still
+ # capturing the preamble, we never saw the start boundary. Note
+ # that as a defect and store the captured text as the payload.
+ if capturing_preamble:
+ defect = errors.StartBoundaryNotFoundDefect()
+ self.policy.handle_defect(self._cur, defect)
+ self._cur.set_payload(EMPTYSTRING.join(preamble))
+ epilogue = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ self._cur.epilogue = EMPTYSTRING.join(epilogue)
+ return
+ # If we're not processing the preamble, then we might have seen
+ # EOF without seeing that end boundary...that is also a defect.
+ if not close_boundary_seen:
+ defect = errors.CloseBoundaryNotFoundDefect()
+ self.policy.handle_defect(self._cur, defect)
+ return
+ # Everything from here to the EOF is epilogue. If the end boundary
+ # ended in a newline, we'll need to make sure the epilogue isn't
+ # None
+ if linesep:
+ epilogue = ['']
+ else:
+ epilogue = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ epilogue.append(line)
+ # Any CRLF at the front of the epilogue is not technically part of
+ # the epilogue. Also, watch out for an empty string epilogue,
+ # which means a single newline.
+ if epilogue:
+ firstline = epilogue[0]
+ bolmo = NLCRE_bol.match(firstline)
+ if bolmo:
+ epilogue[0] = firstline[len(bolmo.group(0)):]
+ self._cur.epilogue = EMPTYSTRING.join(epilogue)
+ return
+ # Otherwise, it's some non-multipart type, so the entire rest of the
+ # file contents becomes the payload.
+ lines = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+
+ def _parse_headers(self, lines):
+ # Passed a list of lines that make up the headers for the current msg
+ lastheader = ''
+ lastvalue = []
+ for lineno, line in enumerate(lines):
+ # Check for continuation
+ if line[0] in ' \t':
+ if not lastheader:
+ # The first line of the headers was a continuation. This
+ # is illegal, so let's note the defect, store the illegal
+ # line, and ignore it for purposes of headers.
+ defect = errors.FirstHeaderLineIsContinuationDefect(line)
+ self.policy.handle_defect(self._cur, defect)
+ continue
+ lastvalue.append(line)
+ continue
+ if lastheader:
+ self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
+ lastheader, lastvalue = '', []
+ # Check for envelope header, i.e. unix-from
+ if line.startswith('From '):
+ if lineno == 0:
+ # Strip off the trailing newline
+ mo = NLCRE_eol.search(line)
+ if mo:
+ line = line[:-len(mo.group(0))]
+ self._cur.set_unixfrom(line)
+ continue
+ elif lineno == len(lines) - 1:
+ # Something looking like a unix-from at the end - it's
+ # probably the first line of the body, so push back the
+ # line and stop.
+ self._input.unreadline(line)
+ return
+ else:
+ # Weirdly placed unix-from line. Note this as a defect
+ # and ignore it.
+ defect = errors.MisplacedEnvelopeHeaderDefect(line)
+ self._cur.defects.append(defect)
+ continue
+ # Split the line on the colon separating field name from value.
+ # There will always be a colon, because if there wasn't the part of
+ # the parser that calls us would have started parsing the body.
+ i = line.find(':')
+
+ # If the colon is on the start of the line the header is clearly
+ # malformed, but we might be able to salvage the rest of the
+ # message. Track the error but keep going.
+ if i == 0:
+ defect = errors.InvalidHeaderDefect("Missing header name.")
+ self._cur.defects.append(defect)
+ continue
+
+ assert i>0, "_parse_headers fed line with no : and no leading WS"
+ lastheader = line[:i]
+ lastvalue = [line]
+ # Done with all the lines, so handle the last header.
+ if lastheader:
+ self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
+
+
+class BytesFeedParser(FeedParser):
+ """Like FeedParser, but feed accepts bytes."""
+
+ def feed(self, data):
+ super().feed(data.decode('ascii', 'surrogateescape'))
diff --git a/modules/language/python/module/email/parser.py b/modules/language/python/module/email/parser.py
new file mode 100644
index 0000000..5ec164e
--- /dev/null
+++ b/modules/language/python/module/email/parser.py
@@ -0,0 +1,130 @@
+module(email,parser)
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
+# Contact: email-sig@python.org
+
+"""A parser of RFC 2822 and MIME email messages."""
+
+__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
+ 'FeedParser', 'BytesFeedParser']
+
+from io import StringIO, TextIOWrapper
+
+from email.feedparser import FeedParser, BytesFeedParser
+from email._policybase import compat32
+
+
+class Parser:
+ def __init__(self, _class=None,*, policy=compat32):
+ """Parser of RFC 2822 and MIME email messages.
+
+ Creates an in-memory object tree representing the email message, which
+ can then be manipulated and turned over to a Generator to return the
+ textual representation of the message.
+
+ The string must be formatted as a block of RFC 2822 headers and header
+ continuation lines, optionally preceded by a `Unix-from' header. The
+ header block is terminated either by the end of the string or by a
+ blank line.
+
+ _class is the class to instantiate for new message objects when they
+ must be created. This class must have a constructor that can take
+ zero arguments. Default is Message.Message.
+
+ The policy keyword specifies a policy object that controls a number of
+ aspects of the parser's operation. The default policy maintains
+ backward compatibility.
+
+ """
+ self._class = _class
+ self.policy = policy
+
+ def parse(self, fp, headersonly=False):
+ """Create a message structure from the data in a file.
+
+ Reads all the data from the file and returns the root of the message
+ structure. Optional headersonly is a flag specifying whether to stop
+ parsing after reading the headers or not. The default is False,
+ meaning it parses the entire contents of the file.
+ """
+ feedparser = FeedParser(self._class, policy=self.policy)
+ if headersonly:
+ feedparser._set_headersonly()
+ while True:
+ data = fp.read(8192)
+ if not data:
+ break
+ feedparser.feed(data)
+ return feedparser.close()
+
+ def parsestr(self, text, headersonly=False):
+ """Create a message structure from a string.
+
+ Returns the root of the message structure. Optional headersonly is a
+ flag specifying whether to stop parsing after reading the headers or
+ not. The default is False, meaning it parses the entire contents of
+ the file.
+ """
+ return self.parse(StringIO(text), headersonly=headersonly)
+
+
+class HeaderParser(Parser):
+ def parse(self, fp, headersonly=True):
+ return Parser.parse(self, fp, True)
+
+ def parsestr(self, text, headersonly=True):
+ return Parser.parsestr(self, text, True)
+
+class BytesParser:
+
+ def __init__(self, *args, **kw):
+ """Parser of binary RFC 2822 and MIME email messages.
+
+ Creates an in-memory object tree representing the email message, which
+ can then be manipulated and turned over to a Generator to return the
+ textual representation of the message.
+
+ The input must be formatted as a block of RFC 2822 headers and header
+ continuation lines, optionally preceded by a `Unix-from' header. The
+ header block is terminated either by the end of the input or by a
+ blank line.
+
+ _class is the class to instantiate for new message objects when they
+ must be created. This class must have a constructor that can take
+ zero arguments. Default is Message.Message.
+ """
+ self.parser = Parser(*args, **kw)
+
+ def parse(self, fp, headersonly=False):
+ """Create a message structure from the data in a binary file.
+
+ Reads all the data from the file and returns the root of the message
+ structure. Optional headersonly is a flag specifying whether to stop
+ parsing after reading the headers or not. The default is False,
+ meaning it parses the entire contents of the file.
+ """
+ fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
+ try:
+ return self.parser.parse(fp, headersonly)
+ finally:
+ fp.detach()
+
+
+ def parsebytes(self, text, headersonly=False):
+ """Create a message structure from a byte string.
+
+ Returns the root of the message structure. Optional headersonly is a
+ flag specifying whether to stop parsing after reading the headers or
+ not. The default is False, meaning it parses the entire contents of
+ the file.
+ """
+ text = text.decode('ASCII', errors='surrogateescape')
+ return self.parser.parsestr(text, headersonly)
+
+
+class BytesHeaderParser(BytesParser):
+ def parse(self, fp, headersonly=True):
+ return BytesParser.parse(self, fp, headersonly=True)
+
+ def parsebytes(self, text, headersonly=True):
+ return BytesParser.parsebytes(self, text, headersonly=True)
diff --git a/modules/language/python/module/email/utils.py b/modules/language/python/module/email/utils.py
new file mode 100644
index 0000000..abe3895
--- /dev/null
+++ b/modules/language/python/module/email/utils.py
@@ -0,0 +1,389 @@
+module(email,utils)
+# Copyright (C) 2001-2010 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Miscellaneous utilities."""
+
+__all__ = [
+ 'collapse_rfc2231_value',
+ 'decode_params',
+ 'decode_rfc2231',
+ 'encode_rfc2231',
+ 'formataddr',
+ 'formatdate',
+ 'format_datetime',
+ 'getaddresses',
+ 'make_msgid',
+ 'mktime_tz',
+ 'parseaddr',
+ 'parsedate',
+ 'parsedate_tz',
+ 'parsedate_to_datetime',
+ 'unquote',
+ '_has_surrogates'
+ ]
+
+import os
+import re
+import time
+import random
+import socket
+import datetime
+import urllib.parse
+from email._parseaddr import quote
+from email._parseaddr import AddressList as _AddressList
+from email._parseaddr import mktime_tz
+from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz
+
+# Intrapackage imports
+from email.charset import Charset
+
+COMMASPACE = ', '
+EMPTYSTRING = ''
+UEMPTYSTRING = ''
+CRLF = '\r\n'
+TICK = "'"
+
+specialsre = re.compile(r'[][\\()<>@,:;".]')
+escapesre = re.compile(r'[\\"]')
+
+def _has_surrogates(s):
+ """Return True if s contains surrogate-escaped binary data."""
+ # This check is based on the fact that unless there are surrogates, utf8
+ # (Python's default encoding) can encode any string. This is the fastest
+ # way to check for surrogates, see issue 11454 for timings.
+ try:
+ s.encode()
+ return False
+ except UnicodeEncodeError:
+ return True
+
+# How to deal with a string containing bytes before handing it to the
+# application through the 'normal' interface.
+def _sanitize(string):
+ # Turn any escaped bytes into unicode 'unknown' char. If the escaped
+ # bytes happen to be utf-8 they will instead get decoded, even if they
+ # were invalid in the charset the source was supposed to be in. This
+ # seems like it is not a bad thing; a defect was still registered.
+ original_bytes = string.encode('utf-8', 'surrogateescape')
+ return original_bytes.decode('utf-8', 'replace')
+
+
+
+# Helpers
+
+def formataddr(pair, charset='utf-8'):
+ """The inverse of parseaddr(), this takes a 2-tuple of the form
+ (realname, email_address) and returns the string value suitable
+ for an RFC 2822 From, To or Cc header.
+
+ If the first element of pair is false, then the second element is
+ returned unmodified.
+
+ Optional charset if given is the character set that is used to encode
+ realname in case realname is not ASCII safe. Can be an instance of str or
+ a Charset-like object which has a header_encode method. Default is
+ 'utf-8'.
+ """
+ name, address = pair
+ # The address MUST (per RFC) be ascii, so raise a UnicodeError if it isn't.
+ address.encode('ascii')
+ if name:
+ try:
+ name.encode('ascii')
+ except UnicodeEncodeError:
+ if isinstance(charset, str):
+ charset = Charset(charset)
+ encoded_name = charset.header_encode(name)
+ return "%s <%s>" % (encoded_name, address)
+ else:
+ quotes = ''
+ if specialsre.search(name):
+ quotes = '"'
+ name = escapesre.sub(r'\\\g<0>', name)
+ return '%s%s%s <%s>' % (quotes, name, quotes, address)
+ return address
+
+
+
+def getaddresses(fieldvalues):
+ """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
+ all = COMMASPACE.join(fieldvalues)
+ a = _AddressList(all)
+ return a.addresslist
+
+
+
+ecre = re.compile(r'''
+ =\? # literal =?
+ (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
+ \? # literal ?
+ (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
+ \? # literal ?
+ (?P<atom>.*?) # non-greedy up to the next ?= is the atom
+ \?= # literal ?=
+ ''', re.VERBOSE | re.IGNORECASE)
+
+
+def _format_timetuple_and_zone(timetuple, zone):
+ return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
+ ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]],
+ timetuple[2],
+ ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1],
+ timetuple[0], timetuple[3], timetuple[4], timetuple[5],
+ zone)
+
+def formatdate(timeval=None, localtime=False, usegmt=False):
+ """Returns a date string as specified by RFC 2822, e.g.:
+
+ Fri, 09 Nov 2001 01:08:47 -0000
+
+ Optional timeval if given is a floating point time value as accepted by
+ gmtime() and localtime(), otherwise the current time is used.
+
+ Optional localtime is a flag that when True, interprets timeval, and
+ returns a date relative to the local timezone instead of UTC, properly
+ taking daylight savings time into account.
+
+ Optional argument usegmt means that the timezone is written out as
+ an ascii string, not numeric one (so "GMT" instead of "+0000"). This
+ is needed for HTTP, and is only used when localtime==False.
+ """
+ # Note: we cannot use strftime() because that honors the locale and RFC
+ # 2822 requires that day and month names be the English abbreviations.
+ if timeval is None:
+ timeval = time.time()
+ if localtime or usegmt:
+ dt = datetime.datetime.fromtimestamp(timeval, datetime.timezone.utc)
+ else:
+ dt = datetime.datetime.utcfromtimestamp(timeval)
+ if localtime:
+ dt = dt.astimezone()
+ usegmt = False
+ return format_datetime(dt, usegmt)
+
+def format_datetime(dt, usegmt=False):
+ """Turn a datetime into a date string as specified in RFC 2822.
+
+ If usegmt is True, dt must be an aware datetime with an offset of zero. In
+ this case 'GMT' will be rendered instead of the normal +0000 required by
+ RFC2822. This is to support HTTP headers involving date stamps.
+ """
+ now = dt.timetuple()
+ if usegmt:
+ if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc:
+ raise ValueError("usegmt option requires a UTC datetime")
+ zone = 'GMT'
+ elif dt.tzinfo is None:
+ zone = '-0000'
+ else:
+ zone = dt.strftime("%z")
+ return _format_timetuple_and_zone(now, zone)
+
+
+def make_msgid(idstring=None, domain=None):
+ """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
+
+ <142480216486.20800.16526388040877946887@nightshade.la.mastaler.com>
+
+ Optional idstring if given is a string used to strengthen the
+ uniqueness of the message id. Optional domain if given provides the
+ portion of the message id after the '@'. It defaults to the locally
+ defined hostname.
+ """
+ timeval = int(time.time()*100)
+ pid = os.getpid()
+ randint = random.getrandbits(64)
+ if idstring is None:
+ idstring = ''
+ else:
+ idstring = '.' + idstring
+ if domain is None:
+ domain = socket.getfqdn()
+ msgid = '<%d.%d.%d%s@%s>' % (timeval, pid, randint, idstring, domain)
+ return msgid
+
+
+def parsedate_to_datetime(data):
+ *dtuple, tz = _parsedate_tz(data)
+ if tz is None:
+ return datetime.datetime(*dtuple[:6])
+ return datetime.datetime(*dtuple[:6],
+ tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
+
+
+def parseaddr(addr):
+ """
+ Parse addr into its constituent realname and email address parts.
+
+ Return a tuple of realname and email address, unless the parse fails, in
+ which case return a 2-tuple of ('', '').
+ """
+ addrs = _AddressList(addr).addresslist
+ if not addrs:
+ return '', ''
+ return addrs[0]
+
+
+# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
+def unquote(str):
+ """Remove quotes from a string."""
+ if len(str) > 1:
+ if str.startswith('"') and str.endswith('"'):
+ return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
+ if str.startswith('<') and str.endswith('>'):
+ return str[1:-1]
+ return str
+
+
+
+# RFC2231-related functions - parameter encoding and decoding
+def decode_rfc2231(s):
+ """Decode string according to RFC 2231"""
+ parts = s.split(TICK, 2)
+ if len(parts) <= 2:
+ return None, None, s
+ return parts
+
+
+def encode_rfc2231(s, charset=None, language=None):
+ """Encode string according to RFC 2231.
+
+ If neither charset nor language is given, then s is returned as-is. If
+ charset is given but not language, the string is encoded using the empty
+ string for language.
+ """
+ s = urllib.parse.quote(s, safe='', encoding=charset or 'ascii')
+ if charset is None and language is None:
+ return s
+ if language is None:
+ language = ''
+ return "%s'%s'%s" % (charset, language, s)
+
+
+rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$',
+ re.ASCII)
+
+def decode_params(params):
+ """Decode parameters list according to RFC 2231.
+
+ params is a sequence of 2-tuples containing (param name, string value).
+ """
+ # Copy params so we don't mess with the original
+ params = params[:]
+ new_params = []
+ # Map parameter's name to a list of continuations. The values are a
+ # 3-tuple of the continuation number, the string value, and a flag
+ # specifying whether a particular segment is %-encoded.
+ rfc2231_params = {}
+ name, value = params.pop(0)
+ new_params.append((name, value))
+ while params:
+ name, value = params.pop(0)
+ if name.endswith('*'):
+ encoded = True
+ else:
+ encoded = False
+ value = unquote(value)
+ mo = rfc2231_continuation.match(name)
+ if mo:
+ name, num = mo.group('name', 'num')
+ if num is not None:
+ num = int(num)
+ rfc2231_params.setdefault(name, []).append((num, value, encoded))
+ else:
+ new_params.append((name, '"%s"' % quote(value)))
+ if rfc2231_params:
+ for name, continuations in rfc2231_params.items():
+ value = []
+ extended = False
+ # Sort by number
+ continuations.sort()
+ # And now append all values in numerical order, converting
+ # %-encodings for the encoded segments. If any of the
+ # continuation names ends in a *, then the entire string, after
+ # decoding segments and concatenating, must have the charset and
+ # language specifiers at the beginning of the string.
+ for num, s, encoded in continuations:
+ if encoded:
+ # Decode as "latin-1", so the characters in s directly
+ # represent the percent-encoded octet values.
+ # collapse_rfc2231_value treats this as an octet sequence.
+ s = urllib.parse.unquote(s, encoding="latin-1")
+ extended = True
+ value.append(s)
+ value = quote(EMPTYSTRING.join(value))
+ if extended:
+ charset, language, value = decode_rfc2231(value)
+ new_params.append((name, (charset, language, '"%s"' % value)))
+ else:
+ new_params.append((name, '"%s"' % value))
+ return new_params
+
+def collapse_rfc2231_value(value, errors='replace',
+ fallback_charset='us-ascii'):
+ if not isinstance(value, tuple) or len(value) != 3:
+ return unquote(value)
+ # While value comes to us as a unicode string, we need it to be a bytes
+ # object. We do not want bytes() normal utf-8 decoder, we want a straight
+ # interpretation of the string as character bytes.
+ charset, language, text = value
+ if charset is None:
+ # Issue 17369: if charset/lang is None, decode_rfc2231 couldn't parse
+ # the value, so use the fallback_charset.
+ charset = fallback_charset
+ rawbytes = bytes(text, 'raw-unicode-escape')
+ try:
+ return str(rawbytes, charset, errors)
+ except LookupError:
+ # charset is not a known codec.
+ return unquote(text)
+
+
+#
+# datetime doesn't provide a localtime function yet, so provide one. Code
+# adapted from the patch in issue 9527. This may not be perfect, but it is
+# better than not having it.
+#
+
+def localtime(dt=None, isdst=-1):
+ """Return local time as an aware datetime object.
+
+ If called without arguments, return current time. Otherwise *dt*
+ argument should be a datetime instance, and it is converted to the
+ local time zone according to the system time zone database. If *dt* is
+ naive (that is, dt.tzinfo is None), it is assumed to be in local time.
+ In this case, a positive or zero value for *isdst* causes localtime to
+ presume initially that summer time (for example, Daylight Saving Time)
+ is or is not (respectively) in effect for the specified time. A
+ negative value for *isdst* causes the localtime() function to attempt
+ to divine whether summer time is in effect for the specified time.
+
+ """
+ if dt is None:
+ return datetime.datetime.now(datetime.timezone.utc).astimezone()
+ if dt.tzinfo is not None:
+ return dt.astimezone()
+ # We have a naive datetime. Convert to a (localtime) timetuple and pass to
+ # system mktime together with the isdst hint. System mktime will return
+ # seconds since epoch.
+ tm = dt.timetuple()[:-1] + (isdst,)
+ seconds = time.mktime(tm)
+ localtm = time.localtime(seconds)
+ try:
+ delta = datetime.timedelta(seconds=localtm.tm_gmtoff)
+ tz = datetime.timezone(delta, localtm.tm_zone)
+ except AttributeError:
+ # Compute UTC offset and compare with the value implied by tm_isdst.
+ # If the values match, use the zone name implied by tm_isdst.
+ delta = dt - datetime.datetime(*time.gmtime(seconds)[:6])
+ dst = time.daylight and localtm.tm_isdst > 0
+ gmtoff = -(time.altzone if dst else time.timezone)
+ if delta == datetime.timedelta(seconds=gmtoff):
+ tz = datetime.timezone(delta, time.tzname[dst])
+ else:
+ tz = datetime.timezone(delta)
+ return dt.replace(tzinfo=tz)
+
diff --git a/modules/language/python/module/selectors.py b/modules/language/python/module/selectors.py
index 6afb52e..4ca8638 100644
--- a/modules/language/python/module/selectors.py
+++ b/modules/language/python/module/selectors.py
@@ -5,18 +5,15 @@ This module allows high-level and efficient I/O multiplexing, built upon the
`select` module primitives.
"""
-
from abc import ABCMeta, abstractmethod
from collections import namedtuple, Mapping
import math
import select
import sys
-
# generic events, that must be mapped to implementation-specific ones
EVENT_READ = (1 << 0)
EVENT_WRITE = (1 << 1)
-
def _fileobj_to_fd(fileobj):
"""Return a file descriptor from a file object.
@@ -41,9 +38,7 @@ def _fileobj_to_fd(fileobj):
raise ValueError("Invalid file descriptor: {}".format(fd))
return fd
-
SelectorKey = namedtuple('SelectorKey', ['fileobj', 'fd', 'events', 'data'])
-
SelectorKey.__doc__ = """SelectorKey(fileobj, fd, events, data)
Object used to associate a file object to its backing
@@ -69,7 +64,6 @@ class _SelectorMapping(Mapping):
def __iter__(self):
return iter(self._selector._fd_to_key)
-
class BaseSelector(metaclass=ABCMeta):
"""Selector abstract base class.
@@ -595,6 +589,7 @@ if hasattr(select, 'kqueue'):
# select() also can't accept a FD > FD_SETSIZE (usually around 1024)
#if 'KqueueSelector' in globals():
# DefaultSelector = KqueueSelector
+
if 'EpollSelector' in globals():
DefaultSelector = EpollSelector
elif 'DevpollSelector' in globals():
@@ -603,3 +598,4 @@ elif 'PollSelector' in globals():
DefaultSelector = PollSelector
else:
DefaultSelector = SelectSelector
+