diff options
author | Stefan Israelsson Tampe <stefan.itampe@gmail.com> | 2018-09-04 21:23:20 +0200 |
---|---|---|
committer | Stefan Israelsson Tampe <stefan.itampe@gmail.com> | 2018-09-04 21:23:20 +0200 |
commit | 880b3bf82a99a94e5e9bf8b0d90327ff54bed339 (patch) | |
tree | 35a230cc04a1d910dc0abe14f28bf98366a258d8 | |
parent | e37613527a05fc56f8ee5886a868c948b3ee4cfc (diff) |
email stuff
22 files changed, 5693 insertions, 51 deletions
diff --git a/modules/language/python/compile.scm b/modules/language/python/compile.scm index a83a781..e013130 100644 --- a/modules/language/python/compile.scm +++ b/modules/language/python/compile.scm @@ -1886,7 +1886,8 @@ #:pure #:use-module ((guile) #:select (@ @@ pk let* lambda call-with-values case-lambda - set! = * + - < <= > >= / pair? + set! = * + - < <= > >= / pair? fluid-set! + fluid-ref syntax-rules let-syntax abort-to-prompt)) #:use-module (language python module python) #:use-module ((language python compile) #:select (pks)) @@ -2220,7 +2221,7 @@ ((_ v) (begin (dont-warn (syntax->datum #'v)) - #'(if (and #f (module-defined? (current-module) 'v)) + #'(if (module-defined? (current-module) 'v) (values) (define! 'v void))))))) @@ -2753,13 +2754,38 @@ (define-syntax qset! (lambda (x) - (pkkk x) + (syntax-case x (@@ @) + ((_ (cons x y) v) + (equal? (syntax->datum #'cons) '(@ (guile) cons)) + #'(let ((w (to-list v))) + (qset! x (car w)) + (qset0! y (cdr w)))) + + ((_ ((@ (guile) q) ()) v) + (equal? (syntax->datum #'q) 'quote) + #'(if (not (null? v)) + (raise (ValueError "too many values to unpack")) + (values))) + + ((_ ((@@ u li) x) v) + (equal? (syntax->datum #'li) 'to-pylist) + #'(let ((w (to-list v))) + (qset! x w))) + + ((_ (ref v a ...) w) + #'(set-x v (a ...) w)) + + ((_ x v) + #'(set! x v))))) + +(define-syntax qset0! + (lambda (x) (syntax-case x (@@ @) ((_ (cons x y) v) (equal? (syntax->datum #'cons) '(@ (guile) cons)) #'(let ((w v)) - (qset! x (car w)) - (qset! y (cdr w)))) + (qset! x (car w)) + (qset0! y (cdr w)))) ((_ ((@ (guile) q) ()) v) (equal? (syntax->datum #'q) 'quote) diff --git a/modules/language/python/format2.scm b/modules/language/python/format2.scm index 65ee545..df30be5 100644 --- a/modules/language/python/format2.scm +++ b/modules/language/python/format2.scm @@ -3,13 +3,15 @@ #:use-module (parser stis-parser) #:use-module (oop pf-objects) #:use-module (oop goops) - #:use-module ((language python module re) #:select (splitm splitmm)) #:use-module (language python exceptions) #:use-module (language python number) #:use-module (language python dict) #:use-module (language python list) #:export (format fnm)) +(define splitm #f) +(define splitmm #f) + (define-syntax-rule (aif it p x y) (let ((it p)) (if it x y))) (define scm-format (@ (guile) format)) diff --git a/modules/language/python/module.scm b/modules/language/python/module.scm index adc87fd..eff20f0 100644 --- a/modules/language/python/module.scm +++ b/modules/language/python/module.scm @@ -11,7 +11,7 @@ #:use-module (language python dict) #:export (Module private public import __import__ modules)) -(define-syntax-rule (aif it p x y) (let ((it p)) (if it x y))) +(define-syntax-rule (aif it p . x) (let ((it p)) (if it . x))) (define-syntax-rule (in-scheme x) (let ((lan (current-language))) @@ -58,7 +58,14 @@ (begin (rawset self id (Module pre l nm)) (_make self pre nm skip-error?))) - (_make self pre nm skip-error?)))) + (aif it (and (module-defined? (current-module) (car nm)) + (module-ref (current-module) (car nm))) + (if (module? it) + (begin + ((rawref it '__init__) pre l nm) + it) + (_make self pre nm skip-error?)) + (_make self pre nm skip-error?))))) (define _contupdate (lambda (self id pre l nm) @@ -151,14 +158,21 @@ (raise (AttributeError "getattr in Module"))) (let ((k (_k k))) (cond - ((memq k '(__iter__ __repr__)) + ((memq k '(__iter__ __repr__ __dir__)) (lambda () ((rawref self k) self))) (else - (let ((x (module-ref (rawref self '_export) k e))) + (let ((x (aif it (rawref self '_export) + (module-ref it k e) + e))) (if (eq? e x) - (let ((x (module-ref (_m self) k e))) + (let ((x (aif it (_m self) + (module-ref it k e) + e))) (if (eq? e x) - (fail) + (let ((x (rawref self k e))) + (if (eq? e x) + (fail) + x)) x)) x))))))) @@ -237,8 +251,9 @@ (if (not (in "-" (symbol->string k))) (set! l (cons (symbol->string k) l)))))) (hash-for-each add h) - (module-for-each add m) - (module-for-each add (rawref self '_export)) + (if m (module-for-each add m)) + (aif it (rawref self '_export) (module-for-each add it)) + (hash-for-each add (slot-ref self 'h)) (py-list l)))) diff --git a/modules/language/python/module/_python.scm b/modules/language/python/module/_python.scm index b8c5118..3998b19 100644 --- a/modules/language/python/module/_python.scm +++ b/modules/language/python/module/_python.scm @@ -50,8 +50,7 @@ divmod enumerate getattr hasattr setattr hex isinstance issubclass iter sum id input oct ord pow super - sorted zip vars - ClassMethod StaticMethod Funcobj)) + sorted zip vars slice)) (define-syntax-rule (aif it p x y) (let ((it p)) (if it x y))) @@ -353,11 +352,6 @@ (begin (yield (reverse r)) (lp)))))))))) - -(define-python-class ClassMethod ()) -(define-python-class StaticMethod ()) -(define-python-class Funcobj ()) - (define print (lam ((= file #f) (* l)) @@ -375,6 +369,7 @@ (l ((@ (guile) display) l ) (print))) l))))) + (define-syntax-rule (mk cl cls ? tp) (begin (set! (@@ (oop pf-objects) cl) cls) @@ -386,3 +381,11 @@ (mk bytes-cls bytes bytes? bytes) (mk list-cls list list? list) (mk float-cls float float? float) + +(define-python-class slice () + (define __init__ + (lam (self x (= y None) (= z None)) + (pf-set self 'x x) + (pf-set self 'y y) + (pf-set self 'z z)))) + diff --git a/modules/language/python/module/calendar.py b/modules/language/python/module/calendar.py new file mode 100644 index 0000000..7e9c0d4 --- /dev/null +++ b/modules/language/python/module/calendar.py @@ -0,0 +1,715 @@ +module(calendar) +"""Calendar printing functions + +Note when comparing these calendars to the ones printed by cal(1): By +default, these calendars have Monday as the first day of the week, and +Sunday as the last (the European convention). Use setfirstweekday() to +set the first day of the week (0=Monday, 6=Sunday).""" + +import sys +import datetime +import locale as _locale +from itertools import repeat + +__all__ = ["IllegalMonthError", "IllegalWeekdayError", "setfirstweekday", + "firstweekday", "isleap", "leapdays", "weekday", "monthrange", + "monthcalendar", "prmonth", "month", "prcal", "calendar", + "timegm", "month_name", "month_abbr", "day_name", "day_abbr", + "Calendar", "TextCalendar", "HTMLCalendar", "LocaleTextCalendar", + "LocaleHTMLCalendar", "weekheader"] + +# Exception raised for bad input (with string parameter for details) +error = ValueError + +# Exceptions raised for bad input +class IllegalMonthError(ValueError): + def __init__(self, month): + self.month = month + def __str__(self): + return "bad month number %r; must be 1-12" % self.month + + +class IllegalWeekdayError(ValueError): + def __init__(self, weekday): + self.weekday = weekday + def __str__(self): + return "bad weekday number %r; must be 0 (Monday) to 6 (Sunday)" % self.weekday + + +# Constants for months referenced later +January = 1 +February = 2 + +# Number of days per month (except for February in leap years) +mdays = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + +# This module used to have hard-coded lists of day and month names, as +# English strings. The classes following emulate a read-only version of +# that, but supply localized names. Note that the values are computed +# fresh on each call, in case the user changes locale between calls. + +class _localized_month: + + _months = [datetime.date(2001, i+1, 1).strftime for i in range(12)] + _months.insert(0, lambda x: "") + + def __init__(self, format): + self.format = format + + def __getitem__(self, i): + funcs = self._months[i] + if isinstance(i, slice): + return [f(self.format) for f in funcs] + else: + return funcs(self.format) + + def __len__(self): + return 13 + + +class _localized_day: + + # January 1, 2001, was a Monday. + _days = [datetime.date(2001, 1, i+1).strftime for i in range(7)] + + def __init__(self, format): + self.format = format + + def __getitem__(self, i): + funcs = self._days[i] + if isinstance(i, slice): + return [f(self.format) for f in funcs] + else: + return funcs(self.format) + + def __len__(self): + return 7 + + +# Full and abbreviated names of weekdays +day_name = _localized_day('%A') +day_abbr = _localized_day('%a') + +# Full and abbreviated names of months (1-based arrays!!!) +month_name = _localized_month('%B') +month_abbr = _localized_month('%b') + +# Constants for weekdays +(MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY) = range(7) + + +def isleap(year): + """Return True for leap years, False for non-leap years.""" + return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0) + + +def leapdays(y1, y2): + """Return number of leap years in range [y1, y2). + Assume y1 <= y2.""" + y1 -= 1 + y2 -= 1 + return (y2//4 - y1//4) - (y2//100 - y1//100) + (y2//400 - y1//400) + + +def weekday(year, month, day): + """Return weekday (0-6 ~ Mon-Sun) for year (1970-...), month (1-12), + day (1-31).""" + return datetime.date(year, month, day).weekday() + + +def monthrange(year, month): + """Return weekday (0-6 ~ Mon-Sun) and number of days (28-31) for + year, month.""" + if not 1 <= month <= 12: + raise IllegalMonthError(month) + day1 = weekday(year, month, 1) + ndays = mdays[month] + (month == February and isleap(year)) + return day1, ndays + + +class Calendar(object): + """ + Base calendar class. This class doesn't do any formatting. It simply + provides data to subclasses. + """ + + def __init__(self, firstweekday=0): + self.firstweekday = firstweekday # 0 = Monday, 6 = Sunday + + def getfirstweekday(self): + return self._firstweekday % 7 + + def setfirstweekday(self, firstweekday): + self._firstweekday = firstweekday + + firstweekday = property(getfirstweekday, setfirstweekday) + + def iterweekdays(self): + """ + Return an iterator for one week of weekday numbers starting with the + configured first one. + """ + for i in range(self.firstweekday, self.firstweekday + 7): + yield i%7 + + def itermonthdates(self, year, month): + """ + Return an iterator for one month. The iterator will yield datetime.date + values and will always iterate through complete weeks, so it will yield + dates outside the specified month. + """ + date = datetime.date(year, month, 1) + # Go back to the beginning of the week + days = (date.weekday() - self.firstweekday) % 7 + date -= datetime.timedelta(days=days) + oneday = datetime.timedelta(days=1) + while True: + yield date + try: + date += oneday + except OverflowError: + # Adding one day could fail after datetime.MAXYEAR + break + if date.month != month and date.weekday() == self.firstweekday: + break + + def itermonthdays2(self, year, month): + """ + Like itermonthdates(), but will yield (day number, weekday number) + tuples. For days outside the specified month the day number is 0. + """ + for i, d in enumerate(self.itermonthdays(year, month), self.firstweekday): + yield d, i % 7 + + def itermonthdays(self, year, month): + """ + Like itermonthdates(), but will yield day numbers. For days outside + the specified month the day number is 0. + """ + day1, ndays = monthrange(year, month) + days_before = (day1 - self.firstweekday) % 7 + yield from repeat(0, days_before) + yield from range(1, ndays + 1) + days_after = (self.firstweekday - day1 - ndays) % 7 + yield from repeat(0, days_after) + + def monthdatescalendar(self, year, month): + """ + Return a matrix (list of lists) representing a month's calendar. + Each row represents a week; week entries are datetime.date values. + """ + dates = list(self.itermonthdates(year, month)) + return [ dates[i:i+7] for i in range(0, len(dates), 7) ] + + def monthdays2calendar(self, year, month): + """ + Return a matrix representing a month's calendar. + Each row represents a week; week entries are + (day number, weekday number) tuples. Day numbers outside this month + are zero. + """ + days = list(self.itermonthdays2(year, month)) + return [ days[i:i+7] for i in range(0, len(days), 7) ] + + def monthdayscalendar(self, year, month): + """ + Return a matrix representing a month's calendar. + Each row represents a week; days outside this month are zero. + """ + days = list(self.itermonthdays(year, month)) + return [ days[i:i+7] for i in range(0, len(days), 7) ] + + def yeardatescalendar(self, year, width=3): + """ + Return the data for the specified year ready for formatting. The return + value is a list of month rows. Each month row contains up to width months. + Each month contains between 4 and 6 weeks and each week contains 1-7 + days. Days are datetime.date objects. + """ + months = [ + self.monthdatescalendar(year, i) + for i in range(January, January+12) + ] + return [months[i:i+width] for i in range(0, len(months), width) ] + + def yeardays2calendar(self, year, width=3): + """ + Return the data for the specified year ready for formatting (similar to + yeardatescalendar()). Entries in the week lists are + (day number, weekday number) tuples. Day numbers outside this month are + zero. + """ + months = [ + self.monthdays2calendar(year, i) + for i in range(January, January+12) + ] + return [months[i:i+width] for i in range(0, len(months), width) ] + + def yeardayscalendar(self, year, width=3): + """ + Return the data for the specified year ready for formatting (similar to + yeardatescalendar()). Entries in the week lists are day numbers. + Day numbers outside this month are zero. + """ + months = [ + self.monthdayscalendar(year, i) + for i in range(January, January+12) + ] + return [months[i:i+width] for i in range(0, len(months), width) ] + + +class TextCalendar(Calendar): + """ + Subclass of Calendar that outputs a calendar as a simple plain text + similar to the UNIX program cal. + """ + + def prweek(self, theweek, width): + """ + Print a single week (no newline). + """ + print(self.formatweek(theweek, width), end=' ') + + def formatday(self, day, weekday, width): + """ + Returns a formatted day. + """ + if day == 0: + s = '' + else: + s = '%2i' % day # right-align single-digit days + return s.center(width) + + def formatweek(self, theweek, width): + """ + Returns a single week in a string (no newline). + """ + return ' '.join(self.formatday(d, wd, width) for (d, wd) in theweek) + + def formatweekday(self, day, width): + """ + Returns a formatted week day name. + """ + if width >= 9: + names = day_name + else: + names = day_abbr + return names[day][:width].center(width) + + def formatweekheader(self, width): + """ + Return a header for a week. + """ + return ' '.join(self.formatweekday(i, width) for i in self.iterweekdays()) + + def formatmonthname(self, theyear, themonth, width, withyear=True): + """ + Return a formatted month name. + """ + s = month_name[themonth] + if withyear: + s = "%s %r" % (s, theyear) + return s.center(width) + + def prmonth(self, theyear, themonth, w=0, l=0): + """ + Print a month's calendar. + """ + print(self.formatmonth(theyear, themonth, w, l), end='') + + def formatmonth(self, theyear, themonth, w=0, l=0): + """ + Return a month's calendar string (multi-line). + """ + w = max(2, w) + l = max(1, l) + s = self.formatmonthname(theyear, themonth, 7 * (w + 1) - 1) + s = s.rstrip() + s += '\n' * l + s += self.formatweekheader(w).rstrip() + s += '\n' * l + for week in self.monthdays2calendar(theyear, themonth): + s += self.formatweek(week, w).rstrip() + s += '\n' * l + return s + + def formatyear(self, theyear, w=2, l=1, c=6, m=3): + """ + Returns a year's calendar as a multi-line string. + """ + w = max(2, w) + l = max(1, l) + c = max(2, c) + colwidth = (w + 1) * 7 - 1 + v = [] + a = v.append + a(repr(theyear).center(colwidth*m+c*(m-1)).rstrip()) + a('\n'*l) + header = self.formatweekheader(w) + for (i, row) in enumerate(self.yeardays2calendar(theyear, m)): + # months in this row + months = range(m*i+1, min(m*(i+1)+1, 13)) + a('\n'*l) + names = (self.formatmonthname(theyear, k, colwidth, False) + for k in months) + a(formatstring(names, colwidth, c).rstrip()) + a('\n'*l) + headers = (header for k in months) + a(formatstring(headers, colwidth, c).rstrip()) + a('\n'*l) + # max number of weeks for this row + height = max(len(cal) for cal in row) + for j in range(height): + weeks = [] + for cal in row: + if j >= len(cal): + weeks.append('') + else: + weeks.append(self.formatweek(cal[j], w)) + a(formatstring(weeks, colwidth, c).rstrip()) + a('\n' * l) + return ''.join(v) + + def pryear(self, theyear, w=0, l=0, c=6, m=3): + """Print a year's calendar.""" + print(self.formatyear(theyear, w, l, c, m)) + + +class HTMLCalendar(Calendar): + """ + This calendar returns complete HTML pages. + """ + + # CSS classes for the day <td>s + cssclasses = ["mon", "tue", "wed", "thu", "fri", "sat", "sun"] + + def formatday(self, day, weekday): + """ + Return a day as a table cell. + """ + if day == 0: + return '<td class="noday"> </td>' # day outside month + else: + return '<td class="%s">%d</td>' % (self.cssclasses[weekday], day) + + def formatweek(self, theweek): + """ + Return a complete week as a table row. + """ + s = ''.join(self.formatday(d, wd) for (d, wd) in theweek) + return '<tr>%s</tr>' % s + + def formatweekday(self, day): + """ + Return a weekday name as a table header. + """ + return '<th class="%s">%s</th>' % (self.cssclasses[day], day_abbr[day]) + + def formatweekheader(self): + """ + Return a header for a week as a table row. + """ + s = ''.join(self.formatweekday(i) for i in self.iterweekdays()) + return '<tr>%s</tr>' % s + + def formatmonthname(self, theyear, themonth, withyear=True): + """ + Return a month name as a table row. + """ + if withyear: + s = '%s %s' % (month_name[themonth], theyear) + else: + s = '%s' % month_name[themonth] + return '<tr><th colspan="7" class="month">%s</th></tr>' % s + + def formatmonth(self, theyear, themonth, withyear=True): + """ + Return a formatted month as a table. + """ + v = [] + a = v.append + a('<table border="0" cellpadding="0" cellspacing="0" class="month">') + a('\n') + a(self.formatmonthname(theyear, themonth, withyear=withyear)) + a('\n') + a(self.formatweekheader()) + a('\n') + for week in self.monthdays2calendar(theyear, themonth): + a(self.formatweek(week)) + a('\n') + a('</table>') + a('\n') + return ''.join(v) + + def formatyear(self, theyear, width=3): + """ + Return a formatted year as a table of tables. + """ + v = [] + a = v.append + width = max(width, 1) + a('<table border="0" cellpadding="0" cellspacing="0" class="year">') + a('\n') + a('<tr><th colspan="%d" class="year">%s</th></tr>' % (width, theyear)) + for i in range(January, January+12, width): + # months in this row + months = range(i, min(i+width, 13)) + a('<tr>') + for m in months: + a('<td>') + a(self.formatmonth(theyear, m, withyear=False)) + a('</td>') + a('</tr>') + a('</table>') + return ''.join(v) + + def formatyearpage(self, theyear, width=3, css='calendar.css', encoding=None): + """ + Return a formatted year as a complete HTML page. + """ + if encoding is None: + encoding = sys.getdefaultencoding() + v = [] + a = v.append + a('<?xml version="1.0" encoding="%s"?>\n' % encoding) + a('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n') + a('<html>\n') + a('<head>\n') + a('<meta http-equiv="Content-Type" content="text/html; charset=%s" />\n' % encoding) + if css is not None: + a('<link rel="stylesheet" type="text/css" href="%s" />\n' % css) + a('<title>Calendar for %d</title>\n' % theyear) + a('</head>\n') + a('<body>\n') + a(self.formatyear(theyear, width)) + a('</body>\n') + a('</html>\n') + return ''.join(v).encode(encoding, "xmlcharrefreplace") + + +class different_locale: + def __init__(self, locale): + self.locale = locale + + def __enter__(self): + self.oldlocale = _locale.getlocale(_locale.LC_TIME) + _locale.setlocale(_locale.LC_TIME, self.locale) + + def __exit__(self, *args): + _locale.setlocale(_locale.LC_TIME, self.oldlocale) + + +class LocaleTextCalendar(TextCalendar): + """ + This class can be passed a locale name in the constructor and will return + month and weekday names in the specified locale. If this locale includes + an encoding all strings containing month and weekday names will be returned + as unicode. + """ + + def __init__(self, firstweekday=0, locale=None): + TextCalendar.__init__(self, firstweekday) + if locale is None: + locale = _locale.getdefaultlocale() + self.locale = locale + + def formatweekday(self, day, width): + with different_locale(self.locale): + if width >= 9: + names = day_name + else: + names = day_abbr + name = names[day] + return name[:width].center(width) + + def formatmonthname(self, theyear, themonth, width, withyear=True): + with different_locale(self.locale): + s = month_name[themonth] + if withyear: + s = "%s %r" % (s, theyear) + return s.center(width) + + +class LocaleHTMLCalendar(HTMLCalendar): + """ + This class can be passed a locale name in the constructor and will return + month and weekday names in the specified locale. If this locale includes + an encoding all strings containing month and weekday names will be returned + as unicode. + """ + def __init__(self, firstweekday=0, locale=None): + HTMLCalendar.__init__(self, firstweekday) + if locale is None: + locale = _locale.getdefaultlocale() + self.locale = locale + + def formatweekday(self, day): + with different_locale(self.locale): + s = day_abbr[day] + return '<th class="%s">%s</th>' % (self.cssclasses[day], s) + + def formatmonthname(self, theyear, themonth, withyear=True): + with different_locale(self.locale): + s = month_name[themonth] + if withyear: + s = '%s %s' % (s, theyear) + return '<tr><th colspan="7" class="month">%s</th></tr>' % s + + +# Support for old module level interface + +c = TextCalendar() + +firstweekday = c.getfirstweekday + +def setfirstweekday(firstweekday): + if not MONDAY <= firstweekday <= SUNDAY: + raise IllegalWeekdayError(firstweekday) + c.firstweekday = firstweekday + +monthcalendar = c.monthdayscalendar +prweek = c.prweek +week = c.formatweek +weekheader = c.formatweekheader +prmonth = c.prmonth +month = c.formatmonth +calendar = c.formatyear +prcal = c.pryear + + +# Spacing of month columns for multi-column year calendar +_colwidth = 7*3 - 1 # Amount printed by prweek() +_spacing = 6 # Number of spaces between columns + + +def format(cols, colwidth=_colwidth, spacing=_spacing): + """Prints multi-column formatting for year calendars""" + print(formatstring(cols, colwidth, spacing)) + + +def formatstring(cols, colwidth=_colwidth, spacing=_spacing): + """Returns a string formatted from n strings, centered within n columns.""" + spacing *= ' ' + return spacing.join(c.center(colwidth) for c in cols) + + +EPOCH = 1970 +_EPOCH_ORD = datetime.date(EPOCH, 1, 1).toordinal() + + +def timegm(tuple): + """Unrelated but handy function to calculate Unix timestamp from GMT.""" + year, month, day, hour, minute, second = tuple[:6] + days = datetime.date(year, month, 1).toordinal() - _EPOCH_ORD + day - 1 + hours = days*24 + hour + minutes = hours*60 + minute + seconds = minutes*60 + second + return seconds + + +def main(args): + import argparse + parser = argparse.ArgumentParser() + textgroup = parser.add_argument_group('text only arguments') + htmlgroup = parser.add_argument_group('html only arguments') + textgroup.add_argument( + "-w", "--width", + type=int, default=2, + help="width of date column (default 2)" + ) + textgroup.add_argument( + "-l", "--lines", + type=int, default=1, + help="number of lines for each week (default 1)" + ) + textgroup.add_argument( + "-s", "--spacing", + type=int, default=6, + help="spacing between months (default 6)" + ) + textgroup.add_argument( + "-m", "--months", + type=int, default=3, + help="months per row (default 3)" + ) + htmlgroup.add_argument( + "-c", "--css", + default="calendar.css", + help="CSS to use for page" + ) + parser.add_argument( + "-L", "--locale", + default=None, + help="locale to be used from month and weekday names" + ) + parser.add_argument( + "-e", "--encoding", + default=None, + help="encoding to use for output" + ) + parser.add_argument( + "-t", "--type", + default="text", + choices=("text", "html"), + help="output type (text or html)" + ) + parser.add_argument( + "year", + nargs='?', type=int, + help="year number (1-9999)" + ) + parser.add_argument( + "month", + nargs='?', type=int, + help="month number (1-12, text only)" + ) + + options = parser.parse_args(args[1:]) + + if options.locale and not options.encoding: + parser.error("if --locale is specified --encoding is required") + sys.exit(1) + + locale = options.locale, options.encoding + + if options.type == "html": + if options.locale: + cal = LocaleHTMLCalendar(locale=locale) + else: + cal = HTMLCalendar() + encoding = options.encoding + if encoding is None: + encoding = sys.getdefaultencoding() + optdict = dict(encoding=encoding, css=options.css) + write = sys.stdout.buffer.write + if options.year is None: + write(cal.formatyearpage(datetime.date.today().year, **optdict)) + elif options.month is None: + write(cal.formatyearpage(options.year, **optdict)) + else: + parser.error("incorrect number of arguments") + sys.exit(1) + else: + if options.locale: + cal = LocaleTextCalendar(locale=locale) + else: + cal = TextCalendar() + optdict = dict(w=options.width, l=options.lines) + if options.month is None: + optdict["c"] = options.spacing + optdict["m"] = options.months + if options.year is None: + result = cal.formatyear(datetime.date.today().year, **optdict) + elif options.month is None: + result = cal.formatyear(options.year, **optdict) + else: + result = cal.formatmonth(options.year, options.month, **optdict) + write = sys.stdout.write + if options.encoding: + result = result.encode(options.encoding) + write = sys.stdout.buffer.write + write(result) + + +if __name__ == "__main__": + main(sys.argv) diff --git a/modules/language/python/module/collections/abc.scm b/modules/language/python/module/collections/abc.scm index 37815d8..ff8fa07 100644 --- a/modules/language/python/module/collections/abc.scm +++ b/modules/language/python/module/collections/abc.scm @@ -19,6 +19,14 @@ (define-syntax-rule (aif it p x y) (let ((it p)) (if it x y))) +(define (_check_methods x . l) + (let lp ((l l)) + (if (pair? l) + (if (ref x (string->symbol (car l))) + #t + (lp (cdr l))) + #f))) + (define-python-class Container () (define __containes__ (lambda x (error "not implemented")))) diff --git a/modules/language/python/module/contextlib.py b/modules/language/python/module/contextlib.py new file mode 100644 index 0000000..739cc28 --- /dev/null +++ b/modules/language/python/module/contextlib.py @@ -0,0 +1,386 @@ +module(contextlib) + +"""Utilities for with-statement contexts. See PEP 343.""" +import abc +import sys +import collections.abc +from collections import deque +from functools import wraps + +__all__ = ["contextmanager", "closing", "AbstractContextManager", + "ContextDecorator", "ExitStack", "redirect_stdout", + "redirect_stderr", "suppress"] + + +class AbstractContextManager(abc.ABC): + + """An abstract base class for context managers.""" + + def __enter__(self): + """Return `self` upon entering the runtime context.""" + return self + + @abc.abstractmethod + def __exit__(self, exc_type, exc_value, traceback): + """Raise any exception triggered within the runtime context.""" + return None + + @classmethod + def __subclasshook__(cls, C): + if cls is AbstractContextManager: + return collections.abc._check_methods(C, "__enter__", "__exit__") + return NotImplemented + + +class ContextDecorator(object): + "A base class or mixin that enables context managers to work as decorators." + + def _recreate_cm(self): + """Return a recreated instance of self. + + Allows an otherwise one-shot context manager like + _GeneratorContextManager to support use as + a decorator via implicit recreation. + + This is a private interface just for _GeneratorContextManager. + See issue #11647 for details. + """ + return self + + def __call__(self, func): + @wraps(func) + def inner(*args, **kwds): + with self._recreate_cm(): + return func(*args, **kwds) + return inner + + +class _GeneratorContextManager(ContextDecorator, AbstractContextManager): + """Helper for @contextmanager decorator.""" + + def __init__(self, func, args, kwds): + self.gen = func(*args, **kwds) + self.func, self.args, self.kwds = func, args, kwds + # Issue 19330: ensure context manager instances have good docstrings + doc = getattr(func, "__doc__", None) + if doc is None: + doc = type(self).__doc__ + self.__doc__ = doc + # Unfortunately, this still doesn't provide good help output when + # inspecting the created context manager instances, since pydoc + # currently bypasses the instance docstring and shows the docstring + # for the class instead. + # See http://bugs.python.org/issue19404 for more details. + + def _recreate_cm(self): + # _GCM instances are one-shot context managers, so the + # CM must be recreated each time a decorated function is + # called + return self.__class__(self.func, self.args, self.kwds) + + def __enter__(self): + try: + return next(self.gen) + except StopIteration: + raise RuntimeError("generator didn't yield") from None + + def __exit__(self, type, value, traceback): + if type is None: + try: + next(self.gen) + except StopIteration: + return False + else: + raise RuntimeError("generator didn't stop") + else: + if value is None: + # Need to force instantiation so we can reliably + # tell if we get the same exception back + value = type() + try: + self.gen.throw(type, value, traceback) + except StopIteration as exc: + # Suppress StopIteration *unless* it's the same exception that + # was passed to throw(). This prevents a StopIteration + # raised inside the "with" statement from being suppressed. + return exc is not value + except RuntimeError as exc: + # Don't re-raise the passed in exception. (issue27122) + if exc is value: + return False + # Likewise, avoid suppressing if a StopIteration exception + # was passed to throw() and later wrapped into a RuntimeError + # (see PEP 479). + if type is StopIteration and exc.__cause__ is value: + return False + raise + except: + # only re-raise if it's *not* the exception that was + # passed to throw(), because __exit__() must not raise + # an exception unless __exit__() itself failed. But throw() + # has to raise the exception to signal propagation, so this + # fixes the impedance mismatch between the throw() protocol + # and the __exit__() protocol. + # + if sys.exc_info()[1] is value: + return False + raise + raise RuntimeError("generator didn't stop after throw()") + + +def contextmanager(func): + """@contextmanager decorator. + + Typical usage: + + @contextmanager + def some_generator(<arguments>): + <setup> + try: + yield <value> + finally: + <cleanup> + + This makes this: + + with some_generator(<arguments>) as <variable>: + <body> + + equivalent to this: + + <setup> + try: + <variable> = <value> + <body> + finally: + <cleanup> + + """ + @wraps(func) + def helper(*args, **kwds): + return _GeneratorContextManager(func, args, kwds) + return helper + + +class closing(AbstractContextManager): + """Context to automatically close something at the end of a block. + + Code like this: + + with closing(<module>.open(<arguments>)) as f: + <block> + + is equivalent to this: + + f = <module>.open(<arguments>) + try: + <block> + finally: + f.close() + + """ + def __init__(self, thing): + self.thing = thing + def __enter__(self): + return self.thing + def __exit__(self, *exc_info): + self.thing.close() + + +class _RedirectStream(AbstractContextManager): + + _stream = None + + def __init__(self, new_target): + self._new_target = new_target + # We use a list of old targets to make this CM re-entrant + self._old_targets = [] + + def __enter__(self): + self._old_targets.append(getattr(sys, self._stream)) + setattr(sys, self._stream, self._new_target) + return self._new_target + + def __exit__(self, exctype, excinst, exctb): + setattr(sys, self._stream, self._old_targets.pop()) + + +class redirect_stdout(_RedirectStream): + """Context manager for temporarily redirecting stdout to another file. + + # How to send help() to stderr + with redirect_stdout(sys.stderr): + help(dir) + + # How to write help() to a file + with open('help.txt', 'w') as f: + with redirect_stdout(f): + help(pow) + """ + + _stream = "stdout" + + +class redirect_stderr(_RedirectStream): + """Context manager for temporarily redirecting stderr to another file.""" + + _stream = "stderr" + + +class suppress(AbstractContextManager): + """Context manager to suppress specified exceptions + + After the exception is suppressed, execution proceeds with the next + statement following the with statement. + + with suppress(FileNotFoundError): + os.remove(somefile) + # Execution still resumes here if the file was already removed + """ + + def __init__(self, *exceptions): + self._exceptions = exceptions + + def __enter__(self): + pass + + def __exit__(self, exctype, excinst, exctb): + # Unlike isinstance and issubclass, CPython exception handling + # currently only looks at the concrete type hierarchy (ignoring + # the instance and subclass checking hooks). While Guido considers + # that a bug rather than a feature, it's a fairly hard one to fix + # due to various internal implementation details. suppress provides + # the simpler issubclass based semantics, rather than trying to + # exactly reproduce the limitations of the CPython interpreter. + # + # See http://bugs.python.org/issue12029 for more details + return exctype is not None and issubclass(exctype, self._exceptions) + + +# Inspired by discussions on http://bugs.python.org/issue13585 +class ExitStack(AbstractContextManager): + """Context manager for dynamic management of a stack of exit callbacks + + For example: + + with ExitStack() as stack: + files = [stack.enter_context(open(fname)) for fname in filenames] + # All opened files will automatically be closed at the end of + # the with statement, even if attempts to open files later + # in the list raise an exception + + """ + def __init__(self): + self._exit_callbacks = deque() + + def pop_all(self): + """Preserve the context stack by transferring it to a new instance""" + new_stack = type(self)() + new_stack._exit_callbacks = self._exit_callbacks + self._exit_callbacks = deque() + return new_stack + + def _push_cm_exit(self, cm, cm_exit): + """Helper to correctly register callbacks to __exit__ methods""" + def _exit_wrapper(*exc_details): + return cm_exit(cm, *exc_details) + _exit_wrapper.__self__ = cm + self.push(_exit_wrapper) + + def push(self, exit): + """Registers a callback with the standard __exit__ method signature + + Can suppress exceptions the same way __exit__ methods can. + + Also accepts any object with an __exit__ method (registering a call + to the method instead of the object itself) + """ + # We use an unbound method rather than a bound method to follow + # the standard lookup behaviour for special methods + _cb_type = type(exit) + try: + exit_method = _cb_type.__exit__ + except AttributeError: + # Not a context manager, so assume its a callable + self._exit_callbacks.append(exit) + else: + self._push_cm_exit(exit, exit_method) + return exit # Allow use as a decorator + + def callback(self, callback, *args, **kwds): + """Registers an arbitrary callback and arguments. + + Cannot suppress exceptions. + """ + def _exit_wrapper(exc_type, exc, tb): + callback(*args, **kwds) + # We changed the signature, so using @wraps is not appropriate, but + # setting __wrapped__ may still help with introspection + _exit_wrapper.__wrapped__ = callback + self.push(_exit_wrapper) + return callback # Allow use as a decorator + + def enter_context(self, cm): + """Enters the supplied context manager + + If successful, also pushes its __exit__ method as a callback and + returns the result of the __enter__ method. + """ + # We look up the special methods on the type to match the with statement + _cm_type = type(cm) + _exit = _cm_type.__exit__ + result = _cm_type.__enter__(cm) + self._push_cm_exit(cm, _exit) + return result + + def close(self): + """Immediately unwind the context stack""" + self.__exit__(None, None, None) + + def __exit__(self, *exc_details): + received_exc = exc_details[0] is not None + + # We manipulate the exception state so it behaves as though + # we were actually nesting multiple with statements + frame_exc = sys.exc_info()[1] + def _fix_exception_context(new_exc, old_exc): + # Context may not be correct, so find the end of the chain + while 1: + exc_context = new_exc.__context__ + if exc_context is old_exc: + # Context is already set correctly (see issue 20317) + return + if exc_context is None or exc_context is frame_exc: + break + new_exc = exc_context + # Change the end of the chain to point to the exception + # we expect it to reference + new_exc.__context__ = old_exc + + # Callbacks are invoked in LIFO order to match the behaviour of + # nested context managers + suppressed_exc = False + pending_raise = False + while self._exit_callbacks: + cb = self._exit_callbacks.pop() + try: + if cb(*exc_details): + suppressed_exc = True + pending_raise = False + exc_details = (None, None, None) + except: + new_exc_details = sys.exc_info() + # simulate the stack of exceptions by setting the context + _fix_exception_context(new_exc_details[1], exc_details[1]) + pending_raise = True + exc_details = new_exc_details + if pending_raise: + try: + # bare "raise exc_details[1]" replaces our carefully + # set-up context + fixed_ctx = exc_details[1].__context__ + raise exc_details[1] + except BaseException: + exc_details[1].__context__ = fixed_ctx + raise + return received_exc and suppressed_exc diff --git a/modules/language/python/module/datetime.py b/modules/language/python/module/datetime.py new file mode 100644 index 0000000..a5a63e9 --- /dev/null +++ b/modules/language/python/module/datetime.py @@ -0,0 +1,2281 @@ +module(datetime) +"""Concrete date/time and related types. + +See http://www.iana.org/time-zones/repository/tz-link.html for +time zone and DST data sources. +""" + +import time as _time +import math as _math + +def _cmp(x, y): + return 0 if x == y else 1 if x > y else -1 + +MINYEAR = 1 +MAXYEAR = 9999 +_MAXORDINAL = 3652059 # date.max.toordinal() + +# Utility functions, adapted from Python's Demo/classes/Dates.py, which +# also assumes the current Gregorian calendar indefinitely extended in +# both directions. Difference: Dates.py calls January 1 of year 0 day +# number 1. The code here calls January 1 of year 1 day number 1. This is +# to match the definition of the "proleptic Gregorian" calendar in Dershowitz +# and Reingold's "Calendrical Calculations", where it's the base calendar +# for all computations. See the book for algorithms for converting between +# proleptic Gregorian ordinals and many other calendar systems. + +# -1 is a placeholder for indexing purposes. +_DAYS_IN_MONTH = [-1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + +_DAYS_BEFORE_MONTH = [-1] # -1 is a placeholder for indexing purposes. +dbm = 0 +for dim in _DAYS_IN_MONTH[1:]: + _DAYS_BEFORE_MONTH.append(dbm) + dbm += dim +del dbm, dim + +def _is_leap(year): + "year -> 1 if leap year, else 0." + return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0) + +def _days_before_year(year): + "year -> number of days before January 1st of year." + y = year - 1 + return y*365 + y//4 - y//100 + y//400 + +def _days_in_month(year, month): + "year, month -> number of days in that month in that year." + assert 1 <= month <= 12, month + if month == 2 and _is_leap(year): + return 29 + return _DAYS_IN_MONTH[month] + +def _days_before_month(year, month): + "year, month -> number of days in year preceding first day of month." + assert 1 <= month <= 12, 'month must be in 1..12' + return _DAYS_BEFORE_MONTH[month] + (month > 2 and _is_leap(year)) + +def _ymd2ord(year, month, day): + "year, month, day -> ordinal, considering 01-Jan-0001 as day 1." + assert 1 <= month <= 12, 'month must be in 1..12' + dim = _days_in_month(year, month) + assert 1 <= day <= dim, ('day must be in 1..%d' % dim) + return (_days_before_year(year) + + _days_before_month(year, month) + + day) + +_DI400Y = _days_before_year(401) # number of days in 400 years +_DI100Y = _days_before_year(101) # " " " " 100 " +_DI4Y = _days_before_year(5) # " " " " 4 " + +# A 4-year cycle has an extra leap day over what we'd get from pasting +# together 4 single years. +assert _DI4Y == 4 * 365 + 1 + +# Similarly, a 400-year cycle has an extra leap day over what we'd get from +# pasting together 4 100-year cycles. +assert _DI400Y == 4 * _DI100Y + 1 + +# OTOH, a 100-year cycle has one fewer leap day than we'd get from +# pasting together 25 4-year cycles. +assert _DI100Y == 25 * _DI4Y - 1 + +def _ord2ymd(n): + "ordinal -> (year, month, day), considering 01-Jan-0001 as day 1." + + # n is a 1-based index, starting at 1-Jan-1. The pattern of leap years + # repeats exactly every 400 years. The basic strategy is to find the + # closest 400-year boundary at or before n, then work with the offset + # from that boundary to n. Life is much clearer if we subtract 1 from + # n first -- then the values of n at 400-year boundaries are exactly + # those divisible by _DI400Y: + # + # D M Y n n-1 + # -- --- ---- ---------- ---------------- + # 31 Dec -400 -_DI400Y -_DI400Y -1 + # 1 Jan -399 -_DI400Y +1 -_DI400Y 400-year boundary + # ... + # 30 Dec 000 -1 -2 + # 31 Dec 000 0 -1 + # 1 Jan 001 1 0 400-year boundary + # 2 Jan 001 2 1 + # 3 Jan 001 3 2 + # ... + # 31 Dec 400 _DI400Y _DI400Y -1 + # 1 Jan 401 _DI400Y +1 _DI400Y 400-year boundary + n -= 1 + n400, n = divmod(n, _DI400Y) + year = n400 * 400 + 1 # ..., -399, 1, 401, ... + + # Now n is the (non-negative) offset, in days, from January 1 of year, to + # the desired date. Now compute how many 100-year cycles precede n. + # Note that it's possible for n100 to equal 4! In that case 4 full + # 100-year cycles precede the desired day, which implies the desired + # day is December 31 at the end of a 400-year cycle. + n100, n = divmod(n, _DI100Y) + + # Now compute how many 4-year cycles precede it. + n4, n = divmod(n, _DI4Y) + + # And now how many single years. Again n1 can be 4, and again meaning + # that the desired day is December 31 at the end of the 4-year cycle. + n1, n = divmod(n, 365) + + year += n100 * 100 + n4 * 4 + n1 + if n1 == 4 or n100 == 4: + assert n == 0 + return year-1, 12, 31 + + # Now the year is correct, and n is the offset from January 1. We find + # the month via an estimate that's either exact or one too large. + leapyear = n1 == 3 and (n4 != 24 or n100 == 3) + assert leapyear == _is_leap(year) + month = (n + 50) >> 5 + preceding = _DAYS_BEFORE_MONTH[month] + (month > 2 and leapyear) + if preceding > n: # estimate is too large + month -= 1 + preceding -= _DAYS_IN_MONTH[month] + (month == 2 and leapyear) + n -= preceding + assert 0 <= n < _days_in_month(year, month) + + # Now the year and month are correct, and n is the offset from the + # start of that month: we're done! + return year, month, n+1 + +# Month and day names. For localized versions, see the calendar module. +_MONTHNAMES = [None, "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] +_DAYNAMES = [None, "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] + + +def _build_struct_time(y, m, d, hh, mm, ss, dstflag): + wday = (_ymd2ord(y, m, d) + 6) % 7 + dnum = _days_before_month(y, m) + d + return _time.struct_time((y, m, d, hh, mm, ss, wday, dnum, dstflag)) + +def _format_time(hh, mm, ss, us, timespec='auto'): + specs = { + 'hours': '{:02d}', + 'minutes': '{:02d}:{:02d}', + 'seconds': '{:02d}:{:02d}:{:02d}', + 'milliseconds': '{:02d}:{:02d}:{:02d}.{:03d}', + 'microseconds': '{:02d}:{:02d}:{:02d}.{:06d}' + } + + if timespec == 'auto': + # Skip trailing microseconds when us==0. + timespec = 'microseconds' if us else 'seconds' + elif timespec == 'milliseconds': + us //= 1000 + try: + fmt = specs[timespec] + except KeyError: + raise ValueError('Unknown timespec value') + else: + return fmt.format(hh, mm, ss, us) + +# Correctly substitute for %z and %Z escapes in strftime formats. +def _wrap_strftime(object, format, timetuple): + # Don't call utcoffset() or tzname() unless actually needed. + freplace = None # the string to use for %f + zreplace = None # the string to use for %z + Zreplace = None # the string to use for %Z + + # Scan format for %z and %Z escapes, replacing as needed. + newformat = [] + push = newformat.append + i, n = 0, len(format) + while i < n: + ch = format[i] + i += 1 + if ch == '%': + if i < n: + ch = format[i] + i += 1 + if ch == 'f': + if freplace is None: + freplace = '%06d' % getattr(object, + 'microsecond', 0) + newformat.append(freplace) + elif ch == 'z': + if zreplace is None: + zreplace = "" + if hasattr(object, "utcoffset"): + offset = object.utcoffset() + if offset is not None: + sign = '+' + if offset.days < 0: + offset = -offset + sign = '-' + h, m = divmod(offset, timedelta(hours=1)) + assert not m % timedelta(minutes=1), "whole minute" + m //= timedelta(minutes=1) + zreplace = '%c%02d%02d' % (sign, h, m) + assert '%' not in zreplace + newformat.append(zreplace) + elif ch == 'Z': + if Zreplace is None: + Zreplace = "" + if hasattr(object, "tzname"): + s = object.tzname() + if s is not None: + # strftime is going to have at this: escape % + Zreplace = s.replace('%', '%%') + newformat.append(Zreplace) + else: + push('%') + push(ch) + else: + push('%') + else: + push(ch) + newformat = "".join(newformat) + return _time.strftime(newformat, timetuple) + +# Just raise TypeError if the arg isn't None or a string. +def _check_tzname(name): + if name is not None and not isinstance(name, str): + raise TypeError("tzinfo.tzname() must return None or string, " + "not '%s'" % type(name)) + +# name is the offset-producing method, "utcoffset" or "dst". +# offset is what it returned. +# If offset isn't None or timedelta, raises TypeError. +# If offset is None, returns None. +# Else offset is checked for being in range, and a whole # of minutes. +# If it is, its integer value is returned. Else ValueError is raised. +def _check_utc_offset(name, offset): + assert name in ("utcoffset", "dst") + if offset is None: + return + if not isinstance(offset, timedelta): + raise TypeError("tzinfo.%s() must return None " + "or timedelta, not '%s'" % (name, type(offset))) + if offset.microseconds: + raise ValueError("tzinfo.%s() must return a whole number " + "of seconds, got %s" % (name, offset)) + if not -timedelta(1) < offset < timedelta(1): + raise ValueError("%s()=%s, must be strictly between " + "-timedelta(hours=24) and timedelta(hours=24)" % + (name, offset)) + +def _check_int_field(value): + if isinstance(value, int): + return value + if not isinstance(value, float): + try: + value = value.__int__() + except AttributeError: + pass + else: + if isinstance(value, int): + return value + raise TypeError('__int__ returned non-int (type %s)' % + type(value).__name__) + raise TypeError('an integer is required (got type %s)' % + type(value).__name__) + raise TypeError('integer argument expected, got float') + +def _check_date_fields(year, month, day): + year = _check_int_field(year) + month = _check_int_field(month) + day = _check_int_field(day) + if not MINYEAR <= year <= MAXYEAR: + raise ValueError('year must be in %d..%d' % (MINYEAR, MAXYEAR), year) + if not 1 <= month <= 12: + raise ValueError('month must be in 1..12', month) + dim = _days_in_month(year, month) + if not 1 <= day <= dim: + raise ValueError('day must be in 1..%d' % dim, day) + return year, month, day + +def _check_time_fields(hour, minute, second, microsecond, fold): + hour = _check_int_field(hour) + minute = _check_int_field(minute) + second = _check_int_field(second) + microsecond = _check_int_field(microsecond) + if not 0 <= hour <= 23: + raise ValueError('hour must be in 0..23', hour) + if not 0 <= minute <= 59: + raise ValueError('minute must be in 0..59', minute) + if not 0 <= second <= 59: + raise ValueError('second must be in 0..59', second) + if not 0 <= microsecond <= 999999: + raise ValueError('microsecond must be in 0..999999', microsecond) + if fold not in (0, 1): + raise ValueError('fold must be either 0 or 1', fold) + return hour, minute, second, microsecond, fold + +def _check_tzinfo_arg(tz): + if tz is not None and not isinstance(tz, tzinfo): + raise TypeError("tzinfo argument must be None or of a tzinfo subclass") + +def _cmperror(x, y): + raise TypeError("can't compare '%s' to '%s'" % ( + type(x).__name__, type(y).__name__)) + +def _divide_and_round(a, b): + """divide a by b and round result to the nearest integer + + When the ratio is exactly half-way between two integers, + the even integer is returned. + """ + # Based on the reference implementation for divmod_near + # in Objects/longobject.c. + q, r = divmod(a, b) + # round up if either r / b > 0.5, or r / b == 0.5 and q is odd. + # The expression r / b > 0.5 is equivalent to 2 * r > b if b is + # positive, 2 * r < b if b negative. + r *= 2 + greater_than_half = r > b if b > 0 else r < b + if greater_than_half or r == b and q % 2 == 1: + q += 1 + + return q + + +class timedelta: + """Represent the difference between two datetime objects. + + Supported operators: + + - add, subtract timedelta + - unary plus, minus, abs + - compare to timedelta + - multiply, divide by int + + In addition, datetime supports subtraction of two datetime objects + returning a timedelta, and addition or subtraction of a datetime + and a timedelta giving a datetime. + + Representation: (days, seconds, microseconds). Why? Because I + felt like it. + """ + __slots__ = '_days', '_seconds', '_microseconds', '_hashcode' + + def __new__(cls, days=0, seconds=0, microseconds=0, + milliseconds=0, minutes=0, hours=0, weeks=0): + # Doing this efficiently and accurately in C is going to be difficult + # and error-prone, due to ubiquitous overflow possibilities, and that + # C double doesn't have enough bits of precision to represent + # microseconds over 10K years faithfully. The code here tries to make + # explicit where go-fast assumptions can be relied on, in order to + # guide the C implementation; it's way more convoluted than speed- + # ignoring auto-overflow-to-long idiomatic Python could be. + + # XXX Check that all inputs are ints or floats. + + # Final values, all integer. + # s and us fit in 32-bit signed ints; d isn't bounded. + d = s = us = 0 + + # Normalize everything to days, seconds, microseconds. + days += weeks*7 + seconds += minutes*60 + hours*3600 + microseconds += milliseconds*1000 + + # Get rid of all fractions, and normalize s and us. + # Take a deep breath <wink>. + if isinstance(days, float): + dayfrac, days = _math.modf(days) + daysecondsfrac, daysecondswhole = _math.modf(dayfrac * (24.*3600.)) + assert daysecondswhole == int(daysecondswhole) # can't overflow + s = int(daysecondswhole) + assert days == int(days) + d = int(days) + else: + daysecondsfrac = 0.0 + d = days + assert isinstance(daysecondsfrac, float) + assert abs(daysecondsfrac) <= 1.0 + assert isinstance(d, int) + assert abs(s) <= 24 * 3600 + # days isn't referenced again before redefinition + + if isinstance(seconds, float): + secondsfrac, seconds = _math.modf(seconds) + assert seconds == int(seconds) + seconds = int(seconds) + secondsfrac += daysecondsfrac + assert abs(secondsfrac) <= 2.0 + else: + secondsfrac = daysecondsfrac + # daysecondsfrac isn't referenced again + assert isinstance(secondsfrac, float) + assert abs(secondsfrac) <= 2.0 + + assert isinstance(seconds, int) + days, seconds = divmod(seconds, 24*3600) + d += days + s += int(seconds) # can't overflow + assert isinstance(s, int) + assert abs(s) <= 2 * 24 * 3600 + # seconds isn't referenced again before redefinition + + usdouble = secondsfrac * 1e6 + assert abs(usdouble) < 2.1e6 # exact value not critical + # secondsfrac isn't referenced again + + if isinstance(microseconds, float): + microseconds = round(microseconds + usdouble) + seconds, microseconds = divmod(microseconds, 1000000) + days, seconds = divmod(seconds, 24*3600) + d += days + s += seconds + else: + microseconds = int(microseconds) + seconds, microseconds = divmod(microseconds, 1000000) + days, seconds = divmod(seconds, 24*3600) + d += days + s += seconds + microseconds = round(microseconds + usdouble) + assert isinstance(s, int) + assert isinstance(microseconds, int) + assert abs(s) <= 3 * 24 * 3600 + assert abs(microseconds) < 3.1e6 + + # Just a little bit of carrying possible for microseconds and seconds. + seconds, us = divmod(microseconds, 1000000) + s += seconds + days, s = divmod(s, 24*3600) + d += days + + assert isinstance(d, int) + assert isinstance(s, int) and 0 <= s < 24*3600 + assert isinstance(us, int) and 0 <= us < 1000000 + + if abs(d) > 999999999: + raise OverflowError("timedelta # of days is too large: %d" % d) + + self = object.__new__(cls) + self._days = d + self._seconds = s + self._microseconds = us + self._hashcode = -1 + return self + + def __repr__(self): + if self._microseconds: + return "%s.%s(%d, %d, %d)" % (self.__class__.__module__, + self.__class__.__qualname__, + self._days, + self._seconds, + self._microseconds) + if self._seconds: + return "%s.%s(%d, %d)" % (self.__class__.__module__, + self.__class__.__qualname__, + self._days, + self._seconds) + return "%s.%s(%d)" % (self.__class__.__module__, + self.__class__.__qualname__, + self._days) + + def __str__(self): + mm, ss = divmod(self._seconds, 60) + hh, mm = divmod(mm, 60) + s = "%d:%02d:%02d" % (hh, mm, ss) + if self._days: + def plural(n): + return n, abs(n) != 1 and "s" or "" + s = ("%d day%s, " % plural(self._days)) + s + if self._microseconds: + s = s + ".%06d" % self._microseconds + return s + + def total_seconds(self): + """Total seconds in the duration.""" + return ((self.days * 86400 + self.seconds) * 10**6 + + self.microseconds) / 10**6 + + # Read-only field accessors + @property + def days(self): + """days""" + return self._days + + @property + def seconds(self): + """seconds""" + return self._seconds + + @property + def microseconds(self): + """microseconds""" + return self._microseconds + + def __add__(self, other): + if isinstance(other, timedelta): + # for CPython compatibility, we cannot use + # our __class__ here, but need a real timedelta + return timedelta(self._days + other._days, + self._seconds + other._seconds, + self._microseconds + other._microseconds) + return NotImplemented + + __radd__ = __add__ + + def __sub__(self, other): + if isinstance(other, timedelta): + # for CPython compatibility, we cannot use + # our __class__ here, but need a real timedelta + return timedelta(self._days - other._days, + self._seconds - other._seconds, + self._microseconds - other._microseconds) + return NotImplemented + + def __rsub__(self, other): + if isinstance(other, timedelta): + return -self + other + return NotImplemented + + def __neg__(self): + # for CPython compatibility, we cannot use + # our __class__ here, but need a real timedelta + return timedelta(-self._days, + -self._seconds, + -self._microseconds) + + def __pos__(self): + return self + + def __abs__(self): + if self._days < 0: + return -self + else: + return self + + def __mul__(self, other): + if isinstance(other, int): + # for CPython compatibility, we cannot use + # our __class__ here, but need a real timedelta + return timedelta(self._days * other, + self._seconds * other, + self._microseconds * other) + if isinstance(other, float): + usec = self._to_microseconds() + a, b = other.as_integer_ratio() + return timedelta(0, 0, _divide_and_round(usec * a, b)) + return NotImplemented + + __rmul__ = __mul__ + + def _to_microseconds(self): + return ((self._days * (24*3600) + self._seconds) * 1000000 + + self._microseconds) + + def __floordiv__(self, other): + if not isinstance(other, (int, timedelta)): + return NotImplemented + usec = self._to_microseconds() + if isinstance(other, timedelta): + return usec // other._to_microseconds() + if isinstance(other, int): + return timedelta(0, 0, usec // other) + + def __truediv__(self, other): + if not isinstance(other, (int, float, timedelta)): + return NotImplemented + usec = self._to_microseconds() + if isinstance(other, timedelta): + return usec / other._to_microseconds() + if isinstance(other, int): + return timedelta(0, 0, _divide_and_round(usec, other)) + if isinstance(other, float): + a, b = other.as_integer_ratio() + return timedelta(0, 0, _divide_and_round(b * usec, a)) + + def __mod__(self, other): + if isinstance(other, timedelta): + r = self._to_microseconds() % other._to_microseconds() + return timedelta(0, 0, r) + return NotImplemented + + def __divmod__(self, other): + if isinstance(other, timedelta): + q, r = divmod(self._to_microseconds(), + other._to_microseconds()) + return q, timedelta(0, 0, r) + return NotImplemented + + # Comparisons of timedelta objects with other. + + def __eq__(self, other): + if isinstance(other, timedelta): + return self._cmp(other) == 0 + else: + return False + + def __le__(self, other): + if isinstance(other, timedelta): + return self._cmp(other) <= 0 + else: + _cmperror(self, other) + + def __lt__(self, other): + if isinstance(other, timedelta): + return self._cmp(other) < 0 + else: + _cmperror(self, other) + + def __ge__(self, other): + if isinstance(other, timedelta): + return self._cmp(other) >= 0 + else: + _cmperror(self, other) + + def __gt__(self, other): + if isinstance(other, timedelta): + return self._cmp(other) > 0 + else: + _cmperror(self, other) + + def _cmp(self, other): + assert isinstance(other, timedelta) + return _cmp(self._getstate(), other._getstate()) + + def __hash__(self): + if self._hashcode == -1: + self._hashcode = hash(self._getstate()) + return self._hashcode + + def __bool__(self): + return (self._days != 0 or + self._seconds != 0 or + self._microseconds != 0) + + # Pickle support. + + def _getstate(self): + return (self._days, self._seconds, self._microseconds) + + def __reduce__(self): + return (self.__class__, self._getstate()) + +timedelta.min = timedelta(-999999999) +timedelta.max = timedelta(days=999999999, hours=23, minutes=59, seconds=59, + microseconds=999999) +timedelta.resolution = timedelta(microseconds=1) + +class date: + """Concrete date type. + + Constructors: + + __new__() + fromtimestamp() + today() + fromordinal() + + Operators: + + __repr__, __str__ + __eq__, __le__, __lt__, __ge__, __gt__, __hash__ + __add__, __radd__, __sub__ (add/radd only with timedelta arg) + + Methods: + + timetuple() + toordinal() + weekday() + isoweekday(), isocalendar(), isoformat() + ctime() + strftime() + + Properties (readonly): + year, month, day + """ + __slots__ = '_year', '_month', '_day', '_hashcode' + + def __new__(cls, year, month=None, day=None): + """Constructor. + + Arguments: + + year, month, day (required, base 1) + """ + if month is None and isinstance(year, bytes) and len(year) == 4 and \ + 1 <= year[2] <= 12: + # Pickle support + self = object.__new__(cls) + self.__setstate(year) + self._hashcode = -1 + return self + year, month, day = _check_date_fields(year, month, day) + self = object.__new__(cls) + self._year = year + self._month = month + self._day = day + self._hashcode = -1 + return self + + # Additional constructors + + @classmethod + def fromtimestamp(cls, t): + "Construct a date from a POSIX timestamp (like time.time())." + y, m, d, hh, mm, ss, weekday, jday, dst = _time.localtime(t) + return cls(y, m, d) + + @classmethod + def today(cls): + "Construct a date from time.time()." + t = _time.time() + return cls.fromtimestamp(t) + + @classmethod + def fromordinal(cls, n): + """Construct a date from a proleptic Gregorian ordinal. + + January 1 of year 1 is day 1. Only the year, month and day are + non-zero in the result. + """ + y, m, d = _ord2ymd(n) + return cls(y, m, d) + + # Conversions to string + + def __repr__(self): + """Convert to formal string, for repr(). + + >>> dt = datetime(2010, 1, 1) + >>> repr(dt) + 'datetime.datetime(2010, 1, 1, 0, 0)' + + >>> dt = datetime(2010, 1, 1, tzinfo=timezone.utc) + >>> repr(dt) + 'datetime.datetime(2010, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)' + """ + return "%s.%s(%d, %d, %d)" % (self.__class__.__module__, + self.__class__.__qualname__, + self._year, + self._month, + self._day) + # XXX These shouldn't depend on time.localtime(), because that + # clips the usable dates to [1970 .. 2038). At least ctime() is + # easily done without using strftime() -- that's better too because + # strftime("%c", ...) is locale specific. + + + def ctime(self): + "Return ctime() style string." + weekday = self.toordinal() % 7 or 7 + return "%s %s %2d 00:00:00 %04d" % ( + _DAYNAMES[weekday], + _MONTHNAMES[self._month], + self._day, self._year) + + def strftime(self, fmt): + "Format using strftime()." + return _wrap_strftime(self, fmt, self.timetuple()) + + def __format__(self, fmt): + if not isinstance(fmt, str): + raise TypeError("must be str, not %s" % type(fmt).__name__) + if len(fmt) != 0: + return self.strftime(fmt) + return str(self) + + def isoformat(self): + """Return the date formatted according to ISO. + + This is 'YYYY-MM-DD'. + + References: + - http://www.w3.org/TR/NOTE-datetime + - http://www.cl.cam.ac.uk/~mgk25/iso-time.html + """ + return "%04d-%02d-%02d" % (self._year, self._month, self._day) + + __str__ = isoformat + + # Read-only field accessors + @property + def year(self): + """year (1-9999)""" + return self._year + + @property + def month(self): + """month (1-12)""" + return self._month + + @property + def day(self): + """day (1-31)""" + return self._day + + # Standard conversions, __eq__, __le__, __lt__, __ge__, __gt__, + # __hash__ (and helpers) + + def timetuple(self): + "Return local time tuple compatible with time.localtime()." + return _build_struct_time(self._year, self._month, self._day, + 0, 0, 0, -1) + + def toordinal(self): + """Return proleptic Gregorian ordinal for the year, month and day. + + January 1 of year 1 is day 1. Only the year, month and day values + contribute to the result. + """ + return _ymd2ord(self._year, self._month, self._day) + + def replace(self, year=None, month=None, day=None): + """Return a new date with new values for the specified fields.""" + if year is None: + year = self._year + if month is None: + month = self._month + if day is None: + day = self._day + return type(self)(year, month, day) + + # Comparisons of date objects with other. + + def __eq__(self, other): + if isinstance(other, date): + return self._cmp(other) == 0 + return NotImplemented + + def __le__(self, other): + if isinstance(other, date): + return self._cmp(other) <= 0 + return NotImplemented + + def __lt__(self, other): + if isinstance(other, date): + return self._cmp(other) < 0 + return NotImplemented + + def __ge__(self, other): + if isinstance(other, date): + return self._cmp(other) >= 0 + return NotImplemented + + def __gt__(self, other): + if isinstance(other, date): + return self._cmp(other) > 0 + return NotImplemented + + def _cmp(self, other): + assert isinstance(other, date) + y, m, d = self._year, self._month, self._day + y2, m2, d2 = other._year, other._month, other._day + return _cmp((y, m, d), (y2, m2, d2)) + + def __hash__(self): + "Hash." + if self._hashcode == -1: + self._hashcode = hash(self._getstate()) + return self._hashcode + + # Computations + + def __add__(self, other): + "Add a date to a timedelta." + if isinstance(other, timedelta): + o = self.toordinal() + other.days + if 0 < o <= _MAXORDINAL: + return date.fromordinal(o) + raise OverflowError("result out of range") + return NotImplemented + + __radd__ = __add__ + + def __sub__(self, other): + """Subtract two dates, or a date and a timedelta.""" + if isinstance(other, timedelta): + return self + timedelta(-other.days) + if isinstance(other, date): + days1 = self.toordinal() + days2 = other.toordinal() + return timedelta(days1 - days2) + return NotImplemented + + def weekday(self): + "Return day of the week, where Monday == 0 ... Sunday == 6." + return (self.toordinal() + 6) % 7 + + # Day-of-the-week and week-of-the-year, according to ISO + + def isoweekday(self): + "Return day of the week, where Monday == 1 ... Sunday == 7." + # 1-Jan-0001 is a Monday + return self.toordinal() % 7 or 7 + + def isocalendar(self): + """Return a 3-tuple containing ISO year, week number, and weekday. + + The first ISO week of the year is the (Mon-Sun) week + containing the year's first Thursday; everything else derives + from that. + + The first week is 1; Monday is 1 ... Sunday is 7. + + ISO calendar algorithm taken from + http://www.phys.uu.nl/~vgent/calendar/isocalendar.htm + (used with permission) + """ + year = self._year + week1monday = _isoweek1monday(year) + today = _ymd2ord(self._year, self._month, self._day) + # Internally, week and day have origin 0 + week, day = divmod(today - week1monday, 7) + if week < 0: + year -= 1 + week1monday = _isoweek1monday(year) + week, day = divmod(today - week1monday, 7) + elif week >= 52: + if today >= _isoweek1monday(year+1): + year += 1 + week = 0 + return year, week+1, day+1 + + # Pickle support. + + def _getstate(self): + yhi, ylo = divmod(self._year, 256) + return bytes([yhi, ylo, self._month, self._day]), + + def __setstate(self, string): + yhi, ylo, self._month, self._day = string + self._year = yhi * 256 + ylo + + def __reduce__(self): + return (self.__class__, self._getstate()) + +_date_class = date # so functions w/ args named "date" can get at the class + +date.min = date(1, 1, 1) +date.max = date(9999, 12, 31) +date.resolution = timedelta(days=1) + + +class tzinfo: + """Abstract base class for time zone info classes. + + Subclasses must override the name(), utcoffset() and dst() methods. + """ + __slots__ = () + + def tzname(self, dt): + "datetime -> string name of time zone." + raise NotImplementedError("tzinfo subclass must override tzname()") + + def utcoffset(self, dt): + "datetime -> minutes east of UTC (negative for west of UTC)" + raise NotImplementedError("tzinfo subclass must override utcoffset()") + + def dst(self, dt): + """datetime -> DST offset in minutes east of UTC. + + Return 0 if DST not in effect. utcoffset() must include the DST + offset. + """ + raise NotImplementedError("tzinfo subclass must override dst()") + + def fromutc(self, dt): + "datetime in UTC -> datetime in local time." + + if not isinstance(dt, datetime): + raise TypeError("fromutc() requires a datetime argument") + if dt.tzinfo is not self: + raise ValueError("dt.tzinfo is not self") + + dtoff = dt.utcoffset() + if dtoff is None: + raise ValueError("fromutc() requires a non-None utcoffset() " + "result") + + # See the long comment block at the end of this file for an + # explanation of this algorithm. + dtdst = dt.dst() + if dtdst is None: + raise ValueError("fromutc() requires a non-None dst() result") + delta = dtoff - dtdst + if delta: + dt += delta + dtdst = dt.dst() + if dtdst is None: + raise ValueError("fromutc(): dt.dst gave inconsistent " + "results; cannot convert") + return dt + dtdst + + # Pickle support. + + def __reduce__(self): + getinitargs = getattr(self, "__getinitargs__", None) + if getinitargs: + args = getinitargs() + else: + args = () + getstate = getattr(self, "__getstate__", None) + if getstate: + state = getstate() + else: + state = getattr(self, "__dict__", None) or None + if state is None: + return (self.__class__, args) + else: + return (self.__class__, args, state) + +_tzinfo_class = tzinfo + +class time: + """Time with time zone. + + Constructors: + + __new__() + + Operators: + + __repr__, __str__ + __eq__, __le__, __lt__, __ge__, __gt__, __hash__ + + Methods: + + strftime() + isoformat() + utcoffset() + tzname() + dst() + + Properties (readonly): + hour, minute, second, microsecond, tzinfo, fold + """ + __slots__ = '_hour', '_minute', '_second', '_microsecond', '_tzinfo', '_hashcode', '_fold' + + def __new__(cls, hour=0, minute=0, second=0, microsecond=0, tzinfo=None, *, fold=0): + """Constructor. + + Arguments: + + hour, minute (required) + second, microsecond (default to zero) + tzinfo (default to None) + fold (keyword only, default to zero) + """ + if isinstance(hour, bytes) and len(hour) == 6 and hour[0]&0x7F < 24: + # Pickle support + self = object.__new__(cls) + self.__setstate(hour, minute or None) + self._hashcode = -1 + return self + hour, minute, second, microsecond, fold = _check_time_fields( + hour, minute, second, microsecond, fold) + _check_tzinfo_arg(tzinfo) + self = object.__new__(cls) + self._hour = hour + self._minute = minute + self._second = second + self._microsecond = microsecond + self._tzinfo = tzinfo + self._hashcode = -1 + self._fold = fold + return self + + # Read-only field accessors + @property + def hour(self): + """hour (0-23)""" + return self._hour + + @property + def minute(self): + """minute (0-59)""" + return self._minute + + @property + def second(self): + """second (0-59)""" + return self._second + + @property + def microsecond(self): + """microsecond (0-999999)""" + return self._microsecond + + @property + def tzinfo(self): + """timezone info object""" + return self._tzinfo + + @property + def fold(self): + return self._fold + + # Standard conversions, __hash__ (and helpers) + + # Comparisons of time objects with other. + + def __eq__(self, other): + if isinstance(other, time): + return self._cmp(other, allow_mixed=True) == 0 + else: + return False + + def __le__(self, other): + if isinstance(other, time): + return self._cmp(other) <= 0 + else: + _cmperror(self, other) + + def __lt__(self, other): + if isinstance(other, time): + return self._cmp(other) < 0 + else: + _cmperror(self, other) + + def __ge__(self, other): + if isinstance(other, time): + return self._cmp(other) >= 0 + else: + _cmperror(self, other) + + def __gt__(self, other): + if isinstance(other, time): + return self._cmp(other) > 0 + else: + _cmperror(self, other) + + def _cmp(self, other, allow_mixed=False): + assert isinstance(other, time) + mytz = self._tzinfo + ottz = other._tzinfo + myoff = otoff = None + + if mytz is ottz: + base_compare = True + else: + myoff = self.utcoffset() + otoff = other.utcoffset() + base_compare = myoff == otoff + + if base_compare: + return _cmp((self._hour, self._minute, self._second, + self._microsecond), + (other._hour, other._minute, other._second, + other._microsecond)) + if myoff is None or otoff is None: + if allow_mixed: + return 2 # arbitrary non-zero value + else: + raise TypeError("cannot compare naive and aware times") + myhhmm = self._hour * 60 + self._minute - myoff//timedelta(minutes=1) + othhmm = other._hour * 60 + other._minute - otoff//timedelta(minutes=1) + return _cmp((myhhmm, self._second, self._microsecond), + (othhmm, other._second, other._microsecond)) + + def __hash__(self): + """Hash.""" + if self._hashcode == -1: + if self.fold: + t = self.replace(fold=0) + else: + t = self + tzoff = t.utcoffset() + if not tzoff: # zero or None + self._hashcode = hash(t._getstate()[0]) + else: + h, m = divmod(timedelta(hours=self.hour, minutes=self.minute) - tzoff, + timedelta(hours=1)) + assert not m % timedelta(minutes=1), "whole minute" + m //= timedelta(minutes=1) + if 0 <= h < 24: + self._hashcode = hash(time(h, m, self.second, self.microsecond)) + else: + self._hashcode = hash((h, m, self.second, self.microsecond)) + return self._hashcode + + # Conversion to string + + def _tzstr(self, sep=":"): + """Return formatted timezone offset (+xx:xx) or None.""" + off = self.utcoffset() + if off is not None: + if off.days < 0: + sign = "-" + off = -off + else: + sign = "+" + hh, mm = divmod(off, timedelta(hours=1)) + mm, ss = divmod(mm, timedelta(minutes=1)) + assert 0 <= hh < 24 + off = "%s%02d%s%02d" % (sign, hh, sep, mm) + if ss: + off += ':%02d' % ss.seconds + return off + + def __repr__(self): + """Convert to formal string, for repr().""" + if self._microsecond != 0: + s = ", %d, %d" % (self._second, self._microsecond) + elif self._second != 0: + s = ", %d" % self._second + else: + s = "" + s= "%s.%s(%d, %d%s)" % (self.__class__.__module__, + self.__class__.__qualname__, + self._hour, self._minute, s) + if self._tzinfo is not None: + assert s[-1:] == ")" + s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")" + if self._fold: + assert s[-1:] == ")" + s = s[:-1] + ", fold=1)" + return s + + def isoformat(self, timespec='auto'): + """Return the time formatted according to ISO. + + The full format is 'HH:MM:SS.mmmmmm+zz:zz'. By default, the fractional + part is omitted if self.microsecond == 0. + + The optional argument timespec specifies the number of additional + terms of the time to include. + """ + s = _format_time(self._hour, self._minute, self._second, + self._microsecond, timespec) + tz = self._tzstr() + if tz: + s += tz + return s + + __str__ = isoformat + + def strftime(self, fmt): + """Format using strftime(). The date part of the timestamp passed + to underlying strftime should not be used. + """ + # The year must be >= 1000 else Python's strftime implementation + # can raise a bogus exception. + timetuple = (1900, 1, 1, + self._hour, self._minute, self._second, + 0, 1, -1) + return _wrap_strftime(self, fmt, timetuple) + + def __format__(self, fmt): + if not isinstance(fmt, str): + raise TypeError("must be str, not %s" % type(fmt).__name__) + if len(fmt) != 0: + return self.strftime(fmt) + return str(self) + + # Timezone functions + + def utcoffset(self): + """Return the timezone offset in minutes east of UTC (negative west of + UTC).""" + if self._tzinfo is None: + return None + offset = self._tzinfo.utcoffset(None) + _check_utc_offset("utcoffset", offset) + return offset + + def tzname(self): + """Return the timezone name. + + Note that the name is 100% informational -- there's no requirement that + it mean anything in particular. For example, "GMT", "UTC", "-500", + "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies. + """ + if self._tzinfo is None: + return None + name = self._tzinfo.tzname(None) + _check_tzname(name) + return name + + def dst(self): + """Return 0 if DST is not in effect, or the DST offset (in minutes + eastward) if DST is in effect. + + This is purely informational; the DST offset has already been added to + the UTC offset returned by utcoffset() if applicable, so there's no + need to consult dst() unless you're interested in displaying the DST + info. + """ + if self._tzinfo is None: + return None + offset = self._tzinfo.dst(None) + _check_utc_offset("dst", offset) + return offset + + def replace(self, hour=None, minute=None, second=None, microsecond=None, + tzinfo=True, *, fold=None): + """Return a new time with new values for the specified fields.""" + if hour is None: + hour = self.hour + if minute is None: + minute = self.minute + if second is None: + second = self.second + if microsecond is None: + microsecond = self.microsecond + if tzinfo is True: + tzinfo = self.tzinfo + if fold is None: + fold = self._fold + return type(self)(hour, minute, second, microsecond, tzinfo, fold=fold) + + # Pickle support. + + def _getstate(self, protocol=3): + us2, us3 = divmod(self._microsecond, 256) + us1, us2 = divmod(us2, 256) + h = self._hour + if self._fold and protocol > 3: + h += 128 + basestate = bytes([h, self._minute, self._second, + us1, us2, us3]) + if self._tzinfo is None: + return (basestate,) + else: + return (basestate, self._tzinfo) + + def __setstate(self, string, tzinfo): + if tzinfo is not None and not isinstance(tzinfo, _tzinfo_class): + raise TypeError("bad tzinfo state arg") + h, self._minute, self._second, us1, us2, us3 = string + if h > 127: + self._fold = 1 + self._hour = h - 128 + else: + self._fold = 0 + self._hour = h + self._microsecond = (((us1 << 8) | us2) << 8) | us3 + self._tzinfo = tzinfo + + def __reduce_ex__(self, protocol): + return (time, self._getstate(protocol)) + + def __reduce__(self): + return self.__reduce_ex__(2) + +_time_class = time # so functions w/ args named "time" can get at the class + +time.min = time(0, 0, 0) +time.max = time(23, 59, 59, 999999) +time.resolution = timedelta(microseconds=1) + +class datetime(date): + """datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]]) + + The year, month and day arguments are required. tzinfo may be None, or an + instance of a tzinfo subclass. The remaining arguments may be ints. + """ + __slots__ = date.__slots__ + time.__slots__ + + def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0, + microsecond=0, tzinfo=None, *, fold=0): + if isinstance(year, bytes) and len(year) == 10 and 1 <= year[2]&0x7F <= 12: + # Pickle support + self = object.__new__(cls) + self.__setstate(year, month) + self._hashcode = -1 + return self + year, month, day = _check_date_fields(year, month, day) + hour, minute, second, microsecond, fold = _check_time_fields( + hour, minute, second, microsecond, fold) + _check_tzinfo_arg(tzinfo) + self = object.__new__(cls) + self._year = year + self._month = month + self._day = day + self._hour = hour + self._minute = minute + self._second = second + self._microsecond = microsecond + self._tzinfo = tzinfo + self._hashcode = -1 + self._fold = fold + return self + + # Read-only field accessors + @property + def hour(self): + """hour (0-23)""" + return self._hour + + @property + def minute(self): + """minute (0-59)""" + return self._minute + + @property + def second(self): + """second (0-59)""" + return self._second + + @property + def microsecond(self): + """microsecond (0-999999)""" + return self._microsecond + + @property + def tzinfo(self): + """timezone info object""" + return self._tzinfo + + @property + def fold(self): + return self._fold + + @classmethod + def _fromtimestamp(cls, t, utc, tz): + """Construct a datetime from a POSIX timestamp (like time.time()). + + A timezone info object may be passed in as well. + """ + frac, t = _math.modf(t) + us = round(frac * 1e6) + if us >= 1000000: + t += 1 + us -= 1000000 + elif us < 0: + t -= 1 + us += 1000000 + + converter = _time.gmtime if utc else _time.localtime + y, m, d, hh, mm, ss, weekday, jday, dst = converter(t) + ss = min(ss, 59) # clamp out leap seconds if the platform has them + result = cls(y, m, d, hh, mm, ss, us, tz) + if tz is None: + # As of version 2015f max fold in IANA database is + # 23 hours at 1969-09-30 13:00:00 in Kwajalein. + # Let's probe 24 hours in the past to detect a transition: + max_fold_seconds = 24 * 3600 + y, m, d, hh, mm, ss = converter(t - max_fold_seconds)[:6] + probe1 = cls(y, m, d, hh, mm, ss, us, tz) + trans = result - probe1 - timedelta(0, max_fold_seconds) + if trans.days < 0: + y, m, d, hh, mm, ss = converter(t + trans // timedelta(0, 1))[:6] + probe2 = cls(y, m, d, hh, mm, ss, us, tz) + if probe2 == result: + result._fold = 1 + else: + result = tz.fromutc(result) + return result + + @classmethod + def fromtimestamp(cls, t, tz=None): + """Construct a datetime from a POSIX timestamp (like time.time()). + + A timezone info object may be passed in as well. + """ + _check_tzinfo_arg(tz) + + return cls._fromtimestamp(t, tz is not None, tz) + + @classmethod + def utcfromtimestamp(cls, t): + """Construct a naive UTC datetime from a POSIX timestamp.""" + return cls._fromtimestamp(t, True, None) + + @classmethod + def now(cls, tz=None): + "Construct a datetime from time.time() and optional time zone info." + t = _time.time() + return cls.fromtimestamp(t, tz) + + @classmethod + def utcnow(cls): + "Construct a UTC datetime from time.time()." + t = _time.time() + return cls.utcfromtimestamp(t) + + @classmethod + def combine(cls, date, time, tzinfo=True): + "Construct a datetime from a given date and a given time." + if not isinstance(date, _date_class): + raise TypeError("date argument must be a date instance") + if not isinstance(time, _time_class): + raise TypeError("time argument must be a time instance") + if tzinfo is True: + tzinfo = time.tzinfo + return cls(date.year, date.month, date.day, + time.hour, time.minute, time.second, time.microsecond, + tzinfo, fold=time.fold) + + def timetuple(self): + "Return local time tuple compatible with time.localtime()." + dst = self.dst() + if dst is None: + dst = -1 + elif dst: + dst = 1 + else: + dst = 0 + return _build_struct_time(self.year, self.month, self.day, + self.hour, self.minute, self.second, + dst) + + def _mktime(self): + """Return integer POSIX timestamp.""" + epoch = datetime(1970, 1, 1) + max_fold_seconds = 24 * 3600 + t = (self - epoch) // timedelta(0, 1) + def local(u): + y, m, d, hh, mm, ss = _time.localtime(u)[:6] + return (datetime(y, m, d, hh, mm, ss) - epoch) // timedelta(0, 1) + + # Our goal is to solve t = local(u) for u. + a = local(t) - t + u1 = t - a + t1 = local(u1) + if t1 == t: + # We found one solution, but it may not be the one we need. + # Look for an earlier solution (if `fold` is 0), or a + # later one (if `fold` is 1). + u2 = u1 + (-max_fold_seconds, max_fold_seconds)[self.fold] + b = local(u2) - u2 + if a == b: + return u1 + else: + b = t1 - u1 + assert a != b + u2 = t - b + t2 = local(u2) + if t2 == t: + return u2 + if t1 == t: + return u1 + # We have found both offsets a and b, but neither t - a nor t - b is + # a solution. This means t is in the gap. + return (max, min)[self.fold](u1, u2) + + + def timestamp(self): + "Return POSIX timestamp as float" + if self._tzinfo is None: + s = self._mktime() + return s + self.microsecond / 1e6 + else: + return (self - _EPOCH).total_seconds() + + def utctimetuple(self): + "Return UTC time tuple compatible with time.gmtime()." + offset = self.utcoffset() + if offset: + self -= offset + y, m, d = self.year, self.month, self.day + hh, mm, ss = self.hour, self.minute, self.second + return _build_struct_time(y, m, d, hh, mm, ss, 0) + + def date(self): + "Return the date part." + return date(self._year, self._month, self._day) + + def time(self): + "Return the time part, with tzinfo None." + return time(self.hour, self.minute, self.second, self.microsecond, fold=self.fold) + + def timetz(self): + "Return the time part, with same tzinfo." + return time(self.hour, self.minute, self.second, self.microsecond, + self._tzinfo, fold=self.fold) + + def replace(self, year=None, month=None, day=None, hour=None, + minute=None, second=None, microsecond=None, tzinfo=True, + *, fold=None): + """Return a new datetime with new values for the specified fields.""" + if year is None: + year = self.year + if month is None: + month = self.month + if day is None: + day = self.day + if hour is None: + hour = self.hour + if minute is None: + minute = self.minute + if second is None: + second = self.second + if microsecond is None: + microsecond = self.microsecond + if tzinfo is True: + tzinfo = self.tzinfo + if fold is None: + fold = self.fold + return type(self)(year, month, day, hour, minute, second, + microsecond, tzinfo, fold=fold) + + def _local_timezone(self): + if self.tzinfo is None: + ts = self._mktime() + else: + ts = (self - _EPOCH) // timedelta(seconds=1) + localtm = _time.localtime(ts) + local = datetime(*localtm[:6]) + try: + # Extract TZ data if available + gmtoff = localtm.tm_gmtoff + zone = localtm.tm_zone + except AttributeError: + delta = local - datetime(*_time.gmtime(ts)[:6]) + zone = _time.strftime('%Z', localtm) + tz = timezone(delta, zone) + else: + tz = timezone(timedelta(seconds=gmtoff), zone) + return tz + + def astimezone(self, tz=None): + if tz is None: + tz = self._local_timezone() + elif not isinstance(tz, tzinfo): + raise TypeError("tz argument must be an instance of tzinfo") + + mytz = self.tzinfo + if mytz is None: + mytz = self._local_timezone() + + if tz is mytz: + return self + + # Convert self to UTC, and attach the new time zone object. + myoffset = mytz.utcoffset(self) + if myoffset is None: + raise ValueError("astimezone() requires an aware datetime") + utc = (self - myoffset).replace(tzinfo=tz) + + # Convert from UTC to tz's local time. + return tz.fromutc(utc) + + # Ways to produce a string. + + def ctime(self): + "Return ctime() style string." + weekday = self.toordinal() % 7 or 7 + return "%s %s %2d %02d:%02d:%02d %04d" % ( + _DAYNAMES[weekday], + _MONTHNAMES[self._month], + self._day, + self._hour, self._minute, self._second, + self._year) + + def isoformat(self, sep='T', timespec='auto'): + """Return the time formatted according to ISO. + + The full format looks like 'YYYY-MM-DD HH:MM:SS.mmmmmm'. + By default, the fractional part is omitted if self.microsecond == 0. + + If self.tzinfo is not None, the UTC offset is also attached, giving + giving a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'. + + Optional argument sep specifies the separator between date and + time, default 'T'. + + The optional argument timespec specifies the number of additional + terms of the time to include. + """ + s = ("%04d-%02d-%02d%c" % (self._year, self._month, self._day, sep) + + _format_time(self._hour, self._minute, self._second, + self._microsecond, timespec)) + + off = self.utcoffset() + if off is not None: + if off.days < 0: + sign = "-" + off = -off + else: + sign = "+" + hh, mm = divmod(off, timedelta(hours=1)) + mm, ss = divmod(mm, timedelta(minutes=1)) + s += "%s%02d:%02d" % (sign, hh, mm) + if ss: + assert not ss.microseconds + s += ":%02d" % ss.seconds + return s + + def __repr__(self): + """Convert to formal string, for repr().""" + L = [self._year, self._month, self._day, # These are never zero + self._hour, self._minute, self._second, self._microsecond] + if L[-1] == 0: + del L[-1] + if L[-1] == 0: + del L[-1] + s = "%s.%s(%s)" % (self.__class__.__module__, + self.__class__.__qualname__, + ", ".join(map(str, L))) + if self._tzinfo is not None: + assert s[-1:] == ")" + s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")" + if self._fold: + assert s[-1:] == ")" + s = s[:-1] + ", fold=1)" + return s + + def __str__(self): + "Convert to string, for str()." + return self.isoformat(sep=' ') + + @classmethod + def strptime(cls, date_string, format): + 'string, format -> new datetime parsed from a string (like time.strptime()).' + import _strptime + return _strptime._strptime_datetime(cls, date_string, format) + + def utcoffset(self): + """Return the timezone offset in minutes east of UTC (negative west of + UTC).""" + if self._tzinfo is None: + return None + offset = self._tzinfo.utcoffset(self) + _check_utc_offset("utcoffset", offset) + return offset + + def tzname(self): + """Return the timezone name. + + Note that the name is 100% informational -- there's no requirement that + it mean anything in particular. For example, "GMT", "UTC", "-500", + "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies. + """ + if self._tzinfo is None: + return None + name = self._tzinfo.tzname(self) + _check_tzname(name) + return name + + def dst(self): + """Return 0 if DST is not in effect, or the DST offset (in minutes + eastward) if DST is in effect. + + This is purely informational; the DST offset has already been added to + the UTC offset returned by utcoffset() if applicable, so there's no + need to consult dst() unless you're interested in displaying the DST + info. + """ + if self._tzinfo is None: + return None + offset = self._tzinfo.dst(self) + _check_utc_offset("dst", offset) + return offset + + # Comparisons of datetime objects with other. + + def __eq__(self, other): + if isinstance(other, datetime): + return self._cmp(other, allow_mixed=True) == 0 + elif not isinstance(other, date): + return NotImplemented + else: + return False + + def __le__(self, other): + if isinstance(other, datetime): + return self._cmp(other) <= 0 + elif not isinstance(other, date): + return NotImplemented + else: + _cmperror(self, other) + + def __lt__(self, other): + if isinstance(other, datetime): + return self._cmp(other) < 0 + elif not isinstance(other, date): + return NotImplemented + else: + _cmperror(self, other) + + def __ge__(self, other): + if isinstance(other, datetime): + return self._cmp(other) >= 0 + elif not isinstance(other, date): + return NotImplemented + else: + _cmperror(self, other) + + def __gt__(self, other): + if isinstance(other, datetime): + return self._cmp(other) > 0 + elif not isinstance(other, date): + return NotImplemented + else: + _cmperror(self, other) + + def _cmp(self, other, allow_mixed=False): + assert isinstance(other, datetime) + mytz = self._tzinfo + ottz = other._tzinfo + myoff = otoff = None + + if mytz is ottz: + base_compare = True + else: + myoff = self.utcoffset() + otoff = other.utcoffset() + # Assume that allow_mixed means that we are called from __eq__ + if allow_mixed: + if myoff != self.replace(fold=not self.fold).utcoffset(): + return 2 + if otoff != other.replace(fold=not other.fold).utcoffset(): + return 2 + base_compare = myoff == otoff + + if base_compare: + return _cmp((self._year, self._month, self._day, + self._hour, self._minute, self._second, + self._microsecond), + (other._year, other._month, other._day, + other._hour, other._minute, other._second, + other._microsecond)) + if myoff is None or otoff is None: + if allow_mixed: + return 2 # arbitrary non-zero value + else: + raise TypeError("cannot compare naive and aware datetimes") + # XXX What follows could be done more efficiently... + diff = self - other # this will take offsets into account + if diff.days < 0: + return -1 + return diff and 1 or 0 + + def __add__(self, other): + "Add a datetime and a timedelta." + if not isinstance(other, timedelta): + return NotImplemented + delta = timedelta(self.toordinal(), + hours=self._hour, + minutes=self._minute, + seconds=self._second, + microseconds=self._microsecond) + delta += other + hour, rem = divmod(delta.seconds, 3600) + minute, second = divmod(rem, 60) + if 0 < delta.days <= _MAXORDINAL: + return datetime.combine(date.fromordinal(delta.days), + time(hour, minute, second, + delta.microseconds, + tzinfo=self._tzinfo)) + raise OverflowError("result out of range") + + __radd__ = __add__ + + def __sub__(self, other): + "Subtract two datetimes, or a datetime and a timedelta." + if not isinstance(other, datetime): + if isinstance(other, timedelta): + return self + -other + return NotImplemented + + days1 = self.toordinal() + days2 = other.toordinal() + secs1 = self._second + self._minute * 60 + self._hour * 3600 + secs2 = other._second + other._minute * 60 + other._hour * 3600 + base = timedelta(days1 - days2, + secs1 - secs2, + self._microsecond - other._microsecond) + if self._tzinfo is other._tzinfo: + return base + myoff = self.utcoffset() + otoff = other.utcoffset() + if myoff == otoff: + return base + if myoff is None or otoff is None: + raise TypeError("cannot mix naive and timezone-aware time") + return base + otoff - myoff + + def __hash__(self): + if self._hashcode == -1: + if self.fold: + t = self.replace(fold=0) + else: + t = self + tzoff = t.utcoffset() + if tzoff is None: + self._hashcode = hash(t._getstate()[0]) + else: + days = _ymd2ord(self.year, self.month, self.day) + seconds = self.hour * 3600 + self.minute * 60 + self.second + self._hashcode = hash(timedelta(days, seconds, self.microsecond) - tzoff) + return self._hashcode + + # Pickle support. + + def _getstate(self, protocol=3): + yhi, ylo = divmod(self._year, 256) + us2, us3 = divmod(self._microsecond, 256) + us1, us2 = divmod(us2, 256) + m = self._month + if self._fold and protocol > 3: + m += 128 + basestate = bytes([yhi, ylo, m, self._day, + self._hour, self._minute, self._second, + us1, us2, us3]) + if self._tzinfo is None: + return (basestate,) + else: + return (basestate, self._tzinfo) + + def __setstate(self, string, tzinfo): + if tzinfo is not None and not isinstance(tzinfo, _tzinfo_class): + raise TypeError("bad tzinfo state arg") + (yhi, ylo, m, self._day, self._hour, + self._minute, self._second, us1, us2, us3) = string + if m > 127: + self._fold = 1 + self._month = m - 128 + else: + self._fold = 0 + self._month = m + self._year = yhi * 256 + ylo + self._microsecond = (((us1 << 8) | us2) << 8) | us3 + self._tzinfo = tzinfo + + def __reduce_ex__(self, protocol): + return (self.__class__, self._getstate(protocol)) + + def __reduce__(self): + return self.__reduce_ex__(2) + + +datetime.min = datetime(1, 1, 1) +datetime.max = datetime(9999, 12, 31, 23, 59, 59, 999999) +datetime.resolution = timedelta(microseconds=1) + + +def _isoweek1monday(year): + # Helper to calculate the day number of the Monday starting week 1 + # XXX This could be done more efficiently + THURSDAY = 3 + firstday = _ymd2ord(year, 1, 1) + firstweekday = (firstday + 6) % 7 # See weekday() above + week1monday = firstday - firstweekday + if firstweekday > THURSDAY: + week1monday += 7 + return week1monday + +class timezone(tzinfo): + __slots__ = '_offset', '_name' + + # Sentinel value to disallow None + _Omitted = object() + def __new__(cls, offset, name=_Omitted): + if not isinstance(offset, timedelta): + raise TypeError("offset must be a timedelta") + if name is cls._Omitted: + if not offset: + return cls.utc + name = None + elif not isinstance(name, str): + raise TypeError("name must be a string") + if not cls._minoffset <= offset <= cls._maxoffset: + raise ValueError("offset must be a timedelta " + "strictly between -timedelta(hours=24) and " + "timedelta(hours=24).") + if (offset.microseconds != 0 or offset.seconds % 60 != 0): + raise ValueError("offset must be a timedelta " + "representing a whole number of minutes") + return cls._create(offset, name) + + @classmethod + def _create(cls, offset, name=None): + self = tzinfo.__new__(cls) + self._offset = offset + self._name = name + return self + + def __getinitargs__(self): + """pickle support""" + if self._name is None: + return (self._offset,) + return (self._offset, self._name) + + def __eq__(self, other): + if type(other) != timezone: + return False + return self._offset == other._offset + + def __hash__(self): + return hash(self._offset) + + def __repr__(self): + """Convert to formal string, for repr(). + + >>> tz = timezone.utc + >>> repr(tz) + 'datetime.timezone.utc' + >>> tz = timezone(timedelta(hours=-5), 'EST') + >>> repr(tz) + "datetime.timezone(datetime.timedelta(-1, 68400), 'EST')" + """ + if self is self.utc: + return 'datetime.timezone.utc' + if self._name is None: + return "%s.%s(%r)" % (self.__class__.__module__, + self.__class__.__qualname__, + self._offset) + return "%s.%s(%r, %r)" % (self.__class__.__module__, + self.__class__.__qualname__, + self._offset, self._name) + + def __str__(self): + return self.tzname(None) + + def utcoffset(self, dt): + if isinstance(dt, datetime) or dt is None: + return self._offset + raise TypeError("utcoffset() argument must be a datetime instance" + " or None") + + def tzname(self, dt): + if isinstance(dt, datetime) or dt is None: + if self._name is None: + return self._name_from_offset(self._offset) + return self._name + raise TypeError("tzname() argument must be a datetime instance" + " or None") + + def dst(self, dt): + if isinstance(dt, datetime) or dt is None: + return None + raise TypeError("dst() argument must be a datetime instance" + " or None") + + def fromutc(self, dt): + if isinstance(dt, datetime): + if dt.tzinfo is not self: + raise ValueError("fromutc: dt.tzinfo " + "is not self") + return dt + self._offset + raise TypeError("fromutc() argument must be a datetime instance" + " or None") + + _maxoffset = timedelta(hours=23, minutes=59) + _minoffset = -_maxoffset + + @staticmethod + def _name_from_offset(delta): + if not delta: + return 'UTC' + if delta < timedelta(0): + sign = '-' + delta = -delta + else: + sign = '+' + hours, rest = divmod(delta, timedelta(hours=1)) + minutes = rest // timedelta(minutes=1) + return 'UTC{}{:02d}:{:02d}'.format(sign, hours, minutes) + +timezone.utc = timezone._create(timedelta(0)) +timezone.min = timezone._create(timezone._minoffset) +timezone.max = timezone._create(timezone._maxoffset) +_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc) + +# Some time zone algebra. For a datetime x, let +# x.n = x stripped of its timezone -- its naive time. +# x.o = x.utcoffset(), and assuming that doesn't raise an exception or +# return None +# x.d = x.dst(), and assuming that doesn't raise an exception or +# return None +# x.s = x's standard offset, x.o - x.d +# +# Now some derived rules, where k is a duration (timedelta). +# +# 1. x.o = x.s + x.d +# This follows from the definition of x.s. +# +# 2. If x and y have the same tzinfo member, x.s = y.s. +# This is actually a requirement, an assumption we need to make about +# sane tzinfo classes. +# +# 3. The naive UTC time corresponding to x is x.n - x.o. +# This is again a requirement for a sane tzinfo class. +# +# 4. (x+k).s = x.s +# This follows from #2, and that datimetimetz+timedelta preserves tzinfo. +# +# 5. (x+k).n = x.n + k +# Again follows from how arithmetic is defined. +# +# Now we can explain tz.fromutc(x). Let's assume it's an interesting case +# (meaning that the various tzinfo methods exist, and don't blow up or return +# None when called). +# +# The function wants to return a datetime y with timezone tz, equivalent to x. +# x is already in UTC. +# +# By #3, we want +# +# y.n - y.o = x.n [1] +# +# The algorithm starts by attaching tz to x.n, and calling that y. So +# x.n = y.n at the start. Then it wants to add a duration k to y, so that [1] +# becomes true; in effect, we want to solve [2] for k: +# +# (y+k).n - (y+k).o = x.n [2] +# +# By #1, this is the same as +# +# (y+k).n - ((y+k).s + (y+k).d) = x.n [3] +# +# By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start. +# Substituting that into [3], +# +# x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving +# k - (y+k).s - (y+k).d = 0; rearranging, +# k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so +# k = y.s - (y+k).d +# +# On the RHS, (y+k).d can't be computed directly, but y.s can be, and we +# approximate k by ignoring the (y+k).d term at first. Note that k can't be +# very large, since all offset-returning methods return a duration of magnitude +# less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must +# be 0, so ignoring it has no consequence then. +# +# In any case, the new value is +# +# z = y + y.s [4] +# +# It's helpful to step back at look at [4] from a higher level: it's simply +# mapping from UTC to tz's standard time. +# +# At this point, if +# +# z.n - z.o = x.n [5] +# +# we have an equivalent time, and are almost done. The insecurity here is +# at the start of daylight time. Picture US Eastern for concreteness. The wall +# time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good +# sense then. The docs ask that an Eastern tzinfo class consider such a time to +# be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST +# on the day DST starts. We want to return the 1:MM EST spelling because that's +# the only spelling that makes sense on the local wall clock. +# +# In fact, if [5] holds at this point, we do have the standard-time spelling, +# but that takes a bit of proof. We first prove a stronger result. What's the +# difference between the LHS and RHS of [5]? Let +# +# diff = x.n - (z.n - z.o) [6] +# +# Now +# z.n = by [4] +# (y + y.s).n = by #5 +# y.n + y.s = since y.n = x.n +# x.n + y.s = since z and y are have the same tzinfo member, +# y.s = z.s by #2 +# x.n + z.s +# +# Plugging that back into [6] gives +# +# diff = +# x.n - ((x.n + z.s) - z.o) = expanding +# x.n - x.n - z.s + z.o = cancelling +# - z.s + z.o = by #2 +# z.d +# +# So diff = z.d. +# +# If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time +# spelling we wanted in the endcase described above. We're done. Contrarily, +# if z.d = 0, then we have a UTC equivalent, and are also done. +# +# If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to +# add to z (in effect, z is in tz's standard time, and we need to shift the +# local clock into tz's daylight time). +# +# Let +# +# z' = z + z.d = z + diff [7] +# +# and we can again ask whether +# +# z'.n - z'.o = x.n [8] +# +# If so, we're done. If not, the tzinfo class is insane, according to the +# assumptions we've made. This also requires a bit of proof. As before, let's +# compute the difference between the LHS and RHS of [8] (and skipping some of +# the justifications for the kinds of substitutions we've done several times +# already): +# +# diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7] +# x.n - (z.n + diff - z'.o) = replacing diff via [6] +# x.n - (z.n + x.n - (z.n - z.o) - z'.o) = +# x.n - z.n - x.n + z.n - z.o + z'.o = cancel x.n +# - z.n + z.n - z.o + z'.o = cancel z.n +# - z.o + z'.o = #1 twice +# -z.s - z.d + z'.s + z'.d = z and z' have same tzinfo +# z'.d - z.d +# +# So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal, +# we've found the UTC-equivalent so are done. In fact, we stop with [7] and +# return z', not bothering to compute z'.d. +# +# How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by +# a dst() offset, and starting *from* a time already in DST (we know z.d != 0), +# would have to change the result dst() returns: we start in DST, and moving +# a little further into it takes us out of DST. +# +# There isn't a sane case where this can happen. The closest it gets is at +# the end of DST, where there's an hour in UTC with no spelling in a hybrid +# tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During +# that hour, on an Eastern clock 1:MM is taken as being in standard time (6:MM +# UTC) because the docs insist on that, but 0:MM is taken as being in daylight +# time (4:MM UTC). There is no local time mapping to 5:MM UTC. The local +# clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in +# standard time. Since that's what the local clock *does*, we want to map both +# UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous +# in local time, but so it goes -- it's the way the local clock works. +# +# When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0, +# so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going. +# z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8] +# (correctly) concludes that z' is not UTC-equivalent to x. +# +# Because we know z.d said z was in daylight time (else [5] would have held and +# we would have stopped then), and we know z.d != z'.d (else [8] would have held +# and we have stopped then), and there are only 2 possible values dst() can +# return in Eastern, it follows that z'.d must be 0 (which it is in the example, +# but the reasoning doesn't depend on the example -- it depends on there being +# two possible dst() outcomes, one zero and the other non-zero). Therefore +# z' must be in standard time, and is the spelling we want in this case. +# +# Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is +# concerned (because it takes z' as being in standard time rather than the +# daylight time we intend here), but returning it gives the real-life "local +# clock repeats an hour" behavior when mapping the "unspellable" UTC hour into +# tz. +# +# When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with +# the 1:MM standard time spelling we want. +# +# So how can this break? One of the assumptions must be violated. Two +# possibilities: +# +# 1) [2] effectively says that y.s is invariant across all y belong to a given +# time zone. This isn't true if, for political reasons or continental drift, +# a region decides to change its base offset from UTC. +# +# 2) There may be versions of "double daylight" time where the tail end of +# the analysis gives up a step too early. I haven't thought about that +# enough to say. +# +# In any case, it's clear that the default fromutc() is strong enough to handle +# "almost all" time zones: so long as the standard offset is invariant, it +# doesn't matter if daylight time transition points change from year to year, or +# if daylight time is skipped in some years; it doesn't matter how large or +# small dst() may get within its bounds; and it doesn't even matter if some +# perverse time zone returns a negative dst()). So a breaking case must be +# pretty bizarre, and a tzinfo subclass can override fromutc() if it is. + +try: + from _datetime import * +except ImportError: + pass +else: + # Clean up unused names + del (_DAYNAMES, _DAYS_BEFORE_MONTH, _DAYS_IN_MONTH, _DI100Y, _DI400Y, + _DI4Y, _EPOCH, _MAXORDINAL, _MONTHNAMES, _build_struct_time, + _check_date_fields, _check_int_field, _check_time_fields, + _check_tzinfo_arg, _check_tzname, _check_utc_offset, _cmp, _cmperror, + _date_class, _days_before_month, _days_before_year, _days_in_month, + _format_time, _is_leap, _isoweek1monday, _math, _ord2ymd, + _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord, + _divide_and_round) + # XXX Since import * above excludes names that start with _, + # docstring does not get overwritten. In the future, it may be + # appropriate to maintain a single module level docstring and + # remove the following line. + from _datetime import __doc__ diff --git a/modules/language/python/module/email.py b/modules/language/python/module/email.py new file mode 100644 index 0000000..f50af12 --- /dev/null +++ b/modules/language/python/module/email.py @@ -0,0 +1,49 @@ +module(email) + +# Copyright (C) 2001-2007 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""A package for parsing, handling, and generating email messages.""" + +__all__ = [ + 'message_from_file', + 'message_from_binary_file', + 'message_from_string', + 'message_from_bytes', + ] + +# Some convenience routines. Don't import Parser and Message as side-effects +# of importing email since those cascadingly import most of the rest of the +# email package. +def message_from_string(s, *args, **kws): + """Parse a string into a Message object model. + + Optional _class and strict are passed to the Parser constructor. + """ + import email + return email.Parser(*args, **kws).parsestr(s) + +def message_from_bytes(s, *args, **kws): + """Parse a bytes string into a Message object model. + + Optional _class and strict are passed to the Parser constructor. + """ + import email + return email.BytesParser(*args, **kws).parsebytes(s) + +def message_from_file(fp, *args, **kws): + """Read a file and parse its contents into a Message object model. + + Optional _class and strict are passed to the Parser constructor. + """ + import email + return email.Parser(*args, **kws).parse(fp) + +def message_from_binary_file(fp, *args, **kws): + """Read a binary file and parse its contents into a Message object model. + + Optional _class and strict are passed to the Parser constructor. + """ + import email + return email.BytesParser(*args, **kws).parse(fp) diff --git a/modules/language/python/module/email/_parseaddr.py b/modules/language/python/module/email/_parseaddr.py new file mode 100644 index 0000000..0f53dab --- /dev/null +++ b/modules/language/python/module/email/_parseaddr.py @@ -0,0 +1,541 @@ +module(email,_parseaddr) +# Copyright (C) 2002-2007 Python Software Foundation +# Contact: email-sig@python.org + +"""Email address parsing code. + +Lifted directly from rfc822.py. This should eventually be rewritten. +""" + +__all__ = [ + 'mktime_tz', + 'parsedate', + 'parsedate_tz', + 'quote', + ] + +import time, calendar + +SPACE = ' ' +EMPTYSTRING = '' +COMMASPACE = ', ' + +# Parse a date field +_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', + 'aug', 'sep', 'oct', 'nov', 'dec', + 'january', 'february', 'march', 'april', 'may', 'june', 'july', + 'august', 'september', 'october', 'november', 'december'] + +_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] + +# The timezone table does not include the military time zones defined +# in RFC822, other than Z. According to RFC1123, the description in +# RFC822 gets the signs wrong, so we can't rely on any such time +# zones. RFC1123 recommends that numeric timezone indicators be used +# instead of timezone names. + +_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, + 'AST': -400, 'ADT': -300, # Atlantic (used in Canada) + 'EST': -500, 'EDT': -400, # Eastern + 'CST': -600, 'CDT': -500, # Central + 'MST': -700, 'MDT': -600, # Mountain + 'PST': -800, 'PDT': -700 # Pacific + } + + +def parsedate_tz(data): + """Convert a date string to a time tuple. + + Accounts for military timezones. + """ + res = _parsedate_tz(data) + if not res: + return + if res[9] is None: + res[9] = 0 + return tuple(res) + +def _parsedate_tz(data): + """Convert date to extended time tuple. + + The last (additional) element is the time zone offset in seconds, except if + the timezone was specified as -0000. In that case the last element is + None. This indicates a UTC timestamp that explicitly declaims knowledge of + the source timezone, as opposed to a +0000 timestamp that indicates the + source timezone really was UTC. + + """ + if not data: + return + data = data.split() + # The FWS after the comma after the day-of-week is optional, so search and + # adjust for this. + if data[0].endswith(',') or data[0].lower() in _daynames: + # There's a dayname here. Skip it + del data[0] + else: + i = data[0].rfind(',') + if i >= 0: + data[0] = data[0][i+1:] + if len(data) == 3: # RFC 850 date, deprecated + stuff = data[0].split('-') + if len(stuff) == 3: + data = stuff + data[1:] + if len(data) == 4: + s = data[3] + i = s.find('+') + if i == -1: + i = s.find('-') + if i > 0: + data[3:] = [s[:i], s[i:]] + else: + data.append('') # Dummy tz + if len(data) < 5: + return None + data = data[:5] + [dd, mm, yy, tm, tz] = data + mm = mm.lower() + if mm not in _monthnames: + dd, mm = mm, dd.lower() + if mm not in _monthnames: + return None + mm = _monthnames.index(mm) + 1 + if mm > 12: + mm -= 12 + if dd[-1] == ',': + dd = dd[:-1] + i = yy.find(':') + if i > 0: + yy, tm = tm, yy + if yy[-1] == ',': + yy = yy[:-1] + if not yy[0].isdigit(): + yy, tz = tz, yy + if tm[-1] == ',': + tm = tm[:-1] + tm = tm.split(':') + if len(tm) == 2: + [thh, tmm] = tm + tss = '0' + elif len(tm) == 3: + [thh, tmm, tss] = tm + elif len(tm) == 1 and '.' in tm[0]: + # Some non-compliant MUAs use '.' to separate time elements. + tm = tm[0].split('.') + if len(tm) == 2: + [thh, tmm] = tm + tss = 0 + elif len(tm) == 3: + [thh, tmm, tss] = tm + else: + return None + try: + yy = int(yy) + dd = int(dd) + thh = int(thh) + tmm = int(tmm) + tss = int(tss) + except ValueError: + return None + # Check for a yy specified in two-digit format, then convert it to the + # appropriate four-digit format, according to the POSIX standard. RFC 822 + # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) + # mandates a 4-digit yy. For more information, see the documentation for + # the time module. + if yy < 100: + # The year is between 1969 and 1999 (inclusive). + if yy > 68: + yy += 1900 + # The year is between 2000 and 2068 (inclusive). + else: + yy += 2000 + tzoffset = None + tz = tz.upper() + if tz in _timezones: + tzoffset = _timezones[tz] + else: + try: + tzoffset = int(tz) + except ValueError: + pass + if tzoffset==0 and tz.startswith('-'): + tzoffset = None + # Convert a timezone offset into seconds ; -0500 -> -18000 + if tzoffset: + if tzoffset < 0: + tzsign = -1 + tzoffset = -tzoffset + else: + tzsign = 1 + tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) + # Daylight Saving Time flag is set to -1, since DST is unknown. + return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset] + + +def parsedate(data): + """Convert a time string to a time tuple.""" + t = parsedate_tz(data) + if isinstance(t, tuple): + return t[:9] + else: + return t + + +def mktime_tz(data): + """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp.""" + if data[9] is None: + # No zone info, so localtime is better assumption than GMT + return time.mktime(data[:8] + (-1,)) + else: + t = calendar.timegm(data) + return t - data[9] + + +def quote(str): + """Prepare string to be used in a quoted string. + + Turns backslash and double quote characters into quoted pairs. These + are the only characters that need to be quoted inside a quoted string. + Does not add the surrounding double quotes. + """ + return str.replace('\\', '\\\\').replace('"', '\\"') + + +class AddrlistClass: + """Address parser class by Ben Escoto. + + To understand what this class does, it helps to have a copy of RFC 2822 in + front of you. + + Note: this class interface is deprecated and may be removed in the future. + Use email.utils.AddressList instead. + """ + + def __init__(self, field): + """Initialize a new instance. + + `field' is an unparsed address header field, containing + one or more addresses. + """ + self.specials = '()<>@,:;.\"[]' + self.pos = 0 + self.LWS = ' \t' + self.CR = '\r\n' + self.FWS = self.LWS + self.CR + self.atomends = self.specials + self.LWS + self.CR + # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it + # is obsolete syntax. RFC 2822 requires that we recognize obsolete + # syntax, so allow dots in phrases. + self.phraseends = self.atomends.replace('.', '') + self.field = field + self.commentlist = [] + + def gotonext(self): + """Skip white space and extract comments.""" + wslist = [] + while self.pos < len(self.field): + if self.field[self.pos] in self.LWS + '\n\r': + if self.field[self.pos] not in '\n\r': + wslist.append(self.field[self.pos]) + self.pos += 1 + elif self.field[self.pos] == '(': + self.commentlist.append(self.getcomment()) + else: + break + return EMPTYSTRING.join(wslist) + + def getaddrlist(self): + """Parse all addresses. + + Returns a list containing all of the addresses. + """ + result = [] + while self.pos < len(self.field): + ad = self.getaddress() + if ad: + result += ad + else: + result.append(('', '')) + return result + + def getaddress(self): + """Parse the next address.""" + self.commentlist = [] + self.gotonext() + + oldpos = self.pos + oldcl = self.commentlist + plist = self.getphraselist() + + self.gotonext() + returnlist = [] + + if self.pos >= len(self.field): + # Bad email address technically, no domain. + if plist: + returnlist = [(SPACE.join(self.commentlist), plist[0])] + + elif self.field[self.pos] in '.@': + # email address is just an addrspec + # this isn't very efficient since we start over + self.pos = oldpos + self.commentlist = oldcl + addrspec = self.getaddrspec() + returnlist = [(SPACE.join(self.commentlist), addrspec)] + + elif self.field[self.pos] == ':': + # address is a group + returnlist = [] + + fieldlen = len(self.field) + self.pos += 1 + while self.pos < len(self.field): + self.gotonext() + if self.pos < fieldlen and self.field[self.pos] == ';': + self.pos += 1 + break + returnlist = returnlist + self.getaddress() + + elif self.field[self.pos] == '<': + # Address is a phrase then a route addr + routeaddr = self.getrouteaddr() + + if self.commentlist: + returnlist = [(SPACE.join(plist) + ' (' + + ' '.join(self.commentlist) + ')', routeaddr)] + else: + returnlist = [(SPACE.join(plist), routeaddr)] + + else: + if plist: + returnlist = [(SPACE.join(self.commentlist), plist[0])] + elif self.field[self.pos] in self.specials: + self.pos += 1 + + self.gotonext() + if self.pos < len(self.field) and self.field[self.pos] == ',': + self.pos += 1 + return returnlist + + def getrouteaddr(self): + """Parse a route address (Return-path value). + + This method just skips all the route stuff and returns the addrspec. + """ + if self.field[self.pos] != '<': + return + + expectroute = False + self.pos += 1 + self.gotonext() + adlist = '' + while self.pos < len(self.field): + if expectroute: + self.getdomain() + expectroute = False + elif self.field[self.pos] == '>': + self.pos += 1 + break + elif self.field[self.pos] == '@': + self.pos += 1 + expectroute = True + elif self.field[self.pos] == ':': + self.pos += 1 + else: + adlist = self.getaddrspec() + self.pos += 1 + break + self.gotonext() + + return adlist + + def getaddrspec(self): + """Parse an RFC 2822 addr-spec.""" + aslist = [] + + self.gotonext() + while self.pos < len(self.field): + preserve_ws = True + if self.field[self.pos] == '.': + if aslist and not aslist[-1].strip(): + aslist.pop() + aslist.append('.') + self.pos += 1 + preserve_ws = False + elif self.field[self.pos] == '"': + aslist.append('"%s"' % quote(self.getquote())) + elif self.field[self.pos] in self.atomends: + if aslist and not aslist[-1].strip(): + aslist.pop() + break + else: + aslist.append(self.getatom()) + ws = self.gotonext() + if preserve_ws and ws: + aslist.append(ws) + + if self.pos >= len(self.field) or self.field[self.pos] != '@': + return EMPTYSTRING.join(aslist) + + aslist.append('@') + self.pos += 1 + self.gotonext() + return EMPTYSTRING.join(aslist) + self.getdomain() + + def getdomain(self): + """Get the complete domain name from an address.""" + sdlist = [] + while self.pos < len(self.field): + if self.field[self.pos] in self.LWS: + self.pos += 1 + elif self.field[self.pos] == '(': + self.commentlist.append(self.getcomment()) + elif self.field[self.pos] == '[': + sdlist.append(self.getdomainliteral()) + elif self.field[self.pos] == '.': + self.pos += 1 + sdlist.append('.') + elif self.field[self.pos] in self.atomends: + break + else: + sdlist.append(self.getatom()) + return EMPTYSTRING.join(sdlist) + + def getdelimited(self, beginchar, endchars, allowcomments=True): + """Parse a header fragment delimited by special characters. + + `beginchar' is the start character for the fragment. + If self is not looking at an instance of `beginchar' then + getdelimited returns the empty string. + + `endchars' is a sequence of allowable end-delimiting characters. + Parsing stops when one of these is encountered. + + If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed + within the parsed fragment. + """ + if self.field[self.pos] != beginchar: + return '' + + slist = [''] + quote = False + self.pos += 1 + while self.pos < len(self.field): + if quote: + slist.append(self.field[self.pos]) + quote = False + elif self.field[self.pos] in endchars: + self.pos += 1 + break + elif allowcomments and self.field[self.pos] == '(': + slist.append(self.getcomment()) + continue # have already advanced pos from getcomment + elif self.field[self.pos] == '\\': + quote = True + else: + slist.append(self.field[self.pos]) + self.pos += 1 + + return EMPTYSTRING.join(slist) + + def getquote(self): + """Get a quote-delimited fragment from self's field.""" + return self.getdelimited('"', '"\r', False) + + def getcomment(self): + """Get a parenthesis-delimited fragment from self's field.""" + return self.getdelimited('(', ')\r', True) + + def getdomainliteral(self): + """Parse an RFC 2822 domain-literal.""" + return '[%s]' % self.getdelimited('[', ']\r', False) + + def getatom(self, atomends=None): + """Parse an RFC 2822 atom. + + Optional atomends specifies a different set of end token delimiters + (the default is to use self.atomends). This is used e.g. in + getphraselist() since phrase endings must not include the `.' (which + is legal in phrases).""" + atomlist = [''] + if atomends is None: + atomends = self.atomends + + while self.pos < len(self.field): + if self.field[self.pos] in atomends: + break + else: + atomlist.append(self.field[self.pos]) + self.pos += 1 + + return EMPTYSTRING.join(atomlist) + + def getphraselist(self): + """Parse a sequence of RFC 2822 phrases. + + A phrase is a sequence of words, which are in turn either RFC 2822 + atoms or quoted-strings. Phrases are canonicalized by squeezing all + runs of continuous whitespace into one space. + """ + plist = [] + + while self.pos < len(self.field): + if self.field[self.pos] in self.FWS: + self.pos += 1 + elif self.field[self.pos] == '"': + plist.append(self.getquote()) + elif self.field[self.pos] == '(': + self.commentlist.append(self.getcomment()) + elif self.field[self.pos] in self.phraseends: + break + else: + plist.append(self.getatom(self.phraseends)) + + return plist + +class AddressList(AddrlistClass): + """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" + def __init__(self, field): + AddrlistClass.__init__(self, field) + if field: + self.addresslist = self.getaddrlist() + else: + self.addresslist = [] + + def __len__(self): + return len(self.addresslist) + + def __add__(self, other): + # Set union + newaddr = AddressList(None) + newaddr.addresslist = self.addresslist[:] + for x in other.addresslist: + if not x in self.addresslist: + newaddr.addresslist.append(x) + return newaddr + + def __iadd__(self, other): + # Set union, in-place + for x in other.addresslist: + if not x in self.addresslist: + self.addresslist.append(x) + return self + + def __sub__(self, other): + # Set difference + newaddr = AddressList(None) + for x in self.addresslist: + if not x in other.addresslist: + newaddr.addresslist.append(x) + return newaddr + + def __isub__(self, other): + # Set difference, in-place + for x in other.addresslist: + if x in self.addresslist: + self.addresslist.remove(x) + return self + + def __getitem__(self, index): + # Make indexing, slices, and 'in' work + return self.addresslist[index] diff --git a/modules/language/python/module/email/base64mime.py b/modules/language/python/module/email/base64mime.py new file mode 100644 index 0000000..26eb1ad --- /dev/null +++ b/modules/language/python/module/email/base64mime.py @@ -0,0 +1,119 @@ +module(email,base64mime) + +# Copyright (C) 2002-2007 Python Software Foundation +# Author: Ben Gertzfield +# Contact: email-sig@python.org + +"""Base64 content transfer encoding per RFCs 2045-2047. + +This module handles the content transfer encoding method defined in RFC 2045 +to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit +characters encoding known as Base64. + +It is used in the MIME standards for email to attach images, audio, and text +using some 8-bit character sets to messages. + +This module provides an interface to encode and decode both headers and bodies +with Base64 encoding. + +RFC 2045 defines a method for including character set information in an +`encoded-word' in a header. This method is commonly used for 8-bit real names +in To:, From:, Cc:, etc. fields, as well as Subject: lines. + +This module does not do the line wrapping or end-of-line character conversion +necessary for proper internationalized headers; it only does dumb encoding and +decoding. To deal with the various line wrapping issues, use the email.header +module. +""" + +__all__ = [ + 'body_decode', + 'body_encode', + 'decode', + 'decodestring', + 'header_encode', + 'header_length', + ] + +from base64 import b64encode +from binascii import b2a_base64, a2b_base64 + +CRLF = '\r\n' +NL = '\n' +EMPTYSTRING = '' + +# See also Charset.py +MISC_LEN = 7 + + +# Helpers +def header_length(bytearray): + """Return the length of s when it is encoded with base64.""" + groups_of_3, leftover = divmod(len(bytearray), 3) + # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. + n = groups_of_3 * 4 + if leftover: + n += 4 + return n + + + +def header_encode(header_bytes, charset='iso-8859-1'): + """Encode a single header line with Base64 encoding in a given charset. + + charset names the character set to use to encode the header. It defaults + to iso-8859-1. Base64 encoding is defined in RFC 2045. + """ + if not header_bytes: + return "" + if isinstance(header_bytes, str): + header_bytes = header_bytes.encode(charset) + encoded = b64encode(header_bytes).decode("ascii") + return '=?%s?b?%s?=' % (charset, encoded) + + + +def body_encode(s, maxlinelen=76, eol=NL): + r"""Encode a string with base64. + + Each line will be wrapped at, at most, maxlinelen characters (defaults to + 76 characters). + + Each line of encoded text will end with eol, which defaults to "\n". Set + this to "\r\n" if you will be using the result of this function directly + in an email. + """ + if not s: + return s + + encvec = [] + max_unencoded = maxlinelen * 3 // 4 + for i in range(0, len(s), max_unencoded): + # BAW: should encode() inherit b2a_base64()'s dubious behavior in + # adding a newline to the encoded string? + enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii") + if enc.endswith(NL) and eol != NL: + enc = enc[:-1] + eol + encvec.append(enc) + return EMPTYSTRING.join(encvec) + + + +def decode(string): + """Decode a raw base64 string, returning a bytes object. + + This function does not parse a full MIME header value encoded with + base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high + level email.header class for that functionality. + """ + if not string: + return bytes() + elif isinstance(string, str): + return a2b_base64(string.encode('raw-unicode-escape')) + else: + return a2b_base64(string) + + +# For convenience and backwards compatibility w/ standard base64 module +body_decode = decode +decodestring = decode diff --git a/modules/language/python/module/email/charset.py b/modules/language/python/module/email/charset.py new file mode 100644 index 0000000..ba5a444 --- /dev/null +++ b/modules/language/python/module/email/charset.py @@ -0,0 +1,408 @@ +module(email,charset) + +# Copyright (C) 2001-2007 Python Software Foundation +# Author: Ben Gertzfield, Barry Warsaw +# Contact: email-sig@python.org + +__all__ = [ + 'Charset', + 'add_alias', + 'add_charset', + 'add_codec', + ] + +from functools import partial + +import email.base64mime as base64mime +import email.quoprimime as quoprimime + +import email.errors as errors +from email.encoders import encode_7or8bit + + + +# Flags for types of header encodings +QP = 1 # Quoted-Printable +BASE64 = 2 # Base64 +SHORTEST = 3 # the shorter of QP and base64, but only for headers + +# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7 +RFC2047_CHROME_LEN = 7 + +DEFAULT_CHARSET = 'us-ascii' +UNKNOWN8BIT = 'unknown-8bit' +EMPTYSTRING = '' + + + +# Defaults +CHARSETS = { + # input header enc body enc output conv + 'iso-8859-1': (QP, QP, None), + 'iso-8859-2': (QP, QP, None), + 'iso-8859-3': (QP, QP, None), + 'iso-8859-4': (QP, QP, None), + # iso-8859-5 is Cyrillic, and not especially used + # iso-8859-6 is Arabic, also not particularly used + # iso-8859-7 is Greek, QP will not make it readable + # iso-8859-8 is Hebrew, QP will not make it readable + 'iso-8859-9': (QP, QP, None), + 'iso-8859-10': (QP, QP, None), + # iso-8859-11 is Thai, QP will not make it readable + 'iso-8859-13': (QP, QP, None), + 'iso-8859-14': (QP, QP, None), + 'iso-8859-15': (QP, QP, None), + 'iso-8859-16': (QP, QP, None), + 'windows-1252':(QP, QP, None), + 'viscii': (QP, QP, None), + 'us-ascii': (None, None, None), + 'big5': (BASE64, BASE64, None), + 'gb2312': (BASE64, BASE64, None), + 'euc-jp': (BASE64, None, 'iso-2022-jp'), + 'shift_jis': (BASE64, None, 'iso-2022-jp'), + 'iso-2022-jp': (BASE64, None, None), + 'koi8-r': (BASE64, BASE64, None), + 'utf-8': (SHORTEST, BASE64, 'utf-8'), + } + +# Aliases for other commonly-used names for character sets. Map +# them to the real ones used in email. +ALIASES = { + 'latin_1': 'iso-8859-1', + 'latin-1': 'iso-8859-1', + 'latin_2': 'iso-8859-2', + 'latin-2': 'iso-8859-2', + 'latin_3': 'iso-8859-3', + 'latin-3': 'iso-8859-3', + 'latin_4': 'iso-8859-4', + 'latin-4': 'iso-8859-4', + 'latin_5': 'iso-8859-9', + 'latin-5': 'iso-8859-9', + 'latin_6': 'iso-8859-10', + 'latin-6': 'iso-8859-10', + 'latin_7': 'iso-8859-13', + 'latin-7': 'iso-8859-13', + 'latin_8': 'iso-8859-14', + 'latin-8': 'iso-8859-14', + 'latin_9': 'iso-8859-15', + 'latin-9': 'iso-8859-15', + 'latin_10':'iso-8859-16', + 'latin-10':'iso-8859-16', + 'cp949': 'ks_c_5601-1987', + 'euc_jp': 'euc-jp', + 'euc_kr': 'euc-kr', + 'ascii': 'us-ascii', + } + + +# Map charsets to their Unicode codec strings. +CODEC_MAP = { + 'gb2312': 'eucgb2312_cn', + 'big5': 'big5_tw', + # Hack: We don't want *any* conversion for stuff marked us-ascii, as all + # sorts of garbage might be sent to us in the guise of 7-bit us-ascii. + # Let that stuff pass through without conversion to/from Unicode. + 'us-ascii': None, + } + + + +# Convenience functions for extending the above mappings +def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): + """Add character set properties to the global registry. + + charset is the input character set, and must be the canonical name of a + character set. + + Optional header_enc and body_enc is either Charset.QP for + quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for + the shortest of qp or base64 encoding, or None for no encoding. SHORTEST + is only valid for header_enc. It describes how message headers and + message bodies in the input charset are to be encoded. Default is no + encoding. + + Optional output_charset is the character set that the output should be + in. Conversions will proceed from input charset, to Unicode, to the + output charset when the method Charset.convert() is called. The default + is to output in the same character set as the input. + + Both input_charset and output_charset must have Unicode codec entries in + the module's charset-to-codec mapping; use add_codec(charset, codecname) + to add codecs the module does not know about. See the codecs module's + documentation for more information. + """ + if body_enc == SHORTEST: + raise ValueError('SHORTEST not allowed for body_enc') + CHARSETS[charset] = (header_enc, body_enc, output_charset) + + +def add_alias(alias, canonical): + """Add a character set alias. + + alias is the alias name, e.g. latin-1 + canonical is the character set's canonical name, e.g. iso-8859-1 + """ + ALIASES[alias] = canonical + + +def add_codec(charset, codecname): + """Add a codec that map characters in the given charset to/from Unicode. + + charset is the canonical name of a character set. codecname is the name + of a Python codec, as appropriate for the second argument to the unicode() + built-in, or to the encode() method of a Unicode string. + """ + CODEC_MAP[charset] = codecname + + + +# Convenience function for encoding strings, taking into account +# that they might be unknown-8bit (ie: have surrogate-escaped bytes) +def _encode(string, codec): + if codec == UNKNOWN8BIT: + return string.encode('ascii', 'surrogateescape') + else: + return string.encode(codec) + + + +class Charset: + """Map character sets to their email properties. + + This class provides information about the requirements imposed on email + for a specific character set. It also provides convenience routines for + converting between character sets, given the availability of the + applicable codecs. Given a character set, it will do its best to provide + information on how to use that character set in an email in an + RFC-compliant way. + + Certain character sets must be encoded with quoted-printable or base64 + when used in email headers or bodies. Certain character sets must be + converted outright, and are not allowed in email. Instances of this + module expose the following information about a character set: + + input_charset: The initial character set specified. Common aliases + are converted to their `official' email names (e.g. latin_1 + is converted to iso-8859-1). Defaults to 7-bit us-ascii. + + header_encoding: If the character set must be encoded before it can be + used in an email header, this attribute will be set to + Charset.QP (for quoted-printable), Charset.BASE64 (for + base64 encoding), or Charset.SHORTEST for the shortest of + QP or BASE64 encoding. Otherwise, it will be None. + + body_encoding: Same as header_encoding, but describes the encoding for the + mail message's body, which indeed may be different than the + header encoding. Charset.SHORTEST is not allowed for + body_encoding. + + output_charset: Some character sets must be converted before they can be + used in email headers or bodies. If the input_charset is + one of them, this attribute will contain the name of the + charset output will be converted to. Otherwise, it will + be None. + + input_codec: The name of the Python codec used to convert the + input_charset to Unicode. If no conversion codec is + necessary, this attribute will be None. + + output_codec: The name of the Python codec used to convert Unicode + to the output_charset. If no conversion codec is necessary, + this attribute will have the same value as the input_codec. + """ + def __init__(self, input_charset=DEFAULT_CHARSET): + # RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to + # unicode because its .lower() is locale insensitive. If the argument + # is already a unicode, we leave it at that, but ensure that the + # charset is ASCII, as the standard (RFC XXX) requires. + try: + if isinstance(input_charset, str): + input_charset.encode('ascii') + else: + input_charset = str(input_charset, 'ascii') + except UnicodeError: + raise errors.CharsetError(input_charset) + input_charset = input_charset.lower() + # Set the input charset after filtering through the aliases + self.input_charset = ALIASES.get(input_charset, input_charset) + # We can try to guess which encoding and conversion to use by the + # charset_map dictionary. Try that first, but let the user override + # it. + henc, benc, conv = CHARSETS.get(self.input_charset, + (SHORTEST, BASE64, None)) + if not conv: + conv = self.input_charset + # Set the attributes, allowing the arguments to override the default. + self.header_encoding = henc + self.body_encoding = benc + self.output_charset = ALIASES.get(conv, conv) + # Now set the codecs. If one isn't defined for input_charset, + # guess and try a Unicode codec with the same name as input_codec. + self.input_codec = CODEC_MAP.get(self.input_charset, + self.input_charset) + self.output_codec = CODEC_MAP.get(self.output_charset, + self.output_charset) + + def __str__(self): + return self.input_charset.lower() + + __repr__ = __str__ + + def __eq__(self, other): + return str(self) == str(other).lower() + + def get_body_encoding(self): + """Return the content-transfer-encoding used for body encoding. + + This is either the string `quoted-printable' or `base64' depending on + the encoding used, or it is a function in which case you should call + the function with a single argument, the Message object being + encoded. The function should then set the Content-Transfer-Encoding + header itself to whatever is appropriate. + + Returns "quoted-printable" if self.body_encoding is QP. + Returns "base64" if self.body_encoding is BASE64. + Returns conversion function otherwise. + """ + assert self.body_encoding != SHORTEST + if self.body_encoding == QP: + return 'quoted-printable' + elif self.body_encoding == BASE64: + return 'base64' + else: + return encode_7or8bit + + def get_output_charset(self): + """Return the output character set. + + This is self.output_charset if that is not None, otherwise it is + self.input_charset. + """ + return self.output_charset or self.input_charset + + def header_encode(self, string): + """Header-encode a string by converting it first to bytes. + + The type of encoding (base64 or quoted-printable) will be based on + this charset's `header_encoding`. + + :param string: A unicode string for the header. It must be possible + to encode this string to bytes using the character set's + output codec. + :return: The encoded string, with RFC 2047 chrome. + """ + codec = self.output_codec or 'us-ascii' + header_bytes = _encode(string, codec) + # 7bit/8bit encodings return the string unchanged (modulo conversions) + encoder_module = self._get_encoder(header_bytes) + if encoder_module is None: + return string + return encoder_module.header_encode(header_bytes, codec) + + def header_encode_lines(self, string, maxlengths): + """Header-encode a string by converting it first to bytes. + + This is similar to `header_encode()` except that the string is fit + into maximum line lengths as given by the argument. + + :param string: A unicode string for the header. It must be possible + to encode this string to bytes using the character set's + output codec. + :param maxlengths: Maximum line length iterator. Each element + returned from this iterator will provide the next maximum line + length. This parameter is used as an argument to built-in next() + and should never be exhausted. The maximum line lengths should + not count the RFC 2047 chrome. These line lengths are only a + hint; the splitter does the best it can. + :return: Lines of encoded strings, each with RFC 2047 chrome. + """ + # See which encoding we should use. + codec = self.output_codec or 'us-ascii' + header_bytes = _encode(string, codec) + encoder_module = self._get_encoder(header_bytes) + encoder = partial(encoder_module.header_encode, charset=codec) + # Calculate the number of characters that the RFC 2047 chrome will + # contribute to each line. + charset = self.get_output_charset() + extra = len(charset) + RFC2047_CHROME_LEN + # Now comes the hard part. We must encode bytes but we can't split on + # bytes because some character sets are variable length and each + # encoded word must stand on its own. So the problem is you have to + # encode to bytes to figure out this word's length, but you must split + # on characters. This causes two problems: first, we don't know how + # many octets a specific substring of unicode characters will get + # encoded to, and second, we don't know how many ASCII characters + # those octets will get encoded to. Unless we try it. Which seems + # inefficient. In the interest of being correct rather than fast (and + # in the hope that there will be few encoded headers in any such + # message), brute force it. :( + lines = [] + current_line = [] + maxlen = next(maxlengths) - extra + for character in string: + current_line.append(character) + this_line = EMPTYSTRING.join(current_line) + length = encoder_module.header_length(_encode(this_line, charset)) + if length > maxlen: + # This last character doesn't fit so pop it off. + current_line.pop() + # Does nothing fit on the first line? + if not lines and not current_line: + lines.append(None) + else: + separator = (' ' if lines else '') + joined_line = EMPTYSTRING.join(current_line) + header_bytes = _encode(joined_line, codec) + lines.append(encoder(header_bytes)) + current_line = [character] + maxlen = next(maxlengths) - extra + joined_line = EMPTYSTRING.join(current_line) + header_bytes = _encode(joined_line, codec) + lines.append(encoder(header_bytes)) + return lines + + def _get_encoder(self, header_bytes): + if self.header_encoding == BASE64: + return base64mime + elif self.header_encoding == QP: + return quoprimime + elif self.header_encoding == SHORTEST: + len64 = base64mime.header_length(header_bytes) + lenqp = quoprimime.header_length(header_bytes) + if len64 < lenqp: + return base64mime + else: + return quoprimime + else: + return None + + def body_encode(self, string): + """Body-encode a string by converting it first to bytes. + + The type of encoding (base64 or quoted-printable) will be based on + self.body_encoding. If body_encoding is None, we assume the + output charset is a 7bit encoding, so re-encoding the decoded + string using the ascii codec produces the correct string version + of the content. + """ + if not string: + return string + if self.body_encoding is BASE64: + if isinstance(string, str): + string = string.encode(self.output_charset) + return base64mime.body_encode(string) + elif self.body_encoding is QP: + # quopromime.body_encode takes a string, but operates on it as if + # it were a list of byte codes. For a (minimal) history on why + # this is so, see changeset 0cf700464177. To correctly encode a + # character set, then, we must turn it into pseudo bytes via the + # latin1 charset, which will encode any byte as a single code point + # between 0 and 255, which is what body_encode is expecting. + if isinstance(string, str): + string = string.encode(self.output_charset) + string = string.decode('latin1') + return quoprimime.body_encode(string) + else: + if isinstance(string, str): + string = string.encode(self.output_charset).decode('ascii') + return string diff --git a/modules/language/python/module/email/encoders.py b/modules/language/python/module/email/encoders.py new file mode 100644 index 0000000..e4eb928 --- /dev/null +++ b/modules/language/python/module/email/encoders.py @@ -0,0 +1,67 @@ +module(email,encoders) + +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Encodings and related functions.""" + +__all__ = [ + 'encode_7or8bit', + 'encode_base64', + 'encode_noop', + 'encode_quopri', + ] + + +from base64 import encodebytes as _bencode +from quopri import encodestring as _encodestring + + +def _qencode(s): + enc = _encodestring(s, quotetabs=True) + # Must encode spaces, which quopri.encodestring() doesn't do + return enc.replace(b' ', b'=20') + + +def encode_base64(msg): + """Encode the message's payload in Base64. + + Also, add an appropriate Content-Transfer-Encoding header. + """ + orig = msg.get_payload(decode=True) + encdata = str(_bencode(orig), 'ascii') + msg.set_payload(encdata) + msg['Content-Transfer-Encoding'] = 'base64' + + +def encode_quopri(msg): + """Encode the message's payload in quoted-printable. + + Also, add an appropriate Content-Transfer-Encoding header. + """ + orig = msg.get_payload(decode=True) + encdata = _qencode(orig) + msg.set_payload(encdata) + msg['Content-Transfer-Encoding'] = 'quoted-printable' + + +def encode_7or8bit(msg): + """Set the Content-Transfer-Encoding header to 7bit or 8bit.""" + orig = msg.get_payload(decode=True) + if orig is None: + # There's no payload. For backwards compatibility we use 7bit + msg['Content-Transfer-Encoding'] = '7bit' + return + # We play a trick to make this go fast. If decoding from ASCII succeeds, + # we know the data must be 7bit, otherwise treat it as 8bit. + try: + orig.decode('ascii') + except UnicodeError: + msg['Content-Transfer-Encoding'] = '8bit' + else: + msg['Content-Transfer-Encoding'] = '7bit' + + +def encode_noop(msg): + """Do nothing.""" diff --git a/modules/language/python/module/email/errors.py b/modules/language/python/module/email/errors.py new file mode 100644 index 0000000..c877442 --- /dev/null +++ b/modules/language/python/module/email/errors.py @@ -0,0 +1,111 @@ +module(email,errors) +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""email package exception classes.""" + +__all__=['MessageError', + 'MessageParseError', + 'HeaderParseError'] + +class MessageError(Exception): + """Base class for errors in the email package.""" + + +class MessageParseError(MessageError): + """Base class for message parsing errors.""" + + +class HeaderParseError(MessageParseError): + """Error while parsing headers.""" + + +class BoundaryError(MessageParseError): + """Couldn't find terminating boundary.""" + + +class MultipartConversionError(MessageError, TypeError): + """Conversion to a multipart is prohibited.""" + + +class CharsetError(MessageError): + """An illegal charset was given.""" + + +# These are parsing defects which the parser was able to work around. +class MessageDefect(ValueError): + """Base class for a message defect.""" + + def __init__(self, line=None): + if line is not None: + super().__init__(line) + self.line = line + +class NoBoundaryInMultipartDefect(MessageDefect): + """A message claimed to be a multipart but had no boundary parameter.""" + +class StartBoundaryNotFoundDefect(MessageDefect): + """The claimed start boundary was never found.""" + +class CloseBoundaryNotFoundDefect(MessageDefect): + """A start boundary was found, but not the corresponding close boundary.""" + +class FirstHeaderLineIsContinuationDefect(MessageDefect): + """A message had a continuation line as its first header line.""" + +class MisplacedEnvelopeHeaderDefect(MessageDefect): + """A 'Unix-from' header was found in the middle of a header block.""" + +class MissingHeaderBodySeparatorDefect(MessageDefect): + """Found line with no leading whitespace and no colon before blank line.""" +# XXX: backward compatibility, just in case (it was never emitted). +MalformedHeaderDefect = MissingHeaderBodySeparatorDefect + +class MultipartInvariantViolationDefect(MessageDefect): + """A message claimed to be a multipart but no subparts were found.""" + +class InvalidMultipartContentTransferEncodingDefect(MessageDefect): + """An invalid content transfer encoding was set on the multipart itself.""" + +class UndecodableBytesDefect(MessageDefect): + """Header contained bytes that could not be decoded""" + +class InvalidBase64PaddingDefect(MessageDefect): + """base64 encoded sequence had an incorrect length""" + +class InvalidBase64CharactersDefect(MessageDefect): + """base64 encoded sequence had characters not in base64 alphabet""" + +# These errors are specific to header parsing. + +class HeaderDefect(MessageDefect): + """Base class for a header defect.""" + + def __init__(self, *args, **kw): + super().__init__(*args, **kw) + +class InvalidHeaderDefect(HeaderDefect): + """Header is not valid, message gives details.""" + +class HeaderMissingRequiredValue(HeaderDefect): + """A header that must have a value had none""" + +class NonPrintableDefect(HeaderDefect): + """ASCII characters outside the ascii-printable range found""" + + def __init__(self, non_printables): + super().__init__(non_printables) + self.non_printables = non_printables + + def __str__(self): + return ("the following ASCII non-printables found in header: " + "{}".format(self.non_printables)) + +class ObsoleteHeaderDefect(HeaderDefect): + """Header uses syntax declared obsolete by RFC 5322""" + +class NonASCIILocalPartDefect(HeaderDefect): + """local_part contains non-ASCII characters""" + # This defect only occurs during unicode parsing, not when + # parsing messages decoded from binary. diff --git a/modules/language/python/module/email/header.py b/modules/language/python/module/email/header.py new file mode 100644 index 0000000..ac3f46c --- /dev/null +++ b/modules/language/python/module/email/header.py @@ -0,0 +1,579 @@ +module(email,header) +# Copyright (C) 2002-2007 Python Software Foundation +# Author: Ben Gertzfield, Barry Warsaw +# Contact: email-sig@python.org + +"""Header encoding and decoding functionality.""" + +__all__ = [ + 'Header', + 'decode_header', + 'make_header', + ] + +import re +import binascii + +import email.quoprimime as quoprimime +import email.base64mime as base64mime + +from email.errors import HeaderParseError +import email.charset as _charset +Charset = _charset.Charset + +NL = '\n' +SPACE = ' ' +BSPACE = b' ' +SPACE8 = ' ' * 8 +EMPTYSTRING = '' +MAXLINELEN = 78 +FWS = ' \t' + +USASCII = Charset('us-ascii') +UTF8 = Charset('utf-8') + +# Match encoded-word strings in the form =?charset?q?Hello_World?= +ecre = re.compile(r''' + =\? # literal =? + (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset + \? # literal ? + (?P<encoding>[qb]) # either a "q" or a "b", case insensitive + \? # literal ? + (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string + \?= # literal ?= + ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) + +# Field name regexp, including trailing colon, but not separating whitespace, +# according to RFC 2822. Character range is from tilde to exclamation mark. +# For use with .match() +fcre = re.compile(r'[\041-\176]+:$') + +# Find a header embedded in a putative header value. Used to check for +# header injection attack. +_embedded_header = re.compile(r'\n[^ \t]+:') + + + +# Helpers +_max_append = quoprimime._max_append + + + +def decode_header(header): + """Decode a message header value without converting charset. + + Returns a list of (string, charset) pairs containing each of the decoded + parts of the header. Charset is None for non-encoded parts of the header, + otherwise a lower-case string containing the name of the character set + specified in the encoded string. + + header may be a string that may or may not contain RFC2047 encoded words, + or it may be a Header object. + + An email.errors.HeaderParseError may be raised when certain decoding error + occurs (e.g. a base64 decoding exception). + """ + # If it is a Header object, we can just return the encoded chunks. + if hasattr(header, '_chunks'): + return [(_charset._encode(string, str(charset)), str(charset)) + for string, charset in header._chunks] + # If no encoding, just return the header with no charset. + if not ecre.search(header): + return [(header, None)] + # First step is to parse all the encoded parts into triplets of the form + # (encoded_string, encoding, charset). For unencoded strings, the last + # two parts will be None. + words = [] + for line in header.splitlines(): + parts = ecre.split(line) + first = True + while parts: + unencoded = parts.pop(0) + if first: + unencoded = unencoded.lstrip() + first = False + if unencoded: + words.append((unencoded, None, None)) + if parts: + charset = parts.pop(0).lower() + encoding = parts.pop(0).lower() + encoded = parts.pop(0) + words.append((encoded, encoding, charset)) + # Now loop over words and remove words that consist of whitespace + # between two encoded strings. + droplist = [] + for n, w in enumerate(words): + if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace(): + droplist.append(n-1) + for d in reversed(droplist): + del words[d] + + # The next step is to decode each encoded word by applying the reverse + # base64 or quopri transformation. decoded_words is now a list of the + # form (decoded_word, charset). + decoded_words = [] + for encoded_string, encoding, charset in words: + if encoding is None: + # This is an unencoded word. + decoded_words.append((encoded_string, charset)) + elif encoding == 'q': + word = quoprimime.header_decode(encoded_string) + decoded_words.append((word, charset)) + elif encoding == 'b': + paderr = len(encoded_string) % 4 # Postel's law: add missing padding + if paderr: + encoded_string += '==='[:4 - paderr] + try: + word = base64mime.decode(encoded_string) + except binascii.Error: + raise HeaderParseError('Base64 decoding error') + else: + decoded_words.append((word, charset)) + else: + raise AssertionError('Unexpected encoding: ' + encoding) + # Now convert all words to bytes and collapse consecutive runs of + # similarly encoded words. + collapsed = [] + last_word = last_charset = None + for word, charset in decoded_words: + if isinstance(word, str): + word = bytes(word, 'raw-unicode-escape') + if last_word is None: + last_word = word + last_charset = charset + elif charset != last_charset: + collapsed.append((last_word, last_charset)) + last_word = word + last_charset = charset + elif last_charset is None: + last_word += BSPACE + word + else: + last_word += word + collapsed.append((last_word, last_charset)) + return collapsed + + + +def make_header(decoded_seq, maxlinelen=None, header_name=None, + continuation_ws=' '): + """Create a Header from a sequence of pairs as returned by decode_header() + + decode_header() takes a header value string and returns a sequence of + pairs of the format (decoded_string, charset) where charset is the string + name of the character set. + + This function takes one of those sequence of pairs and returns a Header + instance. Optional maxlinelen, header_name, and continuation_ws are as in + the Header constructor. + """ + h = Header(maxlinelen=maxlinelen, header_name=header_name, + continuation_ws=continuation_ws) + for s, charset in decoded_seq: + # None means us-ascii but we can simply pass it on to h.append() + if charset is not None and not isinstance(charset, Charset): + charset = Charset(charset) + h.append(s, charset) + return h + + + +class Header: + def __init__(self, s=None, charset=None, + maxlinelen=None, header_name=None, + continuation_ws=' ', errors='strict'): + """Create a MIME-compliant header that can contain many character sets. + + Optional s is the initial header value. If None, the initial header + value is not set. You can later append to the header with .append() + method calls. s may be a byte string or a Unicode string, but see the + .append() documentation for semantics. + + Optional charset serves two purposes: it has the same meaning as the + charset argument to the .append() method. It also sets the default + character set for all subsequent .append() calls that omit the charset + argument. If charset is not provided in the constructor, the us-ascii + charset is used both as s's initial charset and as the default for + subsequent .append() calls. + + The maximum line length can be specified explicitly via maxlinelen. For + splitting the first line to a shorter value (to account for the field + header which isn't included in s, e.g. `Subject') pass in the name of + the field in header_name. The default maxlinelen is 78 as recommended + by RFC 2822. + + continuation_ws must be RFC 2822 compliant folding whitespace (usually + either a space or a hard tab) which will be prepended to continuation + lines. + + errors is passed through to the .append() call. + """ + if charset is None: + charset = USASCII + elif not isinstance(charset, Charset): + charset = Charset(charset) + self._charset = charset + self._continuation_ws = continuation_ws + self._chunks = [] + if s is not None: + self.append(s, charset, errors) + if maxlinelen is None: + maxlinelen = MAXLINELEN + self._maxlinelen = maxlinelen + if header_name is None: + self._headerlen = 0 + else: + # Take the separating colon and space into account. + self._headerlen = len(header_name) + 2 + + def __str__(self): + """Return the string value of the header.""" + self._normalize() + uchunks = [] + lastcs = None + lastspace = None + for string, charset in self._chunks: + # We must preserve spaces between encoded and non-encoded word + # boundaries, which means for us we need to add a space when we go + # from a charset to None/us-ascii, or from None/us-ascii to a + # charset. Only do this for the second and subsequent chunks. + # Don't add a space if the None/us-ascii string already has + # a space (trailing or leading depending on transition) + nextcs = charset + if nextcs == _charset.UNKNOWN8BIT: + original_bytes = string.encode('ascii', 'surrogateescape') + string = original_bytes.decode('ascii', 'replace') + if uchunks: + hasspace = string and self._nonctext(string[0]) + if lastcs not in (None, 'us-ascii'): + if nextcs in (None, 'us-ascii') and not hasspace: + uchunks.append(SPACE) + nextcs = None + elif nextcs not in (None, 'us-ascii') and not lastspace: + uchunks.append(SPACE) + lastspace = string and self._nonctext(string[-1]) + lastcs = nextcs + uchunks.append(string) + return EMPTYSTRING.join(uchunks) + + # Rich comparison operators for equality only. BAW: does it make sense to + # have or explicitly disable <, <=, >, >= operators? + def __eq__(self, other): + # other may be a Header or a string. Both are fine so coerce + # ourselves to a unicode (of the unencoded header value), swap the + # args and do another comparison. + return other == str(self) + + def append(self, s, charset=None, errors='strict'): + """Append a string to the MIME header. + + Optional charset, if given, should be a Charset instance or the name + of a character set (which will be converted to a Charset instance). A + value of None (the default) means that the charset given in the + constructor is used. + + s may be a byte string or a Unicode string. If it is a byte string + (i.e. isinstance(s, str) is false), then charset is the encoding of + that byte string, and a UnicodeError will be raised if the string + cannot be decoded with that charset. If s is a Unicode string, then + charset is a hint specifying the character set of the characters in + the string. In either case, when producing an RFC 2822 compliant + header using RFC 2047 rules, the string will be encoded using the + output codec of the charset. If the string cannot be encoded to the + output codec, a UnicodeError will be raised. + + Optional `errors' is passed as the errors argument to the decode + call if s is a byte string. + """ + if charset is None: + charset = self._charset + elif not isinstance(charset, Charset): + charset = Charset(charset) + if not isinstance(s, str): + input_charset = charset.input_codec or 'us-ascii' + if input_charset == _charset.UNKNOWN8BIT: + s = s.decode('us-ascii', 'surrogateescape') + else: + s = s.decode(input_charset, errors) + # Ensure that the bytes we're storing can be decoded to the output + # character set, otherwise an early error is raised. + output_charset = charset.output_codec or 'us-ascii' + if output_charset != _charset.UNKNOWN8BIT: + try: + s.encode(output_charset, errors) + except UnicodeEncodeError: + if output_charset!='us-ascii': + raise + charset = UTF8 + self._chunks.append((s, charset)) + + def _nonctext(self, s): + """True if string s is not a ctext character of RFC822. + """ + return s.isspace() or s in ('(', ')', '\\') + + def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): + r"""Encode a message header into an RFC-compliant format. + + There are many issues involved in converting a given string for use in + an email header. Only certain character sets are readable in most + email clients, and as header strings can only contain a subset of + 7-bit ASCII, care must be taken to properly convert and encode (with + Base64 or quoted-printable) header strings. In addition, there is a + 75-character length limit on any given encoded header field, so + line-wrapping must be performed, even with double-byte character sets. + + Optional maxlinelen specifies the maximum length of each generated + line, exclusive of the linesep string. Individual lines may be longer + than maxlinelen if a folding point cannot be found. The first line + will be shorter by the length of the header name plus ": " if a header + name was specified at Header construction time. The default value for + maxlinelen is determined at header construction time. + + Optional splitchars is a string containing characters which should be + given extra weight by the splitting algorithm during normal header + wrapping. This is in very rough support of RFC 2822's `higher level + syntactic breaks': split points preceded by a splitchar are preferred + during line splitting, with the characters preferred in the order in + which they appear in the string. Space and tab may be included in the + string to indicate whether preference should be given to one over the + other as a split point when other split chars do not appear in the line + being split. Splitchars does not affect RFC 2047 encoded lines. + + Optional linesep is a string to be used to separate the lines of + the value. The default value is the most useful for typical + Python applications, but it can be set to \r\n to produce RFC-compliant + line separators when needed. + """ + self._normalize() + if maxlinelen is None: + maxlinelen = self._maxlinelen + # A maxlinelen of 0 means don't wrap. For all practical purposes, + # choosing a huge number here accomplishes that and makes the + # _ValueFormatter algorithm much simpler. + if maxlinelen == 0: + maxlinelen = 1000000 + formatter = _ValueFormatter(self._headerlen, maxlinelen, + self._continuation_ws, splitchars) + lastcs = None + hasspace = lastspace = None + for string, charset in self._chunks: + if hasspace is not None: + hasspace = string and self._nonctext(string[0]) + if lastcs not in (None, 'us-ascii'): + if not hasspace or charset not in (None, 'us-ascii'): + formatter.add_transition() + elif charset not in (None, 'us-ascii') and not lastspace: + formatter.add_transition() + lastspace = string and self._nonctext(string[-1]) + lastcs = charset + hasspace = False + lines = string.splitlines() + if lines: + formatter.feed('', lines[0], charset) + else: + formatter.feed('', '', charset) + for line in lines[1:]: + formatter.newline() + if charset.header_encoding is not None: + formatter.feed(self._continuation_ws, ' ' + line.lstrip(), + charset) + else: + sline = line.lstrip() + fws = line[:len(line)-len(sline)] + formatter.feed(fws, sline, charset) + if len(lines) > 1: + formatter.newline() + if self._chunks: + formatter.add_transition() + value = formatter._str(linesep) + if _embedded_header.search(value): + raise HeaderParseError("header value appears to contain " + "an embedded header: {!r}".format(value)) + return value + + def _normalize(self): + # Step 1: Normalize the chunks so that all runs of identical charsets + # get collapsed into a single unicode string. + chunks = [] + last_charset = None + last_chunk = [] + for string, charset in self._chunks: + if charset == last_charset: + last_chunk.append(string) + else: + if last_charset is not None: + chunks.append((SPACE.join(last_chunk), last_charset)) + last_chunk = [string] + last_charset = charset + if last_chunk: + chunks.append((SPACE.join(last_chunk), last_charset)) + self._chunks = chunks + + + +class _ValueFormatter: + def __init__(self, headerlen, maxlen, continuation_ws, splitchars): + self._maxlen = maxlen + self._continuation_ws = continuation_ws + self._continuation_ws_len = len(continuation_ws) + self._splitchars = splitchars + self._lines = [] + self._current_line = _Accumulator(headerlen) + + def _str(self, linesep): + self.newline() + return linesep.join(self._lines) + + def __str__(self): + return self._str(NL) + + def newline(self): + end_of_line = self._current_line.pop() + if end_of_line != (' ', ''): + self._current_line.push(*end_of_line) + if len(self._current_line) > 0: + if self._current_line.is_onlyws(): + self._lines[-1] += str(self._current_line) + else: + self._lines.append(str(self._current_line)) + self._current_line.reset() + + def add_transition(self): + self._current_line.push(' ', '') + + def feed(self, fws, string, charset): + # If the charset has no header encoding (i.e. it is an ASCII encoding) + # then we must split the header at the "highest level syntactic break" + # possible. Note that we don't have a lot of smarts about field + # syntax; we just try to break on semi-colons, then commas, then + # whitespace. Eventually, this should be pluggable. + if charset.header_encoding is None: + self._ascii_split(fws, string, self._splitchars) + return + # Otherwise, we're doing either a Base64 or a quoted-printable + # encoding which means we don't need to split the line on syntactic + # breaks. We can basically just find enough characters to fit on the + # current line, minus the RFC 2047 chrome. What makes this trickier + # though is that we have to split at octet boundaries, not character + # boundaries but it's only safe to split at character boundaries so at + # best we can only get close. + encoded_lines = charset.header_encode_lines(string, self._maxlengths()) + # The first element extends the current line, but if it's None then + # nothing more fit on the current line so start a new line. + try: + first_line = encoded_lines.pop(0) + except IndexError: + # There are no encoded lines, so we're done. + return + if first_line is not None: + self._append_chunk(fws, first_line) + try: + last_line = encoded_lines.pop() + except IndexError: + # There was only one line. + return + self.newline() + self._current_line.push(self._continuation_ws, last_line) + # Everything else are full lines in themselves. + for line in encoded_lines: + self._lines.append(self._continuation_ws + line) + + def _maxlengths(self): + # The first line's length. + yield self._maxlen - len(self._current_line) + while True: + yield self._maxlen - self._continuation_ws_len + + def _ascii_split(self, fws, string, splitchars): + # The RFC 2822 header folding algorithm is simple in principle but + # complex in practice. Lines may be folded any place where "folding + # white space" appears by inserting a linesep character in front of the + # FWS. The complication is that not all spaces or tabs qualify as FWS, + # and we are also supposed to prefer to break at "higher level + # syntactic breaks". We can't do either of these without intimate + # knowledge of the structure of structured headers, which we don't have + # here. So the best we can do here is prefer to break at the specified + # splitchars, and hope that we don't choose any spaces or tabs that + # aren't legal FWS. (This is at least better than the old algorithm, + # where we would sometimes *introduce* FWS after a splitchar, or the + # algorithm before that, where we would turn all white space runs into + # single spaces or tabs.) + parts = re.split("(["+FWS+"]+)", fws+string) + if parts[0]: + parts[:0] = [''] + else: + parts.pop(0) + for fws, part in zip(*[iter(parts)]*2): + self._append_chunk(fws, part) + + def _append_chunk(self, fws, string): + self._current_line.push(fws, string) + if len(self._current_line) > self._maxlen: + # Find the best split point, working backward from the end. + # There might be none, on a long first line. + for ch in self._splitchars: + for i in range(self._current_line.part_count()-1, 0, -1): + if ch.isspace(): + fws = self._current_line[i][0] + if fws and fws[0]==ch: + break + prevpart = self._current_line[i-1][1] + if prevpart and prevpart[-1]==ch: + break + else: + continue + break + else: + fws, part = self._current_line.pop() + if self._current_line._initial_size > 0: + # There will be a header, so leave it on a line by itself. + self.newline() + if not fws: + # We don't use continuation_ws here because the whitespace + # after a header should always be a space. + fws = ' ' + self._current_line.push(fws, part) + return + remainder = self._current_line.pop_from(i) + self._lines.append(str(self._current_line)) + self._current_line.reset(remainder) + + +class _Accumulator(list): + + def __init__(self, initial_size=0): + self._initial_size = initial_size + super().__init__() + + def push(self, fws, string): + self.append((fws, string)) + + def pop_from(self, i=0): + popped = self[i:] + self[i:] = [] + return popped + + def pop(self): + if self.part_count()==0: + return ('', '') + return super().pop() + + def __len__(self): + return sum((len(fws)+len(part) for fws, part in self), + self._initial_size) + + def __str__(self): + return EMPTYSTRING.join((EMPTYSTRING.join((fws, part)) + for fws, part in self)) + + def reset(self, startval=None): + if startval is None: + startval = [] + self[:] = startval + self._initial_size = 0 + + def is_onlyws(self): + return self._initial_size==0 and (not self or str(self).isspace()) + + def part_count(self): + return super().__len__() diff --git a/modules/language/python/module/email/quoprimime.py b/modules/language/python/module/email/quoprimime.py new file mode 100644 index 0000000..21b6a6b --- /dev/null +++ b/modules/language/python/module/email/quoprimime.py @@ -0,0 +1,300 @@ +module(email,quoprimime) +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Ben Gertzfield +# Contact: email-sig@python.org + +"""Quoted-printable content transfer encoding per RFCs 2045-2047. + +This module handles the content transfer encoding method defined in RFC 2045 +to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to +safely encode text that is in a character set similar to the 7-bit US ASCII +character set, but that includes some 8-bit characters that are normally not +allowed in email bodies or headers. + +Quoted-printable is very space-inefficient for encoding binary files; use the +email.base64mime module for that instead. + +This module provides an interface to encode and decode both headers and bodies +with quoted-printable encoding. + +RFC 2045 defines a method for including character set information in an +`encoded-word' in a header. This method is commonly used for 8-bit real names +in To:/From:/Cc: etc. fields, as well as Subject: lines. + +This module does not do the line wrapping or end-of-line character +conversion necessary for proper internationalized headers; it only +does dumb encoding and decoding. To deal with the various line +wrapping issues, use the email.header module. +""" + +__all__ = [ + 'body_decode', + 'body_encode', + 'body_length', + 'decode', + 'decodestring', + 'header_decode', + 'header_encode', + 'header_length', + 'quote', + 'unquote', + ] + +import re + +from string import ascii_letters, digits, hexdigits + +CRLF = '\r\n' +NL = '\n' +EMPTYSTRING = '' + +# Build a mapping of octets to the expansion of that octet. Since we're only +# going to have 256 of these things, this isn't terribly inefficient +# space-wise. Remember that headers and bodies have different sets of safe +# characters. Initialize both maps with the full expansion, and then override +# the safe bytes with the more compact form. +_QUOPRI_MAP = ['=%02X' % c for c in range(256)] +_QUOPRI_HEADER_MAP = _QUOPRI_MAP[:] +_QUOPRI_BODY_MAP = _QUOPRI_MAP[:] + +# Safe header bytes which need no encoding. +for c in b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii'): + _QUOPRI_HEADER_MAP[c] = chr(c) +# Headers have one other special encoding; spaces become underscores. +_QUOPRI_HEADER_MAP[ord(' ')] = '_' + +# Safe body bytes which need no encoding. +for c in (b' !"#$%&\'()*+,-./0123456789:;<>' + b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`' + b'abcdefghijklmnopqrstuvwxyz{|}~\t'): + _QUOPRI_BODY_MAP[c] = chr(c) + + + +# Helpers +def header_check(octet): + """Return True if the octet should be escaped with header quopri.""" + return chr(octet) != _QUOPRI_HEADER_MAP[octet] + + +def body_check(octet): + """Return True if the octet should be escaped with body quopri.""" + return chr(octet) != _QUOPRI_BODY_MAP[octet] + + +def header_length(bytearray): + """Return a header quoted-printable encoding length. + + Note that this does not include any RFC 2047 chrome added by + `header_encode()`. + + :param bytearray: An array of bytes (a.k.a. octets). + :return: The length in bytes of the byte array when it is encoded with + quoted-printable for headers. + """ + return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray) + + +def body_length(bytearray): + """Return a body quoted-printable encoding length. + + :param bytearray: An array of bytes (a.k.a. octets). + :return: The length in bytes of the byte array when it is encoded with + quoted-printable for bodies. + """ + return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray) + + +def _max_append(L, s, maxlen, extra=''): + if not isinstance(s, str): + s = chr(s) + if not L: + L.append(s.lstrip()) + elif len(L[-1]) + len(s) <= maxlen: + L[-1] += extra + s + else: + L.append(s.lstrip()) + + +def unquote(s): + """Turn a string in the form =AB to the ASCII character with value 0xab""" + return chr(int(s[1:3], 16)) + + +def quote(c): + return _QUOPRI_MAP[ord(c)] + + +def header_encode(header_bytes, charset='iso-8859-1'): + """Encode a single header line with quoted-printable (like) encoding. + + Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but + used specifically for email header fields to allow charsets with mostly 7 + bit characters (and some 8 bit) to remain more or less readable in non-RFC + 2045 aware mail clients. + + charset names the character set to use in the RFC 2046 header. It + defaults to iso-8859-1. + """ + # Return empty headers as an empty string. + if not header_bytes: + return '' + # Iterate over every byte, encoding if necessary. + encoded = header_bytes.decode('latin1').translate(_QUOPRI_HEADER_MAP) + # Now add the RFC chrome to each encoded chunk and glue the chunks + # together. + return '=?%s?q?%s?=' % (charset, encoded) + + +_QUOPRI_BODY_ENCODE_MAP = _QUOPRI_BODY_MAP[:] +for c in b'\r\n': + _QUOPRI_BODY_ENCODE_MAP[c] = chr(c) + +def body_encode(body, maxlinelen=76, eol=NL): + """Encode with quoted-printable, wrapping at maxlinelen characters. + + Each line of encoded text will end with eol, which defaults to "\\n". Set + this to "\\r\\n" if you will be using the result of this function directly + in an email. + + Each line will be wrapped at, at most, maxlinelen characters before the + eol string (maxlinelen defaults to 76 characters, the maximum value + permitted by RFC 2045). Long lines will have the 'soft line break' + quoted-printable character "=" appended to them, so the decoded text will + be identical to the original text. + + The minimum maxlinelen is 4 to have room for a quoted character ("=XX") + followed by a soft line break. Smaller values will generate a + ValueError. + + """ + + if maxlinelen < 4: + raise ValueError("maxlinelen must be at least 4") + if not body: + return body + + # quote special characters + body = body.translate(_QUOPRI_BODY_ENCODE_MAP) + + soft_break = '=' + eol + # leave space for the '=' at the end of a line + maxlinelen1 = maxlinelen - 1 + + encoded_body = [] + append = encoded_body.append + + for line in body.splitlines(): + # break up the line into pieces no longer than maxlinelen - 1 + start = 0 + laststart = len(line) - 1 - maxlinelen + while start <= laststart: + stop = start + maxlinelen1 + # make sure we don't break up an escape sequence + if line[stop - 2] == '=': + append(line[start:stop - 1]) + start = stop - 2 + elif line[stop - 1] == '=': + append(line[start:stop]) + start = stop - 1 + else: + append(line[start:stop] + '=') + start = stop + + # handle rest of line, special case if line ends in whitespace + if line and line[-1] in ' \t': + room = start - laststart + if room >= 3: + # It's a whitespace character at end-of-line, and we have room + # for the three-character quoted encoding. + q = quote(line[-1]) + elif room == 2: + # There's room for the whitespace character and a soft break. + q = line[-1] + soft_break + else: + # There's room only for a soft break. The quoted whitespace + # will be the only content on the subsequent line. + q = soft_break + quote(line[-1]) + append(line[start:-1] + q) + else: + append(line[start:]) + + # add back final newline if present + if body[-1] in CRLF: + append('') + + return eol.join(encoded_body) + + + +# BAW: I'm not sure if the intent was for the signature of this function to be +# the same as base64MIME.decode() or not... +def decode(encoded, eol=NL): + """Decode a quoted-printable string. + + Lines are separated with eol, which defaults to \\n. + """ + if not encoded: + return encoded + # BAW: see comment in encode() above. Again, we're building up the + # decoded string with string concatenation, which could be done much more + # efficiently. + decoded = '' + + for line in encoded.splitlines(): + line = line.rstrip() + if not line: + decoded += eol + continue + + i = 0 + n = len(line) + while i < n: + c = line[i] + if c != '=': + decoded += c + i += 1 + # Otherwise, c == "=". Are we at the end of the line? If so, add + # a soft line break. + elif i+1 == n: + i += 1 + continue + # Decode if in form =AB + elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits: + decoded += unquote(line[i:i+3]) + i += 3 + # Otherwise, not in form =AB, pass literally + else: + decoded += c + i += 1 + + if i == n: + decoded += eol + # Special case if original string did not end with eol + if encoded[-1] not in '\r\n' and decoded.endswith(eol): + decoded = decoded[:-1] + return decoded + + +# For convenience and backwards compatibility w/ standard base64 module +body_decode = decode +decodestring = decode + + + +def _unquote_match(match): + """Turn a match in the form =AB to the ASCII character with value 0xab""" + s = match.group(0) + return unquote(s) + + +# Header decoding is done a bit differently +def header_decode(s): + """Decode a string encoded with RFC 2045 MIME header `Q' encoding. + + This function does not parse a full MIME header value encoded with + quoted-printable (like =?iso-8859-1?q?Hello_World?=) -- please use + the high level email.header class for that functionality. + """ + s = s.replace('_', ' ') + return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, flags=re.ASCII) diff --git a/modules/language/python/module/python.scm b/modules/language/python/module/python.scm index 00472b3..5c962e6 100644 --- a/modules/language/python/module/python.scm +++ b/modules/language/python/module/python.scm @@ -2,9 +2,11 @@ #:use-module (language python module _python) #:use-module (language python compile ) #:use-module (language python module ) - #:use-module (language python memoryview ) + #:use-module (language python memoryview ) + #:use-module ((oop pf-objects) #:select (define-python-class)) #:use-module ((language python format2) #:select ()) - #:re-export (memoryview)) + #:re-export (memoryview) + #:export (ClassMethod StaticMethod Funcobj)) (define-syntax re-export-all (syntax-rules () @@ -23,9 +25,19 @@ ((@ (guile) list) (pk name))))) (module-public-interface (resolve-module 'iface))))])) +(set! (@@ (language python format2) splitm) + (@@ (language python module re) splitm)) + +(set! (@@ (language python format2) splitmm) + (@@ (language python module re) splitmm)) + (set! (@ (language python module os) path) (Module '(path os module python language) '(path os))) (re-export-all (language python module _python)) (set! (@@ (language python eval) MM) (@@ (language python compile) void)) + +(define-python-class ClassMethod ()) +(define-python-class StaticMethod ()) +(define-python-class Funcobj ()) diff --git a/modules/language/python/module/re/compile.scm b/modules/language/python/module/re/compile.scm index d23ea71..ac2fad5 100644 --- a/modules/language/python/module/re/compile.scm +++ b/modules/language/python/module/re/compile.scm @@ -450,13 +450,23 @@ ((#:bracket not ch ...) (let ((f (apply f-or! (map (lambda (x) + (define (test x y) + (let ((a (char->integer + (string-ref x 0))) + (b (char->integer + (string-ref y 0)))) + (lambda (x) + (and (<= a x) (>= b x))))) + (match x - ((#:range ch1 ch2) - (if (and (<= (char->integer ch1) 10) - (>= (char->integer ch2) 10)) + ((#:range ch1 ch2) + (if (and (<= (char->integer + (string-ref ch1 0)) 10) + (>= (char->integer + (string-ref ch2 0)) 10)) (f-or! f-nl! - (f-reg! (format #f "[~a-~a]" ch1 ch2))) - (f-reg! (format #f "[~a-~a]" ch1 ch2)))) + (f-test! (test ch1 ch2))) + (f-test! (test ch1 ch2)))) ((#:ch (#:class ch)) (get-class ch)) ((#:ch ch) diff --git a/modules/language/python/module/re/parser.scm b/modules/language/python/module/re/parser.scm index 05ce4c6..40469e2 100644 --- a/modules/language/python/module/re/parser.scm +++ b/modules/language/python/module/re/parser.scm @@ -18,6 +18,10 @@ (define f-back (f-or (f-list #:class (mk-token (f-reg! "[AZbBdDsSwntr]"))) + (mk-token (f-seq (f-reg! "[0-7]") (f-reg! "[0-7]") (f-reg! "[0-7]")) + (lambda (x) + (list->string + (list (integer->char (string->number x 8)))))) (mk-token (f-reg! ".")))) (define anongroup (f-list #:?: "(?:" (Ds ee) ")")) diff --git a/modules/language/python/module/string.scm b/modules/language/python/module/string.scm index c431052..c99344c 100644 --- a/modules/language/python/module/string.scm +++ b/modules/language/python/module/string.scm @@ -10,7 +10,11 @@ #:use-module (language python def) #:use-module (language python string) #:use-module (parser stis-parser) - #:export (Formatter)) + #:export (Formatter ascii_letters digits hexdigits)) + +(define digits "0123456789") +(define ascii_letters "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") +(define hexdigits "0123456789abcdefABCDEF") (define (repr x) ((@ (guile) format) #f "~a" x)) (define int (mk-token (f+ (f-reg! "[0-9]")) string->number)) diff --git a/modules/language/python/number.scm b/modules/language/python/number.scm index 4976470..3d95ded 100644 --- a/modules/language/python/number.scm +++ b/modules/language/python/number.scm @@ -583,9 +583,10 @@ (define* (py-round x #:optional (digits 0)) (let* ((f (expt 10.0 digits))) - (if (equal? digits 0) - (round x) - (/ (round (* x f)) f)))) + (inexact->exact + (if (equal? digits 0) + (round x) + (/ (round (* x f)) f))))) (define-method (py-bin (o <integer>)) (number->string o 2)) diff --git a/modules/oop/pf-objects.scm b/modules/oop/pf-objects.scm index 988e282..2bf904f 100644 --- a/modules/oop/pf-objects.scm +++ b/modules/oop/pf-objects.scm @@ -1196,25 +1196,26 @@ explicitly tell it to not update etc. #'(ddname ...)) #'(let () - (define name - (letruc2 ((dname (make-up dval)) ...) - body - (let ((ret - (make-p-class 'name doc - parents - (lambda (dict) - (pylist-set! dict 'dname dname) - ... - dict)))) - (begin - (module-define! (current-module) 'ddname dname) - (name-object ddname)) - ... - ret))) - (module-define! (current-module) 'nname (rawref name '__goops__)) - (name-object nname) - (name-object name) - name)))))) + (define name + (let ((pa parents)) + (letruc2 ((dname (make-up dval)) ...) + body + (let ((ret + (make-p-class 'name doc + pa + (lambda (dict) + (pylist-set! dict 'dname dname) + ... + dict)))) + (begin + (module-define! (current-module) 'ddname dname) + (name-object ddname)) + ... + ret)))) + (module-define! (current-module) 'nname (rawref name '__goops__)) + (name-object nname) + (name-object name) + name)))))) (define-syntax mk-p-class-noname (lambda (x) |