diff options
Diffstat (limited to 'modules/language/python')
161 files changed, 15335 insertions, 3 deletions
diff --git a/modules/language/python/#eval.scm# b/modules/language/python/#eval.scm# new file mode 100644 index 0000000..5328fe5 --- /dev/null +++ b/modules/language/python/#eval.scm# @@ -0,0 +1,171 @@ +(define-module (language python eval) + #:use-module (language python guilemod) + #:use-module (parser stis-parser lang python3-parser) + #:use-module (language python exceptions) + #:use-module (language python module) + #:use-module (language python try) + #:use-module (language python list) + #:use-module (language python for) + #:use-module (language python dict) + #:use-module (oop pf-objects) + #:use-module ((ice-9 local-eval) #:select ((the-environment . locals))) + #:re-export (locals) + #:replace (eval) + #:export (local-eval local-compile globals compile exec)) + +(define seval (@ (guile) eval)) + +(define-syntax-rule (aif it p x y) (let ((it p)) (if it x y))) + +(define-syntax-rule (L x) (@@ (ice-9 local-eval) x)) + +(define-syntax globals + (lambda (x) + (syntax-case x () + ((g) + #'(M ((L env-module) (locals g))))))) + +(define-syntax-rule (call- self item a ...) + (let ((class (ref self '_module))) + ((rawref class item) class a ...))) + +(define-syntax-rule (apply- self item a ...) + (let ((class (ref self '_module))) + (apply (rawref class item) class a ...))) + +(define-syntax-rule (ref- self item) + (let ((class (ref self '_module))) + (rawref class item))) + + +(define-python-class GlobalModuleWrap (dict) + (define __init__ + (lambda (self module) + (set self '_module module))) + + (define __getitem__ + (lambda (self key) + (if (string? key) (set! key (string->symbol key))) + (call- self '__global_getitem__ key))) + + (define get + (lambda (self key . es) + (if (string? key) (set! key (string->symbol key))) + (apply- self '__global_get__ key es))) + + (define __setitem__ + (lambda (self key val) + (if (string? key) (set! key (string->symbol key))) + (call- self '__global_setitem__ key val))) + + (define __iter__ + (lambda (self) + (call- self '__global_iter__))) + + (define values + (lambda (self) + (for ((k v : (__iter__ self))) ((l '())) + (cons v l) + #:final l))) + + (define keys + (lambda (self) + (for ((k v : (__iter__ self))) ((l '())) + (cons k l) + #:final l))) + + (define __contains__ + (lambda (self key) + (if (string? key) (set! key (string->symbol key))) + (for ((k v : (__iter__ self))) () + (if (eq? k key) + (break #t)) + #:final + #f))) + + (define items __iter__) + + (define __repr__ + (lambda (self) + (format #f "globals(~a)" (ref- self '__name__))))) + + + +(define MM (list 'error)) +(define (M mod) + (set! mod (module-name mod)) + (if (and (> (length mod) 3) + (eq? (car mod) 'language) + (eq? (cadr mod) 'python) + (eq? (caddr mod) 'module)) + (set! mod (Module (reverse mod) + (reverse (cdddr mod)))) + (set! mod (Module (reverse mod) (reverse mod)))) + + (GlobalModuleWrap mod)) + + +(define* (local-eval x locals globals) + "Evaluate the expression @var{x} within the local environment @var{local} and +global environment @var{global}." + (if locals + (if globals + (apply (seval ((L local-wrap) x locals) globals) + ((L env-boxes) locals)) + (apply (seval ((L local-wrap) x locals) ((L env-module) locals)) + ((L env-boxes) locals))) + (seval x (current-module)))) + +(define* (local-compile x locals globals #:key (opts '())) + "Compile the expression @var{x} within the local environment @var{local} and +global environment @var{global}." + (if locals + (if globals + (apply ((@ (system base compile) compile) + ((L local-wrap) x locals) #:env globals + #:from 'scheme #:opts opts) + ((L env-boxes) locals)) + (apply ((@ (system base compile) compile) ((L local-wrap) x locals) + #:env ((L env-module) locals) + #:from 'scheme #:opts opts) + ((L env-boxes) locals))) + ((@ (system base compile) compile) x #:env (current-module) + #:from 'scheme #:opts opts))) + +(define-syntax eval + (lambda (x) + (syntax-case x () + ((eval x) + #'(eval0 x (locals eval))) + ((eval x . l) + #'(eval0 x . l))))) + +(define* (eval0 x #:optional (locals #f) (globals #f)) + (cond + ((string? x) + (aif xp (p x) + (aif cp (comp xp) + (local-eval cp locals globals) + (raise SyntaxError)) + (raise SyntaxError))) + ((pair? x) + (local-eval x locals globals)))) + +(define* (compile x filename mode + #:optional (flags 0) (dont_inherit #f) (optiomize -1)) + (aif xp (p x) + (aif cp (comp xp) + cp + (raise SyntaxError)) + (raise SyntaxError))) + +(define-syntax exec + (lambda (x) + (syntax-case x () + ((exec x) + #'(eval0 x (locals exec))) + ((exec x . l) + #'(exec0 x . l))))) + +(define* (exec0 x #:optional (locals #f) (globals #f)) + (local-eval x locals globals)) diff --git a/modules/language/python/#persist.scm# b/modules/language/python/#persist.scm# new file mode 100644 index 0000000..4ee46fc --- /dev/null +++ b/modules/language/python/#persist.scm# @@ -0,0 +1,114 @@ +(define-module (language python persist) + #:use-module (ice-9 match) + #:use-module (ice-9 vlist) + #:use-module (ice-9 pretty-print) + #:use-module (oop goops) + #:use-module (oop pf-objects) + #:use-module (logic guile-log persistance) + #:re-export(pcopyable? deep-pcopyable? pcopy deep-pcopy name-object + name-object-deep) + #:export (reduce cp red cpit)) + +(define-syntax-rule (aif it p x y) (let ((it p)) (if it x y))) + +(define (vhash->assoc v) + (let ((t (make-hash-table))) + (vhash-fold + (lambda (k v s) + (if (hash-ref t k) + s + (begin + (hash-set! t k #t) + (cons (cons k v) s)))) + '() v))) + +(define-method (pcopyable? (o <p>)) #t) +(define-method (deep-pcopyable? (o <p>)) #t) + +(define (cp o) + (match (red o) + ((#:reduce mk f) + (let ((oo (mk))) + (for-each (lambda (x) (apply (car x) oo (cdr x))) f) + oo)))) + +(define (red o) + (fluid-set! first #t) + (list #:reduce + (let ((cl (class-of o))) + (lambda () (make cl))) + (reduce o))) + + +(define-method (pcopy (o <p>)) + (list #:obj + (aif it (ref o '__copy__) + (it) + (cp o)))) + +(define-method (deep-pcopy (o <p>) p?) + (aif it (and p? (ref o '__deepcopy__)) + (list #:obj (it)) + (red o))) + +(define first (make-fluid #f)) +(define-method (reduce o) '()) +(define-method (reduce (o <p>)) + (if (fluid-ref first) + (begin + (fluid-set! first #f) + (cons + (aif it (ref o '__reduce__) + (it) + (cons + (lambda (o args) + (let ((h (make-hash-table))) + (slot-set! o 'h h) + (for-each + (lambda (x) (hash-set! h (car x) (cdr x))) + args))) + (list + (hash-fold + (lambda (k v s) (cons (cons k v) s)) + '() + (slot-ref o 'h))))) + (next-method))) + (next-method))) + +(define (fold f s l) + (if (pair? l) + (fold f (f (car l) s) (cdr l)) + s)) + +(define-method (reduce (o <pf>)) + (if (fluid-ref first) + (begin + (fluid-set! first #f) + (cons* + (cons + (lambda (o n args) + (slot-set! o 'size n) + (slot-set! o 'n n) + (let ((h + (fold + (lambda (k v s) (vhash-assoc k v s)) + vlist-null + args))) + (slot-set! o 'h h))) + (list (slot-ref o 'n) (vhash->assoc (slot-ref o 'h)))) + (next-method))) + (next-method))) + +(define-syntax cpit + (lambda (x) + (syntax-case x () + ((_ <c> (o lam a)) + #'(begin + (define-method (pcopyable? (o <c>) ) #t) + (define-method (deep-pcopyable? (o <c>) ) #t) + (define-method (pcopy (o <c>) ) (cp o)) + (define-method (deep-pcopy (o <c>) p?) (red o)) + (define-method (reduce (o <c>) ) + (cons* + (cons lam a) + (next-method)))))))) diff --git a/modules/language/python/#python.scm# b/modules/language/python/#python.scm# new file mode 100644 index 0000000..cb36775 --- /dev/null +++ b/modules/language/python/#python.scm# @@ -0,0 +1,246 @@ +(define-module (language python python) + #:use-module (language python parser) + #:use-module (language python expr) + #:use-module (ice-9 match) + #:export (compile-python-string compile-python-file)) + +;;; VARIABLES ---------------------------------------------------------------- +(define (find-global-variables vars tree) + (define (for-each* f l) + (match l + ((x . l) + (f x) + (for-each* f l)) + (x + (f x)))) + + (define (local tree) + (match tree + ((#:global l) + (for-each* + (lambda (x) (hash-set! vars x #t)) l)) + ((x . l) + (for-each* local tree)) + (_ + #t))) + + (define (collect tree) + (match tree + ((#:lambdef . _) + #t) + ((#:identifier . l) + (hash-set! vars tree #t)) + ((_ . _) + (for-each* collect tree)) + (_ + #t))) + + (let lp ((tree tree)) + (match tree + ((#:def . l) + (for-each* local l)) + ((#:lambdef . l) + (for-each* local l)) + ((#:class . l) + (for-each* local l)) + ((#:expr-stmt + a (#:assign x ... e)) + (collect a) + (collect x)) + ((x . l) + (for-each* lp tree)) + (_ + #t)))) +;; COMPILATION + +(define (expr stx out tree) + (define (expr-lhs tree) + (match tree + ((#:test (#:power (#:identifier v . _))) + (datum->syntax stx (string->symbol v))))) + + + (define (expr-rhs tree) + (define (comp-tr op) + (match op + ("notin" #'py-notin) + ("isnot" #'py-isnot) + ("==" #'py_==) + (">=" #'py_>=) + ("<=" #'py_<=) + ("<>" #'py_<>) + ("!=" #'py_!=) + ("in" #'py_in) + ("is" #'py_is) + ("<" #'py_< ) + (">" #'py_> ))) + + (let lp ((tree tree)) + (match tree + ((#:test x #f) + (lp x)) + ((#:test x (a b)) + #`(if #,(py-true? (lp a)) #,(lp x) #,(lp b))) + ((#:or x . y) + #`(py-or #,(lp x) #,@(map lp y))) + ((#:and x y) + #`(py-and #,(lp x) #,@(map lp y))) + ((#:not x) + #`(py-not #,(lp x))) + ((#:comp x) + (lp x)) + ((#:comp x (op . y) . l) + #'(#,(comp-tr op) #,(lp x) #,(lp (cons* #:comp y l)))) + ((#:bor x y) + #`(py-bor #,(lp x) #,@(map lp y))) + ((#:bxor x y) + #`(py-bxor #,(lp x) #,@(map lp y))) + ((#:xand x y) + #`(py-band #,(lp x) #,@(map lp y))) + ((#:<< x y) + #`(py-<< #,(lp x) #,@(map lp y))) + ((#:>> x y) + #`(py->> #,(lp x) #,@(map lp y))) + ((#:+ x y) + #`(py-+ #,(lp x) #,@(map lp y))) + ((#:- x y) + #`(py-- #,(lp x) #,@(map lp y))) + ((#:* x y) + #`(py-* #,(lp x) #,@(map lp y))) + ((#:/ x y) + #`(py-/ #,(lp x) #,@(map lp y))) + ((#:// x y) + #`(py-// #,(lp x) #,@(map lp y))) + ((#:% x y) + #`(py-% #,(lp x) #,@(map lp y))) + ((#:u+ x) + #`(py-u+ #,(lp x))) + ((#:u- x) + #`(py-u- #,(lp x))) + ((#:u~ x) + #`(py-u~ #,(lp x))) + ((#:power x trailer . #f) + (compile-trailer trailer (lp x))) + ((#:power x trailer . l) + #'(py-power ,#(compile-trailer trailer (lp x)) #,(lp l))) + ((#:identifier x . _) + (datum->syntax stx (string->symbol x))) + ((not (_ . _)) + tree)))) + + + + (lambda (tree) + (match tree + ((test1 (#:assign)) + (expr-rhs test1)) + ((test1 (#:assign tests ... last)) + (with-syntax (((rhs ...) (map expr-rhs last)) + ((lhs1 ...) (map expr-lhs test1)) + (((lhs ...) ...) (reverse (map (lambda (l) + (map expr-lhs l)) + tests)))) + (with-syntax (((v ...) (generate-temporaries #'(lhs1 ...)))) + (out #'(call-with-values (lambda () (values rhs ...)) + (lambda (v ...) + (begin + (set! lhs v) ...) + ... + (set! lhs1 v) ...))))))))) + + +(define (compile-outer state out tree) + (define (compile-stmt state tree) + (match tree + ((#:expr-stmt l) + (compile-expr l)) + + ((#:del l) + (compile-del l)) + + (#:pass + (out #'(if #f #f))) + + (#:break + (break out)) + + (#:continue + (continue out)) + + ((#:return . l) + (compile-return state l)) + + ((#:raise . l) + (compile-raise state l)) + + ((#:import l) + (compile-import state l)) + + ((#:global . _) + #t) + + ((#:nonlocal . _) + #t) + + ((#:assert . l) + (compile-assert state l)))) + + (match tree + ((#:stmt x) + (for-each* compile-stmt tree)) + ((#:if . l) + (compile-if state l)) + ((#:while . l) + (compile-while state l)) + ((#:for . l) + (compile-for state l)) + ((#:try . l) + (compile-try state l)) + ((#:with . l) + (compile-with state l)) + ((#:def . l) + (compile-def state l)) + ((#:decorated . l) + (compile-decorated state l)))) + + +(define (compile-python0 stx tree output) + (define global-variables (make-hash-table)) + + (find-global-variables global-variables tree) + (set! all-variables + (hash-fold + (lambda (k v e) + (match k + ((_ v . _) + (cons (datum->syntax stx (string->symbol v)) e)))) + '() global-variables)) + (set! all-globals + (hash-fold + (lambda (k v e) + (match k + ((_ v) + (cons (datum->syntax stx (string->symbol v)) e)))) + '() global-variables)) + + (output (with-syntax (((v ...) all-variables)) + #'(begin (define v (if #f #f)) ...))) + + (output (with-syntax (((v ...) all-globals)) + #'(export v ...))) + + (output #`(begin #,@(compile-outer)))) + + +(define (compile-python1 stx tree) + (let ((out '())) + (define (out x) (set! out (cons x out))) + (compile-python0 stx tree out) + (cons* #'begin (reverse out)))) + +(define-syntax compile-python-string + (lambda (x) + (syntax-case x () + ((_ y) + (if (string? (syntax->datum #'y)) + (compile-python1 x (python-parser diff --git a/modules/language/python/#spec.scm# b/modules/language/python/#spec.scm# new file mode 100644 index 0000000..8291a14 --- /dev/null +++ b/modules/language/python/#spec.scm# @@ -0,0 +1,68 @@ +(define-module (language python spec) + #:use-module (language python guilemod) + #:use-module (parser stis-parser lang python3-parser) + #:use-module ((language python module python) #:select ()) + #:use-module (language python compile) + #:use-module (language python completer) + #:use-module (rnrs io ports) + #:use-module (ice-9 pretty-print) + #:use-module (ice-9 readline) + #:use-module (system base compile) + #:use-module (system base language) + #:use-module (language scheme compile-tree-il) + #:use-module (language scheme decompile-tree-il) + #:use-module (ice-9 rdelim) + #:export (python)) + +;;; +;;; Language definition +;;; + +(define (pr . x) + (define port (open-file "/home/stis/src/python-on-guile/log.txt" "a")) + (with-output-to-port port + (lambda () + (pretty-print x) (car (reverse x)))) + (close port) + (car (reverse x))) + +(define (c x) (pr (comp (pr (p (pr x)))))) +(define (cc port x) + (if (equal? x "") (read port) (c x))) + +(define (e x) (eval (c x) (current-module))) + +(catch #t + (lambda () + (set! (@@ (ice-9 readline) *readline-completion-function*) + (complete-fkn e))) + (lambda x #f)) + +(define-language python + #:title "python" + #:reader (lambda (port env) + (if (not (fluid-ref (@@ (system base compile) %in-compile))) + (cc port (read-line port)) + (cc port (read-string port)))) + + #:compilers `((tree-il . ,compile-tree-il)) + #:decompilers `((tree-il . ,decompile-tree-il)) + #:evaluator (lambda (x module) (primitive-eval x)) + #:printer write + #:make-default-environment + (lambda () + ;; Ideally we'd duplicate the whole module hierarchy so that `set!', + ;; `fluid-set!', etc. don't have any effect in the current environment. + (let ((m (make-fresh-user-module))) + ;; Provide a separate `current-reader' fluid so that + ;; compile-time changes to `current-reader' are + ;; limited to the current compilation unit. + (module-define! m 'current-reader (make-fluid)) + + ;; Default to `simple-format', as is the case until + ;; (ice-9 format) is loaded. This allows + ;; compile-time warnings to be emitted when using + ;; unsupported options. + (module-set! m 'format simple-format) + + m))) diff --git a/modules/language/python/#test.py# b/modules/language/python/#test.py# new file mode 100644 index 0000000..976e04f --- /dev/null +++ b/modules/language/python/#test.py# @@ -0,0 +1,13 @@ +def f(x): + def h(q): + return q + y + z + global y + y=x + z=1 + return y + x + +def g(): + return y,y + +x : x.a = 1 +x : x.f(10) diff --git a/modules/language/python/#util.scm# b/modules/language/python/#util.scm# new file mode 100644 index 0000000..95c54a2 --- /dev/null +++ b/modules/language/python/#util.scm# @@ -0,0 +1,3 @@ +(define-module (language python util) + #:export ()) + diff --git a/modules/language/python/#yield.scm# b/modules/language/python/#yield.scm# new file mode 100644 index 0000000..7488f42 --- /dev/null +++ b/modules/language/python/#yield.scm# @@ -0,0 +1,138 @@ +(define-module (language python yield) + #:use-module (oop pf-objects) + #:use-module (language python exceptions) + #:use-module (oop goops) + #:use-module (ice-9 control) + #:use-module (ice-9 match) + #:use-module (language python persist) + #:replace (send) + #:export (<yield> + in-yield define-generator + make-generator + sendException sendClose)) + +(define-syntax-rule (aif it p x y) (let ((it p)) (if it x y))) + +(define in-yield (make-fluid #f)) + +(define-syntax-parameter YIELD (lambda x #f)) + +(define-syntax yield + (lambda (x) + (syntax-case x () + ((_ x ...) + #'(begin + (fluid-set! in-yield #t) + ((abort-to-prompt YIELD x ...)))) + (x + #'(lambda x + (fluid-set! in-yield #t) + ((apply abort-to-prompt YIELD x))))))) + +(define-syntax make-generator + (syntax-rules () + ((_ closure) + (make-generator () closure)) + ((_ args closure) + (lambda a + (let () + (define obj (make <yield>)) + (define ab (make-prompt-tag)) + (syntax-parameterize ((YIELD (lambda x #'ab))) + (slot-set! obj 'k #f) + (slot-set! obj 'closed #f) + (slot-set! obj 's + (lambda () + (call-with-prompt + ab + (lambda () + (apply closure yield a) + (slot-set! obj 'closed #t) + (throw StopIteration)) + (letrec ((lam + (lambda (k . l) + (fluid-set! in-yield #f) + (slot-set! obj 'k + (lambda (a) + (call-with-prompt + ab + (lambda () + (k a)) + lam))) + (apply values l)))) + lam)))) + obj)))))) + +(define-syntax define-generator + (lambda (x) + (syntax-case x () + ((_ (f y . args) code ...) + #'(define f (make-generator args (lambda (y . args) code ...))))))) + +(define-class <yield> () s k closed) +(name-object <yield>) +(cpit <yield> (o (lambda (o s k closed) + (slot-set! o 's s ) + (slot-set! o 'k k ) + (slot-set! o 'closed closed)) + (list + (slot-ref o 's) + (slot-ref o 'k) + (slot-ref o 'closed)))) + +(define-method (send (l <yield>) . u) + (let ((k (slot-ref l 'k)) + (s (slot-ref l 's)) + (c (slot-ref l 'closed))) + (if (not c) + (if k + (k (lambda () + (if (null? u) + 'Null + (apply values u)))) + (throw 'python (Exception)))))) + + +(define-method (sendException (l <yield>) e . ls) + (let ((k (slot-ref l 'k)) + (s (slot-ref l 's)) + (c (slot-ref l 'closed))) + (if (not c) + (if k + (k (lambda () + (if (pyclass? e) + (throw 'python (apply e ls)) + (apply throw 'python e ls)))) + (throw 'python (Exception)))))) + +(define-method (sendClose (l <yield>)) + (let ((k (slot-ref l 'k)) + (s (slot-ref l 's)) + (c (slot-ref l 'closed))) + (if c + (values) + (if k + (catch #t + (lambda () + (k (lambda () (throw 'python GeneratorExit))) + (slot-set! l 'closed #t) + (throw 'python RuntimeError)) + (lambda (k tag . v) + (slot-set! l 'closed #t) + (if (eq? tag 'python) + (match v + ((tag . l) + (if (eq? tag GeneratorExit) + (values) + (apply throw tag l)))) + (apply throw tag v)))) + (slot-set! l 'closed #t))))) + +(define-method (send (l <p>) . u) + (apply (ref l '__send__) u)) + +(define-method (sendException (l <p>) . u) + (apply (ref l '__exception__) u)) + +(define-method (sendClose (l <p>)) + ((ref l '__close__))) diff --git a/modules/language/python/.#spec.scm b/modules/language/python/.#spec.scm new file mode 120000 index 0000000..bf7a10b --- /dev/null +++ b/modules/language/python/.#spec.scm @@ -0,0 +1 @@ +stis@lapwine.29401:1539964896
\ No newline at end of file diff --git a/modules/language/python/bool.go b/modules/language/python/bool.go Binary files differnew file mode 100644 index 0000000..c69d97a --- /dev/null +++ b/modules/language/python/bool.go diff --git a/modules/language/python/bytes.scm b/modules/language/python/bytes.scm index 1e3a0cd..e3d8245 100644 --- a/modules/language/python/bytes.scm +++ b/modules/language/python/bytes.scm @@ -2,6 +2,7 @@ #:use-module (oop goops) #:use-module (oop pf-objects) #:use-module (ice-9 match) + #:use-module (ice-9 format) #:use-module (ice-9 iconv) #:use-module (rnrs bytevectors) #:use-module (system foreign) diff --git a/modules/language/python/compile.log b/modules/language/python/compile.log new file mode 100644 index 0000000..fdee901 --- /dev/null +++ b/modules/language/python/compile.log @@ -0,0 +1,73 @@ +(let ((z #f)) + (begin + (values) + (set! y x) + (set! z 1) + (return193 (+ y x)))) +(let () (begin (return194 y y))) +return193 +ret +(let ((z #f)) + (begin + (values) + (set! y x) + (set! z 1) + (return193 (+ y x)))) +return194 +ret +(let () (begin (return194 y y))) +return193 +return193 +return193 +return193 +ret +(let ((z #f)) + (begin + (values) + (set! y x) + (set! z 1) + (return193 (+ y x)))) +return194 +return194 +return194 +return194 +ret +(let () (begin (return194 y y))) +return193 +return193 +return193 +return193 +return193 +(let ((z #f)) + (begin (values) (set! y x) (set! z 1) (+ y x))) +return194 +return194 +return194 +return194 +return194 +(let () (begin (values y y))) +(let/ec + return193 + let + ((z #f)) + (begin + (define (h q) + ((@@ (language python compile) with-return) + return194 + (let () (begin (return194 (+ q y z)))))) + (values) + (set! y x) + (set! z 1) + (+ y x))) +(let ((z #f)) + (begin + (define (h q) + ((@@ (language python compile) with-return) + return194 + (let () (begin (return194 (+ q y z)))))) + (values) + (set! y x) + (set! z 1) + (+ y x))) +(let () (begin (+ q y z))) +(let () (begin (values y y))) diff --git a/modules/language/python/def.go b/modules/language/python/def.go Binary files differnew file mode 100644 index 0000000..b0fdc60 --- /dev/null +++ b/modules/language/python/def.go diff --git a/modules/language/python/def.scm b/modules/language/python/def.scm index 569023c..25edc7e 100644 --- a/modules/language/python/def.scm +++ b/modules/language/python/def.scm @@ -1,7 +1,6 @@ (define-module (language python def) #:use-module (oop pf-objects) #:use-module (language python for) - #:use-module (language python list) #:use-module (language python exceptions) #:use-module (ice-9 match) #:use-module (srfi srfi-11) @@ -111,6 +110,7 @@ (define-syntax-rule (def (f . args) code ...) (define f (lam args code ...))) +(define to-list #f) (define (no x) (and-map diff --git a/modules/language/python/dict.scm b/modules/language/python/dict.scm index 9eb698d..2b446b9 100644 --- a/modules/language/python/dict.scm +++ b/modules/language/python/dict.scm @@ -9,6 +9,7 @@ #:use-module (language python exceptions) #:use-module (language python persist) #:use-module (ice-9 match) + #:use-module (ice-9 format) #:use-module (ice-9 control) #:use-module (oop goops) #:use-module (oop pf-objects) diff --git a/modules/language/python/eval.scm b/modules/language/python/eval.scm index 1cd92ad..5328fe5 100644 --- a/modules/language/python/eval.scm +++ b/modules/language/python/eval.scm @@ -1,4 +1,5 @@ (define-module (language python eval) + #:use-module (language python guilemod) #:use-module (parser stis-parser lang python3-parser) #:use-module (language python exceptions) #:use-module (language python module) diff --git a/modules/language/python/exceptions.go b/modules/language/python/exceptions.go Binary files differnew file mode 100644 index 0000000..c978f75 --- /dev/null +++ b/modules/language/python/exceptions.go diff --git a/modules/language/python/for.go b/modules/language/python/for.go Binary files differnew file mode 100644 index 0000000..6fc5dea --- /dev/null +++ b/modules/language/python/for.go diff --git a/modules/language/python/guilemod.go b/modules/language/python/guilemod.go Binary files differnew file mode 100644 index 0000000..37043ec --- /dev/null +++ b/modules/language/python/guilemod.go diff --git a/modules/language/python/hash.go b/modules/language/python/hash.go Binary files differnew file mode 100644 index 0000000..c39e3fb --- /dev/null +++ b/modules/language/python/hash.go diff --git a/modules/language/python/list.go b/modules/language/python/list.go Binary files differnew file mode 100644 index 0000000..99090c0 --- /dev/null +++ b/modules/language/python/list.go diff --git a/modules/language/python/list.scm b/modules/language/python/list.scm index c656edf..35be4fc 100644 --- a/modules/language/python/list.scm +++ b/modules/language/python/list.scm @@ -1,6 +1,7 @@ (define-module (language python list) #:use-module (ice-9 match) #:use-module (ice-9 control) + #:use-module (ice-9 format) #:use-module (oop pf-objects) #:use-module (oop goops) #:use-module (language python hash) @@ -1000,3 +1001,5 @@ #f)) (define py-list list) + +(set! (@@ (language python def) to-list) to-list) diff --git a/modules/language/python/memoryview.scm b/modules/language/python/memoryview.scm new file mode 100644 index 0000000..80b5c06 --- /dev/null +++ b/modules/language/python/memoryview.scm @@ -0,0 +1,24 @@ +(define-module (language python memoryview) + #:use-module (oop pf-objects) + #:use-module (language python exceptions) + #:use-module (language python try) + #:use-module ((language python module _python) #:select (isinstance bytes)) + #:export (memoryview)) + +(define-python-class memoryview () + (define __init__ + (lambda (self obj) + (cond + ((isinstance obj bytes) + (begin + (set self 'obj obj) + (set self 'format "B") + (set self 'ndim 1))) + (else + (raise (TypeError "not a supported memoryview object"))))))) + + + + + + diff --git a/modules/language/python/module.scm b/modules/language/python/module.scm index 49a4366..5e04fda 100644 --- a/modules/language/python/module.scm +++ b/modules/language/python/module.scm @@ -313,7 +313,8 @@ (else #f)) a ...))))) -(define (m? x) ((@ (language python module _python) isinstance) x Module)) +(define (m? x) #f) + (define (import-f x f . l) (if x (if (m? x) diff --git a/modules/language/python/module/#_md5.scm# b/modules/language/python/module/#_md5.scm# new file mode 100644 index 0000000..cc07ebd --- /dev/null +++ b/modules/language/python/module/#_md5.scm# @@ -0,0 +1,11 @@ +(define-module (language python module _md5) + #:use-module (language python checksum) + #:use-module (oop pf-objects) + #:export (md5)) + +(define-python-class md5 (Summer) + (define name "md5") + (define digest_size 16) + + (define _command "/usr/bin/md5sum")) + diff --git a/modules/language/python/module/#_sha1.scm# b/modules/language/python/module/#_sha1.scm# new file mode 100644 index 0000000..87a0adb --- /dev/null +++ b/modules/language/python/module/#_sha1.scm# @@ -0,0 +1,10 @@ +(define-module (language python module _sha1) + #:use-module (language python checksum) + #:use-module (oop pf-objects) + #:export (sha1)) + +(define-python-class sha1 (Summer) + (define name "sha1") + (define digest_size 20) + + (define _command "/usr/bin/sha1sum")) diff --git a/modules/language/python/module/#_sha256.scm# b/modules/language/python/module/#_sha256.scm# new file mode 100644 index 0000000..c87ea1a --- /dev/null +++ b/modules/language/python/module/#_sha256.scm# @@ -0,0 +1,10 @@ +(define-module (language python module _sha256) + #:use-module (language python checksum) + #:use-module (oop pf-objects) + #:export (sha256)) + +(define-python-class sha256 (Summer) + (define name "sha256") + (define digest_size 32) + + (define _command "/usr/bin/sha256sum")) diff --git a/modules/language/python/module/#bz2.py# b/modules/language/python/module/#bz2.py# new file mode 100644 index 0000000..3740792 --- /dev/null +++ b/modules/language/python/module/#bz2.py# @@ -0,0 +1,362 @@ +module(bz2) +"""Interface to the libbzip2 compression library. + +This module provides a file interface, classes for incremental +(de)compression, and functions for one-shot (de)compression. +""" + +__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor", + "open", "compress", "decompress"] + +__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>" + +from builtins import open as _builtin_open +import io +import os +import warnings +import _compression + +try: + from threading import RLock +except ImportError: + from dummy_threading import RLock + +from _bz2 import BZ2Compressor, BZ2Decompressor + + +_MODE_CLOSED = 0 +_MODE_READ = 1 +# Value 2 no longer used +_MODE_WRITE = 3 + + +class BZ2File(_compression.BaseStream): + + """A file object providing transparent bzip2 (de)compression. + + A BZ2File can act as a wrapper for an existing file object, or refer + directly to a named file on disk. + + Note that BZ2File provides a *binary* file interface - data read is + returned as bytes, and data to be written should be given as bytes. + """ + + def __init__(self, filename, mode="r", buffering=None, compresslevel=9): + """Open a bzip2-compressed file. + + If filename is a str, bytes, or PathLike object, it gives the + name of the file to be opened. Otherwise, it should be a file + object, which will be used to read or write the compressed data. + + mode can be 'r' for reading (default), 'w' for (over)writing, + 'x' for creating exclusively, or 'a' for appending. These can + equivalently be given as 'rb', 'wb', 'xb', and 'ab'. + + buffering is ignored. Its use is deprecated. + + If mode is 'w', 'x' or 'a', compresslevel can be a number between 1 + and 9 specifying the level of compression: 1 produces the least + compression, and 9 (default) produces the most compression. + + If mode is 'r', the input file may be the concatenation of + multiple compressed streams. + """ + # This lock must be recursive, so that BufferedIOBase's + # writelines() does not deadlock. + self._lock = RLock() + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + + if buffering is not None: + warnings.warn("Use of 'buffering' argument is deprecated", + DeprecationWarning) + + if not (1 <= compresslevel <= 9): + raise ValueError("compresslevel must be between 1 and 9") + + if mode in ("", "r", "rb"): + mode = "rb" + mode_code = _MODE_READ + elif mode in ("w", "wb"): + mode = "wb" + mode_code = _MODE_WRITE + self._compressor = BZ2Compressor(compresslevel) + elif mode in ("x", "xb"): + mode = "xb" + mode_code = _MODE_WRITE + self._compressor = BZ2Compressor(compresslevel) + elif mode in ("a", "ab"): + mode = "ab" + mode_code = _MODE_WRITE + self._compressor = BZ2Compressor(compresslevel) + else: + raise ValueError("Invalid mode: %r" % (mode,)) + + if isinstance(filename, (str, bytes, os.PathLike)): + self._fp = _builtin_open(filename, mode) + self._closefp = True + self._mode = mode_code + elif hasattr(filename, "read") or hasattr(filename, "write"): + self._fp = filename + self._mode = mode_code + else: + raise TypeError("filename must be a str, bytes, file or PathLike object") + + if self._mode == _MODE_READ: + raw = _compression.DecompressReader(self._fp, + BZ2Decompressor, trailing_error=OSError) + self._buffer = io.BufferedReader(raw) + else: + self._pos = 0 + + def close(self): + """Flush and close the file. + + May be called more than once without error. Once the file is + closed, any other operation on it will raise a ValueError. + """ + with self._lock: + if self._mode == _MODE_CLOSED: + return + try: + if self._mode == _MODE_READ: + self._buffer.close() + elif self._mode == _MODE_WRITE: + self._fp.write(self._compressor.flush()) + self._compressor = None + finally: + try: + if self._closefp: + self._fp.close() + finally: + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + self._buffer = None + + @property + def closed(self): + """True if this file is closed.""" + return self._mode == _MODE_CLOSED + + def fileno(self): + """Return the file descriptor for the underlying file.""" + self._check_not_closed() + return self._fp.fileno() + + def seekable(self): + """Return whether the file supports seeking.""" + return self.readable() and self._buffer.seekable() + + def readable(self): + """Return whether the file was opened for reading.""" + self._check_not_closed() + return self._mode == _MODE_READ + + def writable(self): + """Return whether the file was opened for writing.""" + self._check_not_closed() + return self._mode == _MODE_WRITE + + def peek(self, n=0): + """Return buffered data without advancing the file position. + + Always returns at least one byte of data, unless at EOF. + The exact number of bytes returned is unspecified. + """ + with self._lock: + self._check_can_read() + # Relies on the undocumented fact that BufferedReader.peek() + # always returns at least one byte (except at EOF), independent + # of the value of n + return self._buffer.peek(n) + + def read(self, size=-1): + """Read up to size uncompressed bytes from the file. + + If size is negative or omitted, read until EOF is reached. + Returns b'' if the file is already at EOF. + """ + with self._lock: + self._check_can_read() + return self._buffer.read(size) + + def read1(self, size=-1): + """Read up to size uncompressed bytes, while trying to avoid + making multiple reads from the underlying stream. Reads up to a + buffer's worth of data if size is negative. + + Returns b'' if the file is at EOF. + """ + with self._lock: + self._check_can_read() + if size < 0: + size = io.DEFAULT_BUFFER_SIZE + return self._buffer.read1(size) + + def readinto(self, b): + """Read bytes into b. + + Returns the number of bytes read (0 for EOF). + """ + with self._lock: + self._check_can_read() + return self._buffer.readinto(b) + + def readline(self, size=-1): + """Read a line of uncompressed bytes from the file. + + The terminating newline (if present) is retained. If size is + non-negative, no more than size bytes will be read (in which + case the line may be incomplete). Returns b'' if already at EOF. + """ + if not isinstance(size, int): + if not hasattr(size, "__index__"): + raise TypeError("Integer argument expected") + size = size.__index__() + with self._lock: + self._check_can_read() + return self._buffer.readline(size) + + def readlines(self, size=-1): + """Read a list of lines of uncompressed bytes from the file. + + size can be specified to control the number of lines read: no + further lines will be read once the total size of the lines read + so far equals or exceeds size. + """ + if not isinstance(size, int): + if not hasattr(size, "__index__"): + raise TypeError("Integer argument expected") + size = size.__index__() + with self._lock: + self._check_can_read() + return self._buffer.readlines(size) + + def write(self, data): + """Write a byte string to the file. + + Returns the number of uncompressed bytes written, which is + always len(data). Note that due to buffering, the file on disk + may not reflect the data written until close() is called. + """ + with self._lock: + self._check_can_write() + compressed = self._compressor.compress(data) + self._fp.write(compressed) + self._pos += len(data) + return len(data) + + def writelines(self, seq): + """Write a sequence of byte strings to the file. + + Returns the number of uncompressed bytes written. + seq can be any iterable yielding byte strings. + + Line separators are not added between the written byte strings. + """ + with self._lock: + return _compression.BaseStream.writelines(self, seq) + + def seek(self, offset, whence=io.SEEK_SET): + """Change the file position. + + The new position is specified by offset, relative to the + position indicated by whence. Values for whence are: + + 0: start of stream (default); offset must not be negative + 1: current stream position + 2: end of stream; offset must not be positive + + Returns the new file position. + + Note that seeking is emulated, so depending on the parameters, + this operation may be extremely slow. + """ + with self._lock: + self._check_can_seek() + return self._buffer.seek(offset, whence) + + def tell(self): + """Return the current file position.""" + with self._lock: + self._check_not_closed() + if self._mode == _MODE_READ: + return self._buffer.tell() + return self._pos + + +def open(filename, mode="rb", compresslevel=9, + encoding=None, errors=None, newline=None): + """Open a bzip2-compressed file in binary or text mode. + + The filename argument can be an actual filename (a str, bytes, or + PathLike object), or an existing file object to read from or write + to. + + The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or + "ab" for binary mode, or "rt", "wt", "xt" or "at" for text mode. + The default mode is "rb", and the default compresslevel is 9. + + For binary mode, this function is equivalent to the BZ2File + constructor: BZ2File(filename, mode, compresslevel). In this case, + the encoding, errors and newline arguments must not be provided. + + For text mode, a BZ2File object is created, and wrapped in an + io.TextIOWrapper instance with the specified encoding, error + handling behavior, and line ending(s). + + """ + if "t" in mode: + if "b" in mode: + raise ValueError("Invalid mode: %r" % (mode,)) + else: + if encoding is not None: + raise ValueError("Argument 'encoding' not supported in binary mode") + if errors is not None: + raise ValueError("Argument 'errors' not supported in binary mode") + if newline is not None: + raise ValueError("Argument 'newline' not supported in binary mode") + + bz_mode = mode.replace("t", "") + binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel) + + if "t" in mode: + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file + + +def compress(data, compresslevel=9): + """Compress a block of data. + + compresslevel, if given, must be a number between 1 and 9. + + For incremental compression, use a BZ2Compressor object instead. + """ + comp = BZ2Compressor(compresslevel) + return comp.compress(data) + comp.flush() + + +def decompress(data): + """Decompress a block of data. + + For incremental decompression, use a BZ2Decompressor object instead. + """ + results = [] + while data: + decomp = BZ2Decompressor() + try: + res = decomp.decompress(data) + except OSError: + if results: + break # Leftover data is not a valid bzip2 stream; ignore it. + else: + raise # Error on the first iteration; bail out. + results.append(res) + if not decomp.eof: + raise ValueError("Compressed data ended before the " + "end-of-stream marker was reached") + data = decomp.unused_data + return b"".join(results) diff --git a/modules/language/python/module/#difflib.py# b/modules/language/python/module/#difflib.py# new file mode 100644 index 0000000..a808007 --- /dev/null +++ b/modules/language/python/module/#difflib.py# @@ -0,0 +1,212 @@ +module(difflib) + +""" +Module difflib -- helpers for computing deltas between objects. + +Function get_close_matches(word, possibilities, n=3, cutoff=0.6): + Use SequenceMatcher to return list of the best "good enough" matches. + +Function context_diff(a, b): + For two lists of strings, return a delta in context diff format. + +Function ndiff(a, b): + Return a delta: the difference between `a` and `b` (lists of strings). + +Function restore(delta, which): + Return one of the two sequences that generated an ndiff delta. + +Function unified_diff(a, b): + For two lists of strings, return a delta in unified diff format. + +Class SequenceMatcher: + A flexible class for comparing pairs of sequences of any type. + +Class Differ: + For producing human-readable deltas from sequences of lines of text. + +Class HtmlDiff: + For producing HTML side by side comparison with change highlights. +""" + +__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher', + 'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff', + 'unified_diff', 'diff_bytes', 'HtmlDiff', 'Match'] + +from heapq import nlargest as _nlargest +from collections import namedtuple as _namedtuple + +Match = _namedtuple('Match', 'a b size') + +def _calculate_ratio(matches, length): + if length: + return 2.0 * matches / length + return 1.0 + +class SequenceMatcher: + + """ + SequenceMatcher is a flexible class for comparing pairs of sequences of + any type, so long as the sequence elements are hashable. The basic + algorithm predates, and is a little fancier than, an algorithm + published in the late 1980's by Ratcliff and Obershelp under the + hyperbolic name "gestalt pattern matching". The basic idea is to find + the longest contiguous matching subsequence that contains no "junk" + elements (R-O doesn't address junk). The same idea is then applied + recursively to the pieces of the sequences to the left and to the right + of the matching subsequence. This does not yield minimal edit + sequences, but does tend to yield matches that "look right" to people. + + SequenceMatcher tries to compute a "human-friendly diff" between two + sequences. Unlike e.g. UNIX(tm) diff, the fundamental notion is the + longest *contiguous* & junk-free matching subsequence. That's what + catches peoples' eyes. The Windows(tm) windiff has another interesting + notion, pairing up elements that appear uniquely in each sequence. + That, and the method here, appear to yield more intuitive difference + reports than does diff. This method appears to be the least vulnerable + to synching up on blocks of "junk lines", though (like blank lines in + ordinary text files, or maybe "<P>" lines in HTML files). That may be + because this is the only method of the 3 that has a *concept* of + "junk" <wink>. + + Example, comparing two strings, and considering blanks to be "junk": + + >>> s = SequenceMatcher(lambda x: x == " ", + ... "private Thread currentThread;", + ... "private volatile Thread currentThread;") + >>> + + .ratio() returns a float in [0, 1], measuring the "similarity" of the + sequences. As a rule of thumb, a .ratio() value over 0.6 means the + sequences are close matches: + + >>> print(round(s.ratio(), 3)) + 0.866 + >>> + + If you're only interested in where the sequences match, + .get_matching_blocks() is handy: + + >>> for block in s.get_matching_blocks(): + ... print("a[%d] and b[%d] match for %d elements" % block) + a[0] and b[0] match for 8 elements + a[8] and b[17] match for 21 elements + a[29] and b[38] match for 0 elements + + Note that the last tuple returned by .get_matching_blocks() is always a + dummy, (len(a), len(b), 0), and this is the only case in which the last + tuple element (number of elements matched) is 0. + + If you want to know how to change the first sequence into the second, + use .get_opcodes(): + + >>> for opcode in s.get_opcodes(): + ... print("%6s a[%d:%d] b[%d:%d]" % opcode) + equal a[0:8] b[0:8] + insert a[8:8] b[8:17] + equal a[8:29] b[17:38] + + See the Differ class for a fancy human-friendly file differencer, which + uses SequenceMatcher both to compare sequences of lines, and to compare + sequences of characters within similar (near-matching) lines. + + See also function get_close_matches() in this module, which shows how + simple code building on SequenceMatcher can be used to do useful work. + + Timing: Basic R-O is cubic time worst case and quadratic time expected + case. SequenceMatcher is quadratic time for the worst case and has + expected-case behavior dependent in a complicated way on how many + elements the sequences have in common; best case time is linear. + + Methods: + + __init__(isjunk=None, a='', b='') + Construct a SequenceMatcher. + + set_seqs(a, b) + Set the two sequences to be compared. + + set_seq1(a) + Set the first sequence to be compared. + + set_seq2(b) + Set the second sequence to be compared. + + find_longest_match(alo, ahi, blo, bhi) + Find longest matching block in a[alo:ahi] and b[blo:bhi]. + + get_matching_blocks() + Return list of triples describing matching subsequences. + + get_opcodes() + Return list of 5-tuples describing how to turn a into b. + + ratio() + Return a measure of the sequences' similarity (float in [0,1]). + + quick_ratio() + Return an upper bound on .ratio() relatively quickly. + + real_quick_ratio() + Return an upper bound on ratio() very quickly. + """ + + def __init__(self, isjunk=None, a='', b='', autojunk=True): + """Construct a SequenceMatcher. + + Optional arg isjunk is None (the default), or a one-argument + function that takes a sequence element and returns true iff the + element is junk. None is equivalent to passing "lambda x: 0", i.e. + no elements are considered to be junk. For example, pass + lambda x: x in " \\t" + if you're comparing lines as sequences of characters, and don't + want to synch up on blanks or hard tabs. + + Optional arg a is the first of two sequences to be compared. By + default, an empty string. The elements of a must be hashable. See + also .set_seqs() and .set_seq1(). + + Optional arg b is the second of two sequences to be compared. By + default, an empty string. The elements of b must be hashable. See + also .set_seqs() and .set_seq2(). + + Optional arg autojunk should be set to False to disable the + "automatic junk heuristic" that treats popular elements as junk + (see module documentation for more information). + """ + + # Members: + # a + # first sequence + # b + # second sequence; differences are computed as "what do + # we need to do to 'a' to change it into 'b'?" + # b2j + # for x in b, b2j[x] is a list of the indices (into b) + # at which x appears; junk and popular elements do not appear + # fullbcount + # for x in b, fullbcount[x] == the number of times x + # appears in b; only materialized if really needed (used + # only for computing quick_ratio()) + # matching_blocks + # a list of (i, j, k) triples, where a[i:i+k] == b[j:j+k]; + # ascending & non-overlapping in i and in j; terminated by + # a dummy (len(a), len(b), 0) sentinel + # opcodes + # a list of (tag, i1, i2, j1, j2) tuples, where tag is + # one of + # 'replace' a[i1:i2] should be replaced by b[j1:j2] + # 'delete' a[i1:i2] should be deleted + # 'insert' b[j1:j2] should be inserted + # 'equal' a[i1:i2] == b[j1:j2] + # isjunk + # a user-supplied function taking a sequence element and + # returning true iff the element is "junk" -- this has + # subtle but helpful effects on the algorithm, which I'll + # get around to writing up someday <0.9 wink>. + # DON'T USE! Only __chain_b uses this. Use "in self.bjunk". + # bjunk + # the items in b for which isjunk is True. + # bpopular + # nonjunk items in b treated as junk by the heuristic (if used). + + diff --git a/modules/language/python/module/#json.py# b/modules/language/python/module/#json.py# new file mode 100644 index 0000000..93a7b1c --- /dev/null +++ b/modules/language/python/module/#json.py# @@ -0,0 +1,369 @@ +module(json) + +r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`json` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is derived from a +version of the externally maintained simplejson library. + +Encoding basic Python object hierarchies:: + + >>> import json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print(json.dumps("\"foo\bar")) + "\"foo\bar" + >>> print(json.dumps('\u1234')) + "\u1234" + >>> print(json.dumps('\\')) + "\\" + >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) + {"a": 0, "b": 0, "c": 0} + >>> from io import StringIO + >>> io = StringIO() + >>> json.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import json + >>> from collections import OrderedDict + >>> mydict = OrderedDict([('4', 5), ('6', 7)]) + >>> json.dumps([1,2,3,mydict], separators=(',', ':')) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import json + >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import json + >>> obj = ['foo', {'bar': ['baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == '"foo\x08ar' + True + >>> from io import StringIO + >>> io = StringIO('["streaming API"]') + >>> json.load(io)[0] == 'streaming API' + True + +Specializing JSON object decoding:: + + >>> import json + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + >>> from decimal import Decimal + >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1') + True + +Specializing JSON object encoding:: + + >>> import json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError(repr(obj) + " is not JSON serializable") + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using json.tool from the shell to validate and pretty-print:: + + $ echo '{"json":"obj"}' | python -m json.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m json.tool + Expecting property name enclosed in double quotes: line 1 column 3 (char 2) +""" +__version__ = '2.0.9' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', +] + +__author__ = 'Bob Ippolito <bob@redivi.com>' + +from json.decoder import JSONDecoder, JSONDecodeError +from json.encoder import JSONEncoder +import codecs + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + default=None, +) + +def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + default=None, sort_keys=False, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If ``skipkeys`` is true then ``dict`` keys that are not basic types + (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped + instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the strings written to ``fp`` can + contain non-ASCII characters if they appear in strings contained in + ``obj``. Otherwise, all such characters are escaped in JSON strings. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If specified, ``separators`` should be an ``(item_separator, key_separator)`` + tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and + ``(',', ': ')`` otherwise. To get the most compact JSON representation, + you should specify ``(',', ':')`` to eliminate whitespace. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + If *sort_keys* is true (default: ``False``), then the output of + dictionaries will be sorted by key. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + default is None and not sort_keys and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, + default=default, sort_keys=sort_keys, **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + default=None, sort_keys=False, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is true then ``dict`` keys that are not basic types + (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped + instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the return value can contain non-ASCII + characters if they appear in strings contained in ``obj``. Otherwise, all + such characters are escaped in JSON strings. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If specified, ``separators`` should be an ``(item_separator, key_separator)`` + tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and + ``(',', ': ')`` otherwise. To get the most compact JSON representation, + you should specify ``(',', ':')`` to eliminate whitespace. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + If *sort_keys* is true (default: ``False``), then the output of + dictionaries will be sorted by key. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + default is None and not sort_keys and not kw): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, default=default, sort_keys=sort_keys, + **kw).encode(obj) + + +_default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None) + + +def detect_encoding(b): + bstartswith = b.startswith + if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)): + return 'utf-32' + if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)): + return 'utf-16' + if bstartswith(codecs.BOM_UTF8): + return 'utf-8-sig' + + if len(b) >= 4: + if not b[0]: + # 00 00 -- -- - utf-32-be + # 00 XX -- -- - utf-16-be + return 'utf-16-be' if b[1] else 'utf-32-be' + if not b[1]: + # XX 00 00 00 - utf-32-le + # XX 00 00 XX - utf-16-le + # XX 00 XX -- - utf-16-le + return 'utf-16-le' if b[2] or b[3] else 'utf-32-le' + elif len(b) == 2: + if not b[0]: + # 00 XX - utf-16-be + return 'utf-16-be' + if not b[1]: + # XX 00 - utf-16-le + return 'utf-16-le' + # default + return 'utf-8' + + +def load(fp, *, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + ``object_pairs_hook`` is an optional function that will be called with the + result of any object literal decoded with an ordered list of pairs. The + return value of ``object_pairs_hook`` will be used instead of the ``dict``. + This feature can be used to implement custom decoders that rely on the + order that the key and value pairs are decoded (for example, + collections.OrderedDict will remember the order of insertion). If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg; otherwise ``JSONDecoder`` is used. + + """ + return loads(fp.read(), + cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw) + + +def loads(s, *, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): + """Deserialize ``s`` (a ``str``, ``bytes`` or ``bytearray`` instance + containing a JSON document) to a Python object. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + ``object_pairs_hook`` is an optional function that will be called with the + result of any object literal decoded with an ordered list of pairs. The + return value of ``object_pairs_hook`` will be used instead of the ``dict``. + This feature can be used to implement custom decoders that rely on the + order that the key and value pairs are decoded (for example, + collections.OrderedDict will remember the order of insertion). If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg; otherwise ``JSONDecoder`` is used. + + The ``encoding`` argument is ignored and deprecated. + + """ + if isinstance(s, str): + if s.startswith('\ufeff'): + raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)", + s, 0) + else: + if not isinstance(s, (bytes, bytearray)): + raise TypeError('the JSON object must be str, bytes or bytearray, ' + 'not {!r}'.format(s.__class__.__name__)) + s = s.decode(detect_encoding(s), 'surrogatepass') + + if (cls is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and object_pairs_hook is None and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if object_pairs_hook is not None: + kw['object_pairs_hook'] = object_pairs_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + return cls(**kw).decode(s) diff --git a/modules/language/python/module/#string.scm# b/modules/language/python/module/#string.scm# new file mode 100644 index 0000000..3255d99 --- /dev/null +++ b/modules/language/python/module/#string.scm# @@ -0,0 +1,411 @@ +(define-module (language python module string) + #:use-module (oop pf-objects) + #:use-module (oop goops) + #:use-module (ice-9 match) + #:use-module (language python number) + #:use-module (language python exceptions) + #:use-module (language python yield) + #:use-module (language python list) + #:use-module (language python for) + #:use-module (language python def) + #:use-module (language python string) + #:use-module (language python bytes) + #:use-module ((parser stis-parser) #:select (*whitespace* f-n f-m)) + #:use-module (parser stis-parser lang python3 tool) + #:export (Formatter ascii_letters digits hexdigits)) + +(define digits "0123456789") +(define ascii_letters "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") +(define hexdigits "0123456789abcdefABCDEF") + +(define (repr x) ((@ (guile) format) #f "~a" x)) +(define int (mk-token (f+ (f-reg! "[0-9]")) string->number)) +(define id (mk-token (f-seq (f-reg! "[_a-zA-Z]") + (f* (f-reg! "[_0-9a-zA-Z]"))))) +(define str (mk-token (f+ (f-not! (f-char #\[))))) + +(define conversion (mk-token (f-reg "[rsa]"))) + +(define fill (mk-token (f-reg! "."))) +(define align (mk-token (f-reg! "[<>=^]"))) +(define sign (mk-token (f-reg! "[-+ ]"))) +(define width int) +(define precision int) +(define type (mk-token (f-reg! "[bcdeEfFgGnosxX%]"))) +(define formatSpec + (f-list + (gg? (f-list #:align (gg? fill) align)) + (gg? sign) + (gg? (mk-token (f-tag! "#"))) + (gg? (mk-token (f-tag! "0"))) + (gg? width) + (gg? (mk-token (f-tag ","))) + (gg? (f-seq "." precision)) + (gg? type))) + +(define (get-align s align width sign) + (define widthq (- width (len sign))) + (define (f s a l) + (match a + ("<" (apply py-ljust (+ sign s) width l)) + (">" (apply py-rjust (+ sign s) width l)) + ("^" (apply py-center (+ sign s) width l)) + ("=" (+ sign (apply py-rjust s widthq l))))) + + (match align + (#f + (f s "<" '())) + ((_ #f a) + (f s a '())) + ((_ fill a) + (f s a (list fill))))) + +(define (convert-string format-str s) + (match (with-fluids ((*whitespace* f-true)) + (stis-parse format-str (f-seq formatSpec f-eof))) + ((align sign sharp zero width comma rec type) + (if width + (get-align s align width "") + s)) + (_ (raise (ValueError (+ "wrong format " format-str)))))) + +(set! (@@ (language python string) format) + (lambda (f s) + (py-format s f))) + +(define (gen-sign s sign) + (let lp ((sign sign)) + (match sign + (#f (lp "-")) + ("+" (if (< s 0) + (values (- s) "-") + (values s "+"))) + ("-" (if (< s 0) + (values (- s) "-") + (values s ""))) + (" " (if (< s 0) + (values (- s) "-") + (values s " ")))))) + +(define (convert-float s format-str) + (match (with-fluids ((*whitespace* f-true)) + (stis-parse format-str (f-seq formatSpec f-eof))) + ((align sign sharp zero width comma prec type) + (call-with-values (lambda () (gen-sign s sign)) + (lambda (s s-sign) + (let* ((prec (if prec prec 6)) + (s (let lp ((type type)) + (match type + (#f (lp "g")) + + ("f" + (format #f (+ "~," (number->string prec) "f") s)) + + ("F" + (let ((s (format #f (+ "~," (number->string prec) + "f") + s))) + (py-replace + (py-replace s "nan" "NAN") + "inf" "INF"))) + + ("e" + (py-replace + (format #f (+ "~," (number->string prec) "e") s) + "E" "e")) + + ("E" + (format #f (+ "~," (number->string prec) "e") s)) + + ("g" + (let ((exp (log10 (abs s)))) + (if (and (<= -4 exp) + (<= exp (max 1 prec))) + (lp "f") + (lp "e")))) + ("G" + (let ((exp (log10 (abs s)))) + (if (and (<= -4 exp) + (<= exp (max 1 prec))) + (lp "F") + (lp "E")))) + ("n" + (let ((exp (log10 (abs s)))) + (if (and (<= -4 exp) + (<= exp (max 1 prec))) + (lp "f") + (format #f (+ "~," (number->string prec) "h") + s)))) + + ("%" + (set s (* s 100)) + (+ (lp "f") "%")))))) + + (if width + (if zero + (get-align s '(#:align "0" "=") width + s-sign) + (get-align s align width + s-sign)) + + (+ s-sign s)))))))) + +(define (convert-complex s format-str) + (match (with-fluids ((*whitespace* f-true)) + (stis-parse format-str (f-seq formatSpec f-eof))) + ((align sign sharp zero width comma prec type) + (let* ((prec (if prec prec 6)) + (s (let lp ((type type)) + (match type + (#f (lp "f")) + ("f" + (format #f (+ "~," (number->string prec) "i") s)))))) + (if width + (get-align s align width "") + s))))) + + +(define-method (py-format (s <real>) f) + (convert-float s f)) +(define-method (py-format (s <py-float>) f) + (convert-float s f)) + +(define-method (py-format (s <complex>) f) + (convert-complex s f)) +(define-method (py-format (s <py-complex>) f) + (convert-complex s f)) + + + + + +(define (convert-integer s format-str) + (match (with-fluids ((*whitespace* f-true)) + (stis-parse format-str (f-seq formatSpec f-eof))) + ((align sign sharp zero width comma prec type) + (call-with-values (lambda () (gen-sign s sign)) + (lambda (s s-sign) + (let ((prefix (if sharp + (match type + ("b" "0b") + ("x" "0x") + ("X" "0X") + ("o" "0o") + ("d" "") + (#f "")) + "")) + (s (let lp ((type type)) + (match type + ("b" + (if comma + (format #f "~:b" s) + (format #f "~b" s))) + ("x" + (if comma + (format #f "~:x" s) + (format #f "~x" s))) + ("X" + (if comma + (format #f "~:@(~:x~)" s) + (format #f "~:@(~x~)" s))) + ("o" + (if comma + (format #f "~:o" s) + (format #f "~o" s))) + ("d" + (if comma + (format #f "~:d" s) + (format #f "~d" s))) + (#f + (lp "d")))))) + (if width + (if zero + (get-align s '(#:align "0" "=") width + (+ s-sign prefix)) + (get-align (+ prefix s) align width + s-sign)) + + (+ s-sign prefix s)))))))) + +(define-method (py-format (s <integer>) f) + (convert-integer s f)) + +(define-method (py-format (o <py-int>) f) + (convert-integer (slot-ref o 'x) f)) + +(define argName (f-or! id int)) +(define attributeName id) +(define elementIndex (f-or! int str)) + +(define fieldName + (f-cons argName (ff* (f-or! (f-list #:attr "." attributeName) + (f-list #:elem "[" elementIndex "]"))))) + +(define (replField fieldName1) + (f-list + #:field + (ff? fieldName1 None) + (ff? (f-seq "!" (mk-token (f-scope conversion))) None) + (ff? (f-seq ":" (mk-token (f-scope formatSpec))) None))) + +(define (tag fieldName1) + (f-seq (f-tag "{") (replField fieldName1) (f-tag "}"))) + +(define nontag (f-list #:str + (mk-token (f+ + (f-or! + (f-tag! "{{") + (f-not! (tag (mk-token + (f-scope + fieldName))))))))) + +(define e (f-seq (ff* (f-or! (tag (mk-token (f-scope fieldName))) + nontag)) + f-eof)) + +(set! (@@ (parser stis-parser lang python3-parser) f-formatter) tag) + +(define mk-gen + (make-generator (l) + (lambda (yield l) + (let lp ((u l) (i 0)) + (match u + (() + (yield "" None None None)) + (((#:str str)) + (yield str None None None)) + (((#:field a b c)) + (if (eq? a None) + (yield "" (number->string i) c b) + (yield "" a b c))) + (((#:field a b c) . u) + (if (eq? a None) + (begin + (yield "" (number->string i) c b) + (lp u (+ i 1))) + (begin + (yield "" a b c) + (lp u i)))) + (((#:str s) (#:field a b c) . u) + (if (eq? a None) + (begin + (yield s (number->string i) c b) + (lp u (+ i 1))) + (begin + (yield s a c b) + (lp u i))))))))) + +(define (f-parse str) + (let ((l (with-fluids ((*whitespace* f-true)) + (parse str e)))) + (mk-gen l))) + +(define stis-parse parse) + +(define-python-class Formatter () + (define format + (lam (self format_string (* args) (** kwargs)) + ((ref self 'vformat) format_string args kwargs))) + + (define vformat2 + (lambda (self fn2 co fo) + (if (and (eq? fo None) (eq? co None)) + ((ref self 'convert_field) fn2 "r") + (let ((fn3 (if (eq? co None) + fn2 + ((ref self 'convert_field) + fn2 co)))) + (if (eq? fo None) + fn3 + ((ref self 'format_field ) fn3 fo)))))) + + (define vformat1 + (lambda (self s fn fo co ss args kwargs) + (if (eq? fn None) + (cons s ss) + (let* ((fn2 ((ref self 'get_field ) fn args kwargs)) + (fn3 (if (and (eq? fo None) (eq? co None)) + ((ref self 'convert_field) fn2 "r") + (let ((fn3 (if (eq? co None) + fn2 + ((ref self 'convert_field) + fn2 co)))) + (if (eq? fo None) + fn3 + ((ref self 'format_field ) + fn3 fo)))))) + (cons* fn3 s ss))))) + + (define vformat + (lambda (self format_string args kwargs) + (set self '_args '()) + (for ((s fn fo co : ((ref self 'parse) format_string))) ((ss '(""))) + (vformat1 self s fn fo co ss args kwargs) + #:final + (begin + ((ref self 'check_unused_args) (ref self '_args) args kwargs) + (apply string-append (reverse ss)))))) + + (define parse + (lambda (self format_string) + (f-parse format_string))) + + (define get_field + (lambda (self field_name args kwargs) + (match (with-fluids ((*whitespace* f-true)) + (stis-parse field_name fieldName)) + ((key a ...) + (set self '_args (cons key (ref self '_args))) + (let ((f ((ref self 'get_value) key args kwargs))) + (let lp ((a a) (f f)) + (match a + (((#:ref r) . l) + (lp l (ref f (string->symbol r)))) + (((#:elem k) . l) + (lp l (pylist-ref f k))) + (() + f))))) + (_ + (throw (TypeError (+ "wrong field name format" field_name))))))) + + (define get_value + (lambda (self key args kwargs) + (set self '__args (cons key args)) + (if (integer? key) + (pylist-ref args key) + (pylist-ref kwargs key)))) + + (define check_unused_args + (lambda (self used_args args kwargs) + (let ((n (len args))) + (let lp ((i 0)) + (if (< i n) + (if (member i used_args) + (lp (+ i 1)) + (warn "unused arg" i))))) + (for ((k v : kwargs)) () + (if (not (member k used_args)) + (warn "unused arg" k))))) + + + (define format_field + (lambda (self value format_spec) + (py-format value format_spec))) + + (define convert_field + (lambda (self value conversion) + (cond + ((equal? conversion "s") + (str value)) + ((equal? conversion "r") + (repr value)) + ((equal? conversion "a") + (ascii value)) + (else + (throw (TypeError (+ "conversion " conversion)))))))) + +(define (ascii x) (bytes x)) + +(define formatter (Formatter)) +(set! (@@ (language python string) formatter) formatter) +(set! (@@ (language python compile) formatter) (ref formatter 'vformat2)) diff --git a/modules/language/python/module/#textwrap.py# b/modules/language/python/module/#textwrap.py# new file mode 100644 index 0000000..150e3f9 --- /dev/null +++ b/modules/language/python/module/#textwrap.py# @@ -0,0 +1,479 @@ +module(textwrap) + +"""Text wrapping and filling. +""" + +# Copyright (C) 1999-2001 Gregory P. Ward. +# Copyright (C) 2002, 2003 Python Software Foundation. +# Written by Greg Ward <gward@python.net> + +import re + +__all__ = ['wrap', 'TextWrapper', 'fill', 'dedent', 'indent', 'shorten'] + +# Hardcode the recognized whitespace characters to the US-ASCII +# whitespace characters. The main reason for doing this is that +# some Unicode spaces (like \u00a0) are non-breaking whitespaces. +_whitespace = '\t\n\x0b\x0c\r ' + + +class TextWrapper: + """ + Object for wrapping/filling text. The public interface consists of + the wrap() and fill() methods; the other methods are just there for + subclasses to override in order to tweak the default behaviour. + If you want to completely replace the main wrapping algorithm, + you'll probably have to override _wrap_chunks(). + + Several instance attributes control various aspects of wrapping: + width (default: 70) + the maximum width of wrapped lines (unless break_long_words + is false) + initial_indent (default: "") + string that will be prepended to the first line of wrapped + output. Counts towards the line's width. + subsequent_indent (default: "") + string that will be prepended to all lines save the first + of wrapped output; also counts towards each line's width. + expand_tabs (default: true) + Expand tabs in input text to spaces before further processing. + Each tab will become 0 .. 'tabsize' spaces, depending on its position + in its line. If false, each tab is treated as a single character. + tabsize (default: 8) + Expand tabs in input text to 0 .. 'tabsize' spaces, unless + 'expand_tabs' is false. + replace_whitespace (default: true) + Replace all whitespace characters in the input text by spaces + after tab expansion. Note that if expand_tabs is false and + replace_whitespace is true, every tab will be converted to a + single space! + fix_sentence_endings (default: false) + Ensure that sentence-ending punctuation is always followed + by two spaces. Off by default because the algorithm is + (unavoidably) imperfect. + break_long_words (default: true) + Break words longer than 'width'. If false, those words will not + be broken, and some lines might be longer than 'width'. + break_on_hyphens (default: true) + Allow breaking hyphenated words. If true, wrapping will occur + preferably on whitespaces and right after hyphens part of + compound words. + drop_whitespace (default: true) + Drop leading and trailing whitespace from lines. + max_lines (default: None) + Truncate wrapped lines. + placeholder (default: ' [...]') + Append to the last line of truncated text. + """ + + unicode_whitespace_trans = {} + uspace = ord(' ') + for x in _whitespace: + unicode_whitespace_trans[ord(x)] = uspace + + # This funky little regex is just the trick for splitting + # text up into word-wrappable chunks. E.g. + # "Hello there -- you goof-ball, use the -b option!" + # splits into + # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! + # (after stripping out empty strings). + word_punct = r'[\w!"\'&.,?]' + letter = r'[^\d\W]' + whitespace = r'[%s]' % re.escape(_whitespace) + nowhitespace = '[^' + whitespace[1:] + wordsep_re = re.compile(r''' + ( # any whitespace + %(ws)s+ + | # em-dash between words + (?<=%(wp)s) -{2,} (?=\w) + | # word, possibly hyphenated + %(nws)s+? (?: + # hyphenated word + -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-)) + (?= %(lt)s -? %(lt)s) + | # end of word + (?=%(ws)s|\Z) + | # em-dash + (?<=%(wp)s) (?=-{2,}\w) + ) + )''' % {'wp': word_punct, 'lt': letter, + 'ws': whitespace, 'nws': nowhitespace}, + re.VERBOSE) + del word_punct, letter, nowhitespace + + # This less funky little regex just split on recognized spaces. E.g. + # "Hello there -- you goof-ball, use the -b option!" + # splits into + # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ + wordsep_simple_re = re.compile(r'(%s+)' % whitespace) + del whitespace + + # XXX this is not locale- or charset-aware -- string.lowercase + # is US-ASCII only (and therefore English-only) + sentence_end_re = re.compile(r'[a-z]' # lowercase letter + r'[\.\!\?]' # sentence-ending punct. + r'[\"\']?' # optional end-of-quote + r'\Z') # end of chunk + + def __init__(self, + width=70, + initial_indent="", + subsequent_indent="", + expand_tabs=True, + replace_whitespace=True, + fix_sentence_endings=False, + break_long_words=True, + drop_whitespace=True, + break_on_hyphens=True, + tabsize=8, + *, + max_lines=None, + placeholder=' [...]'): + self.width = width + self.initial_indent = initial_indent + self.subsequent_indent = subsequent_indent + self.expand_tabs = expand_tabs + self.replace_whitespace = replace_whitespace + self.fix_sentence_endings = fix_sentence_endings + self.break_long_words = break_long_words + self.drop_whitespace = drop_whitespace + self.break_on_hyphens = break_on_hyphens + self.tabsize = tabsize + self.max_lines = max_lines + self.placeholder = placeholder + + + # -- Private methods ----------------------------------------------- + # (possibly useful for subclasses to override) + + def _munge_whitespace(self, text): + """_munge_whitespace(text : string) -> string + + Munge whitespace in text: expand tabs and convert all other + whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz" + becomes " foo bar baz". + """ + if self.expand_tabs: + text = text.expandtabs(self.tabsize) + if self.replace_whitespace: + text = text.translate(self.unicode_whitespace_trans) + return text + + + def _split(self, text): + """_split(text : string) -> [string] + + Split the text to wrap into indivisible chunks. Chunks are + not quite the same as words; see _wrap_chunks() for full + details. As an example, the text + Look, goof-ball -- use the -b option! + breaks into the following chunks: + 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ', + 'use', ' ', 'the', ' ', '-b', ' ', 'option!' + if break_on_hyphens is True, or in: + 'Look,', ' ', 'goof-ball', ' ', '--', ' ', + 'use', ' ', 'the', ' ', '-b', ' ', option!' + otherwise. + """ + if self.break_on_hyphens is True: + chunks = self.wordsep_re.split(text) + else: + chunks = self.wordsep_simple_re.split(text) + chunks = [c for c in chunks if c] + return chunks + + def _fix_sentence_endings(self, chunks): + """_fix_sentence_endings(chunks : [string]) + + Correct for sentence endings buried in 'chunks'. Eg. when the + original text contains "... foo.\\nBar ...", munge_whitespace() + and split() will convert that to [..., "foo.", " ", "Bar", ...] + which has one too few spaces; this method simply changes the one + space to two. + """ + i = 0 + patsearch = self.sentence_end_re.search + while i < len(chunks)-1: + if chunks[i+1] == " " and patsearch(chunks[i]): + chunks[i+1] = " " + i += 2 + else: + i += 1 + + def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): + """_handle_long_word(chunks : [string], + cur_line : [string], + cur_len : int, width : int) + + Handle a chunk of text (most likely a word, not whitespace) that + is too long to fit in any line. + """ + # Figure out when indent is larger than the specified width, and make + # sure at least one character is stripped off on every pass + if width < 1: + space_left = 1 + else: + space_left = width - cur_len + + # If we're allowed to break long words, then do so: put as much + # of the next chunk onto the current line as will fit. + if self.break_long_words: + cur_line.append(reversed_chunks[-1][:space_left]) + reversed_chunks[-1] = reversed_chunks[-1][space_left:] + + # Otherwise, we have to preserve the long word intact. Only add + # it to the current line if there's nothing already there -- + # that minimizes how much we violate the width constraint. + elif not cur_line: + cur_line.append(reversed_chunks.pop()) + + # If we're not allowed to break long words, and there's already + # text on the current line, do nothing. Next time through the + # main loop of _wrap_chunks(), we'll wind up here again, but + # cur_len will be zero, so the next line will be entirely + # devoted to the long word that we can't handle right now. + + def _wrap_chunks(self, chunks): + """_wrap_chunks(chunks : [string]) -> [string] + + Wrap a sequence of text chunks and return a list of lines of + length 'self.width' or less. (If 'break_long_words' is false, + some lines may be longer than this.) Chunks correspond roughly + to words and the whitespace between them: each chunk is + indivisible (modulo 'break_long_words'), but a line break can + come between any two chunks. Chunks should not have internal + whitespace; ie. a chunk is either all whitespace or a "word". + Whitespace chunks will be removed from the beginning and end of + lines, but apart from that whitespace is preserved. + """ + lines = [] + if self.width <= 0: + raise ValueError("invalid width %r (must be > 0)" % self.width) + if self.max_lines is not None: + if self.max_lines > 1: + indent = self.subsequent_indent + else: + indent = self.initial_indent + if len(indent) + len(self.placeholder.lstrip()) > self.width: + raise ValueError("placeholder too large for max width") + + # Arrange in reverse order so items can be efficiently popped + # from a stack of chucks. + chunks.reverse() + + while chunks: + # Start the list of chunks that will make up the current line. + # cur_len is just the length of all the chunks in cur_line. + cur_line = [] + cur_len = 0 + + # Figure out which static string will prefix this line. + if lines: + indent = self.subsequent_indent + else: + indent = self.initial_indent + + # Maximum width for this line. + width = self.width - len(indent) + + # First chunk on line is whitespace -- drop it, unless this + # is the very beginning of the text (ie. no lines started yet). + if self.drop_whitespace and chunks[-1].strip() == '' and lines: + del chunks[-1] + + while chunks: + l = len(chunks[-1]) + + # Can at least squeeze this chunk onto the current line. + if cur_len + l <= width: + cur_line.append(chunks.pop()) + cur_len += l + + # Nope, this line is full. + else: + break + + # The current line is full, and the next chunk is too big to + # fit on *any* line (not just this one). + if chunks and len(chunks[-1]) > width: + self._handle_long_word(chunks, cur_line, cur_len, width) + cur_len = sum(map(len, cur_line)) + + # If the last chunk on this line is all whitespace, drop it. + if self.drop_whitespace and cur_line and cur_line[-1].strip() == '': + cur_len -= len(cur_line[-1]) + del cur_line[-1] + + if cur_line: + if (self.max_lines is None or + len(lines) + 1 < self.max_lines or + (not chunks or + self.drop_whitespace and + len(chunks) == 1 and + not chunks[0].strip()) and cur_len <= width): + # Convert current line back to a string and store it in + # list of all lines (return value). + lines.append(indent + ''.join(cur_line)) + else: + while cur_line: + if (cur_line[-1].strip() and + cur_len + len(self.placeholder) <= width): + cur_line.append(self.placeholder) + lines.append(indent + ''.join(cur_line)) + break + cur_len -= len(cur_line[-1]) + del cur_line[-1] + else: + if lines: + prev_line = lines[-1].rstrip() + if (len(prev_line) + len(self.placeholder) <= + self.width): + lines[-1] = prev_line + self.placeholder + break + lines.append(indent + self.placeholder.lstrip()) + break + return lines + + def _split_chunks(self, text): + text = self._munge_whitespace(text) + return self._split(text) + + # -- Public interface ---------------------------------------------- + + def wrap(self, text): + """wrap(text : string) -> [string] + + Reformat the single paragraph in 'text' so it fits in lines of + no more than 'self.width' columns, and return a list of wrapped + lines. Tabs in 'text' are expanded with string.expandtabs(), + and all other whitespace characters (including newline) are + converted to space. + """ + chunks = self._split_chunks(text) + + if self.fix_sentence_endings: + self._fix_sentence_endings(chunks) + + return self._wrap_chunks(chunks) + + def fill(self, text): + """fill(text : string) -> string + + Reformat the single paragraph in 'text' to fit in lines of no + more than 'self.width' columns, and return a new string + containing the entire wrapped paragraph. + """ + return "\n".join(self.wrap(text)) + +# -- Convenience interface --------------------------------------------- + +def wrap(text, width=70, **kwargs): + """Wrap a single paragraph of text, returning a list of wrapped lines. + + Reformat the single paragraph in 'text' so it fits in lines of no + more than 'width' columns, and return a list of wrapped lines. By + default, tabs in 'text' are expanded with string.expandtabs(), and + all other whitespace characters (including newline) are converted to + space. See TextWrapper class for available keyword args to customize + wrapping behaviour. + """ + w = TextWrapper(width=width, **kwargs) + return w.wrap(text) + +def fill(text, width=70, **kwargs): + """Fill a single paragraph of text, returning a new string. + + Reformat the single paragraph in 'text' to fit in lines of no more + than 'width' columns, and return a new string containing the entire + wrapped paragraph. As with wrap(), tabs are expanded and other + whitespace characters converted to space. See TextWrapper class for + available keyword args to customize wrapping behaviour. + """ + w = TextWrapper(width=width, **kwargs) + return w.fill(text) + +def shorten(text, width, **kwargs): + """Collapse and truncate the given text to fit in the given width. + + The text first has its whitespace collapsed. If it then fits in + the *width*, it is returned as is. Otherwise, as many words + as possible are joined and then the placeholder is appended:: + + >>> textwrap.shorten("Hello world!", width=12) + 'Hello world!' + >>> textwrap.shorten("Hello world!", width=11) + 'Hello [...]' + """ + w = TextWrapper(width=width, max_lines=1, **kwargs) + return w.fill(' '.join(text.strip().split())) + +# -- Loosely related functionality ------------------------------------- + +_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE) +_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE) + +def dedent(text): + """Remove any common leading whitespace from every line in `text`. + + This can be used to make triple-quoted strings line up with the left + edge of the display, while still presenting them in the source code + in indented form. + + Note that tabs and spaces are both treated as whitespace, but they + are not equal: the lines " hello" and "\\thello" are + considered to have no common leading whitespace. (This behaviour is + new in Python 2.5; older versions of this module incorrectly + expanded tabs before searching for common leading whitespace.) + """ + # Look for the longest leading string of spaces and tabs common to + # all lines. + margin = None + text = _whitespace_only_re.sub('', text) + indents = _leading_whitespace_re.findall(text) + + for indent in indents: + if margin is None: + margin = indent + + # Current line more deeply indented than previous winner: + # no change (previous winner is still on top). + elif indent.startswith(margin): + pass + + # Current line consistent with and no deeper than previous winner: + # it's the new winner. + elif margin.startswith(indent): + margin = indent + + + # Find the largest common whitespace between current line and previous + # winner. + else: + for i, (x, y) in enumerate(zip(margin, indent)): + if x != y: + margin = margin[:i] + break + else: + margin = margin[:len(indent)] + + if margin: + text = re.sub(r'(?m)^' + margin, '', text) + return text + + +def indent(text, prefix, predicate=None): + """Adds 'prefix' to the beginning of selected lines in 'text'. + + If 'predicate' is provided, 'prefix' will only be added to the lines + where 'predicate(line)' is True. If 'predicate' is not provided, + it will default to adding 'prefix' to all non-empty lines that do not + consist solely of whitespace characters. + """ + if predicate is None: + def predicate(line): + return line.strip() + + def prefixed_lines(): + for line in text.splitlines(True): + yield (prefix + line if predicate(line) else line) + return ''.join(prefixed_lines()) diff --git a/modules/language/python/module/.#_md5.scm b/modules/language/python/module/.#_md5.scm new file mode 120000 index 0000000..c8d0ed8 --- /dev/null +++ b/modules/language/python/module/.#_md5.scm @@ -0,0 +1 @@ +stis@lapwine.2086:1535196580
\ No newline at end of file diff --git a/modules/language/python/module/.#_sha1.scm b/modules/language/python/module/.#_sha1.scm new file mode 120000 index 0000000..c8d0ed8 --- /dev/null +++ b/modules/language/python/module/.#_sha1.scm @@ -0,0 +1 @@ +stis@lapwine.2086:1535196580
\ No newline at end of file diff --git a/modules/language/python/module/.#_sha256.scm b/modules/language/python/module/.#_sha256.scm new file mode 120000 index 0000000..c8d0ed8 --- /dev/null +++ b/modules/language/python/module/.#_sha256.scm @@ -0,0 +1 @@ +stis@lapwine.2086:1535196580
\ No newline at end of file diff --git a/modules/language/python/module/.#bz2.py b/modules/language/python/module/.#bz2.py new file mode 120000 index 0000000..c8d0ed8 --- /dev/null +++ b/modules/language/python/module/.#bz2.py @@ -0,0 +1 @@ +stis@lapwine.2086:1535196580
\ No newline at end of file diff --git a/modules/language/python/module/_python.scm b/modules/language/python/module/_python.scm index 6db20a2..e950496 100644 --- a/modules/language/python/module/_python.scm +++ b/modules/language/python/module/_python.scm @@ -429,3 +429,6 @@ (pf-set self 'y y) (pf-set self 'z z)))) + +(set! (@@ (language python module) m?) + (lambda (x) (isinstance x (@@ (language python module) Module)))) diff --git a/modules/language/python/module/a.py b/modules/language/python/module/a.py new file mode 100644 index 0000000..e30218b --- /dev/null +++ b/modules/language/python/module/a.py @@ -0,0 +1,4 @@ +module(a) + +from . import a +from .x import a diff --git a/modules/language/python/module/a.scm b/modules/language/python/module/a.scm new file mode 100644 index 0000000..b026de1 --- /dev/null +++ b/modules/language/python/module/a.scm @@ -0,0 +1,4 @@ +(define-module (language python module a) + #:export (a)) + +(define a 1) diff --git a/modules/language/python/module/b.py b/modules/language/python/module/b.py new file mode 100644 index 0000000..2f5a238 --- /dev/null +++ b/modules/language/python/module/b.py @@ -0,0 +1,6 @@ +module(b) +export(x) + +x=1+3 + +print(x) diff --git a/modules/language/python/module/bz2.py b/modules/language/python/module/bz2.py new file mode 100644 index 0000000..6f56328 --- /dev/null +++ b/modules/language/python/module/bz2.py @@ -0,0 +1,361 @@ +"""Interface to the libbzip2 compression library. + +This module provides a file interface, classes for incremental +(de)compression, and functions for one-shot (de)compression. +""" + +__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor", + "open", "compress", "decompress"] + +__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>" + +from builtins import open as _builtin_open +import io +import os +import warnings +import _compression + +try: + from threading import RLock +except ImportError: + from dummy_threading import RLock + +from _bz2 import BZ2Compressor, BZ2Decompressor + + +_MODE_CLOSED = 0 +_MODE_READ = 1 +# Value 2 no longer used +_MODE_WRITE = 3 + + +class BZ2File(_compression.BaseStream): + + """A file object providing transparent bzip2 (de)compression. + + A BZ2File can act as a wrapper for an existing file object, or refer + directly to a named file on disk. + + Note that BZ2File provides a *binary* file interface - data read is + returned as bytes, and data to be written should be given as bytes. + """ + + def __init__(self, filename, mode="r", buffering=None, compresslevel=9): + """Open a bzip2-compressed file. + + If filename is a str, bytes, or PathLike object, it gives the + name of the file to be opened. Otherwise, it should be a file + object, which will be used to read or write the compressed data. + + mode can be 'r' for reading (default), 'w' for (over)writing, + 'x' for creating exclusively, or 'a' for appending. These can + equivalently be given as 'rb', 'wb', 'xb', and 'ab'. + + buffering is ignored. Its use is deprecated. + + If mode is 'w', 'x' or 'a', compresslevel can be a number between 1 + and 9 specifying the level of compression: 1 produces the least + compression, and 9 (default) produces the most compression. + + If mode is 'r', the input file may be the concatenation of + multiple compressed streams. + """ + # This lock must be recursive, so that BufferedIOBase's + # writelines() does not deadlock. + self._lock = RLock() + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + + if buffering is not None: + warnings.warn("Use of 'buffering' argument is deprecated", + DeprecationWarning) + + if not (1 <= compresslevel <= 9): + raise ValueError("compresslevel must be between 1 and 9") + + if mode in ("", "r", "rb"): + mode = "rb" + mode_code = _MODE_READ + elif mode in ("w", "wb"): + mode = "wb" + mode_code = _MODE_WRITE + self._compressor = BZ2Compressor(compresslevel) + elif mode in ("x", "xb"): + mode = "xb" + mode_code = _MODE_WRITE + self._compressor = BZ2Compressor(compresslevel) + elif mode in ("a", "ab"): + mode = "ab" + mode_code = _MODE_WRITE + self._compressor = BZ2Compressor(compresslevel) + else: + raise ValueError("Invalid mode: %r" % (mode,)) + + if isinstance(filename, (str, bytes, os.PathLike)): + self._fp = _builtin_open(filename, mode) + self._closefp = True + self._mode = mode_code + elif hasattr(filename, "read") or hasattr(filename, "write"): + self._fp = filename + self._mode = mode_code + else: + raise TypeError("filename must be a str, bytes, file or PathLike object") + + if self._mode == _MODE_READ: + raw = _compression.DecompressReader(self._fp, + BZ2Decompressor, trailing_error=OSError) + self._buffer = io.BufferedReader(raw) + else: + self._pos = 0 + + def close(self): + """Flush and close the file. + + May be called more than once without error. Once the file is + closed, any other operation on it will raise a ValueError. + """ + with self._lock: + if self._mode == _MODE_CLOSED: + return + try: + if self._mode == _MODE_READ: + self._buffer.close() + elif self._mode == _MODE_WRITE: + self._fp.write(self._compressor.flush()) + self._compressor = None + finally: + try: + if self._closefp: + self._fp.close() + finally: + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + self._buffer = None + + @property + def closed(self): + """True if this file is closed.""" + return self._mode == _MODE_CLOSED + + def fileno(self): + """Return the file descriptor for the underlying file.""" + self._check_not_closed() + return self._fp.fileno() + + def seekable(self): + """Return whether the file supports seeking.""" + return self.readable() and self._buffer.seekable() + + def readable(self): + """Return whether the file was opened for reading.""" + self._check_not_closed() + return self._mode == _MODE_READ + + def writable(self): + """Return whether the file was opened for writing.""" + self._check_not_closed() + return self._mode == _MODE_WRITE + + def peek(self, n=0): + """Return buffered data without advancing the file position. + + Always returns at least one byte of data, unless at EOF. + The exact number of bytes returned is unspecified. + """ + with self._lock: + self._check_can_read() + # Relies on the undocumented fact that BufferedReader.peek() + # always returns at least one byte (except at EOF), independent + # of the value of n + return self._buffer.peek(n) + + def read(self, size=-1): + """Read up to size uncompressed bytes from the file. + + If size is negative or omitted, read until EOF is reached. + Returns b'' if the file is already at EOF. + """ + with self._lock: + self._check_can_read() + return self._buffer.read(size) + + def read1(self, size=-1): + """Read up to size uncompressed bytes, while trying to avoid + making multiple reads from the underlying stream. Reads up to a + buffer's worth of data if size is negative. + + Returns b'' if the file is at EOF. + """ + with self._lock: + self._check_can_read() + if size < 0: + size = io.DEFAULT_BUFFER_SIZE + return self._buffer.read1(size) + + def readinto(self, b): + """Read bytes into b. + + Returns the number of bytes read (0 for EOF). + """ + with self._lock: + self._check_can_read() + return self._buffer.readinto(b) + + def readline(self, size=-1): + """Read a line of uncompressed bytes from the file. + + The terminating newline (if present) is retained. If size is + non-negative, no more than size bytes will be read (in which + case the line may be incomplete). Returns b'' if already at EOF. + """ + if not isinstance(size, int): + if not hasattr(size, "__index__"): + raise TypeError("Integer argument expected") + size = size.__index__() + with self._lock: + self._check_can_read() + return self._buffer.readline(size) + + def readlines(self, size=-1): + """Read a list of lines of uncompressed bytes from the file. + + size can be specified to control the number of lines read: no + further lines will be read once the total size of the lines read + so far equals or exceeds size. + """ + if not isinstance(size, int): + if not hasattr(size, "__index__"): + raise TypeError("Integer argument expected") + size = size.__index__() + with self._lock: + self._check_can_read() + return self._buffer.readlines(size) + + def write(self, data): + """Write a byte string to the file. + + Returns the number of uncompressed bytes written, which is + always len(data). Note that due to buffering, the file on disk + may not reflect the data written until close() is called. + """ + with self._lock: + self._check_can_write() + compressed = self._compressor.compress(data) + self._fp.write(compressed) + self._pos += len(data) + return len(data) + + def writelines(self, seq): + """Write a sequence of byte strings to the file. + + Returns the number of uncompressed bytes written. + seq can be any iterable yielding byte strings. + + Line separators are not added between the written byte strings. + """ + with self._lock: + return _compression.BaseStream.writelines(self, seq) + + def seek(self, offset, whence=io.SEEK_SET): + """Change the file position. + + The new position is specified by offset, relative to the + position indicated by whence. Values for whence are: + + 0: start of stream (default); offset must not be negative + 1: current stream position + 2: end of stream; offset must not be positive + + Returns the new file position. + + Note that seeking is emulated, so depending on the parameters, + this operation may be extremely slow. + """ + with self._lock: + self._check_can_seek() + return self._buffer.seek(offset, whence) + + def tell(self): + """Return the current file position.""" + with self._lock: + self._check_not_closed() + if self._mode == _MODE_READ: + return self._buffer.tell() + return self._pos + + +def open(filename, mode="rb", compresslevel=9, + encoding=None, errors=None, newline=None): + """Open a bzip2-compressed file in binary or text mode. + + The filename argument can be an actual filename (a str, bytes, or + PathLike object), or an existing file object to read from or write + to. + + The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or + "ab" for binary mode, or "rt", "wt", "xt" or "at" for text mode. + The default mode is "rb", and the default compresslevel is 9. + + For binary mode, this function is equivalent to the BZ2File + constructor: BZ2File(filename, mode, compresslevel). In this case, + the encoding, errors and newline arguments must not be provided. + + For text mode, a BZ2File object is created, and wrapped in an + io.TextIOWrapper instance with the specified encoding, error + handling behavior, and line ending(s). + + """ + if "t" in mode: + if "b" in mode: + raise ValueError("Invalid mode: %r" % (mode,)) + else: + if encoding is not None: + raise ValueError("Argument 'encoding' not supported in binary mode") + if errors is not None: + raise ValueError("Argument 'errors' not supported in binary mode") + if newline is not None: + raise ValueError("Argument 'newline' not supported in binary mode") + + bz_mode = mode.replace("t", "") + binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel) + + if "t" in mode: + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file + + +def compress(data, compresslevel=9): + """Compress a block of data. + + compresslevel, if given, must be a number between 1 and 9. + + For incremental compression, use a BZ2Compressor object instead. + """ + comp = BZ2Compressor(compresslevel) + return comp.compress(data) + comp.flush() + + +def decompress(data): + """Decompress a block of data. + + For incremental decompression, use a BZ2Decompressor object instead. + """ + results = [] + while data: + decomp = BZ2Decompressor() + try: + res = decomp.decompress(data) + except OSError: + if results: + break # Leftover data is not a valid bzip2 stream; ignore it. + else: + raise # Error on the first iteration; bail out. + results.append(res) + if not decomp.eof: + raise ValueError("Compressed data ended before the " + "end-of-stream marker was reached") + data = decomp.unused_data + return b"".join(results) diff --git a/modules/language/python/module/email/__init__.py b/modules/language/python/module/email/__init__.py new file mode 100644 index 0000000..fae8724 --- /dev/null +++ b/modules/language/python/module/email/__init__.py @@ -0,0 +1,62 @@ +# Copyright (C) 2001-2007 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""A package for parsing, handling, and generating email messages.""" + +__all__ = [ + 'base64mime', + 'charset', + 'encoders', + 'errors', + 'feedparser', + 'generator', + 'header', + 'iterators', + 'message', + 'message_from_file', + 'message_from_binary_file', + 'message_from_string', + 'message_from_bytes', + 'mime', + 'parser', + 'quoprimime', + 'utils', + ] + + + +# Some convenience routines. Don't import Parser and Message as side-effects +# of importing email since those cascadingly import most of the rest of the +# email package. +def message_from_string(s, *args, **kws): + """Parse a string into a Message object model. + + Optional _class and strict are passed to the Parser constructor. + """ + from email.parser import Parser + return Parser(*args, **kws).parsestr(s) + +def message_from_bytes(s, *args, **kws): + """Parse a bytes string into a Message object model. + + Optional _class and strict are passed to the Parser constructor. + """ + from email.parser import BytesParser + return BytesParser(*args, **kws).parsebytes(s) + +def message_from_file(fp, *args, **kws): + """Read a file and parse its contents into a Message object model. + + Optional _class and strict are passed to the Parser constructor. + """ + from email.parser import Parser + return Parser(*args, **kws).parse(fp) + +def message_from_binary_file(fp, *args, **kws): + """Read a binary file and parse its contents into a Message object model. + + Optional _class and strict are passed to the Parser constructor. + """ + from email.parser import BytesParser + return BytesParser(*args, **kws).parse(fp) diff --git a/modules/language/python/module/email/__pycache__/__init__.cpython-36.pyc b/modules/language/python/module/email/__pycache__/__init__.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..2c8ef2d --- /dev/null +++ b/modules/language/python/module/email/__pycache__/__init__.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/_encoded_words.cpython-36.pyc b/modules/language/python/module/email/__pycache__/_encoded_words.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..16b86db --- /dev/null +++ b/modules/language/python/module/email/__pycache__/_encoded_words.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/_header_value_parser.cpython-36.pyc b/modules/language/python/module/email/__pycache__/_header_value_parser.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..18f3d03 --- /dev/null +++ b/modules/language/python/module/email/__pycache__/_header_value_parser.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/_parseaddr.cpython-36.pyc b/modules/language/python/module/email/__pycache__/_parseaddr.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..08e82d8 --- /dev/null +++ b/modules/language/python/module/email/__pycache__/_parseaddr.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/_policybase.cpython-36.pyc b/modules/language/python/module/email/__pycache__/_policybase.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..2d0888c --- /dev/null +++ b/modules/language/python/module/email/__pycache__/_policybase.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/base64mime.cpython-36.pyc b/modules/language/python/module/email/__pycache__/base64mime.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..00baa62 --- /dev/null +++ b/modules/language/python/module/email/__pycache__/base64mime.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/charset.cpython-36.pyc b/modules/language/python/module/email/__pycache__/charset.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..3035f1c --- /dev/null +++ b/modules/language/python/module/email/__pycache__/charset.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/contentmanager.cpython-36.pyc b/modules/language/python/module/email/__pycache__/contentmanager.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..260fe06 --- /dev/null +++ b/modules/language/python/module/email/__pycache__/contentmanager.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/encoders.cpython-36.pyc b/modules/language/python/module/email/__pycache__/encoders.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..3e79480 --- /dev/null +++ b/modules/language/python/module/email/__pycache__/encoders.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/errors.cpython-36.pyc b/modules/language/python/module/email/__pycache__/errors.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..4ee3c07 --- /dev/null +++ b/modules/language/python/module/email/__pycache__/errors.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/feedparser.cpython-36.pyc b/modules/language/python/module/email/__pycache__/feedparser.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..426ef0a --- /dev/null +++ b/modules/language/python/module/email/__pycache__/feedparser.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/generator.cpython-36.pyc b/modules/language/python/module/email/__pycache__/generator.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..d35527f --- /dev/null +++ b/modules/language/python/module/email/__pycache__/generator.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/header.cpython-36.pyc b/modules/language/python/module/email/__pycache__/header.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..64773a6 --- /dev/null +++ b/modules/language/python/module/email/__pycache__/header.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/headerregistry.cpython-36.pyc b/modules/language/python/module/email/__pycache__/headerregistry.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..07e5886 --- /dev/null +++ b/modules/language/python/module/email/__pycache__/headerregistry.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/iterators.cpython-36.pyc b/modules/language/python/module/email/__pycache__/iterators.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..c34d29a --- /dev/null +++ b/modules/language/python/module/email/__pycache__/iterators.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/message.cpython-36.pyc b/modules/language/python/module/email/__pycache__/message.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..942f356 --- /dev/null +++ b/modules/language/python/module/email/__pycache__/message.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/parser.cpython-36.pyc b/modules/language/python/module/email/__pycache__/parser.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..1b32ef6 --- /dev/null +++ b/modules/language/python/module/email/__pycache__/parser.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/policy.cpython-36.pyc b/modules/language/python/module/email/__pycache__/policy.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..63f6b6b --- /dev/null +++ b/modules/language/python/module/email/__pycache__/policy.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/quoprimime.cpython-36.pyc b/modules/language/python/module/email/__pycache__/quoprimime.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..359281d --- /dev/null +++ b/modules/language/python/module/email/__pycache__/quoprimime.cpython-36.pyc diff --git a/modules/language/python/module/email/__pycache__/utils.cpython-36.pyc b/modules/language/python/module/email/__pycache__/utils.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..e309ca5 --- /dev/null +++ b/modules/language/python/module/email/__pycache__/utils.cpython-36.pyc diff --git a/modules/language/python/module/email/architecture.rst b/modules/language/python/module/email/architecture.rst new file mode 100644 index 0000000..fcd10bd --- /dev/null +++ b/modules/language/python/module/email/architecture.rst @@ -0,0 +1,216 @@ +:mod:`email` Package Architecture +================================= + +Overview +-------- + +The email package consists of three major components: + + Model + An object structure that represents an email message, and provides an + API for creating, querying, and modifying a message. + + Parser + Takes a sequence of characters or bytes and produces a model of the + email message represented by those characters or bytes. + + Generator + Takes a model and turns it into a sequence of characters or bytes. The + sequence can either be intended for human consumption (a printable + unicode string) or bytes suitable for transmission over the wire. In + the latter case all data is properly encoded using the content transfer + encodings specified by the relevant RFCs. + +Conceptually the package is organized around the model. The model provides both +"external" APIs intended for use by application programs using the library, +and "internal" APIs intended for use by the Parser and Generator components. +This division is intentionally a bit fuzzy; the API described by this +documentation is all a public, stable API. This allows for an application +with special needs to implement its own parser and/or generator. + +In addition to the three major functional components, there is a third key +component to the architecture: + + Policy + An object that specifies various behavioral settings and carries + implementations of various behavior-controlling methods. + +The Policy framework provides a simple and convenient way to control the +behavior of the library, making it possible for the library to be used in a +very flexible fashion while leveraging the common code required to parse, +represent, and generate message-like objects. For example, in addition to the +default :rfc:`5322` email message policy, we also have a policy that manages +HTTP headers in a fashion compliant with :rfc:`2616`. Individual policy +controls, such as the maximum line length produced by the generator, can also +be controlled individually to meet specialized application requirements. + + +The Model +--------- + +The message model is implemented by the :class:`~email.message.Message` class. +The model divides a message into the two fundamental parts discussed by the +RFC: the header section and the body. The `Message` object acts as a +pseudo-dictionary of named headers. Its dictionary interface provides +convenient access to individual headers by name. However, all headers are kept +internally in an ordered list, so that the information about the order of the +headers in the original message is preserved. + +The `Message` object also has a `payload` that holds the body. A `payload` can +be one of two things: data, or a list of `Message` objects. The latter is used +to represent a multipart MIME message. Lists can be nested arbitrarily deeply +in order to represent the message, with all terminal leaves having non-list +data payloads. + + +Message Lifecycle +----------------- + +The general lifecycle of a message is: + + Creation + A `Message` object can be created by a Parser, or it can be + instantiated as an empty message by an application. + + Manipulation + The application may examine one or more headers, and/or the + payload, and it may modify one or more headers and/or + the payload. This may be done on the top level `Message` + object, or on any sub-object. + + Finalization + The Model is converted into a unicode or binary stream, + or the model is discarded. + + + +Header Policy Control During Lifecycle +-------------------------------------- + +One of the major controls exerted by the Policy is the management of headers +during the `Message` lifecycle. Most applications don't need to be aware of +this. + +A header enters the model in one of two ways: via a Parser, or by being set to +a specific value by an application program after the Model already exists. +Similarly, a header exits the model in one of two ways: by being serialized by +a Generator, or by being retrieved from a Model by an application program. The +Policy object provides hooks for all four of these pathways. + +The model storage for headers is a list of (name, value) tuples. + +The Parser identifies headers during parsing, and passes them to the +:meth:`~email.policy.Policy.header_source_parse` method of the Policy. The +result of that method is the (name, value) tuple to be stored in the model. + +When an application program supplies a header value (for example, through the +`Message` object `__setitem__` interface), the name and the value are passed to +the :meth:`~email.policy.Policy.header_store_parse` method of the Policy, which +returns the (name, value) tuple to be stored in the model. + +When an application program retrieves a header (through any of the dict or list +interfaces of `Message`), the name and value are passed to the +:meth:`~email.policy.Policy.header_fetch_parse` method of the Policy to +obtain the value returned to the application. + +When a Generator requests a header during serialization, the name and value are +passed to the :meth:`~email.policy.Policy.fold` method of the Policy, which +returns a string containing line breaks in the appropriate places. The +:meth:`~email.policy.Policy.cte_type` Policy control determines whether or +not Content Transfer Encoding is performed on the data in the header. There is +also a :meth:`~email.policy.Policy.binary_fold` method for use by generators +that produce binary output, which returns the folded header as binary data, +possibly folded at different places than the corresponding string would be. + + +Handling Binary Data +-------------------- + +In an ideal world all message data would conform to the RFCs, meaning that the +parser could decode the message into the idealized unicode message that the +sender originally wrote. In the real world, the email package must also be +able to deal with badly formatted messages, including messages containing +non-ASCII characters that either have no indicated character set or are not +valid characters in the indicated character set. + +Since email messages are *primarily* text data, and operations on message data +are primarily text operations (except for binary payloads of course), the model +stores all text data as unicode strings. Un-decodable binary inside text +data is handled by using the `surrogateescape` error handler of the ASCII +codec. As with the binary filenames the error handler was introduced to +handle, this allows the email package to "carry" the binary data received +during parsing along until the output stage, at which time it is regenerated +in its original form. + +This carried binary data is almost entirely an implementation detail. The one +place where it is visible in the API is in the "internal" API. A Parser must +do the `surrogateescape` encoding of binary input data, and pass that data to +the appropriate Policy method. The "internal" interface used by the Generator +to access header values preserves the `surrogateescaped` bytes. All other +interfaces convert the binary data either back into bytes or into a safe form +(losing information in some cases). + + +Backward Compatibility +---------------------- + +The :class:`~email.policy.Policy.Compat32` Policy provides backward +compatibility with version 5.1 of the email package. It does this via the +following implementation of the four+1 Policy methods described above: + +header_source_parse + Splits the first line on the colon to obtain the name, discards any spaces + after the colon, and joins the remainder of the line with all of the + remaining lines, preserving the linesep characters to obtain the value. + Trailing carriage return and/or linefeed characters are stripped from the + resulting value string. + +header_store_parse + Returns the name and value exactly as received from the application. + +header_fetch_parse + If the value contains any `surrogateescaped` binary data, return the value + as a :class:`~email.header.Header` object, using the character set + `unknown-8bit`. Otherwise just returns the value. + +fold + Uses :class:`~email.header.Header`'s folding to fold headers in the + same way the email5.1 generator did. + +binary_fold + Same as fold, but encodes to 'ascii'. + + +New Algorithm +------------- + +header_source_parse + Same as legacy behavior. + +header_store_parse + Same as legacy behavior. + +header_fetch_parse + If the value is already a header object, returns it. Otherwise, parses the + value using the new parser, and returns the resulting object as the value. + `surrogateescaped` bytes get turned into unicode unknown character code + points. + +fold + Uses the new header folding algorithm, respecting the policy settings. + surrogateescaped bytes are encoded using the ``unknown-8bit`` charset for + ``cte_type=7bit`` or ``8bit``. Returns a string. + + At some point there will also be a ``cte_type=unicode``, and for that + policy fold will serialize the idealized unicode message with RFC-like + folding, converting any surrogateescaped bytes into the unicode + unknown character glyph. + +binary_fold + Uses the new header folding algorithm, respecting the policy settings. + surrogateescaped bytes are encoded using the `unknown-8bit` charset for + ``cte_type=7bit``, and get turned back into bytes for ``cte_type=8bit``. + Returns bytes. + + At some point there will also be a ``cte_type=unicode``, and for that + policy binary_fold will serialize the message according to :rfc:``5335``. diff --git a/modules/language/python/module/email/mime/#audio.py# b/modules/language/python/module/email/mime/#audio.py# new file mode 100644 index 0000000..ccd08fe --- /dev/null +++ b/modules/language/python/module/email/mime/#audio.py# @@ -0,0 +1,76 @@ +module(email,mime,audio) + +# Copyright (C) 2001-2007 Python Software Foundation +# Author: Anthony Baxter +# Contact: email-sig@python.org + +"""Class representing audio/* type MIME documents.""" + +__all__ = ['MIMEAudio'] + +import sndhdr + +from io import BytesIO +import email.encoders as encoders +from email.mime.nonmultipart import MIMENonMultipart + + + +_sndhdr_MIMEmap = {'au' : 'basic', + 'wav' :'x-wav', + 'aiff':'x-aiff', + 'aifc':'x-aiff', + } + +# There are others in sndhdr that don't have MIME types. :( +# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? +def _whatsnd(data): + """Try to identify a sound file type. + + sndhdr.what() has a pretty cruddy interface, unfortunately. This is why + we re-do it here. It would be easier to reverse engineer the Unix 'file' + command and use the standard 'magic' file, as shipped with a modern Unix. + """ + hdr = data[:512] + fakefile = BytesIO(hdr) + for testfn in sndhdr.tests: + res = testfn(hdr, fakefile) + if res is not None: + return _sndhdr_MIMEmap.get(res[0]) + return None + + + +class MIMEAudio(MIMENonMultipart): + """Class for generating audio/* MIME documents.""" + + def __init__(self, _audiodata, _subtype=None, + _encoder=encoders.encode_base64, *, policy=None, **_params): + """Create an audio/* type MIME document. + + _audiodata is a string containing the raw audio data. If this data + can be decoded by the standard Python `sndhdr' module, then the + subtype will be automatically included in the Content-Type header. + Otherwise, you can specify the specific audio subtype via the + _subtype parameter. If _subtype is not given, and no subtype can be + guessed, a TypeError is raised. + + _encoder is a function which will perform the actual encoding for + transport of the image data. It takes one argument, which is this + Image instance. It should use get_payload() and set_payload() to + change the payload to the encoded form. It should also add any + Content-Transfer-Encoding or other headers to the message as + necessary. The default encoding is Base64. + + Any additional keyword arguments are passed to the base class + constructor, which turns them into parameters on the Content-Type + header. + """ + if _subtype is None: + _subtype = _whatsnd(_audiodata) + if _subtype is None: + raise TypeError('Could not find audio MIME subtype') + MIMENonMultipart.__init__(self, 'audio', _subtype, policy=policy, + **_params) + self.set_payload(_audiodata) + _encoder(self) diff --git a/modules/language/python/module/email/mime/#image.py# b/modules/language/python/module/email/mime/#image.py# new file mode 100644 index 0000000..5346dac --- /dev/null +++ b/modules/language/python/module/email/mime/#image.py# @@ -0,0 +1,49 @@ +module(email,mime,image) + +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Class representing image/* type MIME documents.""" + +__all__ = ['MIMEImage'] + +import imghdr + +import email.encoders as encoders +from email.mime.nonmultipart import MIMENonMultipart + + + +class MIMEImage(MIMENonMultipart): + """Class for generating image/* type MIME documents.""" + + def __init__(self, _imagedata, _subtype=None, + _encoder=encoders.encode_base64, *, policy=None, **_params): + """Create an image/* type MIME document. + + _imagedata is a string containing the raw image data. If this data + can be decoded by the standard Python `imghdr' module, then the + subtype will be automatically included in the Content-Type header. + Otherwise, you can specify the specific image subtype via the _subtype + parameter. + + _encoder is a function which will perform the actual encoding for + transport of the image data. It takes one argument, which is this + Image instance. It should use get_payload() and set_payload() to + change the payload to the encoded form. It should also add any + Content-Transfer-Encoding or other headers to the message as + necessary. The default encoding is Base64. + + Any additional keyword arguments are passed to the base class + constructor, which turns them into parameters on the Content-Type + header. + """ + if _subtype is None: + _subtype = imghdr.what(None, _imagedata) + if _subtype is None: + raise TypeError('Could not guess image MIME subtype') + MIMENonMultipart.__init__(self, 'image', _subtype, policy=policy, + **_params) + self.set_payload(_imagedata) + _encoder(self) diff --git a/modules/language/python/module/email/mime/.#audio.py b/modules/language/python/module/email/mime/.#audio.py new file mode 120000 index 0000000..6876c49 --- /dev/null +++ b/modules/language/python/module/email/mime/.#audio.py @@ -0,0 +1 @@ +stis@lapwine.3569:1536593089
\ No newline at end of file diff --git a/modules/language/python/module/email/mime/__init__.py b/modules/language/python/module/email/mime/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/modules/language/python/module/email/mime/__init__.py diff --git a/modules/language/python/module/email/mime/__pycache__/__init__.cpython-36.pyc b/modules/language/python/module/email/mime/__pycache__/__init__.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..c9ca4cc --- /dev/null +++ b/modules/language/python/module/email/mime/__pycache__/__init__.cpython-36.pyc diff --git a/modules/language/python/module/email/mime/__pycache__/application.cpython-36.pyc b/modules/language/python/module/email/mime/__pycache__/application.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..cafb6ca --- /dev/null +++ b/modules/language/python/module/email/mime/__pycache__/application.cpython-36.pyc diff --git a/modules/language/python/module/email/mime/__pycache__/audio.cpython-36.pyc b/modules/language/python/module/email/mime/__pycache__/audio.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..5b737e7 --- /dev/null +++ b/modules/language/python/module/email/mime/__pycache__/audio.cpython-36.pyc diff --git a/modules/language/python/module/email/mime/__pycache__/base.cpython-36.pyc b/modules/language/python/module/email/mime/__pycache__/base.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..6ece8ab --- /dev/null +++ b/modules/language/python/module/email/mime/__pycache__/base.cpython-36.pyc diff --git a/modules/language/python/module/email/mime/__pycache__/image.cpython-36.pyc b/modules/language/python/module/email/mime/__pycache__/image.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..3ba9d75 --- /dev/null +++ b/modules/language/python/module/email/mime/__pycache__/image.cpython-36.pyc diff --git a/modules/language/python/module/email/mime/__pycache__/message.cpython-36.pyc b/modules/language/python/module/email/mime/__pycache__/message.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..5eac587 --- /dev/null +++ b/modules/language/python/module/email/mime/__pycache__/message.cpython-36.pyc diff --git a/modules/language/python/module/email/mime/__pycache__/multipart.cpython-36.pyc b/modules/language/python/module/email/mime/__pycache__/multipart.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..d0b31f4 --- /dev/null +++ b/modules/language/python/module/email/mime/__pycache__/multipart.cpython-36.pyc diff --git a/modules/language/python/module/email/mime/__pycache__/nonmultipart.cpython-36.pyc b/modules/language/python/module/email/mime/__pycache__/nonmultipart.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..555b295 --- /dev/null +++ b/modules/language/python/module/email/mime/__pycache__/nonmultipart.cpython-36.pyc diff --git a/modules/language/python/module/email/mime/__pycache__/text.cpython-36.pyc b/modules/language/python/module/email/mime/__pycache__/text.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..b34d005 --- /dev/null +++ b/modules/language/python/module/email/mime/__pycache__/text.cpython-36.pyc diff --git a/modules/language/python/module/email/mime/application.py~ b/modules/language/python/module/email/mime/application.py~ new file mode 100644 index 0000000..6877e55 --- /dev/null +++ b/modules/language/python/module/email/mime/application.py~ @@ -0,0 +1,37 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Keith Dart +# Contact: email-sig@python.org + +"""Class representing application/* type MIME documents.""" + +__all__ = ["MIMEApplication"] + +from email import encoders +from email.mime.nonmultipart import MIMENonMultipart + + +class MIMEApplication(MIMENonMultipart): + """Class for generating application/* MIME documents.""" + + def __init__(self, _data, _subtype='octet-stream', + _encoder=encoders.encode_base64, *, policy=None, **_params): + """Create an application/* type MIME document. + + _data is a string containing the raw application data. + + _subtype is the MIME content type subtype, defaulting to + 'octet-stream'. + + _encoder is a function which will perform the actual encoding for + transport of the application data, defaulting to base64 encoding. + + Any additional keyword arguments are passed to the base class + constructor, which turns them into parameters on the Content-Type + header. + """ + if _subtype is None: + raise TypeError('Invalid application MIME subtype') + MIMENonMultipart.__init__(self, 'application', _subtype, policy=policy, + **_params) + self.set_payload(_data) + _encoder(self) diff --git a/modules/language/python/module/email/mime/multipart.py~ b/modules/language/python/module/email/mime/multipart.py~ new file mode 100644 index 0000000..2d3f288 --- /dev/null +++ b/modules/language/python/module/email/mime/multipart.py~ @@ -0,0 +1,48 @@ +# Copyright (C) 2002-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Base class for MIME multipart/* type messages.""" + +__all__ = ['MIMEMultipart'] + +from email.mime.base import MIMEBase + + + +class MIMEMultipart(MIMEBase): + """Base class for MIME multipart/* type messages.""" + + def __init__(self, _subtype='mixed', boundary=None, _subparts=None, + *, policy=None, + **_params): + """Creates a multipart/* type message. + + By default, creates a multipart/mixed message, with proper + Content-Type and MIME-Version headers. + + _subtype is the subtype of the multipart content type, defaulting to + `mixed'. + + boundary is the multipart boundary string. By default it is + calculated as needed. + + _subparts is a sequence of initial subparts for the payload. It + must be an iterable object, such as a list. You can always + attach new subparts to the message by using the attach() method. + + Additional parameters for the Content-Type header are taken from the + keyword arguments (or passed into the _params argument). + """ + MIMEBase.__init__(self, 'multipart', _subtype, policy=policy, **_params) + + # Initialise _payload to an empty list as the Message superclass's + # implementation of is_multipart assumes that _payload is a list for + # multipart messages. + self._payload = [] + + if _subparts: + for p in _subparts: + self.attach(p) + if boundary: + self.set_boundary(boundary) diff --git a/modules/language/python/module/f.py b/modules/language/python/module/f.py new file mode 100644 index 0000000..6de30e7 --- /dev/null +++ b/modules/language/python/module/f.py @@ -0,0 +1,45 @@ +module(f) + +from enum import Enum, unique, auto, IntEnum + +class Color (Enum): + RED = 1 + GREEN = 2 + BLUE = 3 + +class Shape(Enum): + SQUARE = 2 + DIAMOND = 1 + CIRCLE = 3 + ALIAS_FOR_SQUARE = 2 + +class Color2 (Enum): + RED = auto() + GREEN = auto() + BLUE = auto() + +@unique +class Misstake(Enum): + ONE = 1 + TWO = 2 + THREE = 3 + FOUR = 4 + +class AutoName(Enum): + def _generate_next_value_(name, start, count, last_values): + return name + + +class Ordinal(AutoName): + NORTH = auto() + SOUTH = auto() + EAST = auto() + WEST = auto() + +class Num(IntEnum): + One = 1 + Two = 2 + Three = 3 + + +__all__ = ['Color','Shape','Color2','Misstake','Ordinal','Num'] diff --git a/modules/language/python/module/g.py b/modules/language/python/module/g.py new file mode 100644 index 0000000..dafef95 --- /dev/null +++ b/modules/language/python/module/g.py @@ -0,0 +1,9 @@ +module(g) + +class A(dict): + def __getitem__(self,k): + pk(k) + return super().__getitem__(k) + + +__all__= ['A'] diff --git a/modules/language/python/module/html/__pycache__/__init__.cpython-36.pyc b/modules/language/python/module/html/__pycache__/__init__.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..15fcdf5 --- /dev/null +++ b/modules/language/python/module/html/__pycache__/__init__.cpython-36.pyc diff --git a/modules/language/python/module/html/__pycache__/entities.cpython-36.pyc b/modules/language/python/module/html/__pycache__/entities.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..35806d5 --- /dev/null +++ b/modules/language/python/module/html/__pycache__/entities.cpython-36.pyc diff --git a/modules/language/python/module/html/__pycache__/parser.cpython-36.pyc b/modules/language/python/module/html/__pycache__/parser.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..27f74fc --- /dev/null +++ b/modules/language/python/module/html/__pycache__/parser.cpython-36.pyc diff --git a/modules/language/python/module/http/__init__.py b/modules/language/python/module/http/__init__.py new file mode 100644 index 0000000..d4334cc --- /dev/null +++ b/modules/language/python/module/http/__init__.py @@ -0,0 +1,134 @@ +from enum import IntEnum + +__all__ = ['HTTPStatus'] + +class HTTPStatus(IntEnum): + """HTTP status codes and reason phrases + + Status codes from the following RFCs are all observed: + + * RFC 7231: Hypertext Transfer Protocol (HTTP/1.1), obsoletes 2616 + * RFC 6585: Additional HTTP Status Codes + * RFC 3229: Delta encoding in HTTP + * RFC 4918: HTTP Extensions for WebDAV, obsoletes 2518 + * RFC 5842: Binding Extensions to WebDAV + * RFC 7238: Permanent Redirect + * RFC 2295: Transparent Content Negotiation in HTTP + * RFC 2774: An HTTP Extension Framework + """ + def __new__(cls, value, phrase, description=''): + obj = int.__new__(cls, value) + obj._value_ = value + + obj.phrase = phrase + obj.description = description + return obj + + # informational + CONTINUE = 100, 'Continue', 'Request received, please continue' + SWITCHING_PROTOCOLS = (101, 'Switching Protocols', + 'Switching to new protocol; obey Upgrade header') + PROCESSING = 102, 'Processing' + + # success + OK = 200, 'OK', 'Request fulfilled, document follows' + CREATED = 201, 'Created', 'Document created, URL follows' + ACCEPTED = (202, 'Accepted', + 'Request accepted, processing continues off-line') + NON_AUTHORITATIVE_INFORMATION = (203, + 'Non-Authoritative Information', 'Request fulfilled from cache') + NO_CONTENT = 204, 'No Content', 'Request fulfilled, nothing follows' + RESET_CONTENT = 205, 'Reset Content', 'Clear input form for further input' + PARTIAL_CONTENT = 206, 'Partial Content', 'Partial content follows' + MULTI_STATUS = 207, 'Multi-Status' + ALREADY_REPORTED = 208, 'Already Reported' + IM_USED = 226, 'IM Used' + + # redirection + MULTIPLE_CHOICES = (300, 'Multiple Choices', + 'Object has several resources -- see URI list') + MOVED_PERMANENTLY = (301, 'Moved Permanently', + 'Object moved permanently -- see URI list') + FOUND = 302, 'Found', 'Object moved temporarily -- see URI list' + SEE_OTHER = 303, 'See Other', 'Object moved -- see Method and URL list' + NOT_MODIFIED = (304, 'Not Modified', + 'Document has not changed since given time') + USE_PROXY = (305, 'Use Proxy', + 'You must use proxy specified in Location to access this resource') + TEMPORARY_REDIRECT = (307, 'Temporary Redirect', + 'Object moved temporarily -- see URI list') + PERMANENT_REDIRECT = (308, 'Permanent Redirect', + 'Object moved temporarily -- see URI list') + + # client error + BAD_REQUEST = (400, 'Bad Request', + 'Bad request syntax or unsupported method') + UNAUTHORIZED = (401, 'Unauthorized', + 'No permission -- see authorization schemes') + PAYMENT_REQUIRED = (402, 'Payment Required', + 'No payment -- see charging schemes') + FORBIDDEN = (403, 'Forbidden', + 'Request forbidden -- authorization will not help') + NOT_FOUND = (404, 'Not Found', + 'Nothing matches the given URI') + METHOD_NOT_ALLOWED = (405, 'Method Not Allowed', + 'Specified method is invalid for this resource') + NOT_ACCEPTABLE = (406, 'Not Acceptable', + 'URI not available in preferred format') + PROXY_AUTHENTICATION_REQUIRED = (407, + 'Proxy Authentication Required', + 'You must authenticate with this proxy before proceeding') + REQUEST_TIMEOUT = (408, 'Request Timeout', + 'Request timed out; try again later') + CONFLICT = 409, 'Conflict', 'Request conflict' + GONE = (410, 'Gone', + 'URI no longer exists and has been permanently removed') + LENGTH_REQUIRED = (411, 'Length Required', + 'Client must specify Content-Length') + PRECONDITION_FAILED = (412, 'Precondition Failed', + 'Precondition in headers is false') + REQUEST_ENTITY_TOO_LARGE = (413, 'Request Entity Too Large', + 'Entity is too large') + REQUEST_URI_TOO_LONG = (414, 'Request-URI Too Long', + 'URI is too long') + UNSUPPORTED_MEDIA_TYPE = (415, 'Unsupported Media Type', + 'Entity body in unsupported format') + REQUESTED_RANGE_NOT_SATISFIABLE = (416, + 'Requested Range Not Satisfiable', + 'Cannot satisfy request range') + EXPECTATION_FAILED = (417, 'Expectation Failed', + 'Expect condition could not be satisfied') + UNPROCESSABLE_ENTITY = 422, 'Unprocessable Entity' + LOCKED = 423, 'Locked' + FAILED_DEPENDENCY = 424, 'Failed Dependency' + UPGRADE_REQUIRED = 426, 'Upgrade Required' + PRECONDITION_REQUIRED = (428, 'Precondition Required', + 'The origin server requires the request to be conditional') + TOO_MANY_REQUESTS = (429, 'Too Many Requests', + 'The user has sent too many requests in ' + 'a given amount of time ("rate limiting")') + REQUEST_HEADER_FIELDS_TOO_LARGE = (431, + 'Request Header Fields Too Large', + 'The server is unwilling to process the request because its header ' + 'fields are too large') + + # server errors + INTERNAL_SERVER_ERROR = (500, 'Internal Server Error', + 'Server got itself in trouble') + NOT_IMPLEMENTED = (501, 'Not Implemented', + 'Server does not support this operation') + BAD_GATEWAY = (502, 'Bad Gateway', + 'Invalid responses from another server/proxy') + SERVICE_UNAVAILABLE = (503, 'Service Unavailable', + 'The server cannot process the request due to a high load') + GATEWAY_TIMEOUT = (504, 'Gateway Timeout', + 'The gateway server did not receive a timely response') + HTTP_VERSION_NOT_SUPPORTED = (505, 'HTTP Version Not Supported', + 'Cannot fulfill request') + VARIANT_ALSO_NEGOTIATES = 506, 'Variant Also Negotiates' + INSUFFICIENT_STORAGE = 507, 'Insufficient Storage' + LOOP_DETECTED = 508, 'Loop Detected' + NOT_EXTENDED = 510, 'Not Extended' + NETWORK_AUTHENTICATION_REQUIRED = (511, + 'Network Authentication Required', + 'The client needs to authenticate to gain network access') diff --git a/modules/language/python/module/http/__pycache__/__init__.cpython-36.pyc b/modules/language/python/module/http/__pycache__/__init__.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..59e9320 --- /dev/null +++ b/modules/language/python/module/http/__pycache__/__init__.cpython-36.pyc diff --git a/modules/language/python/module/http/__pycache__/client.cpython-36.pyc b/modules/language/python/module/http/__pycache__/client.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..cdf1038 --- /dev/null +++ b/modules/language/python/module/http/__pycache__/client.cpython-36.pyc diff --git a/modules/language/python/module/http/__pycache__/cookiejar.cpython-36.pyc b/modules/language/python/module/http/__pycache__/cookiejar.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..56b828a --- /dev/null +++ b/modules/language/python/module/http/__pycache__/cookiejar.cpython-36.pyc diff --git a/modules/language/python/module/http/__pycache__/cookies.cpython-36.pyc b/modules/language/python/module/http/__pycache__/cookies.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..e2c9c21 --- /dev/null +++ b/modules/language/python/module/http/__pycache__/cookies.cpython-36.pyc diff --git a/modules/language/python/module/http/__pycache__/server.cpython-36.pyc b/modules/language/python/module/http/__pycache__/server.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..45e16b6 --- /dev/null +++ b/modules/language/python/module/http/__pycache__/server.cpython-36.pyc diff --git a/modules/language/python/module/http/cookiejar.py~ b/modules/language/python/module/http/cookiejar.py~ new file mode 100644 index 0000000..adf956d --- /dev/null +++ b/modules/language/python/module/http/cookiejar.py~ @@ -0,0 +1,2098 @@ +r"""HTTP cookie handling for web clients. + +This module has (now fairly distant) origins in Gisle Aas' Perl module +HTTP::Cookies, from the libwww-perl library. + +Docstrings, comments and debug strings in this code refer to the +attributes of the HTTP cookie system as cookie-attributes, to distinguish +them clearly from Python attributes. + +Class diagram (note that BSDDBCookieJar and the MSIE* classes are not +distributed with the Python standard library, but are available from +http://wwwsearch.sf.net/): + + CookieJar____ + / \ \ + FileCookieJar \ \ + / | \ \ \ + MozillaCookieJar | LWPCookieJar \ \ + | | \ + | ---MSIEBase | \ + | / | | \ + | / MSIEDBCookieJar BSDDBCookieJar + |/ + MSIECookieJar + +""" + +__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', + 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar'] + +import copy +import datetime +import re +import time +import urllib.parse, urllib.request +try: + import threading as _threading +except ImportError: + import dummy_threading as _threading +import http.client # only for the default HTTP port +from calendar import timegm + +debug = False # set to True to enable debugging via the logging module +logger = None + +def _debug(*args): + if not debug: + return + global logger + if not logger: + import logging + logger = logging.getLogger("http.cookiejar") + return logger.debug(*args) + + +DEFAULT_HTTP_PORT = str(http.client.HTTP_PORT) +MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " + "instance initialised with one)") + +def _warn_unhandled_exception(): + # There are a few catch-all except: statements in this module, for + # catching input that's bad in unexpected ways. Warn if any + # exceptions are caught there. + import io, warnings, traceback + f = io.StringIO() + traceback.print_exc(None, f) + msg = f.getvalue() + warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2) + + +# Date/time conversion +# ----------------------------------------------------------------------------- + +EPOCH_YEAR = 1970 +def _timegm(tt): + year, month, mday, hour, min, sec = tt[:6] + if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and + (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): + return timegm(tt) + else: + return None + +DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] +MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] +MONTHS_LOWER = [] +for month in MONTHS: MONTHS_LOWER.append(month.lower()) + +def time2isoz(t=None): + """Return a string representing time in seconds since epoch, t. + + If the function is called without an argument, it will use the current + time. + + The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", + representing Universal Time (UTC, aka GMT). An example of this format is: + + 1994-11-24 08:49:37Z + + """ + if t is None: + dt = datetime.datetime.utcnow() + else: + dt = datetime.datetime.utcfromtimestamp(t) + return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( + dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second) + +def time2netscape(t=None): + """Return a string representing time in seconds since epoch, t. + + If the function is called without an argument, it will use the current + time. + + The format of the returned string is like this: + + Wed, DD-Mon-YYYY HH:MM:SS GMT + + """ + if t is None: + dt = datetime.datetime.utcnow() + else: + dt = datetime.datetime.utcfromtimestamp(t) + return "%s, %02d-%s-%04d %02d:%02d:%02d GMT" % ( + DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1], + dt.year, dt.hour, dt.minute, dt.second) + + +UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} + +TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII) +def offset_from_tz_string(tz): + offset = None + if tz in UTC_ZONES: + offset = 0 + else: + m = TIMEZONE_RE.search(tz) + if m: + offset = 3600 * int(m.group(2)) + if m.group(3): + offset = offset + 60 * int(m.group(3)) + if m.group(1) == '-': + offset = -offset + return offset + +def _str2time(day, mon, yr, hr, min, sec, tz): + yr = int(yr) + if yr > datetime.MAXYEAR: + return None + + # translate month name to number + # month numbers start with 1 (January) + try: + mon = MONTHS_LOWER.index(mon.lower())+1 + except ValueError: + # maybe it's already a number + try: + imon = int(mon) + except ValueError: + return None + if 1 <= imon <= 12: + mon = imon + else: + return None + + # make sure clock elements are defined + if hr is None: hr = 0 + if min is None: min = 0 + if sec is None: sec = 0 + + day = int(day) + hr = int(hr) + min = int(min) + sec = int(sec) + + if yr < 1000: + # find "obvious" year + cur_yr = time.localtime(time.time())[0] + m = cur_yr % 100 + tmp = yr + yr = yr + cur_yr - m + m = m - tmp + if abs(m) > 50: + if m > 0: yr = yr + 100 + else: yr = yr - 100 + + # convert UTC time tuple to seconds since epoch (not timezone-adjusted) + t = _timegm((yr, mon, day, hr, min, sec, tz)) + + if t is not None: + # adjust time using timezone string, to get absolute time since epoch + if tz is None: + tz = "UTC" + tz = tz.upper() + offset = offset_from_tz_string(tz) + if offset is None: + return None + t = t - offset + + return t + +STRICT_DATE_RE = re.compile( + r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " + r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII) +WEEKDAY_RE = re.compile( + r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII) +LOOSE_HTTP_DATE_RE = re.compile( + r"""^ + (\d\d?) # day + (?:\s+|[-\/]) + (\w+) # month + (?:\s+|[-\/]) + (\d+) # year + (?: + (?:\s+|:) # separator before clock + (\d\d?):(\d\d) # hour:min + (?::(\d\d))? # optional seconds + )? # optional clock + \s* + ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone + \s* + (?:\(\w+\))? # ASCII representation of timezone in parens. + \s*$""", re.X | re.ASCII) +def http2time(text): + """Returns time in seconds since epoch of time represented by a string. + + Return value is an integer. + + None is returned if the format of str is unrecognized, the time is outside + the representable range, or the timezone string is not recognized. If the + string contains no timezone, UTC is assumed. + + The timezone in the string may be numerical (like "-0800" or "+0100") or a + string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the + timezone strings equivalent to UTC (zero offset) are known to the function. + + The function loosely parses the following formats: + + Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format + Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format + Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format + 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) + 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) + 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday) + + The parser ignores leading and trailing whitespace. The time may be + absent. + + If the year is given with only 2 digits, the function will select the + century that makes the year closest to the current date. + + """ + # fast exit for strictly conforming string + m = STRICT_DATE_RE.search(text) + if m: + g = m.groups() + mon = MONTHS_LOWER.index(g[1].lower()) + 1 + tt = (int(g[2]), mon, int(g[0]), + int(g[3]), int(g[4]), float(g[5])) + return _timegm(tt) + + # No, we need some messy parsing... + + # clean up + text = text.lstrip() + text = WEEKDAY_RE.sub("", text, 1) # Useless weekday + + # tz is time zone specifier string + day, mon, yr, hr, min, sec, tz = [None]*7 + + # loose regexp parse + m = LOOSE_HTTP_DATE_RE.search(text) + if m is not None: + day, mon, yr, hr, min, sec, tz = m.groups() + else: + return None # bad format + + return _str2time(day, mon, yr, hr, min, sec, tz) + +ISO_DATE_RE = re.compile( + r"""^ + (\d{4}) # year + [-\/]? + (\d\d?) # numerical month + [-\/]? + (\d\d?) # day + (?: + (?:\s+|[-:Tt]) # separator before clock + (\d\d?):?(\d\d) # hour:min + (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) + )? # optional clock + \s* + ([-+]?\d\d?:?(:?\d\d)? + |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) + \s*$""", re.X | re. ASCII) +def iso2time(text): + """ + As for http2time, but parses the ISO 8601 formats: + + 1994-02-03 14:15:29 -0100 -- ISO 8601 format + 1994-02-03 14:15:29 -- zone is optional + 1994-02-03 -- only date + 1994-02-03T14:15:29 -- Use T as separator + 19940203T141529Z -- ISO 8601 compact format + 19940203 -- only date + + """ + # clean up + text = text.lstrip() + + # tz is time zone specifier string + day, mon, yr, hr, min, sec, tz = [None]*7 + + # loose regexp parse + m = ISO_DATE_RE.search(text) + if m is not None: + # XXX there's an extra bit of the timezone I'm ignoring here: is + # this the right thing to do? + yr, mon, day, hr, min, sec, tz, _ = m.groups() + else: + return None # bad format + + return _str2time(day, mon, yr, hr, min, sec, tz) + + +# Header parsing +# ----------------------------------------------------------------------------- + +def unmatched(match): + """Return unmatched part of re.Match object.""" + start, end = match.span(0) + return match.string[:start]+match.string[end:] + +HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)") +HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") +HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)") +HEADER_ESCAPE_RE = re.compile(r"\\(.)") +def split_header_words(header_values): + r"""Parse header values into a list of lists containing key,value pairs. + + The function knows how to deal with ",", ";" and "=" as well as quoted + values after "=". A list of space separated tokens are parsed as if they + were separated by ";". + + If the header_values passed as argument contains multiple values, then they + are treated as if they were a single value separated by comma ",". + + This means that this function is useful for parsing header fields that + follow this syntax (BNF as from the HTTP/1.1 specification, but we relax + the requirement for tokens). + + headers = #header + header = (token | parameter) *( [";"] (token | parameter)) + + token = 1*<any CHAR except CTLs or separators> + separators = "(" | ")" | "<" | ">" | "@" + | "," | ";" | ":" | "\" | <"> + | "/" | "[" | "]" | "?" | "=" + | "{" | "}" | SP | HT + + quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) + qdtext = <any TEXT except <">> + quoted-pair = "\" CHAR + + parameter = attribute "=" value + attribute = token + value = token | quoted-string + + Each header is represented by a list of key/value pairs. The value for a + simple token (not part of a parameter) is None. Syntactically incorrect + headers will not necessarily be parsed as you would want. + + This is easier to describe with some examples: + + >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) + [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] + >>> split_header_words(['text/html; charset="iso-8859-1"']) + [[('text/html', None), ('charset', 'iso-8859-1')]] + >>> split_header_words([r'Basic realm="\"foo\bar\""']) + [[('Basic', None), ('realm', '"foobar"')]] + + """ + assert not isinstance(header_values, str) + result = [] + for text in header_values: + orig_text = text + pairs = [] + while text: + m = HEADER_TOKEN_RE.search(text) + if m: + text = unmatched(m) + name = m.group(1) + m = HEADER_QUOTED_VALUE_RE.search(text) + if m: # quoted value + text = unmatched(m) + value = m.group(1) + value = HEADER_ESCAPE_RE.sub(r"\1", value) + else: + m = HEADER_VALUE_RE.search(text) + if m: # unquoted value + text = unmatched(m) + value = m.group(1) + value = value.rstrip() + else: + # no value, a lone token + value = None + pairs.append((name, value)) + elif text.lstrip().startswith(","): + # concatenated headers, as per RFC 2616 section 4.2 + text = text.lstrip()[1:] + if pairs: result.append(pairs) + pairs = [] + else: + # skip junk + non_junk, nr_junk_chars = re.subn(r"^[=\s;]*", "", text) + assert nr_junk_chars > 0, ( + "split_header_words bug: '%s', '%s', %s" % + (orig_text, text, pairs)) + text = non_junk + if pairs: result.append(pairs) + return result + +HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") +def join_header_words(lists): + """Do the inverse (almost) of the conversion done by split_header_words. + + Takes a list of lists of (key, value) pairs and produces a single header + value. Attribute values are quoted if needed. + + >>> join_header_words([[("text/plain", None), ("charset", "iso-8859-1")]]) + 'text/plain; charset="iso-8859-1"' + >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859-1")]]) + 'text/plain, charset="iso-8859-1"' + + """ + headers = [] + for pairs in lists: + attr = [] + for k, v in pairs: + if v is not None: + if not re.search(r"^\w+$", v): + v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ + v = '"%s"' % v + k = "%s=%s" % (k, v) + attr.append(k) + if attr: headers.append("; ".join(attr)) + return ", ".join(headers) + +def strip_quotes(text): + if text.startswith('"'): + text = text[1:] + if text.endswith('"'): + text = text[:-1] + return text + +def parse_ns_headers(ns_headers): + """Ad-hoc parser for Netscape protocol cookie-attributes. + + The old Netscape cookie format for Set-Cookie can for instance contain + an unquoted "," in the expires field, so we have to use this ad-hoc + parser instead of split_header_words. + + XXX This may not make the best possible effort to parse all the crap + that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient + parser is probably better, so could do worse than following that if + this ever gives any trouble. + + Currently, this is also used for parsing RFC 2109 cookies. + + """ + known_attrs = ("expires", "domain", "path", "secure", + # RFC 2109 attrs (may turn up in Netscape cookies, too) + "version", "port", "max-age") + + result = [] + for ns_header in ns_headers: + pairs = [] + version_set = False + + # XXX: The following does not strictly adhere to RFCs in that empty + # names and values are legal (the former will only appear once and will + # be overwritten if multiple occurrences are present). This is + # mostly to deal with backwards compatibility. + for ii, param in enumerate(ns_header.split(';')): + param = param.strip() + + key, sep, val = param.partition('=') + key = key.strip() + + if not key: + if ii == 0: + break + else: + continue + + # allow for a distinction between present and empty and missing + # altogether + val = val.strip() if sep else None + + if ii != 0: + lc = key.lower() + if lc in known_attrs: + key = lc + + if key == "version": + # This is an RFC 2109 cookie. + if val is not None: + val = strip_quotes(val) + version_set = True + elif key == "expires": + # convert expires date to seconds since epoch + if val is not None: + val = http2time(strip_quotes(val)) # None if invalid + pairs.append((key, val)) + + if pairs: + if not version_set: + pairs.append(("version", "0")) + result.append(pairs) + + return result + + +IPV4_RE = re.compile(r"\.\d+$", re.ASCII) +def is_HDN(text): + """Return True if text is a host domain name.""" + # XXX + # This may well be wrong. Which RFC is HDN defined in, if any (for + # the purposes of RFC 2965)? + # For the current implementation, what about IPv6? Remember to look + # at other uses of IPV4_RE also, if change this. + if IPV4_RE.search(text): + return False + if text == "": + return False + if text[0] == "." or text[-1] == ".": + return False + return True + +def domain_match(A, B): + """Return True if domain A domain-matches domain B, according to RFC 2965. + + A and B may be host domain names or IP addresses. + + RFC 2965, section 1: + + Host names can be specified either as an IP address or a HDN string. + Sometimes we compare one host name with another. (Such comparisons SHALL + be case-insensitive.) Host A's name domain-matches host B's if + + * their host name strings string-compare equal; or + + * A is a HDN string and has the form NB, where N is a non-empty + name string, B has the form .B', and B' is a HDN string. (So, + x.y.com domain-matches .Y.com but not Y.com.) + + Note that domain-match is not a commutative operation: a.b.c.com + domain-matches .c.com, but not the reverse. + + """ + # Note that, if A or B are IP addresses, the only relevant part of the + # definition of the domain-match algorithm is the direct string-compare. + A = A.lower() + B = B.lower() + if A == B: + return True + if not is_HDN(A): + return False + i = A.rfind(B) + if i == -1 or i == 0: + # A does not have form NB, or N is the empty string + return False + if not B.startswith("."): + return False + if not is_HDN(B[1:]): + return False + return True + +def liberal_is_HDN(text): + """Return True if text is a sort-of-like a host domain name. + + For accepting/blocking domains. + + """ + if IPV4_RE.search(text): + return False + return True + +def user_domain_match(A, B): + """For blocking/accepting domains. + + A and B may be host domain names or IP addresses. + + """ + A = A.lower() + B = B.lower() + if not (liberal_is_HDN(A) and liberal_is_HDN(B)): + if A == B: + # equal IP addresses + return True + return False + initial_dot = B.startswith(".") + if initial_dot and A.endswith(B): + return True + if not initial_dot and A == B: + return True + return False + +cut_port_re = re.compile(r":\d+$", re.ASCII) +def request_host(request): + """Return request-host, as defined by RFC 2965. + + Variation from RFC: returned value is lowercased, for convenient + comparison. + + """ + url = request.get_full_url() + host = urllib.parse.urlparse(url)[1] + if host == "": + host = request.get_header("Host", "") + + # remove port, if present + host = cut_port_re.sub("", host, 1) + return host.lower() + +def eff_request_host(request): + """Return a tuple (request-host, effective request-host name). + + As defined by RFC 2965, except both are lowercased. + + """ + erhn = req_host = request_host(request) + if req_host.find(".") == -1 and not IPV4_RE.search(req_host): + erhn = req_host + ".local" + return req_host, erhn + +def request_path(request): + """Path component of request-URI, as defined by RFC 2965.""" + url = request.get_full_url() + parts = urllib.parse.urlsplit(url) + path = escape_path(parts.path) + if not path.startswith("/"): + # fix bad RFC 2396 absoluteURI + path = "/" + path + return path + +def request_port(request): + host = request.host + i = host.find(':') + if i >= 0: + port = host[i+1:] + try: + int(port) + except ValueError: + _debug("nonnumeric port: '%s'", port) + return None + else: + port = DEFAULT_HTTP_PORT + return port + +# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't +# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). +HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" +ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") +def uppercase_escaped_char(match): + return "%%%s" % match.group(1).upper() +def escape_path(path): + """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" + # There's no knowing what character encoding was used to create URLs + # containing %-escapes, but since we have to pick one to escape invalid + # path characters, we pick UTF-8, as recommended in the HTML 4.0 + # specification: + # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 + # And here, kind of: draft-fielding-uri-rfc2396bis-03 + # (And in draft IRI specification: draft-duerst-iri-05) + # (And here, for new URI schemes: RFC 2718) + path = urllib.parse.quote(path, HTTP_PATH_SAFE) + path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) + return path + +def reach(h): + """Return reach of host h, as defined by RFC 2965, section 1. + + The reach R of a host name H is defined as follows: + + * If + + - H is the host domain name of a host; and, + + - H has the form A.B; and + + - A has no embedded (that is, interior) dots; and + + - B has at least one embedded dot, or B is the string "local". + then the reach of H is .B. + + * Otherwise, the reach of H is H. + + >>> reach("www.acme.com") + '.acme.com' + >>> reach("acme.com") + 'acme.com' + >>> reach("acme.local") + '.local' + + """ + i = h.find(".") + if i >= 0: + #a = h[:i] # this line is only here to show what a is + b = h[i+1:] + i = b.find(".") + if is_HDN(h) and (i >= 0 or b == "local"): + return "."+b + return h + +def is_third_party(request): + """ + + RFC 2965, section 3.3.6: + + An unverifiable transaction is to a third-party host if its request- + host U does not domain-match the reach R of the request-host O in the + origin transaction. + + """ + req_host = request_host(request) + if not domain_match(req_host, reach(request.origin_req_host)): + return True + else: + return False + + +class Cookie: + """HTTP Cookie. + + This class represents both Netscape and RFC 2965 cookies. + + This is deliberately a very simple class. It just holds attributes. It's + possible to construct Cookie instances that don't comply with the cookie + standards. CookieJar.make_cookies is the factory function for Cookie + objects -- it deals with cookie parsing, supplying defaults, and + normalising to the representation used in this class. CookiePolicy is + responsible for checking them to see whether they should be accepted from + and returned to the server. + + Note that the port may be present in the headers, but unspecified ("Port" + rather than"Port=80", for example); if this is the case, port is None. + + """ + + def __init__(self, version, name, value, + port, port_specified, + domain, domain_specified, domain_initial_dot, + path, path_specified, + secure, + expires, + discard, + comment, + comment_url, + rest, + rfc2109=False, + ): + + if version is not None: version = int(version) + if expires is not None: expires = int(float(expires)) + if port is None and port_specified is True: + raise ValueError("if port is None, port_specified must be false") + + self.version = version + self.name = name + self.value = value + self.port = port + self.port_specified = port_specified + # normalise case, as per RFC 2965 section 3.3.3 + self.domain = domain.lower() + self.domain_specified = domain_specified + # Sigh. We need to know whether the domain given in the + # cookie-attribute had an initial dot, in order to follow RFC 2965 + # (as clarified in draft errata). Needed for the returned $Domain + # value. + self.domain_initial_dot = domain_initial_dot + self.path = path + self.path_specified = path_specified + self.secure = secure + self.expires = expires + self.discard = discard + self.comment = comment + self.comment_url = comment_url + self.rfc2109 = rfc2109 + + self._rest = copy.copy(rest) + + def has_nonstandard_attr(self, name): + return name in self._rest + def get_nonstandard_attr(self, name, default=None): + return self._rest.get(name, default) + def set_nonstandard_attr(self, name, value): + self._rest[name] = value + + def is_expired(self, now=None): + if now is None: now = time.time() + if (self.expires is not None) and (self.expires <= now): + return True + return False + + def __str__(self): + if self.port is None: p = "" + else: p = ":"+self.port + limit = self.domain + p + self.path + if self.value is not None: + namevalue = "%s=%s" % (self.name, self.value) + else: + namevalue = self.name + return "<Cookie %s for %s>" % (namevalue, limit) + + def __repr__(self): + args = [] + for name in ("version", "name", "value", + "port", "port_specified", + "domain", "domain_specified", "domain_initial_dot", + "path", "path_specified", + "secure", "expires", "discard", "comment", "comment_url", + ): + attr = getattr(self, name) + args.append("%s=%s" % (name, repr(attr))) + args.append("rest=%s" % repr(self._rest)) + args.append("rfc2109=%s" % repr(self.rfc2109)) + return "%s(%s)" % (self.__class__.__name__, ", ".join(args)) + + +class CookiePolicy: + """Defines which cookies get accepted from and returned to server. + + May also modify cookies, though this is probably a bad idea. + + The subclass DefaultCookiePolicy defines the standard rules for Netscape + and RFC 2965 cookies -- override that if you want a customized policy. + + """ + def set_ok(self, cookie, request): + """Return true if (and only if) cookie should be accepted from server. + + Currently, pre-expired cookies never get this far -- the CookieJar + class deletes such cookies itself. + + """ + raise NotImplementedError() + + def return_ok(self, cookie, request): + """Return true if (and only if) cookie should be returned to server.""" + raise NotImplementedError() + + def domain_return_ok(self, domain, request): + """Return false if cookies should not be returned, given cookie domain. + """ + return True + + def path_return_ok(self, path, request): + """Return false if cookies should not be returned, given cookie path. + """ + return True + + +class DefaultCookiePolicy(CookiePolicy): + """Implements the standard rules for accepting and returning cookies.""" + + DomainStrictNoDots = 1 + DomainStrictNonDomain = 2 + DomainRFC2965Match = 4 + + DomainLiberal = 0 + DomainStrict = DomainStrictNoDots|DomainStrictNonDomain + + def __init__(self, + blocked_domains=None, allowed_domains=None, + netscape=True, rfc2965=False, + rfc2109_as_netscape=None, + hide_cookie2=False, + strict_domain=False, + strict_rfc2965_unverifiable=True, + strict_ns_unverifiable=False, + strict_ns_domain=DomainLiberal, + strict_ns_set_initial_dollar=False, + strict_ns_set_path=False, + ): + """Constructor arguments should be passed as keyword arguments only.""" + self.netscape = netscape + self.rfc2965 = rfc2965 + self.rfc2109_as_netscape = rfc2109_as_netscape + self.hide_cookie2 = hide_cookie2 + self.strict_domain = strict_domain + self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable + self.strict_ns_unverifiable = strict_ns_unverifiable + self.strict_ns_domain = strict_ns_domain + self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar + self.strict_ns_set_path = strict_ns_set_path + + if blocked_domains is not None: + self._blocked_domains = tuple(blocked_domains) + else: + self._blocked_domains = () + + if allowed_domains is not None: + allowed_domains = tuple(allowed_domains) + self._allowed_domains = allowed_domains + + def blocked_domains(self): + """Return the sequence of blocked domains (as a tuple).""" + return self._blocked_domains + def set_blocked_domains(self, blocked_domains): + """Set the sequence of blocked domains.""" + self._blocked_domains = tuple(blocked_domains) + + def is_blocked(self, domain): + for blocked_domain in self._blocked_domains: + if user_domain_match(domain, blocked_domain): + return True + return False + + def allowed_domains(self): + """Return None, or the sequence of allowed domains (as a tuple).""" + return self._allowed_domains + def set_allowed_domains(self, allowed_domains): + """Set the sequence of allowed domains, or None.""" + if allowed_domains is not None: + allowed_domains = tuple(allowed_domains) + self._allowed_domains = allowed_domains + + def is_not_allowed(self, domain): + if self._allowed_domains is None: + return False + for allowed_domain in self._allowed_domains: + if user_domain_match(domain, allowed_domain): + return False + return True + + def set_ok(self, cookie, request): + """ + If you override .set_ok(), be sure to call this method. If it returns + false, so should your subclass (assuming your subclass wants to be more + strict about which cookies to accept). + + """ + _debug(" - checking cookie %s=%s", cookie.name, cookie.value) + + assert cookie.name is not None + + for n in "version", "verifiability", "name", "path", "domain", "port": + fn_name = "set_ok_"+n + fn = getattr(self, fn_name) + if not fn(cookie, request): + return False + + return True + + def set_ok_version(self, cookie, request): + if cookie.version is None: + # Version is always set to 0 by parse_ns_headers if it's a Netscape + # cookie, so this must be an invalid RFC 2965 cookie. + _debug(" Set-Cookie2 without version attribute (%s=%s)", + cookie.name, cookie.value) + return False + if cookie.version > 0 and not self.rfc2965: + _debug(" RFC 2965 cookies are switched off") + return False + elif cookie.version == 0 and not self.netscape: + _debug(" Netscape cookies are switched off") + return False + return True + + def set_ok_verifiability(self, cookie, request): + if request.unverifiable and is_third_party(request): + if cookie.version > 0 and self.strict_rfc2965_unverifiable: + _debug(" third-party RFC 2965 cookie during " + "unverifiable transaction") + return False + elif cookie.version == 0 and self.strict_ns_unverifiable: + _debug(" third-party Netscape cookie during " + "unverifiable transaction") + return False + return True + + def set_ok_name(self, cookie, request): + # Try and stop servers setting V0 cookies designed to hack other + # servers that know both V0 and V1 protocols. + if (cookie.version == 0 and self.strict_ns_set_initial_dollar and + cookie.name.startswith("$")): + _debug(" illegal name (starts with '$'): '%s'", cookie.name) + return False + return True + + def set_ok_path(self, cookie, request): + if cookie.path_specified: + req_path = request_path(request) + if ((cookie.version > 0 or + (cookie.version == 0 and self.strict_ns_set_path)) and + not req_path.startswith(cookie.path)): + _debug(" path attribute %s is not a prefix of request " + "path %s", cookie.path, req_path) + return False + return True + + def set_ok_domain(self, cookie, request): + if self.is_blocked(cookie.domain): + _debug(" domain %s is in user block-list", cookie.domain) + return False + if self.is_not_allowed(cookie.domain): + _debug(" domain %s is not in user allow-list", cookie.domain) + return False + if cookie.domain_specified: + req_host, erhn = eff_request_host(request) + domain = cookie.domain + if self.strict_domain and (domain.count(".") >= 2): + # XXX This should probably be compared with the Konqueror + # (kcookiejar.cpp) and Mozilla implementations, but it's a + # losing battle. + i = domain.rfind(".") + j = domain.rfind(".", 0, i) + if j == 0: # domain like .foo.bar + tld = domain[i+1:] + sld = domain[j+1:i] + if sld.lower() in ("co", "ac", "com", "edu", "org", "net", + "gov", "mil", "int", "aero", "biz", "cat", "coop", + "info", "jobs", "mobi", "museum", "name", "pro", + "travel", "eu") and len(tld) == 2: + # domain like .co.uk + _debug(" country-code second level domain %s", domain) + return False + if domain.startswith("."): + undotted_domain = domain[1:] + else: + undotted_domain = domain + embedded_dots = (undotted_domain.find(".") >= 0) + if not embedded_dots and domain != ".local": + _debug(" non-local domain %s contains no embedded dot", + domain) + return False + if cookie.version == 0: + if (not erhn.endswith(domain) and + (not erhn.startswith(".") and + not ("."+erhn).endswith(domain))): + _debug(" effective request-host %s (even with added " + "initial dot) does not end with %s", + erhn, domain) + return False + if (cookie.version > 0 or + (self.strict_ns_domain & self.DomainRFC2965Match)): + if not domain_match(erhn, domain): + _debug(" effective request-host %s does not domain-match " + "%s", erhn, domain) + return False + if (cookie.version > 0 or + (self.strict_ns_domain & self.DomainStrictNoDots)): + host_prefix = req_host[:-len(domain)] + if (host_prefix.find(".") >= 0 and + not IPV4_RE.search(req_host)): + _debug(" host prefix %s for domain %s contains a dot", + host_prefix, domain) + return False + return True + + def set_ok_port(self, cookie, request): + if cookie.port_specified: + req_port = request_port(request) + if req_port is None: + req_port = "80" + else: + req_port = str(req_port) + for p in cookie.port.split(","): + try: + int(p) + except ValueError: + _debug(" bad port %s (not numeric)", p) + return False + if p == req_port: + break + else: + _debug(" request port (%s) not found in %s", + req_port, cookie.port) + return False + return True + + def return_ok(self, cookie, request): + """ + If you override .return_ok(), be sure to call this method. If it + returns false, so should your subclass (assuming your subclass wants to + be more strict about which cookies to return). + + """ + # Path has already been checked by .path_return_ok(), and domain + # blocking done by .domain_return_ok(). + _debug(" - checking cookie %s=%s", cookie.name, cookie.value) + + for n in "version", "verifiability", "secure", "expires", "port", "domain": + fn_name = "return_ok_"+n + fn = getattr(self, fn_name) + if not fn(cookie, request): + return False + return True + + def return_ok_version(self, cookie, request): + if cookie.version > 0 and not self.rfc2965: + _debug(" RFC 2965 cookies are switched off") + return False + elif cookie.version == 0 and not self.netscape: + _debug(" Netscape cookies are switched off") + return False + return True + + def return_ok_verifiability(self, cookie, request): + if request.unverifiable and is_third_party(request): + if cookie.version > 0 and self.strict_rfc2965_unverifiable: + _debug(" third-party RFC 2965 cookie during unverifiable " + "transaction") + return False + elif cookie.version == 0 and self.strict_ns_unverifiable: + _debug(" third-party Netscape cookie during unverifiable " + "transaction") + return False + return True + + def return_ok_secure(self, cookie, request): + if cookie.secure and request.type != "https": + _debug(" secure cookie with non-secure request") + return False + return True + + def return_ok_expires(self, cookie, request): + if cookie.is_expired(self._now): + _debug(" cookie expired") + return False + return True + + def return_ok_port(self, cookie, request): + if cookie.port: + req_port = request_port(request) + if req_port is None: + req_port = "80" + for p in cookie.port.split(","): + if p == req_port: + break + else: + _debug(" request port %s does not match cookie port %s", + req_port, cookie.port) + return False + return True + + def return_ok_domain(self, cookie, request): + req_host, erhn = eff_request_host(request) + domain = cookie.domain + + # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't + if (cookie.version == 0 and + (self.strict_ns_domain & self.DomainStrictNonDomain) and + not cookie.domain_specified and domain != erhn): + _debug(" cookie with unspecified domain does not string-compare " + "equal to request domain") + return False + + if cookie.version > 0 and not domain_match(erhn, domain): + _debug(" effective request-host name %s does not domain-match " + "RFC 2965 cookie domain %s", erhn, domain) + return False + if cookie.version == 0 and not ("."+erhn).endswith(domain): + _debug(" request-host %s does not match Netscape cookie domain " + "%s", req_host, domain) + return False + return True + + def domain_return_ok(self, domain, request): + # Liberal check of. This is here as an optimization to avoid + # having to load lots of MSIE cookie files unless necessary. + req_host, erhn = eff_request_host(request) + if not req_host.startswith("."): + req_host = "."+req_host + if not erhn.startswith("."): + erhn = "."+erhn + if not (req_host.endswith(domain) or erhn.endswith(domain)): + #_debug(" request domain %s does not match cookie domain %s", + # req_host, domain) + return False + + if self.is_blocked(domain): + _debug(" domain %s is in user block-list", domain) + return False + if self.is_not_allowed(domain): + _debug(" domain %s is not in user allow-list", domain) + return False + + return True + + def path_return_ok(self, path, request): + _debug("- checking cookie path=%s", path) + req_path = request_path(request) + if not req_path.startswith(path): + _debug(" %s does not path-match %s", req_path, path) + return False + return True + + +def vals_sorted_by_key(adict): + keys = sorted(adict.keys()) + return map(adict.get, keys) + +def deepvalues(mapping): + """Iterates over nested mapping, depth-first, in sorted order by key.""" + values = vals_sorted_by_key(mapping) + for obj in values: + mapping = False + try: + obj.items + except AttributeError: + pass + else: + mapping = True + yield from deepvalues(obj) + if not mapping: + yield obj + + +# Used as second parameter to dict.get() method, to distinguish absent +# dict key from one with a None value. +class Absent: pass + +class CookieJar: + """Collection of HTTP cookies. + + You may not need to know about this class: try + urllib.request.build_opener(HTTPCookieProcessor).open(url). + """ + + non_word_re = re.compile(r"\W") + quote_re = re.compile(r"([\"\\])") + strict_domain_re = re.compile(r"\.?[^.]*") + domain_re = re.compile(r"[^.]*") + dots_re = re.compile(r"^\.+") + + magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII) + + def __init__(self, policy=None): + if policy is None: + policy = DefaultCookiePolicy() + self._policy = policy + + self._cookies_lock = _threading.RLock() + self._cookies = {} + + def set_policy(self, policy): + self._policy = policy + + def _cookies_for_domain(self, domain, request): + cookies = [] + if not self._policy.domain_return_ok(domain, request): + return [] + _debug("Checking %s for cookies to return", domain) + cookies_by_path = self._cookies[domain] + for path in cookies_by_path.keys(): + if not self._policy.path_return_ok(path, request): + continue + cookies_by_name = cookies_by_path[path] + for cookie in cookies_by_name.values(): + if not self._policy.return_ok(cookie, request): + _debug(" not returning cookie") + continue + _debug(" it's a match") + cookies.append(cookie) + return cookies + + def _cookies_for_request(self, request): + """Return a list of cookies to be returned to server.""" + cookies = [] + for domain in self._cookies.keys(): + cookies.extend(self._cookies_for_domain(domain, request)) + return cookies + + def _cookie_attrs(self, cookies): + """Return a list of cookie-attributes to be returned to server. + + like ['foo="bar"; $Path="/"', ...] + + The $Version attribute is also added when appropriate (currently only + once per request). + + """ + # add cookies in order of most specific (ie. longest) path first + cookies.sort(key=lambda a: len(a.path), reverse=True) + + version_set = False + + attrs = [] + for cookie in cookies: + # set version of Cookie header + # XXX + # What should it be if multiple matching Set-Cookie headers have + # different versions themselves? + # Answer: there is no answer; was supposed to be settled by + # RFC 2965 errata, but that may never appear... + version = cookie.version + if not version_set: + version_set = True + if version > 0: + attrs.append("$Version=%s" % version) + + # quote cookie value if necessary + # (not for Netscape protocol, which already has any quotes + # intact, due to the poorly-specified Netscape Cookie: syntax) + if ((cookie.value is not None) and + self.non_word_re.search(cookie.value) and version > 0): + value = self.quote_re.sub(r"\\\1", cookie.value) + else: + value = cookie.value + + # add cookie-attributes to be returned in Cookie header + if cookie.value is None: + attrs.append(cookie.name) + else: + attrs.append("%s=%s" % (cookie.name, value)) + if version > 0: + if cookie.path_specified: + attrs.append('$Path="%s"' % cookie.path) + if cookie.domain.startswith("."): + domain = cookie.domain + if (not cookie.domain_initial_dot and + domain.startswith(".")): + domain = domain[1:] + attrs.append('$Domain="%s"' % domain) + if cookie.port is not None: + p = "$Port" + if cookie.port_specified: + p = p + ('="%s"' % cookie.port) + attrs.append(p) + + return attrs + + def add_cookie_header(self, request): + """Add correct Cookie: header to request (urllib.request.Request object). + + The Cookie2 header is also added unless policy.hide_cookie2 is true. + + """ + _debug("add_cookie_header") + self._cookies_lock.acquire() + try: + + self._policy._now = self._now = int(time.time()) + + cookies = self._cookies_for_request(request) + + attrs = self._cookie_attrs(cookies) + if attrs: + if not request.has_header("Cookie"): + request.add_unredirected_header( + "Cookie", "; ".join(attrs)) + + # if necessary, advertise that we know RFC 2965 + if (self._policy.rfc2965 and not self._policy.hide_cookie2 and + not request.has_header("Cookie2")): + for cookie in cookies: + if cookie.version != 1: + request.add_unredirected_header("Cookie2", '$Version="1"') + break + + finally: + self._cookies_lock.release() + + self.clear_expired_cookies() + + def _normalized_cookie_tuples(self, attrs_set): + """Return list of tuples containing normalised cookie information. + + attrs_set is the list of lists of key,value pairs extracted from + the Set-Cookie or Set-Cookie2 headers. + + Tuples are name, value, standard, rest, where name and value are the + cookie name and value, standard is a dictionary containing the standard + cookie-attributes (discard, secure, version, expires or max-age, + domain, path and port) and rest is a dictionary containing the rest of + the cookie-attributes. + + """ + cookie_tuples = [] + + boolean_attrs = "discard", "secure" + value_attrs = ("version", + "expires", "max-age", + "domain", "path", "port", + "comment", "commenturl") + + for cookie_attrs in attrs_set: + name, value = cookie_attrs[0] + + # Build dictionary of standard cookie-attributes (standard) and + # dictionary of other cookie-attributes (rest). + + # Note: expiry time is normalised to seconds since epoch. V0 + # cookies should have the Expires cookie-attribute, and V1 cookies + # should have Max-Age, but since V1 includes RFC 2109 cookies (and + # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we + # accept either (but prefer Max-Age). + max_age_set = False + + bad_cookie = False + + standard = {} + rest = {} + for k, v in cookie_attrs[1:]: + lc = k.lower() + # don't lose case distinction for unknown fields + if lc in value_attrs or lc in boolean_attrs: + k = lc + if k in boolean_attrs and v is None: + # boolean cookie-attribute is present, but has no value + # (like "discard", rather than "port=80") + v = True + if k in standard: + # only first value is significant + continue + if k == "domain": + if v is None: + _debug(" missing value for domain attribute") + bad_cookie = True + break + # RFC 2965 section 3.3.3 + v = v.lower() + if k == "expires": + if max_age_set: + # Prefer max-age to expires (like Mozilla) + continue + if v is None: + _debug(" missing or invalid value for expires " + "attribute: treating as session cookie") + continue + if k == "max-age": + max_age_set = True + try: + v = int(v) + except ValueError: + _debug(" missing or invalid (non-numeric) value for " + "max-age attribute") + bad_cookie = True + break + # convert RFC 2965 Max-Age to seconds since epoch + # XXX Strictly you're supposed to follow RFC 2616 + # age-calculation rules. Remember that zero Max-Age + # is a request to discard (old and new) cookie, though. + k = "expires" + v = self._now + v + if (k in value_attrs) or (k in boolean_attrs): + if (v is None and + k not in ("port", "comment", "commenturl")): + _debug(" missing value for %s attribute" % k) + bad_cookie = True + break + standard[k] = v + else: + rest[k] = v + + if bad_cookie: + continue + + cookie_tuples.append((name, value, standard, rest)) + + return cookie_tuples + + def _cookie_from_cookie_tuple(self, tup, request): + # standard is dict of standard cookie-attributes, rest is dict of the + # rest of them + name, value, standard, rest = tup + + domain = standard.get("domain", Absent) + path = standard.get("path", Absent) + port = standard.get("port", Absent) + expires = standard.get("expires", Absent) + + # set the easy defaults + version = standard.get("version", None) + if version is not None: + try: + version = int(version) + except ValueError: + return None # invalid version, ignore cookie + secure = standard.get("secure", False) + # (discard is also set if expires is Absent) + discard = standard.get("discard", False) + comment = standard.get("comment", None) + comment_url = standard.get("commenturl", None) + + # set default path + if path is not Absent and path != "": + path_specified = True + path = escape_path(path) + else: + path_specified = False + path = request_path(request) + i = path.rfind("/") + if i != -1: + if version == 0: + # Netscape spec parts company from reality here + path = path[:i] + else: + path = path[:i+1] + if len(path) == 0: path = "/" + + # set default domain + domain_specified = domain is not Absent + # but first we have to remember whether it starts with a dot + domain_initial_dot = False + if domain_specified: + domain_initial_dot = bool(domain.startswith(".")) + if domain is Absent: + req_host, erhn = eff_request_host(request) + domain = erhn + elif not domain.startswith("."): + domain = "."+domain + + # set default port + port_specified = False + if port is not Absent: + if port is None: + # Port attr present, but has no value: default to request port. + # Cookie should then only be sent back on that port. + port = request_port(request) + else: + port_specified = True + port = re.sub(r"\s+", "", port) + else: + # No port attr present. Cookie can be sent back on any port. + port = None + + # set default expires and discard + if expires is Absent: + expires = None + discard = True + elif expires <= self._now: + # Expiry date in past is request to delete cookie. This can't be + # in DefaultCookiePolicy, because can't delete cookies there. + try: + self.clear(domain, path, name) + except KeyError: + pass + _debug("Expiring cookie, domain='%s', path='%s', name='%s'", + domain, path, name) + return None + + return Cookie(version, + name, value, + port, port_specified, + domain, domain_specified, domain_initial_dot, + path, path_specified, + secure, + expires, + discard, + comment, + comment_url, + rest) + + def _cookies_from_attrs_set(self, attrs_set, request): + cookie_tuples = self._normalized_cookie_tuples(attrs_set) + + cookies = [] + for tup in cookie_tuples: + cookie = self._cookie_from_cookie_tuple(tup, request) + if cookie: cookies.append(cookie) + return cookies + + def _process_rfc2109_cookies(self, cookies): + rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None) + if rfc2109_as_ns is None: + rfc2109_as_ns = not self._policy.rfc2965 + for cookie in cookies: + if cookie.version == 1: + cookie.rfc2109 = True + if rfc2109_as_ns: + # treat 2109 cookies as Netscape cookies rather than + # as RFC2965 cookies + cookie.version = 0 + + def make_cookies(self, response, request): + """Return sequence of Cookie objects extracted from response object.""" + # get cookie-attributes for RFC 2965 and Netscape protocols + headers = response.info() + rfc2965_hdrs = headers.get_all("Set-Cookie2", []) + ns_hdrs = headers.get_all("Set-Cookie", []) + + rfc2965 = self._policy.rfc2965 + netscape = self._policy.netscape + + if ((not rfc2965_hdrs and not ns_hdrs) or + (not ns_hdrs and not rfc2965) or + (not rfc2965_hdrs and not netscape) or + (not netscape and not rfc2965)): + return [] # no relevant cookie headers: quick exit + + try: + cookies = self._cookies_from_attrs_set( + split_header_words(rfc2965_hdrs), request) + except Exception: + _warn_unhandled_exception() + cookies = [] + + if ns_hdrs and netscape: + try: + # RFC 2109 and Netscape cookies + ns_cookies = self._cookies_from_attrs_set( + parse_ns_headers(ns_hdrs), request) + except Exception: + _warn_unhandled_exception() + ns_cookies = [] + self._process_rfc2109_cookies(ns_cookies) + + # Look for Netscape cookies (from Set-Cookie headers) that match + # corresponding RFC 2965 cookies (from Set-Cookie2 headers). + # For each match, keep the RFC 2965 cookie and ignore the Netscape + # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are + # bundled in with the Netscape cookies for this purpose, which is + # reasonable behaviour. + if rfc2965: + lookup = {} + for cookie in cookies: + lookup[(cookie.domain, cookie.path, cookie.name)] = None + + def no_matching_rfc2965(ns_cookie, lookup=lookup): + key = ns_cookie.domain, ns_cookie.path, ns_cookie.name + return key not in lookup + ns_cookies = filter(no_matching_rfc2965, ns_cookies) + + if ns_cookies: + cookies.extend(ns_cookies) + + return cookies + + def set_cookie_if_ok(self, cookie, request): + """Set a cookie if policy says it's OK to do so.""" + self._cookies_lock.acquire() + try: + self._policy._now = self._now = int(time.time()) + + if self._policy.set_ok(cookie, request): + self.set_cookie(cookie) + + + finally: + self._cookies_lock.release() + + def set_cookie(self, cookie): + """Set a cookie, without checking whether or not it should be set.""" + c = self._cookies + self._cookies_lock.acquire() + try: + if cookie.domain not in c: c[cookie.domain] = {} + c2 = c[cookie.domain] + if cookie.path not in c2: c2[cookie.path] = {} + c3 = c2[cookie.path] + c3[cookie.name] = cookie + finally: + self._cookies_lock.release() + + def extract_cookies(self, response, request): + """Extract cookies from response, where allowable given the request.""" + _debug("extract_cookies: %s", response.info()) + self._cookies_lock.acquire() + try: + self._policy._now = self._now = int(time.time()) + + for cookie in self.make_cookies(response, request): + if self._policy.set_ok(cookie, request): + _debug(" setting cookie: %s", cookie) + self.set_cookie(cookie) + finally: + self._cookies_lock.release() + + def clear(self, domain=None, path=None, name=None): + """Clear some cookies. + + Invoking this method without arguments will clear all cookies. If + given a single argument, only cookies belonging to that domain will be + removed. If given two arguments, cookies belonging to the specified + path within that domain are removed. If given three arguments, then + the cookie with the specified name, path and domain is removed. + + Raises KeyError if no matching cookie exists. + + """ + if name is not None: + if (domain is None) or (path is None): + raise ValueError( + "domain and path must be given to remove a cookie by name") + del self._cookies[domain][path][name] + elif path is not None: + if domain is None: + raise ValueError( + "domain must be given to remove cookies by path") + del self._cookies[domain][path] + elif domain is not None: + del self._cookies[domain] + else: + self._cookies = {} + + def clear_session_cookies(self): + """Discard all session cookies. + + Note that the .save() method won't save session cookies anyway, unless + you ask otherwise by passing a true ignore_discard argument. + + """ + self._cookies_lock.acquire() + try: + for cookie in self: + if cookie.discard: + self.clear(cookie.domain, cookie.path, cookie.name) + finally: + self._cookies_lock.release() + + def clear_expired_cookies(self): + """Discard all expired cookies. + + You probably don't need to call this method: expired cookies are never + sent back to the server (provided you're using DefaultCookiePolicy), + this method is called by CookieJar itself every so often, and the + .save() method won't save expired cookies anyway (unless you ask + otherwise by passing a true ignore_expires argument). + + """ + self._cookies_lock.acquire() + try: + now = time.time() + for cookie in self: + if cookie.is_expired(now): + self.clear(cookie.domain, cookie.path, cookie.name) + finally: + self._cookies_lock.release() + + def __iter__(self): + return deepvalues(self._cookies) + + def __len__(self): + """Return number of contained cookies.""" + i = 0 + for cookie in self: i = i + 1 + return i + + def __repr__(self): + r = [] + for cookie in self: r.append(repr(cookie)) + return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r)) + + def __str__(self): + r = [] + for cookie in self: r.append(str(cookie)) + return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r)) + + +# derives from OSError for backwards-compatibility with Python 2.4.0 +class LoadError(OSError): pass + +class FileCookieJar(CookieJar): + """CookieJar that can be loaded from and saved to a file.""" + + def __init__(self, filename=None, delayload=False, policy=None): + """ + Cookies are NOT loaded from the named file until either the .load() or + .revert() method is called. + + """ + CookieJar.__init__(self, policy) + if filename is not None: + try: + filename+"" + except: + raise ValueError("filename must be string-like") + self.filename = filename + self.delayload = bool(delayload) + + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + """Save cookies to a file.""" + raise NotImplementedError() + + def load(self, filename=None, ignore_discard=False, ignore_expires=False): + """Load cookies from a file.""" + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + with open(filename) as f: + self._really_load(f, filename, ignore_discard, ignore_expires) + + def revert(self, filename=None, + ignore_discard=False, ignore_expires=False): + """Clear all cookies and reload cookies from a saved file. + + Raises LoadError (or OSError) if reversion is not successful; the + object's state will not be altered if this happens. + + """ + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + self._cookies_lock.acquire() + try: + + old_state = copy.deepcopy(self._cookies) + self._cookies = {} + try: + self.load(filename, ignore_discard, ignore_expires) + except OSError: + self._cookies = old_state + raise + + finally: + self._cookies_lock.release() + + +def lwp_cookie_str(cookie): + """Return string representation of Cookie in the LWP cookie file format. + + Actually, the format is extended a bit -- see module docstring. + + """ + h = [(cookie.name, cookie.value), + ("path", cookie.path), + ("domain", cookie.domain)] + if cookie.port is not None: h.append(("port", cookie.port)) + if cookie.path_specified: h.append(("path_spec", None)) + if cookie.port_specified: h.append(("port_spec", None)) + if cookie.domain_initial_dot: h.append(("domain_dot", None)) + if cookie.secure: h.append(("secure", None)) + if cookie.expires: h.append(("expires", + time2isoz(float(cookie.expires)))) + if cookie.discard: h.append(("discard", None)) + if cookie.comment: h.append(("comment", cookie.comment)) + if cookie.comment_url: h.append(("commenturl", cookie.comment_url)) + + keys = sorted(cookie._rest.keys()) + for k in keys: + h.append((k, str(cookie._rest[k]))) + + h.append(("version", str(cookie.version))) + + return join_header_words([h]) + +class LWPCookieJar(FileCookieJar): + """ + The LWPCookieJar saves a sequence of "Set-Cookie3" lines. + "Set-Cookie3" is the format used by the libwww-perl library, not known + to be compatible with any browser, but which is easy to read and + doesn't lose information about RFC 2965 cookies. + + Additional methods + + as_lwp_str(ignore_discard=True, ignore_expired=True) + + """ + + def as_lwp_str(self, ignore_discard=True, ignore_expires=True): + """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers. + + ignore_discard and ignore_expires: see docstring for FileCookieJar.save + + """ + now = time.time() + r = [] + for cookie in self: + if not ignore_discard and cookie.discard: + continue + if not ignore_expires and cookie.is_expired(now): + continue + r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie)) + return "\n".join(r+[""]) + + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + with open(filename, "w") as f: + # There really isn't an LWP Cookies 2.0 format, but this indicates + # that there is extra information in here (domain_dot and + # port_spec) while still being compatible with libwww-perl, I hope. + f.write("#LWP-Cookies-2.0\n") + f.write(self.as_lwp_str(ignore_discard, ignore_expires)) + + def _really_load(self, f, filename, ignore_discard, ignore_expires): + magic = f.readline() + if not self.magic_re.search(magic): + msg = ("%r does not look like a Set-Cookie3 (LWP) format " + "file" % filename) + raise LoadError(msg) + + now = time.time() + + header = "Set-Cookie3:" + boolean_attrs = ("port_spec", "path_spec", "domain_dot", + "secure", "discard") + value_attrs = ("version", + "port", "path", "domain", + "expires", + "comment", "commenturl") + + try: + while 1: + line = f.readline() + if line == "": break + if not line.startswith(header): + continue + line = line[len(header):].strip() + + for data in split_header_words([line]): + name, value = data[0] + standard = {} + rest = {} + for k in boolean_attrs: + standard[k] = False + for k, v in data[1:]: + if k is not None: + lc = k.lower() + else: + lc = None + # don't lose case distinction for unknown fields + if (lc in value_attrs) or (lc in boolean_attrs): + k = lc + if k in boolean_attrs: + if v is None: v = True + standard[k] = v + elif k in value_attrs: + standard[k] = v + else: + rest[k] = v + + h = standard.get + expires = h("expires") + discard = h("discard") + if expires is not None: + expires = iso2time(expires) + if expires is None: + discard = True + domain = h("domain") + domain_specified = domain.startswith(".") + c = Cookie(h("version"), name, value, + h("port"), h("port_spec"), + domain, domain_specified, h("domain_dot"), + h("path"), h("path_spec"), + h("secure"), + expires, + discard, + h("comment"), + h("commenturl"), + rest) + if not ignore_discard and c.discard: + continue + if not ignore_expires and c.is_expired(now): + continue + self.set_cookie(c) + except OSError: + raise + except Exception: + _warn_unhandled_exception() + raise LoadError("invalid Set-Cookie3 format file %r: %r" % + (filename, line)) + + +class MozillaCookieJar(FileCookieJar): + """ + + WARNING: you may want to backup your browser's cookies file if you use + this class to save cookies. I *think* it works, but there have been + bugs in the past! + + This class differs from CookieJar only in the format it uses to save and + load cookies to and from a file. This class uses the Mozilla/Netscape + `cookies.txt' format. lynx uses this file format, too. + + Don't expect cookies saved while the browser is running to be noticed by + the browser (in fact, Mozilla on unix will overwrite your saved cookies if + you change them on disk while it's running; on Windows, you probably can't + save at all while the browser is running). + + Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to + Netscape cookies on saving. + + In particular, the cookie version and port number information is lost, + together with information about whether or not Path, Port and Discard were + specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the + domain as set in the HTTP header started with a dot (yes, I'm aware some + domains in Netscape files start with a dot and some don't -- trust me, you + really don't want to know any more about this). + + Note that though Mozilla and Netscape use the same format, they use + slightly different headers. The class saves cookies using the Netscape + header by default (Mozilla can cope with that). + + """ + magic_re = re.compile("#( Netscape)? HTTP Cookie File") + header = """\ +# Netscape HTTP Cookie File +# http://curl.haxx.se/rfc/cookie_spec.html +# This is a generated file! Do not edit. + +""" + + def _really_load(self, f, filename, ignore_discard, ignore_expires): + now = time.time() + + magic = f.readline() + if not self.magic_re.search(magic): + raise LoadError( + "%r does not look like a Netscape format cookies file" % + filename) + + try: + while 1: + line = f.readline() + if line == "": break + + # last field may be absent, so keep any trailing tab + if line.endswith("\n"): line = line[:-1] + + # skip comments and blank lines XXX what is $ for? + if (line.strip().startswith(("#", "$")) or + line.strip() == ""): + continue + + domain, domain_specified, path, secure, expires, name, value = \ + line.split("\t") + secure = (secure == "TRUE") + domain_specified = (domain_specified == "TRUE") + if name == "": + # cookies.txt regards 'Set-Cookie: foo' as a cookie + # with no name, whereas http.cookiejar regards it as a + # cookie with no value. + name = value + value = None + + initial_dot = domain.startswith(".") + assert domain_specified == initial_dot + + discard = False + if expires == "": + expires = None + discard = True + + # assume path_specified is false + c = Cookie(0, name, value, + None, False, + domain, domain_specified, initial_dot, + path, False, + secure, + expires, + discard, + None, + None, + {}) + if not ignore_discard and c.discard: + continue + if not ignore_expires and c.is_expired(now): + continue + self.set_cookie(c) + + except OSError: + raise + except Exception: + _warn_unhandled_exception() + raise LoadError("invalid Netscape format cookies file %r: %r" % + (filename, line)) + + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + with open(filename, "w") as f: + f.write(self.header) + now = time.time() + for cookie in self: + if not ignore_discard and cookie.discard: + continue + if not ignore_expires and cookie.is_expired(now): + continue + if cookie.secure: secure = "TRUE" + else: secure = "FALSE" + if cookie.domain.startswith("."): initial_dot = "TRUE" + else: initial_dot = "FALSE" + if cookie.expires is not None: + expires = str(cookie.expires) + else: + expires = "" + if cookie.value is None: + # cookies.txt regards 'Set-Cookie: foo' as a cookie + # with no name, whereas http.cookiejar regards it as a + # cookie with no value. + name = "" + value = cookie.name + else: + name = cookie.name + value = cookie.value + f.write( + "\t".join([cookie.domain, initial_dot, cookie.path, + secure, expires, name, value])+ + "\n") diff --git a/modules/language/python/module/http/cookies.py~ b/modules/language/python/module/http/cookies.py~ new file mode 100644 index 0000000..be3b080 --- /dev/null +++ b/modules/language/python/module/http/cookies.py~ @@ -0,0 +1,635 @@ +#### +# Copyright 2000 by Timothy O'Malley <timo@alum.mit.edu> +# +# All Rights Reserved +# +# Permission to use, copy, modify, and distribute this software +# and its documentation for any purpose and without fee is hereby +# granted, provided that the above copyright notice appear in all +# copies and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Timothy O'Malley not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS +# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR +# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. +# +#### +# +# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp +# by Timothy O'Malley <timo@alum.mit.edu> +# +# Cookie.py is a Python module for the handling of HTTP +# cookies as a Python dictionary. See RFC 2109 for more +# information on cookies. +# +# The original idea to treat Cookies as a dictionary came from +# Dave Mitchell (davem@magnet.com) in 1995, when he released the +# first version of nscookie.py. +# +#### + +r""" +Here's a sample session to show how to use this module. +At the moment, this is the only documentation. + +The Basics +---------- + +Importing is easy... + + >>> from http import cookies + +Most of the time you start by creating a cookie. + + >>> C = cookies.SimpleCookie() + +Once you've created your Cookie, you can add values just as if it were +a dictionary. + + >>> C = cookies.SimpleCookie() + >>> C["fig"] = "newton" + >>> C["sugar"] = "wafer" + >>> C.output() + 'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer' + +Notice that the printable representation of a Cookie is the +appropriate format for a Set-Cookie: header. This is the +default behavior. You can change the header and printed +attributes by using the .output() function + + >>> C = cookies.SimpleCookie() + >>> C["rocky"] = "road" + >>> C["rocky"]["path"] = "/cookie" + >>> print(C.output(header="Cookie:")) + Cookie: rocky=road; Path=/cookie + >>> print(C.output(attrs=[], header="Cookie:")) + Cookie: rocky=road + +The load() method of a Cookie extracts cookies from a string. In a +CGI script, you would use this method to extract the cookies from the +HTTP_COOKIE environment variable. + + >>> C = cookies.SimpleCookie() + >>> C.load("chips=ahoy; vienna=finger") + >>> C.output() + 'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger' + +The load() method is darn-tootin smart about identifying cookies +within a string. Escaped quotation marks, nested semicolons, and other +such trickeries do not confuse it. + + >>> C = cookies.SimpleCookie() + >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') + >>> print(C) + Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" + +Each element of the Cookie also supports all of the RFC 2109 +Cookie attributes. Here's an example which sets the Path +attribute. + + >>> C = cookies.SimpleCookie() + >>> C["oreo"] = "doublestuff" + >>> C["oreo"]["path"] = "/" + >>> print(C) + Set-Cookie: oreo=doublestuff; Path=/ + +Each dictionary element has a 'value' attribute, which gives you +back the value associated with the key. + + >>> C = cookies.SimpleCookie() + >>> C["twix"] = "none for you" + >>> C["twix"].value + 'none for you' + +The SimpleCookie expects that all values should be standard strings. +Just to be sure, SimpleCookie invokes the str() builtin to convert +the value to a string, when the values are set dictionary-style. + + >>> C = cookies.SimpleCookie() + >>> C["number"] = 7 + >>> C["string"] = "seven" + >>> C["number"].value + '7' + >>> C["string"].value + 'seven' + >>> C.output() + 'Set-Cookie: number=7\r\nSet-Cookie: string=seven' + +Finis. +""" + +# +# Import our required modules +# +import re +import string + +__all__ = ["CookieError", "BaseCookie", "SimpleCookie"] + +_nulljoin = ''.join +_semispacejoin = '; '.join +_spacejoin = ' '.join + +def _warn_deprecated_setter(setter): + import warnings + msg = ('The .%s setter is deprecated. The attribute will be read-only in ' + 'future releases. Please use the set() method instead.' % setter) + warnings.warn(msg, DeprecationWarning, stacklevel=3) + +# +# Define an exception visible to External modules +# +class CookieError(Exception): + pass + + +# These quoting routines conform to the RFC2109 specification, which in +# turn references the character definitions from RFC2068. They provide +# a two-way quoting algorithm. Any non-text character is translated +# into a 4 character sequence: a forward-slash followed by the +# three-digit octal equivalent of the character. Any '\' or '"' is +# quoted with a preceding '\' slash. +# Because of the way browsers really handle cookies (as opposed to what +# the RFC says) we also encode "," and ";". +# +# These are taken from RFC2068 and RFC2109. +# _LegalChars is the list of chars which don't require "'s +# _Translator hash-table for fast quoting +# +_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:" +_UnescapedChars = _LegalChars + ' ()/<=>?@[]{}' + +_Translator = {n: '\\%03o' % n + for n in set(range(256)) - set(map(ord, _UnescapedChars))} +_Translator.update({ + ord('"'): '\\"', + ord('\\'): '\\\\', +}) + +_is_legal_key = re.compile('[%s]+' % re.escape(_LegalChars)).fullmatch + +def _quote(str): + r"""Quote a string for use in a cookie header. + + If the string does not need to be double-quoted, then just return the + string. Otherwise, surround the string in doublequotes and quote + (with a \) special characters. + """ + if str is None or _is_legal_key(str): + return str + else: + return '"' + str.translate(_Translator) + '"' + + +_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") +_QuotePatt = re.compile(r"[\\].") + +def _unquote(str): + # If there aren't any doublequotes, + # then there can't be any special characters. See RFC 2109. + if str is None or len(str) < 2: + return str + if str[0] != '"' or str[-1] != '"': + return str + + # We have to assume that we must decode this string. + # Down to work. + + # Remove the "s + str = str[1:-1] + + # Check for special sequences. Examples: + # \012 --> \n + # \" --> " + # + i = 0 + n = len(str) + res = [] + while 0 <= i < n: + o_match = _OctalPatt.search(str, i) + q_match = _QuotePatt.search(str, i) + if not o_match and not q_match: # Neither matched + res.append(str[i:]) + break + # else: + j = k = -1 + if o_match: + j = o_match.start(0) + if q_match: + k = q_match.start(0) + if q_match and (not o_match or k < j): # QuotePatt matched + res.append(str[i:k]) + res.append(str[k+1]) + i = k + 2 + else: # OctalPatt matched + res.append(str[i:j]) + res.append(chr(int(str[j+1:j+4], 8))) + i = j + 4 + return _nulljoin(res) + +# The _getdate() routine is used to set the expiration time in the cookie's HTTP +# header. By default, _getdate() returns the current time in the appropriate +# "expires" format for a Set-Cookie header. The one optional argument is an +# offset from now, in seconds. For example, an offset of -3600 means "one hour +# ago". The offset may be a floating point number. +# + +_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + +_monthname = [None, + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + +def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname): + from time import gmtime, time + now = time() + year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future) + return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % \ + (weekdayname[wd], day, monthname[month], year, hh, mm, ss) + + +class Morsel(dict): + """A class to hold ONE (key, value) pair. + + In a cookie, each such pair may have several attributes, so this class is + used to keep the attributes associated with the appropriate key,value pair. + This class also includes a coded_value attribute, which is used to hold + the network representation of the value. This is most useful when Python + objects are pickled for network transit. + """ + # RFC 2109 lists these attributes as reserved: + # path comment domain + # max-age secure version + # + # For historical reasons, these attributes are also reserved: + # expires + # + # This is an extension from Microsoft: + # httponly + # + # This dictionary provides a mapping from the lowercase + # variant on the left to the appropriate traditional + # formatting on the right. + _reserved = { + "expires" : "expires", + "path" : "Path", + "comment" : "Comment", + "domain" : "Domain", + "max-age" : "Max-Age", + "secure" : "Secure", + "httponly" : "HttpOnly", + "version" : "Version", + } + + _flags = {'secure', 'httponly'} + + def __init__(self): + # Set defaults + self._key = self._value = self._coded_value = None + + # Set default attributes + for key in self._reserved: + dict.__setitem__(self, key, "") + + @property + def key(self): + return self._key + + @key.setter + def key(self, key): + _warn_deprecated_setter('key') + self._key = key + + @property + def value(self): + return self._value + + @value.setter + def value(self, value): + _warn_deprecated_setter('value') + self._value = value + + @property + def coded_value(self): + return self._coded_value + + @coded_value.setter + def coded_value(self, coded_value): + _warn_deprecated_setter('coded_value') + self._coded_value = coded_value + + def __setitem__(self, K, V): + K = K.lower() + if not K in self._reserved: + raise CookieError("Invalid attribute %r" % (K,)) + dict.__setitem__(self, K, V) + + def setdefault(self, key, val=None): + key = key.lower() + if key not in self._reserved: + raise CookieError("Invalid attribute %r" % (key,)) + return dict.setdefault(self, key, val) + + def __eq__(self, morsel): + if not isinstance(morsel, Morsel): + return NotImplemented + return (dict.__eq__(self, morsel) and + self._value == morsel._value and + self._key == morsel._key and + self._coded_value == morsel._coded_value) + + __ne__ = object.__ne__ + + def copy(self): + morsel = Morsel() + dict.update(morsel, self) + morsel.__dict__.update(self.__dict__) + return morsel + + def update(self, values): + data = {} + for key, val in dict(values).items(): + key = key.lower() + if key not in self._reserved: + raise CookieError("Invalid attribute %r" % (key,)) + data[key] = val + dict.update(self, data) + + def isReservedKey(self, K): + return K.lower() in self._reserved + + def set(self, key, val, coded_val, LegalChars=_LegalChars): + if LegalChars != _LegalChars: + import warnings + warnings.warn( + 'LegalChars parameter is deprecated, ignored and will ' + 'be removed in future versions.', DeprecationWarning, + stacklevel=2) + + if key.lower() in self._reserved: + raise CookieError('Attempt to set a reserved key %r' % (key,)) + if not _is_legal_key(key): + raise CookieError('Illegal key %r' % (key,)) + + # It's a good key, so save it. + self._key = key + self._value = val + self._coded_value = coded_val + + def __getstate__(self): + return { + 'key': self._key, + 'value': self._value, + 'coded_value': self._coded_value, + } + + def __setstate__(self, state): + self._key = state['key'] + self._value = state['value'] + self._coded_value = state['coded_value'] + + def output(self, attrs=None, header="Set-Cookie:"): + return "%s %s" % (header, self.OutputString(attrs)) + + __str__ = output + + def __repr__(self): + return '<%s: %s>' % (self.__class__.__name__, self.OutputString()) + + def js_output(self, attrs=None): + # Print javascript + return """ + <script type="text/javascript"> + <!-- begin hiding + document.cookie = \"%s\"; + // end hiding --> + </script> + """ % (self.OutputString(attrs).replace('"', r'\"')) + + def OutputString(self, attrs=None): + # Build up our result + # + result = [] + append = result.append + + # First, the key=value pair + append("%s=%s" % (self.key, self.coded_value)) + + # Now add any defined attributes + if attrs is None: + attrs = self._reserved + items = sorted(self.items()) + for key, value in items: + if value == "": + continue + if key not in attrs: + continue + if key == "expires" and isinstance(value, int): + append("%s=%s" % (self._reserved[key], _getdate(value))) + elif key == "max-age" and isinstance(value, int): + append("%s=%d" % (self._reserved[key], value)) + elif key in self._flags: + if value: + append(str(self._reserved[key])) + else: + append("%s=%s" % (self._reserved[key], value)) + + # Return the result + return _semispacejoin(result) + + +# +# Pattern for finding cookie +# +# This used to be strict parsing based on the RFC2109 and RFC2068 +# specifications. I have since discovered that MSIE 3.0x doesn't +# follow the character rules outlined in those specs. As a +# result, the parsing rules here are less strict. +# + +_LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=" +_LegalValueChars = _LegalKeyChars + r'\[\]' +_CookiePattern = re.compile(r""" + \s* # Optional whitespace at start of cookie + (?P<key> # Start of group 'key' + [""" + _LegalKeyChars + r"""]+? # Any word of at least one letter + ) # End of group 'key' + ( # Optional group: there may not be a value. + \s*=\s* # Equal Sign + (?P<val> # Start of group 'val' + "(?:[^\\"]|\\.)*" # Any doublequoted string + | # or + \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr + | # or + [""" + _LegalValueChars + r"""]* # Any word or empty string + ) # End of group 'val' + )? # End of optional value group + \s* # Any number of spaces. + (\s+|;|$) # Ending either at space, semicolon, or EOS. + """, re.ASCII | re.VERBOSE) # re.ASCII may be removed if safe. + + +# At long last, here is the cookie class. Using this class is almost just like +# using a dictionary. See this module's docstring for example usage. +# +class BaseCookie(dict): + """A container class for a set of Morsels.""" + + def value_decode(self, val): + """real_value, coded_value = value_decode(STRING) + Called prior to setting a cookie's value from the network + representation. The VALUE is the value read from HTTP + header. + Override this function to modify the behavior of cookies. + """ + return val, val + + def value_encode(self, val): + """real_value, coded_value = value_encode(VALUE) + Called prior to setting a cookie's value from the dictionary + representation. The VALUE is the value being assigned. + Override this function to modify the behavior of cookies. + """ + strval = str(val) + return strval, strval + + def __init__(self, input=None): + if input: + self.load(input) + + def __set(self, key, real_value, coded_value): + """Private method for setting a cookie's value""" + M = self.get(key, Morsel()) + M.set(key, real_value, coded_value) + dict.__setitem__(self, key, M) + + def __setitem__(self, key, value): + """Dictionary style assignment.""" + if isinstance(value, Morsel): + # allow assignment of constructed Morsels (e.g. for pickling) + dict.__setitem__(self, key, value) + else: + rval, cval = self.value_encode(value) + self.__set(key, rval, cval) + + def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"): + """Return a string suitable for HTTP.""" + result = [] + items = sorted(self.items()) + for key, value in items: + result.append(value.output(attrs, header)) + return sep.join(result) + + __str__ = output + + def __repr__(self): + l = [] + items = sorted(self.items()) + for key, value in items: + l.append('%s=%s' % (key, repr(value.value))) + return '<%s: %s>' % (self.__class__.__name__, _spacejoin(l)) + + def js_output(self, attrs=None): + """Return a string suitable for JavaScript.""" + result = [] + items = sorted(self.items()) + for key, value in items: + result.append(value.js_output(attrs)) + return _nulljoin(result) + + def load(self, rawdata): + """Load cookies from a string (presumably HTTP_COOKIE) or + from a dictionary. Loading cookies from a dictionary 'd' + is equivalent to calling: + map(Cookie.__setitem__, d.keys(), d.values()) + """ + if isinstance(rawdata, str): + self.__parse_string(rawdata) + else: + # self.update() wouldn't call our custom __setitem__ + for key, value in rawdata.items(): + self[key] = value + return + + def __parse_string(self, str, patt=_CookiePattern): + i = 0 # Our starting point + n = len(str) # Length of string + parsed_items = [] # Parsed (type, key, value) triples + morsel_seen = False # A key=value pair was previously encountered + + TYPE_ATTRIBUTE = 1 + TYPE_KEYVALUE = 2 + + # We first parse the whole cookie string and reject it if it's + # syntactically invalid (this helps avoid some classes of injection + # attacks). + while 0 <= i < n: + # Start looking for a cookie + match = patt.match(str, i) + if not match: + # No more cookies + break + + key, value = match.group("key"), match.group("val") + i = match.end(0) + + if key[0] == "$": + if not morsel_seen: + # We ignore attributes which pertain to the cookie + # mechanism as a whole, such as "$Version". + # See RFC 2965. (Does anyone care?) + continue + parsed_items.append((TYPE_ATTRIBUTE, key[1:], value)) + elif key.lower() in Morsel._reserved: + if not morsel_seen: + # Invalid cookie string + return + if value is None: + if key.lower() in Morsel._flags: + parsed_items.append((TYPE_ATTRIBUTE, key, True)) + else: + # Invalid cookie string + return + else: + parsed_items.append((TYPE_ATTRIBUTE, key, _unquote(value))) + elif value is not None: + parsed_items.append((TYPE_KEYVALUE, key, self.value_decode(value))) + morsel_seen = True + else: + # Invalid cookie string + return + + # The cookie string is valid, apply it. + M = None # current morsel + for tp, key, value in parsed_items: + if tp == TYPE_ATTRIBUTE: + assert M is not None + M[key] = value + else: + assert tp == TYPE_KEYVALUE + rval, cval = value + self.__set(key, rval, cval) + M = self[key] + + +class SimpleCookie(BaseCookie): + """ + SimpleCookie supports strings as cookie values. When setting + the value using the dictionary assignment notation, SimpleCookie + calls the builtin str() to convert the value to a string. Values + received from HTTP are kept as strings. + """ + def value_decode(self, val): + return _unquote(val), val + + def value_encode(self, val): + strval = str(val) + return strval, _quote(strval) diff --git a/modules/language/python/module/http/server.py~ b/modules/language/python/module/http/server.py~ new file mode 100644 index 0000000..e12e45b --- /dev/null +++ b/modules/language/python/module/http/server.py~ @@ -0,0 +1,1211 @@ +"""HTTP server classes. + +Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see +SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, +and CGIHTTPRequestHandler for CGI scripts. + +It does, however, optionally implement HTTP/1.1 persistent connections, +as of version 0.3. + +Notes on CGIHTTPRequestHandler +------------------------------ + +This class implements GET and POST requests to cgi-bin scripts. + +If the os.fork() function is not present (e.g. on Windows), +subprocess.Popen() is used as a fallback, with slightly altered semantics. + +In all cases, the implementation is intentionally naive -- all +requests are executed synchronously. + +SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL +-- it may execute arbitrary Python code or external programs. + +Note that status code 200 is sent prior to execution of a CGI script, so +scripts cannot send other status codes such as 302 (redirect). + +XXX To do: + +- log requests even later (to capture byte count) +- log user-agent header and other interesting goodies +- send error log to separate file +""" + + +# See also: +# +# HTTP Working Group T. Berners-Lee +# INTERNET-DRAFT R. T. Fielding +# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen +# Expires September 8, 1995 March 8, 1995 +# +# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt +# +# and +# +# Network Working Group R. Fielding +# Request for Comments: 2616 et al +# Obsoletes: 2068 June 1999 +# Category: Standards Track +# +# URL: http://www.faqs.org/rfcs/rfc2616.html + +# Log files +# --------- +# +# Here's a quote from the NCSA httpd docs about log file format. +# +# | The logfile format is as follows. Each line consists of: +# | +# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb +# | +# | host: Either the DNS name or the IP number of the remote client +# | rfc931: Any information returned by identd for this person, +# | - otherwise. +# | authuser: If user sent a userid for authentication, the user name, +# | - otherwise. +# | DD: Day +# | Mon: Month (calendar name) +# | YYYY: Year +# | hh: hour (24-hour format, the machine's timezone) +# | mm: minutes +# | ss: seconds +# | request: The first line of the HTTP request as sent by the client. +# | ddd: the status code returned by the server, - if not available. +# | bbbb: the total number of bytes sent, +# | *not including the HTTP/1.0 header*, - if not available +# | +# | You can determine the name of the file accessed through request. +# +# (Actually, the latter is only true if you know the server configuration +# at the time the request was made!) + +__version__ = "0.6" + +__all__ = [ + "HTTPServer", "BaseHTTPRequestHandler", + "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler", +] + +import email.utils +import html +import http.client +import io +import mimetypes +import os +import posixpath +import select +import shutil +import socket # For gethostbyaddr() +import socketserver +import sys +import time +import urllib.parse +import copy +import argparse + +from http import HTTPStatus + + +# Default error message template +DEFAULT_ERROR_MESSAGE = """\ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" + "http://www.w3.org/TR/html4/strict.dtd"> +<html> + <head> + <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> + <title>Error response</title> + </head> + <body> + <h1>Error response</h1> + <p>Error code: %(code)d</p> + <p>Message: %(message)s.</p> + <p>Error code explanation: %(code)s - %(explain)s.</p> + </body> +</html> +""" + +DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" + +class HTTPServer(socketserver.TCPServer): + + allow_reuse_address = 1 # Seems to make sense in testing environment + + def server_bind(self): + """Override server_bind to store the server name.""" + socketserver.TCPServer.server_bind(self) + host, port = self.server_address[:2] + self.server_name = socket.getfqdn(host) + self.server_port = port + + +class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): + + """HTTP request handler base class. + + The following explanation of HTTP serves to guide you through the + code as well as to expose any misunderstandings I may have about + HTTP (so you don't need to read the code to figure out I'm wrong + :-). + + HTTP (HyperText Transfer Protocol) is an extensible protocol on + top of a reliable stream transport (e.g. TCP/IP). The protocol + recognizes three parts to a request: + + 1. One line identifying the request type and path + 2. An optional set of RFC-822-style headers + 3. An optional data part + + The headers and data are separated by a blank line. + + The first line of the request has the form + + <command> <path> <version> + + where <command> is a (case-sensitive) keyword such as GET or POST, + <path> is a string containing path information for the request, + and <version> should be the string "HTTP/1.0" or "HTTP/1.1". + <path> is encoded using the URL encoding scheme (using %xx to signify + the ASCII character with hex code xx). + + The specification specifies that lines are separated by CRLF but + for compatibility with the widest range of clients recommends + servers also handle LF. Similarly, whitespace in the request line + is treated sensibly (allowing multiple spaces between components + and allowing trailing whitespace). + + Similarly, for output, lines ought to be separated by CRLF pairs + but most clients grok LF characters just fine. + + If the first line of the request has the form + + <command> <path> + + (i.e. <version> is left out) then this is assumed to be an HTTP + 0.9 request; this form has no optional headers and data part and + the reply consists of just the data. + + The reply form of the HTTP 1.x protocol again has three parts: + + 1. One line giving the response code + 2. An optional set of RFC-822-style headers + 3. The data + + Again, the headers and data are separated by a blank line. + + The response code line has the form + + <version> <responsecode> <responsestring> + + where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), + <responsecode> is a 3-digit response code indicating success or + failure of the request, and <responsestring> is an optional + human-readable string explaining what the response code means. + + This server parses the request and the headers, and then calls a + function specific to the request type (<command>). Specifically, + a request SPAM will be handled by a method do_SPAM(). If no + such method exists the server sends an error response to the + client. If it exists, it is called with no arguments: + + do_SPAM() + + Note that the request name is case sensitive (i.e. SPAM and spam + are different requests). + + The various request details are stored in instance variables: + + - client_address is the client IP address in the form (host, + port); + + - command, path and version are the broken-down request line; + + - headers is an instance of email.message.Message (or a derived + class) containing the header information; + + - rfile is a file object open for reading positioned at the + start of the optional input data part; + + - wfile is a file object open for writing. + + IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! + + The first thing to be written must be the response line. Then + follow 0 or more header lines, then a blank line, and then the + actual data (if any). The meaning of the header lines depends on + the command executed by the server; in most cases, when data is + returned, there should be at least one header line of the form + + Content-type: <type>/<subtype> + + where <type> and <subtype> should be registered MIME types, + e.g. "text/html" or "text/plain". + + """ + + # The Python system version, truncated to its first component. + sys_version = "Python/" + sys.version.split()[0] + + # The server software version. You may want to override this. + # The format is multiple whitespace-separated strings, + # where each string is of the form name[/version]. + server_version = "BaseHTTP/" + __version__ + + error_message_format = DEFAULT_ERROR_MESSAGE + error_content_type = DEFAULT_ERROR_CONTENT_TYPE + + # The default request version. This only affects responses up until + # the point where the request line is parsed, so it mainly decides what + # the client gets back when sending a malformed request line. + # Most web servers default to HTTP 0.9, i.e. don't send a status line. + default_request_version = "HTTP/0.9" + + def parse_request(self): + """Parse a request (internal). + + The request should be stored in self.raw_requestline; the results + are in self.command, self.path, self.request_version and + self.headers. + + Return True for success, False for failure; on failure, an + error is sent back. + + """ + self.command = None # set in case of error on the first line + self.request_version = version = self.default_request_version + self.close_connection = True + requestline = str(self.raw_requestline, 'iso-8859-1') + requestline = requestline.rstrip('\r\n') + self.requestline = requestline + words = requestline.split() + if len(words) == 3: + command, path, version = words + try: + if version[:5] != 'HTTP/': + raise ValueError + base_version_number = version.split('/', 1)[1] + version_number = base_version_number.split(".") + # RFC 2145 section 3.1 says there can be only one "." and + # - major and minor numbers MUST be treated as + # separate integers; + # - HTTP/2.4 is a lower version than HTTP/2.13, which in + # turn is lower than HTTP/12.3; + # - Leading zeros MUST be ignored by recipients. + if len(version_number) != 2: + raise ValueError + version_number = int(version_number[0]), int(version_number[1]) + except (ValueError, IndexError): + self.send_error( + HTTPStatus.BAD_REQUEST, + "Bad request version (%r)" % version) + return False + if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": + self.close_connection = False + if version_number >= (2, 0): + self.send_error( + HTTPStatus.HTTP_VERSION_NOT_SUPPORTED, + "Invalid HTTP version (%s)" % base_version_number) + return False + elif len(words) == 2: + command, path = words + self.close_connection = True + if command != 'GET': + self.send_error( + HTTPStatus.BAD_REQUEST, + "Bad HTTP/0.9 request type (%r)" % command) + return False + elif not words: + return False + else: + self.send_error( + HTTPStatus.BAD_REQUEST, + "Bad request syntax (%r)" % requestline) + return False + self.command, self.path, self.request_version = command, path, version + + # Examine the headers and look for a Connection directive. + try: + self.headers = http.client.parse_headers(self.rfile, + _class=self.MessageClass) + except http.client.LineTooLong as err: + self.send_error( + HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, + "Line too long", + str(err)) + return False + except http.client.HTTPException as err: + self.send_error( + HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, + "Too many headers", + str(err) + ) + return False + + conntype = self.headers.get('Connection', "") + if conntype.lower() == 'close': + self.close_connection = True + elif (conntype.lower() == 'keep-alive' and + self.protocol_version >= "HTTP/1.1"): + self.close_connection = False + # Examine the headers and look for an Expect directive + expect = self.headers.get('Expect', "") + if (expect.lower() == "100-continue" and + self.protocol_version >= "HTTP/1.1" and + self.request_version >= "HTTP/1.1"): + if not self.handle_expect_100(): + return False + return True + + def handle_expect_100(self): + """Decide what to do with an "Expect: 100-continue" header. + + If the client is expecting a 100 Continue response, we must + respond with either a 100 Continue or a final response before + waiting for the request body. The default is to always respond + with a 100 Continue. You can behave differently (for example, + reject unauthorized requests) by overriding this method. + + This method should either return True (possibly after sending + a 100 Continue response) or send an error response and return + False. + + """ + self.send_response_only(HTTPStatus.CONTINUE) + self.end_headers() + return True + + def handle_one_request(self): + """Handle a single HTTP request. + + You normally don't need to override this method; see the class + __doc__ string for information on how to handle specific HTTP + commands such as GET and POST. + + """ + try: + self.raw_requestline = self.rfile.readline(65537) + if len(self.raw_requestline) > 65536: + self.requestline = '' + self.request_version = '' + self.command = '' + self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG) + return + if not self.raw_requestline: + self.close_connection = True + return + if not self.parse_request(): + # An error code has been sent, just exit + return + mname = 'do_' + self.command + if not hasattr(self, mname): + self.send_error( + HTTPStatus.NOT_IMPLEMENTED, + "Unsupported method (%r)" % self.command) + return + method = getattr(self, mname) + method() + self.wfile.flush() #actually send the response if not already done. + except socket.timeout as e: + #a read or a write timed out. Discard this connection + self.log_error("Request timed out: %r", e) + self.close_connection = True + return + + def handle(self): + """Handle multiple requests if necessary.""" + self.close_connection = True + + self.handle_one_request() + while not self.close_connection: + self.handle_one_request() + + def send_error(self, code, message=None, explain=None): + """Send and log an error reply. + + Arguments are + * code: an HTTP error code + 3 digits + * message: a simple optional 1 line reason phrase. + *( HTAB / SP / VCHAR / %x80-FF ) + defaults to short entry matching the response code + * explain: a detailed message defaults to the long entry + matching the response code. + + This sends an error response (so it must be called before any + output has been generated), logs the error, and finally sends + a piece of HTML explaining the error to the user. + + """ + + try: + shortmsg, longmsg = self.responses[code] + except KeyError: + shortmsg, longmsg = '???', '???' + if message is None: + message = shortmsg + if explain is None: + explain = longmsg + self.log_error("code %d, message %s", code, message) + self.send_response(code, message) + self.send_header('Connection', 'close') + + # Message body is omitted for cases described in: + # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified) + # - RFC7231: 6.3.6. 205(Reset Content) + body = None + if (code >= 200 and + code not in (HTTPStatus.NO_CONTENT, + HTTPStatus.RESET_CONTENT, + HTTPStatus.NOT_MODIFIED)): + # HTML encode to prevent Cross Site Scripting attacks + # (see bug #1100201) + content = (self.error_message_format % { + 'code': code, + 'message': html.escape(message, quote=False), + 'explain': html.escape(explain, quote=False) + }) + body = content.encode('UTF-8', 'replace') + self.send_header("Content-Type", self.error_content_type) + self.send_header('Content-Length', int(len(body))) + self.end_headers() + + if self.command != 'HEAD' and body: + self.wfile.write(body) + + def send_response(self, code, message=None): + """Add the response header to the headers buffer and log the + response code. + + Also send two standard headers with the server software + version and the current date. + + """ + self.log_request(code) + self.send_response_only(code, message) + self.send_header('Server', self.version_string()) + self.send_header('Date', self.date_time_string()) + + def send_response_only(self, code, message=None): + """Send the response header only.""" + if self.request_version != 'HTTP/0.9': + if message is None: + if code in self.responses: + message = self.responses[code][0] + else: + message = '' + if not hasattr(self, '_headers_buffer'): + self._headers_buffer = [] + self._headers_buffer.append(("%s %d %s\r\n" % + (self.protocol_version, code, message)).encode( + 'latin-1', 'strict')) + + def send_header(self, keyword, value): + """Send a MIME header to the headers buffer.""" + if self.request_version != 'HTTP/0.9': + if not hasattr(self, '_headers_buffer'): + self._headers_buffer = [] + self._headers_buffer.append( + ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) + + if keyword.lower() == 'connection': + if value.lower() == 'close': + self.close_connection = True + elif value.lower() == 'keep-alive': + self.close_connection = False + + def end_headers(self): + """Send the blank line ending the MIME headers.""" + if self.request_version != 'HTTP/0.9': + self._headers_buffer.append(b"\r\n") + self.flush_headers() + + def flush_headers(self): + if hasattr(self, '_headers_buffer'): + self.wfile.write(b"".join(self._headers_buffer)) + self._headers_buffer = [] + + def log_request(self, code='-', size='-'): + """Log an accepted request. + + This is called by send_response(). + + """ + if isinstance(code, HTTPStatus): + code = code.value + self.log_message('"%s" %s %s', + self.requestline, str(code), str(size)) + + def log_error(self, format, *args): + """Log an error. + + This is called when a request cannot be fulfilled. By + default it passes the message on to log_message(). + + Arguments are the same as for log_message(). + + XXX This should go to the separate error log. + + """ + + self.log_message(format, *args) + + def log_message(self, format, *args): + """Log an arbitrary message. + + This is used by all other logging functions. Override + it if you have specific logging wishes. + + The first argument, FORMAT, is a format string for the + message to be logged. If the format string contains + any % escapes requiring parameters, they should be + specified as subsequent arguments (it's just like + printf!). + + The client ip and current date/time are prefixed to + every message. + + """ + + sys.stderr.write("%s - - [%s] %s\n" % + (self.address_string(), + self.log_date_time_string(), + format%args)) + + def version_string(self): + """Return the server software version string.""" + return self.server_version + ' ' + self.sys_version + + def date_time_string(self, timestamp=None): + """Return the current date and time formatted for a message header.""" + if timestamp is None: + timestamp = time.time() + return email.utils.formatdate(timestamp, usegmt=True) + + def log_date_time_string(self): + """Return the current time formatted for logging.""" + now = time.time() + year, month, day, hh, mm, ss, x, y, z = time.localtime(now) + s = "%02d/%3s/%04d %02d:%02d:%02d" % ( + day, self.monthname[month], year, hh, mm, ss) + return s + + weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + + monthname = [None, + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + + def address_string(self): + """Return the client address.""" + + return self.client_address[0] + + # Essentially static class variables + + # The version of the HTTP protocol we support. + # Set this to HTTP/1.1 to enable automatic keepalive + protocol_version = "HTTP/1.0" + + # MessageClass used to parse headers + MessageClass = http.client.HTTPMessage + + # hack to maintain backwards compatibility + responses = { + v: (v.phrase, v.description) + for v in HTTPStatus.__members__.values() + } + + +class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): + + """Simple HTTP request handler with GET and HEAD commands. + + This serves files from the current directory and any of its + subdirectories. The MIME type for files is determined by + calling the .guess_type() method. + + The GET and HEAD requests are identical except that the HEAD + request omits the actual contents of the file. + + """ + + server_version = "SimpleHTTP/" + __version__ + + def do_GET(self): + """Serve a GET request.""" + f = self.send_head() + if f: + try: + self.copyfile(f, self.wfile) + finally: + f.close() + + def do_HEAD(self): + """Serve a HEAD request.""" + f = self.send_head() + if f: + f.close() + + def send_head(self): + """Common code for GET and HEAD commands. + + This sends the response code and MIME headers. + + Return value is either a file object (which has to be copied + to the outputfile by the caller unless the command was HEAD, + and must be closed by the caller under all circumstances), or + None, in which case the caller has nothing further to do. + + """ + path = self.translate_path(self.path) + f = None + if os.path.isdir(path): + parts = urllib.parse.urlsplit(self.path) + if not parts.path.endswith('/'): + # redirect browser - doing basically what apache does + self.send_response(HTTPStatus.MOVED_PERMANENTLY) + new_parts = (parts[0], parts[1], parts[2] + '/', + parts[3], parts[4]) + new_url = urllib.parse.urlunsplit(new_parts) + self.send_header("Location", new_url) + self.end_headers() + return None + for index in "index.html", "index.htm": + index = os.path.join(path, index) + if os.path.exists(index): + path = index + break + else: + return self.list_directory(path) + ctype = self.guess_type(path) + try: + f = open(path, 'rb') + except OSError: + self.send_error(HTTPStatus.NOT_FOUND, "File not found") + return None + try: + self.send_response(HTTPStatus.OK) + self.send_header("Content-type", ctype) + fs = os.fstat(f.fileno()) + self.send_header("Content-Length", str(fs[6])) + self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) + self.end_headers() + return f + except: + f.close() + raise + + def list_directory(self, path): + """Helper to produce a directory listing (absent index.html). + + Return value is either a file object, or None (indicating an + error). In either case, the headers are sent, making the + interface the same as for send_head(). + + """ + try: + list = os.listdir(path) + except OSError: + self.send_error( + HTTPStatus.NOT_FOUND, + "No permission to list directory") + return None + list.sort(key=lambda a: a.lower()) + r = [] + try: + displaypath = urllib.parse.unquote(self.path, + errors='surrogatepass') + except UnicodeDecodeError: + displaypath = urllib.parse.unquote(path) + displaypath = html.escape(displaypath, quote=False) + enc = sys.getfilesystemencoding() + title = 'Directory listing for %s' % displaypath + r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' + '"http://www.w3.org/TR/html4/strict.dtd">') + r.append('<html>\n<head>') + r.append('<meta http-equiv="Content-Type" ' + 'content="text/html; charset=%s">' % enc) + r.append('<title>%s</title>\n</head>' % title) + r.append('<body>\n<h1>%s</h1>' % title) + r.append('<hr>\n<ul>') + for name in list: + fullname = os.path.join(path, name) + displayname = linkname = name + # Append / for directories or @ for symbolic links + if os.path.isdir(fullname): + displayname = name + "/" + linkname = name + "/" + if os.path.islink(fullname): + displayname = name + "@" + # Note: a link to a directory displays with @ and links with / + r.append('<li><a href="%s">%s</a></li>' + % (urllib.parse.quote(linkname, + errors='surrogatepass'), + html.escape(displayname, quote=False))) + r.append('</ul>\n<hr>\n</body>\n</html>\n') + encoded = '\n'.join(r).encode(enc, 'surrogateescape') + f = io.BytesIO() + f.write(encoded) + f.seek(0) + self.send_response(HTTPStatus.OK) + self.send_header("Content-type", "text/html; charset=%s" % enc) + self.send_header("Content-Length", str(len(encoded))) + self.end_headers() + return f + + def translate_path(self, path): + """Translate a /-separated PATH to the local filename syntax. + + Components that mean special things to the local file system + (e.g. drive or directory names) are ignored. (XXX They should + probably be diagnosed.) + + """ + # abandon query parameters + path = path.split('?',1)[0] + path = path.split('#',1)[0] + # Don't forget explicit trailing slash when normalizing. Issue17324 + trailing_slash = path.rstrip().endswith('/') + try: + path = urllib.parse.unquote(path, errors='surrogatepass') + except UnicodeDecodeError: + path = urllib.parse.unquote(path) + path = posixpath.normpath(path) + words = path.split('/') + words = filter(None, words) + path = os.getcwd() + for word in words: + if os.path.dirname(word) or word in (os.curdir, os.pardir): + # Ignore components that are not a simple file/directory name + continue + path = os.path.join(path, word) + if trailing_slash: + path += '/' + return path + + def copyfile(self, source, outputfile): + """Copy all data between two file objects. + + The SOURCE argument is a file object open for reading + (or anything with a read() method) and the DESTINATION + argument is a file object open for writing (or + anything with a write() method). + + The only reason for overriding this would be to change + the block size or perhaps to replace newlines by CRLF + -- note however that this the default server uses this + to copy binary data as well. + + """ + shutil.copyfileobj(source, outputfile) + + def guess_type(self, path): + """Guess the type of a file. + + Argument is a PATH (a filename). + + Return value is a string of the form type/subtype, + usable for a MIME Content-type header. + + The default implementation looks the file's extension + up in the table self.extensions_map, using application/octet-stream + as a default; however it would be permissible (if + slow) to look inside the data to make a better guess. + + """ + + base, ext = posixpath.splitext(path) + if ext in self.extensions_map: + return self.extensions_map[ext] + ext = ext.lower() + if ext in self.extensions_map: + return self.extensions_map[ext] + else: + return self.extensions_map[''] + + if not mimetypes.inited: + mimetypes.init() # try to read system mime.types + extensions_map = mimetypes.types_map.copy() + extensions_map.update({ + '': 'application/octet-stream', # Default + '.py': 'text/plain', + '.c': 'text/plain', + '.h': 'text/plain', + }) + + +# Utilities for CGIHTTPRequestHandler + +def _url_collapse_path(path): + """ + Given a URL path, remove extra '/'s and '.' path elements and collapse + any '..' references and returns a collapsed path. + + Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. + The utility of this function is limited to is_cgi method and helps + preventing some security attacks. + + Returns: The reconstituted URL, which will always start with a '/'. + + Raises: IndexError if too many '..' occur within the path. + + """ + # Query component should not be involved. + path, _, query = path.partition('?') + path = urllib.parse.unquote(path) + + # Similar to os.path.split(os.path.normpath(path)) but specific to URL + # path semantics rather than local operating system semantics. + path_parts = path.split('/') + head_parts = [] + for part in path_parts[:-1]: + if part == '..': + head_parts.pop() # IndexError if more '..' than prior parts + elif part and part != '.': + head_parts.append( part ) + if path_parts: + tail_part = path_parts.pop() + if tail_part: + if tail_part == '..': + head_parts.pop() + tail_part = '' + elif tail_part == '.': + tail_part = '' + else: + tail_part = '' + + if query: + tail_part = '?'.join((tail_part, query)) + + splitpath = ('/' + '/'.join(head_parts), tail_part) + collapsed_path = "/".join(splitpath) + + return collapsed_path + + + +nobody = None + +def nobody_uid(): + """Internal routine to get nobody's uid""" + global nobody + if nobody: + return nobody + try: + import pwd + except ImportError: + return -1 + try: + nobody = pwd.getpwnam('nobody')[2] + except KeyError: + nobody = 1 + max(x[2] for x in pwd.getpwall()) + return nobody + + +def executable(path): + """Test for executable file.""" + return os.access(path, os.X_OK) + + +class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): + + """Complete HTTP server with GET, HEAD and POST commands. + + GET and HEAD also support running CGI scripts. + + The POST command is *only* implemented for CGI scripts. + + """ + + # Determine platform specifics + have_fork = hasattr(os, 'fork') + + # Make rfile unbuffered -- we need to read one line and then pass + # the rest to a subprocess, so we can't use buffered input. + rbufsize = 0 + + def do_POST(self): + """Serve a POST request. + + This is only implemented for CGI scripts. + + """ + + if self.is_cgi(): + self.run_cgi() + else: + self.send_error( + HTTPStatus.NOT_IMPLEMENTED, + "Can only POST to CGI scripts") + + def send_head(self): + """Version of send_head that support CGI scripts""" + if self.is_cgi(): + return self.run_cgi() + else: + return SimpleHTTPRequestHandler.send_head(self) + + def is_cgi(self): + """Test whether self.path corresponds to a CGI script. + + Returns True and updates the cgi_info attribute to the tuple + (dir, rest) if self.path requires running a CGI script. + Returns False otherwise. + + If any exception is raised, the caller should assume that + self.path was rejected as invalid and act accordingly. + + The default implementation tests whether the normalized url + path begins with one of the strings in self.cgi_directories + (and the next character is a '/' or the end of the string). + + """ + collapsed_path = _url_collapse_path(self.path) + dir_sep = collapsed_path.find('/', 1) + head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] + if head in self.cgi_directories: + self.cgi_info = head, tail + return True + return False + + + cgi_directories = ['/cgi-bin', '/htbin'] + + def is_executable(self, path): + """Test whether argument path is an executable file.""" + return executable(path) + + def is_python(self, path): + """Test whether argument path is a Python script.""" + head, tail = os.path.splitext(path) + return tail.lower() in (".py", ".pyw") + + def run_cgi(self): + """Execute a CGI script.""" + dir, rest = self.cgi_info + path = dir + '/' + rest + i = path.find('/', len(dir)+1) + while i >= 0: + nextdir = path[:i] + nextrest = path[i+1:] + + scriptdir = self.translate_path(nextdir) + if os.path.isdir(scriptdir): + dir, rest = nextdir, nextrest + i = path.find('/', len(dir)+1) + else: + break + + # find an explicit query string, if present. + rest, _, query = rest.partition('?') + + # dissect the part after the directory name into a script name & + # a possible additional path, to be stored in PATH_INFO. + i = rest.find('/') + if i >= 0: + script, rest = rest[:i], rest[i:] + else: + script, rest = rest, '' + + scriptname = dir + '/' + script + scriptfile = self.translate_path(scriptname) + if not os.path.exists(scriptfile): + self.send_error( + HTTPStatus.NOT_FOUND, + "No such CGI script (%r)" % scriptname) + return + if not os.path.isfile(scriptfile): + self.send_error( + HTTPStatus.FORBIDDEN, + "CGI script is not a plain file (%r)" % scriptname) + return + ispy = self.is_python(scriptname) + if self.have_fork or not ispy: + if not self.is_executable(scriptfile): + self.send_error( + HTTPStatus.FORBIDDEN, + "CGI script is not executable (%r)" % scriptname) + return + + # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html + # XXX Much of the following could be prepared ahead of time! + env = copy.deepcopy(os.environ) + env['SERVER_SOFTWARE'] = self.version_string() + env['SERVER_NAME'] = self.server.server_name + env['GATEWAY_INTERFACE'] = 'CGI/1.1' + env['SERVER_PROTOCOL'] = self.protocol_version + env['SERVER_PORT'] = str(self.server.server_port) + env['REQUEST_METHOD'] = self.command + uqrest = urllib.parse.unquote(rest) + env['PATH_INFO'] = uqrest + env['PATH_TRANSLATED'] = self.translate_path(uqrest) + env['SCRIPT_NAME'] = scriptname + if query: + env['QUERY_STRING'] = query + env['REMOTE_ADDR'] = self.client_address[0] + authorization = self.headers.get("authorization") + if authorization: + authorization = authorization.split() + if len(authorization) == 2: + import base64, binascii + env['AUTH_TYPE'] = authorization[0] + if authorization[0].lower() == "basic": + try: + authorization = authorization[1].encode('ascii') + authorization = base64.decodebytes(authorization).\ + decode('ascii') + except (binascii.Error, UnicodeError): + pass + else: + authorization = authorization.split(':') + if len(authorization) == 2: + env['REMOTE_USER'] = authorization[0] + # XXX REMOTE_IDENT + if self.headers.get('content-type') is None: + env['CONTENT_TYPE'] = self.headers.get_content_type() + else: + env['CONTENT_TYPE'] = self.headers['content-type'] + length = self.headers.get('content-length') + if length: + env['CONTENT_LENGTH'] = length + referer = self.headers.get('referer') + if referer: + env['HTTP_REFERER'] = referer + accept = [] + for line in self.headers.getallmatchingheaders('accept'): + if line[:1] in "\t\n\r ": + accept.append(line.strip()) + else: + accept = accept + line[7:].split(',') + env['HTTP_ACCEPT'] = ','.join(accept) + ua = self.headers.get('user-agent') + if ua: + env['HTTP_USER_AGENT'] = ua + co = filter(None, self.headers.get_all('cookie', [])) + cookie_str = ', '.join(co) + if cookie_str: + env['HTTP_COOKIE'] = cookie_str + # XXX Other HTTP_* headers + # Since we're setting the env in the parent, provide empty + # values to override previously set values + for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', + 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): + env.setdefault(k, "") + + self.send_response(HTTPStatus.OK, "Script output follows") + self.flush_headers() + + decoded_query = query.replace('+', ' ') + + if self.have_fork: + # Unix -- fork as we should + args = [script] + if '=' not in decoded_query: + args.append(decoded_query) + nobody = nobody_uid() + self.wfile.flush() # Always flush before forking + pid = os.fork() + if pid != 0: + # Parent + pid, sts = os.waitpid(pid, 0) + # throw away additional data [see bug #427345] + while select.select([self.rfile], [], [], 0)[0]: + if not self.rfile.read(1): + break + if sts: + self.log_error("CGI script exit status %#x", sts) + return + # Child + try: + try: + os.setuid(nobody) + except OSError: + pass + os.dup2(self.rfile.fileno(), 0) + os.dup2(self.wfile.fileno(), 1) + os.execve(scriptfile, args, env) + except: + self.server.handle_error(self.request, self.client_address) + os._exit(127) + + else: + # Non-Unix -- use subprocess + import subprocess + cmdline = [scriptfile] + if self.is_python(scriptfile): + interp = sys.executable + if interp.lower().endswith("w.exe"): + # On Windows, use python.exe, not pythonw.exe + interp = interp[:-5] + interp[-4:] + cmdline = [interp, '-u'] + cmdline + if '=' not in query: + cmdline.append(query) + self.log_message("command: %s", subprocess.list2cmdline(cmdline)) + try: + nbytes = int(length) + except (TypeError, ValueError): + nbytes = 0 + p = subprocess.Popen(cmdline, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env = env + ) + if self.command.lower() == "post" and nbytes > 0: + data = self.rfile.read(nbytes) + else: + data = None + # throw away additional data [see bug #427345] + while select.select([self.rfile._sock], [], [], 0)[0]: + if not self.rfile._sock.recv(1): + break + stdout, stderr = p.communicate(data) + self.wfile.write(stdout) + if stderr: + self.log_error('%s', stderr) + p.stderr.close() + p.stdout.close() + status = p.returncode + if status: + self.log_error("CGI script exit status %#x", status) + else: + self.log_message("CGI script exited OK") + + +def test(HandlerClass=BaseHTTPRequestHandler, + ServerClass=HTTPServer, protocol="HTTP/1.0", port=8000, bind=""): + """Test the HTTP request handler class. + + This runs an HTTP server on port 8000 (or the port argument). + + """ + server_address = (bind, port) + + HandlerClass.protocol_version = protocol + with ServerClass(server_address, HandlerClass) as httpd: + sa = httpd.socket.getsockname() + serve_message = "Serving HTTP on {host} port {port} (http://{host}:{port}/) ..." + print(serve_message.format(host=sa[0], port=sa[1])) + try: + httpd.serve_forever() + except KeyboardInterrupt: + print("\nKeyboard interrupt received, exiting.") + sys.exit(0) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--cgi', action='store_true', + help='Run as CGI Server') + parser.add_argument('--bind', '-b', default='', metavar='ADDRESS', + help='Specify alternate bind address ' + '[default: all interfaces]') + parser.add_argument('port', action='store', + default=8000, type=int, + nargs='?', + help='Specify alternate port [default: 8000]') + args = parser.parse_args() + if args.cgi: + handler_class = CGIHTTPRequestHandler + else: + handler_class = SimpleHTTPRequestHandler + test(HandlerClass=handler_class, port=args.port, bind=args.bind) diff --git a/modules/language/python/module/json/__init__.py b/modules/language/python/module/json/__init__.py new file mode 100644 index 0000000..6d0511e --- /dev/null +++ b/modules/language/python/module/json/__init__.py @@ -0,0 +1,367 @@ +r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`json` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is derived from a +version of the externally maintained simplejson library. + +Encoding basic Python object hierarchies:: + + >>> import json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print(json.dumps("\"foo\bar")) + "\"foo\bar" + >>> print(json.dumps('\u1234')) + "\u1234" + >>> print(json.dumps('\\')) + "\\" + >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) + {"a": 0, "b": 0, "c": 0} + >>> from io import StringIO + >>> io = StringIO() + >>> json.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import json + >>> from collections import OrderedDict + >>> mydict = OrderedDict([('4', 5), ('6', 7)]) + >>> json.dumps([1,2,3,mydict], separators=(',', ':')) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import json + >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import json + >>> obj = ['foo', {'bar': ['baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == '"foo\x08ar' + True + >>> from io import StringIO + >>> io = StringIO('["streaming API"]') + >>> json.load(io)[0] == 'streaming API' + True + +Specializing JSON object decoding:: + + >>> import json + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + >>> from decimal import Decimal + >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1') + True + +Specializing JSON object encoding:: + + >>> import json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError(repr(obj) + " is not JSON serializable") + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using json.tool from the shell to validate and pretty-print:: + + $ echo '{"json":"obj"}' | python -m json.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m json.tool + Expecting property name enclosed in double quotes: line 1 column 3 (char 2) +""" +__version__ = '2.0.9' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', +] + +__author__ = 'Bob Ippolito <bob@redivi.com>' + +from .decoder import JSONDecoder, JSONDecodeError +from .encoder import JSONEncoder +import codecs + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + default=None, +) + +def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + default=None, sort_keys=False, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If ``skipkeys`` is true then ``dict`` keys that are not basic types + (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped + instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the strings written to ``fp`` can + contain non-ASCII characters if they appear in strings contained in + ``obj``. Otherwise, all such characters are escaped in JSON strings. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If specified, ``separators`` should be an ``(item_separator, key_separator)`` + tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and + ``(',', ': ')`` otherwise. To get the most compact JSON representation, + you should specify ``(',', ':')`` to eliminate whitespace. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + If *sort_keys* is true (default: ``False``), then the output of + dictionaries will be sorted by key. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + default is None and not sort_keys and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, + default=default, sort_keys=sort_keys, **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + default=None, sort_keys=False, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is true then ``dict`` keys that are not basic types + (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped + instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the return value can contain non-ASCII + characters if they appear in strings contained in ``obj``. Otherwise, all + such characters are escaped in JSON strings. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If specified, ``separators`` should be an ``(item_separator, key_separator)`` + tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and + ``(',', ': ')`` otherwise. To get the most compact JSON representation, + you should specify ``(',', ':')`` to eliminate whitespace. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + If *sort_keys* is true (default: ``False``), then the output of + dictionaries will be sorted by key. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + default is None and not sort_keys and not kw): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, default=default, sort_keys=sort_keys, + **kw).encode(obj) + + +_default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None) + + +def detect_encoding(b): + bstartswith = b.startswith + if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)): + return 'utf-32' + if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)): + return 'utf-16' + if bstartswith(codecs.BOM_UTF8): + return 'utf-8-sig' + + if len(b) >= 4: + if not b[0]: + # 00 00 -- -- - utf-32-be + # 00 XX -- -- - utf-16-be + return 'utf-16-be' if b[1] else 'utf-32-be' + if not b[1]: + # XX 00 00 00 - utf-32-le + # XX 00 00 XX - utf-16-le + # XX 00 XX -- - utf-16-le + return 'utf-16-le' if b[2] or b[3] else 'utf-32-le' + elif len(b) == 2: + if not b[0]: + # 00 XX - utf-16-be + return 'utf-16-be' + if not b[1]: + # XX 00 - utf-16-le + return 'utf-16-le' + # default + return 'utf-8' + + +def load(fp, *, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + ``object_pairs_hook`` is an optional function that will be called with the + result of any object literal decoded with an ordered list of pairs. The + return value of ``object_pairs_hook`` will be used instead of the ``dict``. + This feature can be used to implement custom decoders that rely on the + order that the key and value pairs are decoded (for example, + collections.OrderedDict will remember the order of insertion). If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg; otherwise ``JSONDecoder`` is used. + + """ + return loads(fp.read(), + cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw) + + +def loads(s, *, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): + """Deserialize ``s`` (a ``str``, ``bytes`` or ``bytearray`` instance + containing a JSON document) to a Python object. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + ``object_pairs_hook`` is an optional function that will be called with the + result of any object literal decoded with an ordered list of pairs. The + return value of ``object_pairs_hook`` will be used instead of the ``dict``. + This feature can be used to implement custom decoders that rely on the + order that the key and value pairs are decoded (for example, + collections.OrderedDict will remember the order of insertion). If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg; otherwise ``JSONDecoder`` is used. + + The ``encoding`` argument is ignored and deprecated. + + """ + if isinstance(s, str): + if s.startswith('\ufeff'): + raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)", + s, 0) + else: + if not isinstance(s, (bytes, bytearray)): + raise TypeError('the JSON object must be str, bytes or bytearray, ' + 'not {!r}'.format(s.__class__.__name__)) + s = s.decode(detect_encoding(s), 'surrogatepass') + + if (cls is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and object_pairs_hook is None and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if object_pairs_hook is not None: + kw['object_pairs_hook'] = object_pairs_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + return cls(**kw).decode(s) diff --git a/modules/language/python/module/json/__pycache__/__init__.cpython-36.pyc b/modules/language/python/module/json/__pycache__/__init__.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..3fe2fb8 --- /dev/null +++ b/modules/language/python/module/json/__pycache__/__init__.cpython-36.pyc diff --git a/modules/language/python/module/json/__pycache__/decoder.cpython-36.pyc b/modules/language/python/module/json/__pycache__/decoder.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..8d6e017 --- /dev/null +++ b/modules/language/python/module/json/__pycache__/decoder.cpython-36.pyc diff --git a/modules/language/python/module/json/__pycache__/encoder.cpython-36.pyc b/modules/language/python/module/json/__pycache__/encoder.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..402c319 --- /dev/null +++ b/modules/language/python/module/json/__pycache__/encoder.cpython-36.pyc diff --git a/modules/language/python/module/json/__pycache__/scanner.cpython-36.pyc b/modules/language/python/module/json/__pycache__/scanner.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..7aab373 --- /dev/null +++ b/modules/language/python/module/json/__pycache__/scanner.cpython-36.pyc diff --git a/modules/language/python/module/json/__pycache__/tool.cpython-36.pyc b/modules/language/python/module/json/__pycache__/tool.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..1353ca5 --- /dev/null +++ b/modules/language/python/module/json/__pycache__/tool.cpython-36.pyc diff --git a/modules/language/python/module/lzma.py b/modules/language/python/module/lzma.py new file mode 100644 index 0000000..0817b87 --- /dev/null +++ b/modules/language/python/module/lzma.py @@ -0,0 +1,347 @@ +"""Interface to the liblzma compression library. + +This module provides a class for reading and writing compressed files, +classes for incremental (de)compression, and convenience functions for +one-shot (de)compression. + +These classes and functions support both the XZ and legacy LZMA +container formats, as well as raw compressed data streams. +""" + +__all__ = [ + "CHECK_NONE", "CHECK_CRC32", "CHECK_CRC64", "CHECK_SHA256", + "CHECK_ID_MAX", "CHECK_UNKNOWN", + "FILTER_LZMA1", "FILTER_LZMA2", "FILTER_DELTA", "FILTER_X86", "FILTER_IA64", + "FILTER_ARM", "FILTER_ARMTHUMB", "FILTER_POWERPC", "FILTER_SPARC", + "FORMAT_AUTO", "FORMAT_XZ", "FORMAT_ALONE", "FORMAT_RAW", + "MF_HC3", "MF_HC4", "MF_BT2", "MF_BT3", "MF_BT4", + "MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME", + + "LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError", + "open", "compress", "decompress", "is_check_supported", +] + +import builtins +import io +import os +from _lzma import * +from _lzma import _encode_filter_properties, _decode_filter_properties +import _compression + + +_MODE_CLOSED = 0 +_MODE_READ = 1 +# Value 2 no longer used +_MODE_WRITE = 3 + + +class LZMAFile(_compression.BaseStream): + + """A file object providing transparent LZMA (de)compression. + + An LZMAFile can act as a wrapper for an existing file object, or + refer directly to a named file on disk. + + Note that LZMAFile provides a *binary* file interface - data read + is returned as bytes, and data to be written must be given as bytes. + """ + + def __init__(self, filename=None, mode="r", *, + format=None, check=-1, preset=None, filters=None): + """Open an LZMA-compressed file in binary mode. + + filename can be either an actual file name (given as a str, + bytes, or PathLike object), in which case the named file is + opened, or it can be an existing file object to read from or + write to. + + mode can be "r" for reading (default), "w" for (over)writing, + "x" for creating exclusively, or "a" for appending. These can + equivalently be given as "rb", "wb", "xb" and "ab" respectively. + + format specifies the container format to use for the file. + If mode is "r", this defaults to FORMAT_AUTO. Otherwise, the + default is FORMAT_XZ. + + check specifies the integrity check to use. This argument can + only be used when opening a file for writing. For FORMAT_XZ, + the default is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not + support integrity checks - for these formats, check must be + omitted, or be CHECK_NONE. + + When opening a file for reading, the *preset* argument is not + meaningful, and should be omitted. The *filters* argument should + also be omitted, except when format is FORMAT_RAW (in which case + it is required). + + When opening a file for writing, the settings used by the + compressor can be specified either as a preset compression + level (with the *preset* argument), or in detail as a custom + filter chain (with the *filters* argument). For FORMAT_XZ and + FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset + level. For FORMAT_RAW, the caller must always specify a filter + chain; the raw compressor does not support preset compression + levels. + + preset (if provided) should be an integer in the range 0-9, + optionally OR-ed with the constant PRESET_EXTREME. + + filters (if provided) should be a sequence of dicts. Each dict + should have an entry for "id" indicating ID of the filter, plus + additional entries for options to the filter. + """ + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + + if mode in ("r", "rb"): + if check != -1: + raise ValueError("Cannot specify an integrity check " + "when opening a file for reading") + if preset is not None: + raise ValueError("Cannot specify a preset compression " + "level when opening a file for reading") + if format is None: + format = FORMAT_AUTO + mode_code = _MODE_READ + elif mode in ("w", "wb", "a", "ab", "x", "xb"): + if format is None: + format = FORMAT_XZ + mode_code = _MODE_WRITE + self._compressor = LZMACompressor(format=format, check=check, + preset=preset, filters=filters) + self._pos = 0 + else: + raise ValueError("Invalid mode: {!r}".format(mode)) + + if isinstance(filename, (str, bytes, os.PathLike)): + if "b" not in mode: + mode += "b" + self._fp = builtins.open(filename, mode) + self._closefp = True + self._mode = mode_code + elif hasattr(filename, "read") or hasattr(filename, "write"): + self._fp = filename + self._mode = mode_code + else: + raise TypeError("filename must be a str, bytes, file or PathLike object") + + if self._mode == _MODE_READ: + raw = _compression.DecompressReader(self._fp, LZMADecompressor, + trailing_error=LZMAError, format=format, filters=filters) + self._buffer = io.BufferedReader(raw) + + def close(self): + """Flush and close the file. + + May be called more than once without error. Once the file is + closed, any other operation on it will raise a ValueError. + """ + if self._mode == _MODE_CLOSED: + return + try: + if self._mode == _MODE_READ: + self._buffer.close() + self._buffer = None + elif self._mode == _MODE_WRITE: + self._fp.write(self._compressor.flush()) + self._compressor = None + finally: + try: + if self._closefp: + self._fp.close() + finally: + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + + @property + def closed(self): + """True if this file is closed.""" + return self._mode == _MODE_CLOSED + + def fileno(self): + """Return the file descriptor for the underlying file.""" + self._check_not_closed() + return self._fp.fileno() + + def seekable(self): + """Return whether the file supports seeking.""" + return self.readable() and self._buffer.seekable() + + def readable(self): + """Return whether the file was opened for reading.""" + self._check_not_closed() + return self._mode == _MODE_READ + + def writable(self): + """Return whether the file was opened for writing.""" + self._check_not_closed() + return self._mode == _MODE_WRITE + + def peek(self, size=-1): + """Return buffered data without advancing the file position. + + Always returns at least one byte of data, unless at EOF. + The exact number of bytes returned is unspecified. + """ + self._check_can_read() + # Relies on the undocumented fact that BufferedReader.peek() always + # returns at least one byte (except at EOF) + return self._buffer.peek(size) + + def read(self, size=-1): + """Read up to size uncompressed bytes from the file. + + If size is negative or omitted, read until EOF is reached. + Returns b"" if the file is already at EOF. + """ + self._check_can_read() + return self._buffer.read(size) + + def read1(self, size=-1): + """Read up to size uncompressed bytes, while trying to avoid + making multiple reads from the underlying stream. Reads up to a + buffer's worth of data if size is negative. + + Returns b"" if the file is at EOF. + """ + self._check_can_read() + if size < 0: + size = io.DEFAULT_BUFFER_SIZE + return self._buffer.read1(size) + + def readline(self, size=-1): + """Read a line of uncompressed bytes from the file. + + The terminating newline (if present) is retained. If size is + non-negative, no more than size bytes will be read (in which + case the line may be incomplete). Returns b'' if already at EOF. + """ + self._check_can_read() + return self._buffer.readline(size) + + def write(self, data): + """Write a bytes object to the file. + + Returns the number of uncompressed bytes written, which is + always len(data). Note that due to buffering, the file on disk + may not reflect the data written until close() is called. + """ + self._check_can_write() + compressed = self._compressor.compress(data) + self._fp.write(compressed) + self._pos += len(data) + return len(data) + + def seek(self, offset, whence=io.SEEK_SET): + """Change the file position. + + The new position is specified by offset, relative to the + position indicated by whence. Possible values for whence are: + + 0: start of stream (default): offset must not be negative + 1: current stream position + 2: end of stream; offset must not be positive + + Returns the new file position. + + Note that seeking is emulated, so depending on the parameters, + this operation may be extremely slow. + """ + self._check_can_seek() + return self._buffer.seek(offset, whence) + + def tell(self): + """Return the current file position.""" + self._check_not_closed() + if self._mode == _MODE_READ: + return self._buffer.tell() + return self._pos + + +def open(filename, mode="rb", *, + format=None, check=-1, preset=None, filters=None, + encoding=None, errors=None, newline=None): + """Open an LZMA-compressed file in binary or text mode. + + filename can be either an actual file name (given as a str, bytes, + or PathLike object), in which case the named file is opened, or it + can be an existing file object to read from or write to. + + The mode argument can be "r", "rb" (default), "w", "wb", "x", "xb", + "a", or "ab" for binary mode, or "rt", "wt", "xt", or "at" for text + mode. + + The format, check, preset and filters arguments specify the + compression settings, as for LZMACompressor, LZMADecompressor and + LZMAFile. + + For binary mode, this function is equivalent to the LZMAFile + constructor: LZMAFile(filename, mode, ...). In this case, the + encoding, errors and newline arguments must not be provided. + + For text mode, an LZMAFile object is created, and wrapped in an + io.TextIOWrapper instance with the specified encoding, error + handling behavior, and line ending(s). + + """ + if "t" in mode: + if "b" in mode: + raise ValueError("Invalid mode: %r" % (mode,)) + else: + if encoding is not None: + raise ValueError("Argument 'encoding' not supported in binary mode") + if errors is not None: + raise ValueError("Argument 'errors' not supported in binary mode") + if newline is not None: + raise ValueError("Argument 'newline' not supported in binary mode") + + lz_mode = mode.replace("t", "") + binary_file = LZMAFile(filename, lz_mode, format=format, check=check, + preset=preset, filters=filters) + + if "t" in mode: + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file + + +def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None): + """Compress a block of data. + + Refer to LZMACompressor's docstring for a description of the + optional arguments *format*, *check*, *preset* and *filters*. + + For incremental compression, use an LZMACompressor instead. + """ + comp = LZMACompressor(format, check, preset, filters) + return comp.compress(data) + comp.flush() + + +def decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None): + """Decompress a block of data. + + Refer to LZMADecompressor's docstring for a description of the + optional arguments *format*, *check* and *filters*. + + For incremental decompression, use an LZMADecompressor instead. + """ + results = [] + while True: + decomp = LZMADecompressor(format, memlimit, filters) + try: + res = decomp.decompress(data) + except LZMAError: + if results: + break # Leftover data is not a valid LZMA/XZ stream; ignore it. + else: + raise # Error on the first iteration; bail out. + results.append(res) + if not decomp.eof: + raise LZMAError("Compressed data ended before the " + "end-of-stream marker was reached") + data = decomp.unused_data + if not data: + break + return b"".join(results) diff --git a/modules/language/python/module/string.scm b/modules/language/python/module/string.scm index 3255d99..9c82365 100644 --- a/modules/language/python/module/string.scm +++ b/modules/language/python/module/string.scm @@ -2,6 +2,7 @@ #:use-module (oop pf-objects) #:use-module (oop goops) #:use-module (ice-9 match) + #:use-module (ice-9 format) #:use-module (language python number) #:use-module (language python exceptions) #:use-module (language python yield) diff --git a/modules/language/python/module/test.py b/modules/language/python/module/test.py new file mode 100644 index 0000000..84dd3de --- /dev/null +++ b/modules/language/python/module/test.py @@ -0,0 +1,10 @@ +module(test) + +class A (): + def f(x): + x + +A .f (A). \ + f(A) + + diff --git a/modules/language/python/module/test2.py b/modules/language/python/module/test2.py new file mode 100644 index 0000000..0d59a6c --- /dev/null +++ b/modules/language/python/module/test2.py @@ -0,0 +1,28 @@ +module(test2) +from optparse import OptionParser + +def f(args): + parser = OptionParser(usage="usage: %prog [options] filename", + version="%prog 1.0") + + parser.add_option("-x", "--xhtml", + action="store_true", + dest="xhtml_flag", + default=False, + help="create a XHTML template instead of HTML") + + parser.add_option("-c", "--cssfile", + action="store", # optional because action defaults to "store" + dest="cssfile", + default="style.css", + help="CSS file to link") + + (options, args) = parser.parse_args(args) + + if len(args) != 1: + parser.error("wrong number of arguments") + + print(options) + print(args) + +__all__=['f'] diff --git a/modules/language/python/module/urllib/#error.py# b/modules/language/python/module/urllib/#error.py# new file mode 100644 index 0000000..7df0238 --- /dev/null +++ b/modules/language/python/module/urllib/#error.py# @@ -0,0 +1,77 @@ +module(urllib,error) + +"""Exception classes raised by urllib. + +The base exception class is URLError, which inherits from OSError. It +doesn't define any behavior of its own, but is the base class for all +exceptions defined in this package. + +HTTPError is an exception class that is also a valid HTTP response +instance. It behaves this way because HTTP protocol errors are valid +responses, with a status code, headers, and a body. In some contexts, +an application may want to handle an exception like a regular +response. +""" + +import urllib.response + +__all__ = ['URLError', 'HTTPError', 'ContentTooShortError'] + +class URLError(OSError): + # URLError is a sub-type of OSError, but it doesn't share any of + # the implementation. need to override __init__ and __str__. + # It sets self.args for compatibility with other EnvironmentError + # subclasses, but args doesn't have the typical format with errno in + # slot 0 and strerror in slot 1. This may be better than nothing. + def __init__(self, reason, filename=None): + self.args = reason, + self.reason = reason + if filename is not None: + self.filename = filename + + def __str__(self): + return '<urlopen error %s>' % self.reason + + +class HTTPError(URLError, urllib.response.addinfourl): + """Raised when HTTP error occurs, but also acts like non-error return""" + __super_init = urllib.response.addinfourl.__init__ + + def __init__(self, url, code, msg, hdrs, fp): + self.code = code + self.msg = msg + self.hdrs = hdrs + self.fp = fp + self.filename = url + # The addinfourl classes depend on fp being a valid file + # object. In some cases, the HTTPError may not have a valid + # file object. If this happens, the simplest workaround is to + # not initialize the base classes. + if fp is not None: + self.__super_init(fp, hdrs, url, code) + + def __str__(self): + return 'HTTP Error %s: %s' % (self.code, self.msg) + + def __repr__(self): + return '<HTTPError %s: %r>' % (self.code, self.msg) + + # since URLError specifies a .reason attribute, HTTPError should also + # provide this attribute. See issue13211 for discussion. + @property + def reason(self): + return self.msg + + @property + def headers(self): + return self.hdrs + + @headers.setter + def headers(self, headers): + self.hdrs = headers + +class ContentTooShortError(URLError): + """Exception raised when downloaded size does not match content-length.""" + def __init__(self, message, content): + URLError.__init__(self, message) + self.content = content diff --git a/modules/language/python/module/urllib/.#error.py b/modules/language/python/module/urllib/.#error.py new file mode 120000 index 0000000..c8d0ed8 --- /dev/null +++ b/modules/language/python/module/urllib/.#error.py @@ -0,0 +1 @@ +stis@lapwine.2086:1535196580
\ No newline at end of file diff --git a/modules/language/python/module/xml.py~ b/modules/language/python/module/xml.py~ new file mode 100644 index 0000000..bf6d8dd --- /dev/null +++ b/modules/language/python/module/xml.py~ @@ -0,0 +1,20 @@ +"""Core XML support for Python. + +This package contains four sub-packages: + +dom -- The W3C Document Object Model. This supports DOM Level 1 + + Namespaces. + +parsers -- Python wrappers for XML parsers (currently only supports Expat). + +sax -- The Simple API for XML, developed by XML-Dev, led by David + Megginson and ported to Python by Lars Marius Garshol. This + supports the SAX 2 API. + +etree -- The ElementTree XML library. This is a subset of the full + ElementTree XML release. + +""" + + +__all__ = ["dom", "parsers", "sax", "etree"] diff --git a/modules/language/python/module/xml/#sax.py# b/modules/language/python/module/xml/#sax.py# new file mode 100644 index 0000000..eccc3fe --- /dev/null +++ b/modules/language/python/module/xml/#sax.py# @@ -0,0 +1,108 @@ +module(xml.sax) +"""Simple API for XML (SAX) implementation for Python. + +This module provides an implementation of the SAX 2 interface; +information about the Java version of the interface can be found at +http://www.megginson.com/SAX/. The Python version of the interface is +documented at <...>. + +This package contains the following modules: + +handler -- Base classes and constants which define the SAX 2 API for + the 'client-side' of SAX for Python. + +saxutils -- Implementation of the convenience classes commonly used to + work with SAX. + +xmlreader -- Base classes and constants which define the SAX 2 API for + the parsers used with SAX for Python. + +expatreader -- Driver that allows use of the Expat parser with SAX. +""" + +from .xmlreader import InputSource +from .handler import ContentHandler, ErrorHandler +from ._exceptions import SAXException, SAXNotRecognizedException, \ + SAXParseException, SAXNotSupportedException, \ + SAXReaderNotAvailable + + +def parse(source, handler, errorHandler=ErrorHandler()): + parser = make_parser() + parser.setContentHandler(handler) + parser.setErrorHandler(errorHandler) + parser.parse(source) + +def parseString(string, handler, errorHandler=ErrorHandler()): + import io + if errorHandler is None: + errorHandler = ErrorHandler() + parser = make_parser() + parser.setContentHandler(handler) + parser.setErrorHandler(errorHandler) + + inpsrc = InputSource() + if isinstance(string, str): + inpsrc.setCharacterStream(io.StringIO(string)) + else: + inpsrc.setByteStream(io.BytesIO(string)) + parser.parse(inpsrc) + +# this is the parser list used by the make_parser function if no +# alternatives are given as parameters to the function + +default_parser_list = ["xml.sax.expatreader"] + +# tell modulefinder that importing sax potentially imports expatreader +_false = 0 +if _false: + import xml.sax.expatreader + +import os, sys +if "PY_SAX_PARSER" in os.environ: + default_parser_list = os.environ["PY_SAX_PARSER"].split(",") +del os + +_key = "python.xml.sax.parser" +if sys.platform[:4] == "java" and sys.registry.containsKey(_key): + default_parser_list = sys.registry.getProperty(_key).split(",") + + +def make_parser(parser_list = []): + """Creates and returns a SAX parser. + + Creates the first parser it is able to instantiate of the ones + given in the list created by doing parser_list + + default_parser_list. The lists must contain the names of Python + modules containing both a SAX parser and a create_parser function.""" + + for parser_name in parser_list + default_parser_list: + try: + return _create_parser(parser_name) + except ImportError as e: + import sys + if parser_name in sys.modules: + # The parser module was found, but importing it + # failed unexpectedly, pass this exception through + raise + except SAXReaderNotAvailable: + # The parser module detected that it won't work properly, + # so try the next one + pass + + raise SAXReaderNotAvailable("No parsers found", None) + +# --- Internal utility methods used by make_parser + +if sys.platform[ : 4] == "java": + def _create_parser(parser_name): + from org.python.core import imp + drv_module = imp.importName(parser_name, 0, globals()) + return drv_module.create_parser() + +else: + def _create_parser(parser_name): + drv_module = __import__(parser_name,{},{},['create_parser']) + return drv_module.create_parser() + +del sys diff --git a/modules/language/python/module/xml/__init__.py b/modules/language/python/module/xml/__init__.py new file mode 100644 index 0000000..bf6d8dd --- /dev/null +++ b/modules/language/python/module/xml/__init__.py @@ -0,0 +1,20 @@ +"""Core XML support for Python. + +This package contains four sub-packages: + +dom -- The W3C Document Object Model. This supports DOM Level 1 + + Namespaces. + +parsers -- Python wrappers for XML parsers (currently only supports Expat). + +sax -- The Simple API for XML, developed by XML-Dev, led by David + Megginson and ported to Python by Lars Marius Garshol. This + supports the SAX 2 API. + +etree -- The ElementTree XML library. This is a subset of the full + ElementTree XML release. + +""" + + +__all__ = ["dom", "parsers", "sax", "etree"] diff --git a/modules/language/python/module/xml/__pycache__/__init__.cpython-36.pyc b/modules/language/python/module/xml/__pycache__/__init__.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..e273178 --- /dev/null +++ b/modules/language/python/module/xml/__pycache__/__init__.cpython-36.pyc diff --git a/modules/language/python/module/xml/dom.py~ b/modules/language/python/module/xml/dom.py~ new file mode 100644 index 0000000..97cf9a6 --- /dev/null +++ b/modules/language/python/module/xml/dom.py~ @@ -0,0 +1,140 @@ +"""W3C Document Object Model implementation for Python. + +The Python mapping of the Document Object Model is documented in the +Python Library Reference in the section on the xml.dom package. + +This package contains the following modules: + +minidom -- A simple implementation of the Level 1 DOM with namespace + support added (based on the Level 2 specification) and other + minor Level 2 functionality. + +pulldom -- DOM builder supporting on-demand tree-building for selected + subtrees of the document. + +""" + + +class Node: + """Class giving the NodeType constants.""" + __slots__ = () + + # DOM implementations may use this as a base class for their own + # Node implementations. If they don't, the constants defined here + # should still be used as the canonical definitions as they match + # the values given in the W3C recommendation. Client code can + # safely refer to these values in all tests of Node.nodeType + # values. + + ELEMENT_NODE = 1 + ATTRIBUTE_NODE = 2 + TEXT_NODE = 3 + CDATA_SECTION_NODE = 4 + ENTITY_REFERENCE_NODE = 5 + ENTITY_NODE = 6 + PROCESSING_INSTRUCTION_NODE = 7 + COMMENT_NODE = 8 + DOCUMENT_NODE = 9 + DOCUMENT_TYPE_NODE = 10 + DOCUMENT_FRAGMENT_NODE = 11 + NOTATION_NODE = 12 + + +#ExceptionCode +INDEX_SIZE_ERR = 1 +DOMSTRING_SIZE_ERR = 2 +HIERARCHY_REQUEST_ERR = 3 +WRONG_DOCUMENT_ERR = 4 +INVALID_CHARACTER_ERR = 5 +NO_DATA_ALLOWED_ERR = 6 +NO_MODIFICATION_ALLOWED_ERR = 7 +NOT_FOUND_ERR = 8 +NOT_SUPPORTED_ERR = 9 +INUSE_ATTRIBUTE_ERR = 10 +INVALID_STATE_ERR = 11 +SYNTAX_ERR = 12 +INVALID_MODIFICATION_ERR = 13 +NAMESPACE_ERR = 14 +INVALID_ACCESS_ERR = 15 +VALIDATION_ERR = 16 + + +class DOMException(Exception): + """Abstract base class for DOM exceptions. + Exceptions with specific codes are specializations of this class.""" + + def __init__(self, *args, **kw): + if self.__class__ is DOMException: + raise RuntimeError( + "DOMException should not be instantiated directly") + Exception.__init__(self, *args, **kw) + + def _get_code(self): + return self.code + + +class IndexSizeErr(DOMException): + code = INDEX_SIZE_ERR + +class DomstringSizeErr(DOMException): + code = DOMSTRING_SIZE_ERR + +class HierarchyRequestErr(DOMException): + code = HIERARCHY_REQUEST_ERR + +class WrongDocumentErr(DOMException): + code = WRONG_DOCUMENT_ERR + +class InvalidCharacterErr(DOMException): + code = INVALID_CHARACTER_ERR + +class NoDataAllowedErr(DOMException): + code = NO_DATA_ALLOWED_ERR + +class NoModificationAllowedErr(DOMException): + code = NO_MODIFICATION_ALLOWED_ERR + +class NotFoundErr(DOMException): + code = NOT_FOUND_ERR + +class NotSupportedErr(DOMException): + code = NOT_SUPPORTED_ERR + +class InuseAttributeErr(DOMException): + code = INUSE_ATTRIBUTE_ERR + +class InvalidStateErr(DOMException): + code = INVALID_STATE_ERR + +class SyntaxErr(DOMException): + code = SYNTAX_ERR + +class InvalidModificationErr(DOMException): + code = INVALID_MODIFICATION_ERR + +class NamespaceErr(DOMException): + code = NAMESPACE_ERR + +class InvalidAccessErr(DOMException): + code = INVALID_ACCESS_ERR + +class ValidationErr(DOMException): + code = VALIDATION_ERR + +class UserDataHandler: + """Class giving the operation constants for UserDataHandler.handle().""" + + # Based on DOM Level 3 (WD 9 April 2002) + + NODE_CLONED = 1 + NODE_IMPORTED = 2 + NODE_DELETED = 3 + NODE_RENAMED = 4 + +XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace" +XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/" +XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml" +EMPTY_NAMESPACE = None +EMPTY_PREFIX = None + +from .domreg import getDOMImplementation, registerDOMImplementation diff --git a/modules/language/python/module/xml/dom/#xmlbuilder.py# b/modules/language/python/module/xml/dom/#xmlbuilder.py# new file mode 100644 index 0000000..a2a4d1b --- /dev/null +++ b/modules/language/python/module/xml/dom/#xmlbuilder.py# @@ -0,0 +1,412 @@ +module(xml,dom,xmlbuilder) + +"""Implementation of the DOM Level 3 'LS-Load' feature.""" + +import copy +import warnings +import xml.dom + +from xml.dom.NodeFilter import NodeFilter + + +__all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"] + + +class Options: + """Features object that has variables set for each DOMBuilder feature. + + The DOMBuilder class uses an instance of this class to pass settings to + the ExpatBuilder class. + """ + + # Note that the DOMBuilder class in LoadSave constrains which of these + # values can be set using the DOM Level 3 LoadSave feature. + + namespaces = 1 + namespace_declarations = True + validation = False + external_parameter_entities = True + external_general_entities = True + external_dtd_subset = True + validate_if_schema = False + validate = False + datatype_normalization = False + create_entity_ref_nodes = True + entities = True + whitespace_in_element_content = True + cdata_sections = True + comments = True + charset_overrides_xml_encoding = True + infoset = False + supported_mediatypes_only = False + + errorHandler = None + filter = None + + +class DOMBuilder: + entityResolver = None + errorHandler = None + filter = None + + ACTION_REPLACE = 1 + ACTION_APPEND_AS_CHILDREN = 2 + ACTION_INSERT_AFTER = 3 + ACTION_INSERT_BEFORE = 4 + + _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN, + ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE) + + def __init__(self): + self._options = Options() + + def _get_entityResolver(self): + return self.entityResolver + def _set_entityResolver(self, entityResolver): + self.entityResolver = entityResolver + + def _get_errorHandler(self): + return self.errorHandler + def _set_errorHandler(self, errorHandler): + self.errorHandler = errorHandler + + def _get_filter(self): + return self.filter + def _set_filter(self, filter): + self.filter = filter + + def setFeature(self, name, state): + if self.supportsFeature(name): + state = state and 1 or 0 + try: + settings = self._settings[(_name_xform(name), state)] + except KeyError: + raise xml.dom.NotSupportedErr( + "unsupported feature: %r" % (name,)) + else: + for name, value in settings: + setattr(self._options, name, value) + else: + raise xml.dom.NotFoundErr("unknown feature: " + repr(name)) + + def supportsFeature(self, name): + return hasattr(self._options, _name_xform(name)) + + def canSetFeature(self, name, state): + key = (_name_xform(name), state and 1 or 0) + return key in self._settings + + # This dictionary maps from (feature,value) to a list of + # (option,value) pairs that should be set on the Options object. + # If a (feature,value) setting is not in this dictionary, it is + # not supported by the DOMBuilder. + # + _settings = { + ("namespace_declarations", 0): [ + ("namespace_declarations", 0)], + ("namespace_declarations", 1): [ + ("namespace_declarations", 1)], + ("validation", 0): [ + ("validation", 0)], + ("external_general_entities", 0): [ + ("external_general_entities", 0)], + ("external_general_entities", 1): [ + ("external_general_entities", 1)], + ("external_parameter_entities", 0): [ + ("external_parameter_entities", 0)], + ("external_parameter_entities", 1): [ + ("external_parameter_entities", 1)], + ("validate_if_schema", 0): [ + ("validate_if_schema", 0)], + ("create_entity_ref_nodes", 0): [ + ("create_entity_ref_nodes", 0)], + ("create_entity_ref_nodes", 1): [ + ("create_entity_ref_nodes", 1)], + ("entities", 0): [ + ("create_entity_ref_nodes", 0), + ("entities", 0)], + ("entities", 1): [ + ("entities", 1)], + ("whitespace_in_element_content", 0): [ + ("whitespace_in_element_content", 0)], + ("whitespace_in_element_content", 1): [ + ("whitespace_in_element_content", 1)], + ("cdata_sections", 0): [ + ("cdata_sections", 0)], + ("cdata_sections", 1): [ + ("cdata_sections", 1)], + ("comments", 0): [ + ("comments", 0)], + ("comments", 1): [ + ("comments", 1)], + ("charset_overrides_xml_encoding", 0): [ + ("charset_overrides_xml_encoding", 0)], + ("charset_overrides_xml_encoding", 1): [ + ("charset_overrides_xml_encoding", 1)], + ("infoset", 0): [], + ("infoset", 1): [ + ("namespace_declarations", 0), + ("validate_if_schema", 0), + ("create_entity_ref_nodes", 0), + ("entities", 0), + ("cdata_sections", 0), + ("datatype_normalization", 1), + ("whitespace_in_element_content", 1), + ("comments", 1), + ("charset_overrides_xml_encoding", 1)], + ("supported_mediatypes_only", 0): [ + ("supported_mediatypes_only", 0)], + ("namespaces", 0): [ + ("namespaces", 0)], + ("namespaces", 1): [ + ("namespaces", 1)], + } + + def getFeature(self, name): + xname = _name_xform(name) + try: + return getattr(self._options, xname) + except AttributeError: + if name == "infoset": + options = self._options + return (options.datatype_normalization + and options.whitespace_in_element_content + and options.comments + and options.charset_overrides_xml_encoding + and not (options.namespace_declarations + or options.validate_if_schema + or options.create_entity_ref_nodes + or options.entities + or options.cdata_sections)) + raise xml.dom.NotFoundErr("feature %s not known" % repr(name)) + + def parseURI(self, uri): + if self.entityResolver: + input = self.entityResolver.resolveEntity(None, uri) + else: + input = DOMEntityResolver().resolveEntity(None, uri) + return self.parse(input) + + def parse(self, input): + options = copy.copy(self._options) + options.filter = self.filter + options.errorHandler = self.errorHandler + fp = input.byteStream + if fp is None and options.systemId: + import urllib.request + fp = urllib.request.urlopen(input.systemId) + return self._parse_bytestream(fp, options) + + def parseWithContext(self, input, cnode, action): + if action not in self._legal_actions: + raise ValueError("not a legal action") + raise NotImplementedError("Haven't written this yet...") + + def _parse_bytestream(self, stream, options): + import xml.dom.expatbuilder + builder = xml.dom.expatbuilder.makeBuilder(options) + return builder.parseFile(stream) + + +def _name_xform(name): + return name.lower().replace('-', '_') + + +class DOMEntityResolver(object): + __slots__ = '_opener', + + def resolveEntity(self, publicId, systemId): + assert systemId is not None + source = DOMInputSource() + source.publicId = publicId + source.systemId = systemId + source.byteStream = self._get_opener().open(systemId) + + # determine the encoding if the transport provided it + source.encoding = self._guess_media_encoding(source) + + # determine the base URI is we can + import posixpath, urllib.parse + parts = urllib.parse.urlparse(systemId) + scheme, netloc, path, params, query, fragment = parts + # XXX should we check the scheme here as well? + if path and not path.endswith("/"): + path = posixpath.dirname(path) + "/" + parts = scheme, netloc, path, params, query, fragment + source.baseURI = urllib.parse.urlunparse(parts) + + return source + + def _get_opener(self): + try: + return self._opener + except AttributeError: + self._opener = self._create_opener() + return self._opener + + def _create_opener(self): + import urllib.request + return urllib.request.build_opener() + + def _guess_media_encoding(self, source): + info = source.byteStream.info() + if "Content-Type" in info: + for param in info.getplist(): + if param.startswith("charset="): + return param.split("=", 1)[1].lower() + + +class DOMInputSource(object): + __slots__ = ('byteStream', 'characterStream', 'stringData', + 'encoding', 'publicId', 'systemId', 'baseURI') + + def __init__(self): + self.byteStream = None + self.characterStream = None + self.stringData = None + self.encoding = None + self.publicId = None + self.systemId = None + self.baseURI = None + + def _get_byteStream(self): + return self.byteStream + def _set_byteStream(self, byteStream): + self.byteStream = byteStream + + def _get_characterStream(self): + return self.characterStream + def _set_characterStream(self, characterStream): + self.characterStream = characterStream + + def _get_stringData(self): + return self.stringData + def _set_stringData(self, data): + self.stringData = data + + def _get_encoding(self): + return self.encoding + def _set_encoding(self, encoding): + self.encoding = encoding + + def _get_publicId(self): + return self.publicId + def _set_publicId(self, publicId): + self.publicId = publicId + + def _get_systemId(self): + return self.systemId + def _set_systemId(self, systemId): + self.systemId = systemId + + def _get_baseURI(self): + return self.baseURI + def _set_baseURI(self, uri): + self.baseURI = uri + + +class DOMBuilderFilter: + """Element filter which can be used to tailor construction of + a DOM instance. + """ + + # There's really no need for this class; concrete implementations + # should just implement the endElement() and startElement() + # methods as appropriate. Using this makes it easy to only + # implement one of them. + + FILTER_ACCEPT = 1 + FILTER_REJECT = 2 + FILTER_SKIP = 3 + FILTER_INTERRUPT = 4 + + whatToShow = NodeFilter.SHOW_ALL + + def _get_whatToShow(self): + return self.whatToShow + + def acceptNode(self, element): + return self.FILTER_ACCEPT + + def startContainer(self, element): + return self.FILTER_ACCEPT + +del NodeFilter + + +class _AsyncDeprecatedProperty: + def warn(self, cls): + clsname = cls.__name__ + warnings.warn( + "{cls}.async is deprecated; use {cls}.async_".format(cls=clsname), + DeprecationWarning) + + def __get__(self, instance, cls): + self.warn(cls) + if instance is not None: + return instance.async_ + return False + + def __set__(self, instance, value): + self.warn(type(instance)) + setattr(instance, 'async_', value) + + +class DocumentLS: + """Mixin to create documents that conform to the load/save spec.""" + + async_ = False + locals()['async'] = _AsyncDeprecatedProperty() # Avoid DeprecationWarning + + def _get_async(self): + return False + + def _set_async(self, flag): + if flag: + raise xml.dom.NotSupportedErr( + "asynchronous document loading is not supported") + + def abort(self): + # What does it mean to "clear" a document? Does the + # documentElement disappear? + raise NotImplementedError( + "haven't figured out what this means yet") + + def load(self, uri): + raise NotImplementedError("haven't written this yet") + + def loadXML(self, source): + raise NotImplementedError("haven't written this yet") + + def saveXML(self, snode): + if snode is None: + snode = self + elif snode.ownerDocument is not self: + raise xml.dom.WrongDocumentErr() + return snode.toxml() + + +del _AsyncDeprecatedProperty + + +class DOMImplementationLS: + MODE_SYNCHRONOUS = 1 + MODE_ASYNCHRONOUS = 2 + + def createDOMBuilder(self, mode, schemaType): + if schemaType is not None: + raise xml.dom.NotSupportedErr( + "schemaType not yet supported") + if mode == self.MODE_SYNCHRONOUS: + return DOMBuilder() + if mode == self.MODE_ASYNCHRONOUS: + raise xml.dom.NotSupportedErr( + "asynchronous builders are not supported") + raise ValueError("unknown value for mode") + + def createDOMWriter(self): + raise NotImplementedError( + "the writer interface hasn't been written yet!") + + def createDOMInputSource(self): + return DOMInputSource() diff --git a/modules/language/python/module/xml/dom/NodeFilter.py~ b/modules/language/python/module/xml/dom/NodeFilter.py~ new file mode 100644 index 0000000..640e0bf --- /dev/null +++ b/modules/language/python/module/xml/dom/NodeFilter.py~ @@ -0,0 +1,27 @@ +# This is the Python mapping for interface NodeFilter from +# DOM2-Traversal-Range. It contains only constants. + +class NodeFilter: + """ + This is the DOM2 NodeFilter interface. It contains only constants. + """ + FILTER_ACCEPT = 1 + FILTER_REJECT = 2 + FILTER_SKIP = 3 + + SHOW_ALL = 0xFFFFFFFF + SHOW_ELEMENT = 0x00000001 + SHOW_ATTRIBUTE = 0x00000002 + SHOW_TEXT = 0x00000004 + SHOW_CDATA_SECTION = 0x00000008 + SHOW_ENTITY_REFERENCE = 0x00000010 + SHOW_ENTITY = 0x00000020 + SHOW_PROCESSING_INSTRUCTION = 0x00000040 + SHOW_COMMENT = 0x00000080 + SHOW_DOCUMENT = 0x00000100 + SHOW_DOCUMENT_TYPE = 0x00000200 + SHOW_DOCUMENT_FRAGMENT = 0x00000400 + SHOW_NOTATION = 0x00000800 + + def acceptNode(self, node): + raise NotImplementedError diff --git a/modules/language/python/module/xml/dom/__pycache__/NodeFilter.cpython-36.pyc b/modules/language/python/module/xml/dom/__pycache__/NodeFilter.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..7c51e14 --- /dev/null +++ b/modules/language/python/module/xml/dom/__pycache__/NodeFilter.cpython-36.pyc diff --git a/modules/language/python/module/xml/dom/__pycache__/__init__.cpython-36.pyc b/modules/language/python/module/xml/dom/__pycache__/__init__.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..a2e0a09 --- /dev/null +++ b/modules/language/python/module/xml/dom/__pycache__/__init__.cpython-36.pyc diff --git a/modules/language/python/module/xml/dom/__pycache__/domreg.cpython-36.pyc b/modules/language/python/module/xml/dom/__pycache__/domreg.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..34b5fa7 --- /dev/null +++ b/modules/language/python/module/xml/dom/__pycache__/domreg.cpython-36.pyc diff --git a/modules/language/python/module/xml/dom/__pycache__/expatbuilder.cpython-36.pyc b/modules/language/python/module/xml/dom/__pycache__/expatbuilder.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..67f963c --- /dev/null +++ b/modules/language/python/module/xml/dom/__pycache__/expatbuilder.cpython-36.pyc diff --git a/modules/language/python/module/xml/dom/__pycache__/minicompat.cpython-36.pyc b/modules/language/python/module/xml/dom/__pycache__/minicompat.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..1fc7a36 --- /dev/null +++ b/modules/language/python/module/xml/dom/__pycache__/minicompat.cpython-36.pyc diff --git a/modules/language/python/module/xml/dom/__pycache__/minidom.cpython-36.pyc b/modules/language/python/module/xml/dom/__pycache__/minidom.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..2155a98 --- /dev/null +++ b/modules/language/python/module/xml/dom/__pycache__/minidom.cpython-36.pyc diff --git a/modules/language/python/module/xml/dom/__pycache__/pulldom.cpython-36.pyc b/modules/language/python/module/xml/dom/__pycache__/pulldom.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..a3fec5e --- /dev/null +++ b/modules/language/python/module/xml/dom/__pycache__/pulldom.cpython-36.pyc diff --git a/modules/language/python/module/xml/dom/__pycache__/xmlbuilder.cpython-36.pyc b/modules/language/python/module/xml/dom/__pycache__/xmlbuilder.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..ed69780 --- /dev/null +++ b/modules/language/python/module/xml/dom/__pycache__/xmlbuilder.cpython-36.pyc diff --git a/modules/language/python/module/xml/dom/domreg.py~ b/modules/language/python/module/xml/dom/domreg.py~ new file mode 100644 index 0000000..8c3d901 --- /dev/null +++ b/modules/language/python/module/xml/dom/domreg.py~ @@ -0,0 +1,97 @@ +"""Registration facilities for DOM. This module should not be used +directly. Instead, the functions getDOMImplementation and +registerDOMImplementation should be imported from xml.dom.""" + +# This is a list of well-known implementations. Well-known names +# should be published by posting to xml-sig@python.org, and are +# subsequently recorded in this file. + +well_known_implementations = { + 'minidom':'xml.dom.minidom', + '4DOM': 'xml.dom.DOMImplementation', + } + +# DOM implementations not officially registered should register +# themselves with their + +registered = {} + +def registerDOMImplementation(name, factory): + """registerDOMImplementation(name, factory) + + Register the factory function with the name. The factory function + should return an object which implements the DOMImplementation + interface. The factory function can either return the same object, + or a new one (e.g. if that implementation supports some + customization).""" + + registered[name] = factory + +def _good_enough(dom, features): + "_good_enough(dom, features) -> Return 1 if the dom offers the features" + for f,v in features: + if not dom.hasFeature(f,v): + return 0 + return 1 + +def getDOMImplementation(name=None, features=()): + """getDOMImplementation(name = None, features = ()) -> DOM implementation. + + Return a suitable DOM implementation. The name is either + well-known, the module name of a DOM implementation, or None. If + it is not None, imports the corresponding module and returns + DOMImplementation object if the import succeeds. + + If name is not given, consider the available implementations to + find one with the required feature set. If no implementation can + be found, raise an ImportError. The features list must be a sequence + of (feature, version) pairs which are passed to hasFeature.""" + + import os + creator = None + mod = well_known_implementations.get(name) + if mod: + mod = __import__(mod, {}, {}, ['getDOMImplementation']) + return mod.getDOMImplementation() + elif name: + return registered[name]() + elif "PYTHON_DOM" in os.environ: + return getDOMImplementation(name = os.environ["PYTHON_DOM"]) + + # User did not specify a name, try implementations in arbitrary + # order, returning the one that has the required features + if isinstance(features, str): + features = _parse_feature_string(features) + for creator in registered.values(): + dom = creator() + if _good_enough(dom, features): + return dom + + for creator in well_known_implementations.keys(): + try: + dom = getDOMImplementation(name = creator) + except Exception: # typically ImportError, or AttributeError + continue + if _good_enough(dom, features): + return dom + + raise ImportError("no suitable DOM implementation found") + +def _parse_feature_string(s): + features = [] + parts = s.split() + i = 0 + length = len(parts) + while i < length: + feature = parts[i] + if feature[0] in "0123456789": + raise ValueError("bad feature name: %r" % (feature,)) + i = i + 1 + version = None + if i < length: + v = parts[i] + if v[0] in "0123456789": + i = i + 1 + version = v + features.append((feature, version)) + return tuple(features) diff --git a/modules/language/python/module/xml/dom/expatbuilder.py~ b/modules/language/python/module/xml/dom/expatbuilder.py~ new file mode 100644 index 0000000..2bd835b --- /dev/null +++ b/modules/language/python/module/xml/dom/expatbuilder.py~ @@ -0,0 +1,965 @@ +"""Facility to use the Expat parser to load a minidom instance +from a string or file. + +This avoids all the overhead of SAX and pulldom to gain performance. +""" + +# Warning! +# +# This module is tightly bound to the implementation details of the +# minidom DOM and can't be used with other DOM implementations. This +# is due, in part, to a lack of appropriate methods in the DOM (there is +# no way to create Entity and Notation nodes via the DOM Level 2 +# interface), and for performance. The latter is the cause of some fairly +# cryptic code. +# +# Performance hacks: +# +# - .character_data_handler() has an extra case in which continuing +# data is appended to an existing Text node; this can be a +# speedup since pyexpat can break up character data into multiple +# callbacks even though we set the buffer_text attribute on the +# parser. This also gives us the advantage that we don't need a +# separate normalization pass. +# +# - Determining that a node exists is done using an identity comparison +# with None rather than a truth test; this avoids searching for and +# calling any methods on the node object if it exists. (A rather +# nice speedup is achieved this way as well!) + +from xml.dom import xmlbuilder, minidom, Node +from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE +from xml.parsers import expat +from xml.dom.minidom import _append_child, _set_attribute_node +from xml.dom.NodeFilter import NodeFilter + +TEXT_NODE = Node.TEXT_NODE +CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE +DOCUMENT_NODE = Node.DOCUMENT_NODE + +FILTER_ACCEPT = xmlbuilder.DOMBuilderFilter.FILTER_ACCEPT +FILTER_REJECT = xmlbuilder.DOMBuilderFilter.FILTER_REJECT +FILTER_SKIP = xmlbuilder.DOMBuilderFilter.FILTER_SKIP +FILTER_INTERRUPT = xmlbuilder.DOMBuilderFilter.FILTER_INTERRUPT + +theDOMImplementation = minidom.getDOMImplementation() + +# Expat typename -> TypeInfo +_typeinfo_map = { + "CDATA": minidom.TypeInfo(None, "cdata"), + "ENUM": minidom.TypeInfo(None, "enumeration"), + "ENTITY": minidom.TypeInfo(None, "entity"), + "ENTITIES": minidom.TypeInfo(None, "entities"), + "ID": minidom.TypeInfo(None, "id"), + "IDREF": minidom.TypeInfo(None, "idref"), + "IDREFS": minidom.TypeInfo(None, "idrefs"), + "NMTOKEN": minidom.TypeInfo(None, "nmtoken"), + "NMTOKENS": minidom.TypeInfo(None, "nmtokens"), + } + +class ElementInfo(object): + __slots__ = '_attr_info', '_model', 'tagName' + + def __init__(self, tagName, model=None): + self.tagName = tagName + self._attr_info = [] + self._model = model + + def __getstate__(self): + return self._attr_info, self._model, self.tagName + + def __setstate__(self, state): + self._attr_info, self._model, self.tagName = state + + def getAttributeType(self, aname): + for info in self._attr_info: + if info[1] == aname: + t = info[-2] + if t[0] == "(": + return _typeinfo_map["ENUM"] + else: + return _typeinfo_map[info[-2]] + return minidom._no_type + + def getAttributeTypeNS(self, namespaceURI, localName): + return minidom._no_type + + def isElementContent(self): + if self._model: + type = self._model[0] + return type not in (expat.model.XML_CTYPE_ANY, + expat.model.XML_CTYPE_MIXED) + else: + return False + + def isEmpty(self): + if self._model: + return self._model[0] == expat.model.XML_CTYPE_EMPTY + else: + return False + + def isId(self, aname): + for info in self._attr_info: + if info[1] == aname: + return info[-2] == "ID" + return False + + def isIdNS(self, euri, ename, auri, aname): + # not sure this is meaningful + return self.isId((auri, aname)) + +def _intern(builder, s): + return builder._intern_setdefault(s, s) + +def _parse_ns_name(builder, name): + assert ' ' in name + parts = name.split(' ') + intern = builder._intern_setdefault + if len(parts) == 3: + uri, localname, prefix = parts + prefix = intern(prefix, prefix) + qname = "%s:%s" % (prefix, localname) + qname = intern(qname, qname) + localname = intern(localname, localname) + elif len(parts) == 2: + uri, localname = parts + prefix = EMPTY_PREFIX + qname = localname = intern(localname, localname) + else: + raise ValueError("Unsupported syntax: spaces in URIs not supported: %r" % name) + return intern(uri, uri), localname, prefix, qname + + +class ExpatBuilder: + """Document builder that uses Expat to build a ParsedXML.DOM document + instance.""" + + def __init__(self, options=None): + if options is None: + options = xmlbuilder.Options() + self._options = options + if self._options.filter is not None: + self._filter = FilterVisibilityController(self._options.filter) + else: + self._filter = None + # This *really* doesn't do anything in this case, so + # override it with something fast & minimal. + self._finish_start_element = id + self._parser = None + self.reset() + + def createParser(self): + """Create a new parser object.""" + return expat.ParserCreate() + + def getParser(self): + """Return the parser object, creating a new one if needed.""" + if not self._parser: + self._parser = self.createParser() + self._intern_setdefault = self._parser.intern.setdefault + self._parser.buffer_text = True + self._parser.ordered_attributes = True + self._parser.specified_attributes = True + self.install(self._parser) + return self._parser + + def reset(self): + """Free all data structures used during DOM construction.""" + self.document = theDOMImplementation.createDocument( + EMPTY_NAMESPACE, None, None) + self.curNode = self.document + self._elem_info = self.document._elem_info + self._cdata = False + + def install(self, parser): + """Install the callbacks needed to build the DOM into the parser.""" + # This creates circular references! + parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler + parser.StartElementHandler = self.first_element_handler + parser.EndElementHandler = self.end_element_handler + parser.ProcessingInstructionHandler = self.pi_handler + if self._options.entities: + parser.EntityDeclHandler = self.entity_decl_handler + parser.NotationDeclHandler = self.notation_decl_handler + if self._options.comments: + parser.CommentHandler = self.comment_handler + if self._options.cdata_sections: + parser.StartCdataSectionHandler = self.start_cdata_section_handler + parser.EndCdataSectionHandler = self.end_cdata_section_handler + parser.CharacterDataHandler = self.character_data_handler_cdata + else: + parser.CharacterDataHandler = self.character_data_handler + parser.ExternalEntityRefHandler = self.external_entity_ref_handler + parser.XmlDeclHandler = self.xml_decl_handler + parser.ElementDeclHandler = self.element_decl_handler + parser.AttlistDeclHandler = self.attlist_decl_handler + + def parseFile(self, file): + """Parse a document from a file object, returning the document + node.""" + parser = self.getParser() + first_buffer = True + try: + while 1: + buffer = file.read(16*1024) + if not buffer: + break + parser.Parse(buffer, 0) + if first_buffer and self.document.documentElement: + self._setup_subset(buffer) + first_buffer = False + parser.Parse("", True) + except ParseEscape: + pass + doc = self.document + self.reset() + self._parser = None + return doc + + def parseString(self, string): + """Parse a document from a string, returning the document node.""" + parser = self.getParser() + try: + parser.Parse(string, True) + self._setup_subset(string) + except ParseEscape: + pass + doc = self.document + self.reset() + self._parser = None + return doc + + def _setup_subset(self, buffer): + """Load the internal subset if there might be one.""" + if self.document.doctype: + extractor = InternalSubsetExtractor() + extractor.parseString(buffer) + subset = extractor.getSubset() + self.document.doctype.internalSubset = subset + + def start_doctype_decl_handler(self, doctypeName, systemId, publicId, + has_internal_subset): + doctype = self.document.implementation.createDocumentType( + doctypeName, publicId, systemId) + doctype.ownerDocument = self.document + _append_child(self.document, doctype) + self.document.doctype = doctype + if self._filter and self._filter.acceptNode(doctype) == FILTER_REJECT: + self.document.doctype = None + del self.document.childNodes[-1] + doctype = None + self._parser.EntityDeclHandler = None + self._parser.NotationDeclHandler = None + if has_internal_subset: + if doctype is not None: + doctype.entities._seq = [] + doctype.notations._seq = [] + self._parser.CommentHandler = None + self._parser.ProcessingInstructionHandler = None + self._parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler + + def end_doctype_decl_handler(self): + if self._options.comments: + self._parser.CommentHandler = self.comment_handler + self._parser.ProcessingInstructionHandler = self.pi_handler + if not (self._elem_info or self._filter): + self._finish_end_element = id + + def pi_handler(self, target, data): + node = self.document.createProcessingInstruction(target, data) + _append_child(self.curNode, node) + if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: + self.curNode.removeChild(node) + + def character_data_handler_cdata(self, data): + childNodes = self.curNode.childNodes + if self._cdata: + if ( self._cdata_continue + and childNodes[-1].nodeType == CDATA_SECTION_NODE): + childNodes[-1].appendData(data) + return + node = self.document.createCDATASection(data) + self._cdata_continue = True + elif childNodes and childNodes[-1].nodeType == TEXT_NODE: + node = childNodes[-1] + value = node.data + data + node.data = value + return + else: + node = minidom.Text() + node.data = data + node.ownerDocument = self.document + _append_child(self.curNode, node) + + def character_data_handler(self, data): + childNodes = self.curNode.childNodes + if childNodes and childNodes[-1].nodeType == TEXT_NODE: + node = childNodes[-1] + node.data = node.data + data + return + node = minidom.Text() + node.data = node.data + data + node.ownerDocument = self.document + _append_child(self.curNode, node) + + def entity_decl_handler(self, entityName, is_parameter_entity, value, + base, systemId, publicId, notationName): + if is_parameter_entity: + # we don't care about parameter entities for the DOM + return + if not self._options.entities: + return + node = self.document._create_entity(entityName, publicId, + systemId, notationName) + if value is not None: + # internal entity + # node *should* be readonly, but we'll cheat + child = self.document.createTextNode(value) + node.childNodes.append(child) + self.document.doctype.entities._seq.append(node) + if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: + del self.document.doctype.entities._seq[-1] + + def notation_decl_handler(self, notationName, base, systemId, publicId): + node = self.document._create_notation(notationName, publicId, systemId) + self.document.doctype.notations._seq.append(node) + if self._filter and self._filter.acceptNode(node) == FILTER_ACCEPT: + del self.document.doctype.notations._seq[-1] + + def comment_handler(self, data): + node = self.document.createComment(data) + _append_child(self.curNode, node) + if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: + self.curNode.removeChild(node) + + def start_cdata_section_handler(self): + self._cdata = True + self._cdata_continue = False + + def end_cdata_section_handler(self): + self._cdata = False + self._cdata_continue = False + + def external_entity_ref_handler(self, context, base, systemId, publicId): + return 1 + + def first_element_handler(self, name, attributes): + if self._filter is None and not self._elem_info: + self._finish_end_element = id + self.getParser().StartElementHandler = self.start_element_handler + self.start_element_handler(name, attributes) + + def start_element_handler(self, name, attributes): + node = self.document.createElement(name) + _append_child(self.curNode, node) + self.curNode = node + + if attributes: + for i in range(0, len(attributes), 2): + a = minidom.Attr(attributes[i], EMPTY_NAMESPACE, + None, EMPTY_PREFIX) + value = attributes[i+1] + a.value = value + a.ownerDocument = self.document + _set_attribute_node(node, a) + + if node is not self.document.documentElement: + self._finish_start_element(node) + + def _finish_start_element(self, node): + if self._filter: + # To be general, we'd have to call isSameNode(), but this + # is sufficient for minidom: + if node is self.document.documentElement: + return + filt = self._filter.startContainer(node) + if filt == FILTER_REJECT: + # ignore this node & all descendents + Rejecter(self) + elif filt == FILTER_SKIP: + # ignore this node, but make it's children become + # children of the parent node + Skipper(self) + else: + return + self.curNode = node.parentNode + node.parentNode.removeChild(node) + node.unlink() + + # If this ever changes, Namespaces.end_element_handler() needs to + # be changed to match. + # + def end_element_handler(self, name): + curNode = self.curNode + self.curNode = curNode.parentNode + self._finish_end_element(curNode) + + def _finish_end_element(self, curNode): + info = self._elem_info.get(curNode.tagName) + if info: + self._handle_white_text_nodes(curNode, info) + if self._filter: + if curNode is self.document.documentElement: + return + if self._filter.acceptNode(curNode) == FILTER_REJECT: + self.curNode.removeChild(curNode) + curNode.unlink() + + def _handle_white_text_nodes(self, node, info): + if (self._options.whitespace_in_element_content + or not info.isElementContent()): + return + + # We have element type information and should remove ignorable + # whitespace; identify for text nodes which contain only + # whitespace. + L = [] + for child in node.childNodes: + if child.nodeType == TEXT_NODE and not child.data.strip(): + L.append(child) + + # Remove ignorable whitespace from the tree. + for child in L: + node.removeChild(child) + + def element_decl_handler(self, name, model): + info = self._elem_info.get(name) + if info is None: + self._elem_info[name] = ElementInfo(name, model) + else: + assert info._model is None + info._model = model + + def attlist_decl_handler(self, elem, name, type, default, required): + info = self._elem_info.get(elem) + if info is None: + info = ElementInfo(elem) + self._elem_info[elem] = info + info._attr_info.append( + [None, name, None, None, default, 0, type, required]) + + def xml_decl_handler(self, version, encoding, standalone): + self.document.version = version + self.document.encoding = encoding + # This is still a little ugly, thanks to the pyexpat API. ;-( + if standalone >= 0: + if standalone: + self.document.standalone = True + else: + self.document.standalone = False + + +# Don't include FILTER_INTERRUPT, since that's checked separately +# where allowed. +_ALLOWED_FILTER_RETURNS = (FILTER_ACCEPT, FILTER_REJECT, FILTER_SKIP) + +class FilterVisibilityController(object): + """Wrapper around a DOMBuilderFilter which implements the checks + to make the whatToShow filter attribute work.""" + + __slots__ = 'filter', + + def __init__(self, filter): + self.filter = filter + + def startContainer(self, node): + mask = self._nodetype_mask[node.nodeType] + if self.filter.whatToShow & mask: + val = self.filter.startContainer(node) + if val == FILTER_INTERRUPT: + raise ParseEscape + if val not in _ALLOWED_FILTER_RETURNS: + raise ValueError( + "startContainer() returned illegal value: " + repr(val)) + return val + else: + return FILTER_ACCEPT + + def acceptNode(self, node): + mask = self._nodetype_mask[node.nodeType] + if self.filter.whatToShow & mask: + val = self.filter.acceptNode(node) + if val == FILTER_INTERRUPT: + raise ParseEscape + if val == FILTER_SKIP: + # move all child nodes to the parent, and remove this node + parent = node.parentNode + for child in node.childNodes[:]: + parent.appendChild(child) + # node is handled by the caller + return FILTER_REJECT + if val not in _ALLOWED_FILTER_RETURNS: + raise ValueError( + "acceptNode() returned illegal value: " + repr(val)) + return val + else: + return FILTER_ACCEPT + + _nodetype_mask = { + Node.ELEMENT_NODE: NodeFilter.SHOW_ELEMENT, + Node.ATTRIBUTE_NODE: NodeFilter.SHOW_ATTRIBUTE, + Node.TEXT_NODE: NodeFilter.SHOW_TEXT, + Node.CDATA_SECTION_NODE: NodeFilter.SHOW_CDATA_SECTION, + Node.ENTITY_REFERENCE_NODE: NodeFilter.SHOW_ENTITY_REFERENCE, + Node.ENTITY_NODE: NodeFilter.SHOW_ENTITY, + Node.PROCESSING_INSTRUCTION_NODE: NodeFilter.SHOW_PROCESSING_INSTRUCTION, + Node.COMMENT_NODE: NodeFilter.SHOW_COMMENT, + Node.DOCUMENT_NODE: NodeFilter.SHOW_DOCUMENT, + Node.DOCUMENT_TYPE_NODE: NodeFilter.SHOW_DOCUMENT_TYPE, + Node.DOCUMENT_FRAGMENT_NODE: NodeFilter.SHOW_DOCUMENT_FRAGMENT, + Node.NOTATION_NODE: NodeFilter.SHOW_NOTATION, + } + + +class FilterCrutch(object): + __slots__ = '_builder', '_level', '_old_start', '_old_end' + + def __init__(self, builder): + self._level = 0 + self._builder = builder + parser = builder._parser + self._old_start = parser.StartElementHandler + self._old_end = parser.EndElementHandler + parser.StartElementHandler = self.start_element_handler + parser.EndElementHandler = self.end_element_handler + +class Rejecter(FilterCrutch): + __slots__ = () + + def __init__(self, builder): + FilterCrutch.__init__(self, builder) + parser = builder._parser + for name in ("ProcessingInstructionHandler", + "CommentHandler", + "CharacterDataHandler", + "StartCdataSectionHandler", + "EndCdataSectionHandler", + "ExternalEntityRefHandler", + ): + setattr(parser, name, None) + + def start_element_handler(self, *args): + self._level = self._level + 1 + + def end_element_handler(self, *args): + if self._level == 0: + # restore the old handlers + parser = self._builder._parser + self._builder.install(parser) + parser.StartElementHandler = self._old_start + parser.EndElementHandler = self._old_end + else: + self._level = self._level - 1 + +class Skipper(FilterCrutch): + __slots__ = () + + def start_element_handler(self, *args): + node = self._builder.curNode + self._old_start(*args) + if self._builder.curNode is not node: + self._level = self._level + 1 + + def end_element_handler(self, *args): + if self._level == 0: + # We're popping back out of the node we're skipping, so we + # shouldn't need to do anything but reset the handlers. + self._builder._parser.StartElementHandler = self._old_start + self._builder._parser.EndElementHandler = self._old_end + self._builder = None + else: + self._level = self._level - 1 + self._old_end(*args) + + +# framework document used by the fragment builder. +# Takes a string for the doctype, subset string, and namespace attrs string. + +_FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID = \ + "http://xml.python.org/entities/fragment-builder/internal" + +_FRAGMENT_BUILDER_TEMPLATE = ( + '''\ +<!DOCTYPE wrapper + %%s [ + <!ENTITY fragment-builder-internal + SYSTEM "%s"> +%%s +]> +<wrapper %%s +>&fragment-builder-internal;</wrapper>''' + % _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID) + + +class FragmentBuilder(ExpatBuilder): + """Builder which constructs document fragments given XML source + text and a context node. + + The context node is expected to provide information about the + namespace declarations which are in scope at the start of the + fragment. + """ + + def __init__(self, context, options=None): + if context.nodeType == DOCUMENT_NODE: + self.originalDocument = context + self.context = context + else: + self.originalDocument = context.ownerDocument + self.context = context + ExpatBuilder.__init__(self, options) + + def reset(self): + ExpatBuilder.reset(self) + self.fragment = None + + def parseFile(self, file): + """Parse a document fragment from a file object, returning the + fragment node.""" + return self.parseString(file.read()) + + def parseString(self, string): + """Parse a document fragment from a string, returning the + fragment node.""" + self._source = string + parser = self.getParser() + doctype = self.originalDocument.doctype + ident = "" + if doctype: + subset = doctype.internalSubset or self._getDeclarations() + if doctype.publicId: + ident = ('PUBLIC "%s" "%s"' + % (doctype.publicId, doctype.systemId)) + elif doctype.systemId: + ident = 'SYSTEM "%s"' % doctype.systemId + else: + subset = "" + nsattrs = self._getNSattrs() # get ns decls from node's ancestors + document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs) + try: + parser.Parse(document, 1) + except: + self.reset() + raise + fragment = self.fragment + self.reset() +## self._parser = None + return fragment + + def _getDeclarations(self): + """Re-create the internal subset from the DocumentType node. + + This is only needed if we don't already have the + internalSubset as a string. + """ + doctype = self.context.ownerDocument.doctype + s = "" + if doctype: + for i in range(doctype.notations.length): + notation = doctype.notations.item(i) + if s: + s = s + "\n " + s = "%s<!NOTATION %s" % (s, notation.nodeName) + if notation.publicId: + s = '%s PUBLIC "%s"\n "%s">' \ + % (s, notation.publicId, notation.systemId) + else: + s = '%s SYSTEM "%s">' % (s, notation.systemId) + for i in range(doctype.entities.length): + entity = doctype.entities.item(i) + if s: + s = s + "\n " + s = "%s<!ENTITY %s" % (s, entity.nodeName) + if entity.publicId: + s = '%s PUBLIC "%s"\n "%s"' \ + % (s, entity.publicId, entity.systemId) + elif entity.systemId: + s = '%s SYSTEM "%s"' % (s, entity.systemId) + else: + s = '%s "%s"' % (s, entity.firstChild.data) + if entity.notationName: + s = "%s NOTATION %s" % (s, entity.notationName) + s = s + ">" + return s + + def _getNSattrs(self): + return "" + + def external_entity_ref_handler(self, context, base, systemId, publicId): + if systemId == _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID: + # this entref is the one that we made to put the subtree + # in; all of our given input is parsed in here. + old_document = self.document + old_cur_node = self.curNode + parser = self._parser.ExternalEntityParserCreate(context) + # put the real document back, parse into the fragment to return + self.document = self.originalDocument + self.fragment = self.document.createDocumentFragment() + self.curNode = self.fragment + try: + parser.Parse(self._source, 1) + finally: + self.curNode = old_cur_node + self.document = old_document + self._source = None + return -1 + else: + return ExpatBuilder.external_entity_ref_handler( + self, context, base, systemId, publicId) + + +class Namespaces: + """Mix-in class for builders; adds support for namespaces.""" + + def _initNamespaces(self): + # list of (prefix, uri) ns declarations. Namespace attrs are + # constructed from this and added to the element's attrs. + self._ns_ordered_prefixes = [] + + def createParser(self): + """Create a new namespace-handling parser.""" + parser = expat.ParserCreate(namespace_separator=" ") + parser.namespace_prefixes = True + return parser + + def install(self, parser): + """Insert the namespace-handlers onto the parser.""" + ExpatBuilder.install(self, parser) + if self._options.namespace_declarations: + parser.StartNamespaceDeclHandler = ( + self.start_namespace_decl_handler) + + def start_namespace_decl_handler(self, prefix, uri): + """Push this namespace declaration on our storage.""" + self._ns_ordered_prefixes.append((prefix, uri)) + + def start_element_handler(self, name, attributes): + if ' ' in name: + uri, localname, prefix, qname = _parse_ns_name(self, name) + else: + uri = EMPTY_NAMESPACE + qname = name + localname = None + prefix = EMPTY_PREFIX + node = minidom.Element(qname, uri, prefix, localname) + node.ownerDocument = self.document + _append_child(self.curNode, node) + self.curNode = node + + if self._ns_ordered_prefixes: + for prefix, uri in self._ns_ordered_prefixes: + if prefix: + a = minidom.Attr(_intern(self, 'xmlns:' + prefix), + XMLNS_NAMESPACE, prefix, "xmlns") + else: + a = minidom.Attr("xmlns", XMLNS_NAMESPACE, + "xmlns", EMPTY_PREFIX) + a.value = uri + a.ownerDocument = self.document + _set_attribute_node(node, a) + del self._ns_ordered_prefixes[:] + + if attributes: + node._ensure_attributes() + _attrs = node._attrs + _attrsNS = node._attrsNS + for i in range(0, len(attributes), 2): + aname = attributes[i] + value = attributes[i+1] + if ' ' in aname: + uri, localname, prefix, qname = _parse_ns_name(self, aname) + a = minidom.Attr(qname, uri, localname, prefix) + _attrs[qname] = a + _attrsNS[(uri, localname)] = a + else: + a = minidom.Attr(aname, EMPTY_NAMESPACE, + aname, EMPTY_PREFIX) + _attrs[aname] = a + _attrsNS[(EMPTY_NAMESPACE, aname)] = a + a.ownerDocument = self.document + a.value = value + a.ownerElement = node + + if __debug__: + # This only adds some asserts to the original + # end_element_handler(), so we only define this when -O is not + # used. If changing one, be sure to check the other to see if + # it needs to be changed as well. + # + def end_element_handler(self, name): + curNode = self.curNode + if ' ' in name: + uri, localname, prefix, qname = _parse_ns_name(self, name) + assert (curNode.namespaceURI == uri + and curNode.localName == localname + and curNode.prefix == prefix), \ + "element stack messed up! (namespace)" + else: + assert curNode.nodeName == name, \ + "element stack messed up - bad nodeName" + assert curNode.namespaceURI == EMPTY_NAMESPACE, \ + "element stack messed up - bad namespaceURI" + self.curNode = curNode.parentNode + self._finish_end_element(curNode) + + +class ExpatBuilderNS(Namespaces, ExpatBuilder): + """Document builder that supports namespaces.""" + + def reset(self): + ExpatBuilder.reset(self) + self._initNamespaces() + + +class FragmentBuilderNS(Namespaces, FragmentBuilder): + """Fragment builder that supports namespaces.""" + + def reset(self): + FragmentBuilder.reset(self) + self._initNamespaces() + + def _getNSattrs(self): + """Return string of namespace attributes from this element and + ancestors.""" + # XXX This needs to be re-written to walk the ancestors of the + # context to build up the namespace information from + # declarations, elements, and attributes found in context. + # Otherwise we have to store a bunch more data on the DOM + # (though that *might* be more reliable -- not clear). + attrs = "" + context = self.context + L = [] + while context: + if hasattr(context, '_ns_prefix_uri'): + for prefix, uri in context._ns_prefix_uri.items(): + # add every new NS decl from context to L and attrs string + if prefix in L: + continue + L.append(prefix) + if prefix: + declname = "xmlns:" + prefix + else: + declname = "xmlns" + if attrs: + attrs = "%s\n %s='%s'" % (attrs, declname, uri) + else: + attrs = " %s='%s'" % (declname, uri) + context = context.parentNode + return attrs + + +class ParseEscape(Exception): + """Exception raised to short-circuit parsing in InternalSubsetExtractor.""" + pass + +class InternalSubsetExtractor(ExpatBuilder): + """XML processor which can rip out the internal document type subset.""" + + subset = None + + def getSubset(self): + """Return the internal subset as a string.""" + return self.subset + + def parseFile(self, file): + try: + ExpatBuilder.parseFile(self, file) + except ParseEscape: + pass + + def parseString(self, string): + try: + ExpatBuilder.parseString(self, string) + except ParseEscape: + pass + + def install(self, parser): + parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler + parser.StartElementHandler = self.start_element_handler + + def start_doctype_decl_handler(self, name, publicId, systemId, + has_internal_subset): + if has_internal_subset: + parser = self.getParser() + self.subset = [] + parser.DefaultHandler = self.subset.append + parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler + else: + raise ParseEscape() + + def end_doctype_decl_handler(self): + s = ''.join(self.subset).replace('\r\n', '\n').replace('\r', '\n') + self.subset = s + raise ParseEscape() + + def start_element_handler(self, name, attrs): + raise ParseEscape() + + +def parse(file, namespaces=True): + """Parse a document, returning the resulting Document node. + + 'file' may be either a file name or an open file object. + """ + if namespaces: + builder = ExpatBuilderNS() + else: + builder = ExpatBuilder() + + if isinstance(file, str): + with open(file, 'rb') as fp: + result = builder.parseFile(fp) + else: + result = builder.parseFile(file) + return result + + +def parseString(string, namespaces=True): + """Parse a document from a string, returning the resulting + Document node. + """ + if namespaces: + builder = ExpatBuilderNS() + else: + builder = ExpatBuilder() + return builder.parseString(string) + + +def parseFragment(file, context, namespaces=True): + """Parse a fragment of a document, given the context from which it + was originally extracted. context should be the parent of the + node(s) which are in the fragment. + + 'file' may be either a file name or an open file object. + """ + if namespaces: + builder = FragmentBuilderNS(context) + else: + builder = FragmentBuilder(context) + + if isinstance(file, str): + with open(file, 'rb') as fp: + result = builder.parseFile(fp) + else: + result = builder.parseFile(file) + return result + + +def parseFragmentString(string, context, namespaces=True): + """Parse a fragment of a document from a string, given the context + from which it was originally extracted. context should be the + parent of the node(s) which are in the fragment. + """ + if namespaces: + builder = FragmentBuilderNS(context) + else: + builder = FragmentBuilder(context) + return builder.parseString(string) + + +def makeBuilder(options): + """Create a builder based on an Options object.""" + if options.namespaces: + return ExpatBuilderNS(options) + else: + return ExpatBuilder(options) diff --git a/modules/language/python/module/xml/dom/minicompat.py~ b/modules/language/python/module/xml/dom/minicompat.py~ new file mode 100644 index 0000000..5d6fae9 --- /dev/null +++ b/modules/language/python/module/xml/dom/minicompat.py~ @@ -0,0 +1,109 @@ +"""Python version compatibility support for minidom. + +This module contains internal implementation details and +should not be imported; use xml.dom.minidom instead. +""" + +# This module should only be imported using "import *". +# +# The following names are defined: +# +# NodeList -- lightest possible NodeList implementation +# +# EmptyNodeList -- lightest possible NodeList that is guaranteed to +# remain empty (immutable) +# +# StringTypes -- tuple of defined string types +# +# defproperty -- function used in conjunction with GetattrMagic; +# using these together is needed to make them work +# as efficiently as possible in both Python 2.2+ +# and older versions. For example: +# +# class MyClass(GetattrMagic): +# def _get_myattr(self): +# return something +# +# defproperty(MyClass, "myattr", +# "return some value") +# +# For Python 2.2 and newer, this will construct a +# property object on the class, which avoids +# needing to override __getattr__(). It will only +# work for read-only attributes. +# +# For older versions of Python, inheriting from +# GetattrMagic will use the traditional +# __getattr__() hackery to achieve the same effect, +# but less efficiently. +# +# defproperty() should be used for each version of +# the relevant _get_<property>() function. + +__all__ = ["NodeList", "EmptyNodeList", "StringTypes", "defproperty"] + +import xml.dom + +StringTypes = (str,) + + +class NodeList(list): + __slots__ = () + + def item(self, index): + if 0 <= index < len(self): + return self[index] + + def _get_length(self): + return len(self) + + def _set_length(self, value): + raise xml.dom.NoModificationAllowedErr( + "attempt to modify read-only attribute 'length'") + + length = property(_get_length, _set_length, + doc="The number of nodes in the NodeList.") + + # For backward compatibility + def __setstate__(self, state): + if state is None: + state = [] + self[:] = state + + +class EmptyNodeList(tuple): + __slots__ = () + + def __add__(self, other): + NL = NodeList() + NL.extend(other) + return NL + + def __radd__(self, other): + NL = NodeList() + NL.extend(other) + return NL + + def item(self, index): + return None + + def _get_length(self): + return 0 + + def _set_length(self, value): + raise xml.dom.NoModificationAllowedErr( + "attempt to modify read-only attribute 'length'") + + length = property(_get_length, _set_length, + doc="The number of nodes in the NodeList.") + + +def defproperty(klass, name, doc): + get = getattr(klass, ("_get_" + name)) + def set(self, value, name=name): + raise xml.dom.NoModificationAllowedErr( + "attempt to modify read-only attribute " + repr(name)) + assert not hasattr(klass, "_set_" + name), \ + "expected not to find _set_" + name + prop = property(get, set, doc=doc) + setattr(klass, name, prop) diff --git a/modules/language/python/module/xml/dom/pulldom.py~ b/modules/language/python/module/xml/dom/pulldom.py~ new file mode 100644 index 0000000..43504f7 --- /dev/null +++ b/modules/language/python/module/xml/dom/pulldom.py~ @@ -0,0 +1,342 @@ +import xml.sax +import xml.sax.handler + +START_ELEMENT = "START_ELEMENT" +END_ELEMENT = "END_ELEMENT" +COMMENT = "COMMENT" +START_DOCUMENT = "START_DOCUMENT" +END_DOCUMENT = "END_DOCUMENT" +PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION" +IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE" +CHARACTERS = "CHARACTERS" + +class PullDOM(xml.sax.ContentHandler): + _locator = None + document = None + + def __init__(self, documentFactory=None): + from xml.dom import XML_NAMESPACE + self.documentFactory = documentFactory + self.firstEvent = [None, None] + self.lastEvent = self.firstEvent + self.elementStack = [] + self.push = self.elementStack.append + try: + self.pop = self.elementStack.pop + except AttributeError: + # use class' pop instead + pass + self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts + self._current_context = self._ns_contexts[-1] + self.pending_events = [] + + def pop(self): + result = self.elementStack[-1] + del self.elementStack[-1] + return result + + def setDocumentLocator(self, locator): + self._locator = locator + + def startPrefixMapping(self, prefix, uri): + if not hasattr(self, '_xmlns_attrs'): + self._xmlns_attrs = [] + self._xmlns_attrs.append((prefix or 'xmlns', uri)) + self._ns_contexts.append(self._current_context.copy()) + self._current_context[uri] = prefix or None + + def endPrefixMapping(self, prefix): + self._current_context = self._ns_contexts.pop() + + def startElementNS(self, name, tagName , attrs): + # Retrieve xml namespace declaration attributes. + xmlns_uri = 'http://www.w3.org/2000/xmlns/' + xmlns_attrs = getattr(self, '_xmlns_attrs', None) + if xmlns_attrs is not None: + for aname, value in xmlns_attrs: + attrs._attrs[(xmlns_uri, aname)] = value + self._xmlns_attrs = [] + uri, localname = name + if uri: + # When using namespaces, the reader may or may not + # provide us with the original name. If not, create + # *a* valid tagName from the current context. + if tagName is None: + prefix = self._current_context[uri] + if prefix: + tagName = prefix + ":" + localname + else: + tagName = localname + if self.document: + node = self.document.createElementNS(uri, tagName) + else: + node = self.buildDocument(uri, tagName) + else: + # When the tagname is not prefixed, it just appears as + # localname + if self.document: + node = self.document.createElement(localname) + else: + node = self.buildDocument(None, localname) + + for aname,value in attrs.items(): + a_uri, a_localname = aname + if a_uri == xmlns_uri: + if a_localname == 'xmlns': + qname = a_localname + else: + qname = 'xmlns:' + a_localname + attr = self.document.createAttributeNS(a_uri, qname) + node.setAttributeNodeNS(attr) + elif a_uri: + prefix = self._current_context[a_uri] + if prefix: + qname = prefix + ":" + a_localname + else: + qname = a_localname + attr = self.document.createAttributeNS(a_uri, qname) + node.setAttributeNodeNS(attr) + else: + attr = self.document.createAttribute(a_localname) + node.setAttributeNode(attr) + attr.value = value + + self.lastEvent[1] = [(START_ELEMENT, node), None] + self.lastEvent = self.lastEvent[1] + self.push(node) + + def endElementNS(self, name, tagName): + self.lastEvent[1] = [(END_ELEMENT, self.pop()), None] + self.lastEvent = self.lastEvent[1] + + def startElement(self, name, attrs): + if self.document: + node = self.document.createElement(name) + else: + node = self.buildDocument(None, name) + + for aname,value in attrs.items(): + attr = self.document.createAttribute(aname) + attr.value = value + node.setAttributeNode(attr) + + self.lastEvent[1] = [(START_ELEMENT, node), None] + self.lastEvent = self.lastEvent[1] + self.push(node) + + def endElement(self, name): + self.lastEvent[1] = [(END_ELEMENT, self.pop()), None] + self.lastEvent = self.lastEvent[1] + + def comment(self, s): + if self.document: + node = self.document.createComment(s) + self.lastEvent[1] = [(COMMENT, node), None] + self.lastEvent = self.lastEvent[1] + else: + event = [(COMMENT, s), None] + self.pending_events.append(event) + + def processingInstruction(self, target, data): + if self.document: + node = self.document.createProcessingInstruction(target, data) + self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None] + self.lastEvent = self.lastEvent[1] + else: + event = [(PROCESSING_INSTRUCTION, target, data), None] + self.pending_events.append(event) + + def ignorableWhitespace(self, chars): + node = self.document.createTextNode(chars) + self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None] + self.lastEvent = self.lastEvent[1] + + def characters(self, chars): + node = self.document.createTextNode(chars) + self.lastEvent[1] = [(CHARACTERS, node), None] + self.lastEvent = self.lastEvent[1] + + def startDocument(self): + if self.documentFactory is None: + import xml.dom.minidom + self.documentFactory = xml.dom.minidom.Document.implementation + + def buildDocument(self, uri, tagname): + # Can't do that in startDocument, since we need the tagname + # XXX: obtain DocumentType + node = self.documentFactory.createDocument(uri, tagname, None) + self.document = node + self.lastEvent[1] = [(START_DOCUMENT, node), None] + self.lastEvent = self.lastEvent[1] + self.push(node) + # Put everything we have seen so far into the document + for e in self.pending_events: + if e[0][0] == PROCESSING_INSTRUCTION: + _,target,data = e[0] + n = self.document.createProcessingInstruction(target, data) + e[0] = (PROCESSING_INSTRUCTION, n) + elif e[0][0] == COMMENT: + n = self.document.createComment(e[0][1]) + e[0] = (COMMENT, n) + else: + raise AssertionError("Unknown pending event ",e[0][0]) + self.lastEvent[1] = e + self.lastEvent = e + self.pending_events = None + return node.firstChild + + def endDocument(self): + self.lastEvent[1] = [(END_DOCUMENT, self.document), None] + self.pop() + + def clear(self): + "clear(): Explicitly release parsing structures" + self.document = None + +class ErrorHandler: + def warning(self, exception): + print(exception) + def error(self, exception): + raise exception + def fatalError(self, exception): + raise exception + +class DOMEventStream: + def __init__(self, stream, parser, bufsize): + self.stream = stream + self.parser = parser + self.bufsize = bufsize + if not hasattr(self.parser, 'feed'): + self.getEvent = self._slurp + self.reset() + + def reset(self): + self.pulldom = PullDOM() + # This content handler relies on namespace support + self.parser.setFeature(xml.sax.handler.feature_namespaces, 1) + self.parser.setContentHandler(self.pulldom) + + def __getitem__(self, pos): + rc = self.getEvent() + if rc: + return rc + raise IndexError + + def __next__(self): + rc = self.getEvent() + if rc: + return rc + raise StopIteration + + def __iter__(self): + return self + + def expandNode(self, node): + event = self.getEvent() + parents = [node] + while event: + token, cur_node = event + if cur_node is node: + return + if token != END_ELEMENT: + parents[-1].appendChild(cur_node) + if token == START_ELEMENT: + parents.append(cur_node) + elif token == END_ELEMENT: + del parents[-1] + event = self.getEvent() + + def getEvent(self): + # use IncrementalParser interface, so we get the desired + # pull effect + if not self.pulldom.firstEvent[1]: + self.pulldom.lastEvent = self.pulldom.firstEvent + while not self.pulldom.firstEvent[1]: + buf = self.stream.read(self.bufsize) + if not buf: + self.parser.close() + return None + self.parser.feed(buf) + rc = self.pulldom.firstEvent[1][0] + self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1] + return rc + + def _slurp(self): + """ Fallback replacement for getEvent() using the + standard SAX2 interface, which means we slurp the + SAX events into memory (no performance gain, but + we are compatible to all SAX parsers). + """ + self.parser.parse(self.stream) + self.getEvent = self._emit + return self._emit() + + def _emit(self): + """ Fallback replacement for getEvent() that emits + the events that _slurp() read previously. + """ + rc = self.pulldom.firstEvent[1][0] + self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1] + return rc + + def clear(self): + """clear(): Explicitly release parsing objects""" + self.pulldom.clear() + del self.pulldom + self.parser = None + self.stream = None + +class SAX2DOM(PullDOM): + + def startElementNS(self, name, tagName , attrs): + PullDOM.startElementNS(self, name, tagName, attrs) + curNode = self.elementStack[-1] + parentNode = self.elementStack[-2] + parentNode.appendChild(curNode) + + def startElement(self, name, attrs): + PullDOM.startElement(self, name, attrs) + curNode = self.elementStack[-1] + parentNode = self.elementStack[-2] + parentNode.appendChild(curNode) + + def processingInstruction(self, target, data): + PullDOM.processingInstruction(self, target, data) + node = self.lastEvent[0][1] + parentNode = self.elementStack[-1] + parentNode.appendChild(node) + + def ignorableWhitespace(self, chars): + PullDOM.ignorableWhitespace(self, chars) + node = self.lastEvent[0][1] + parentNode = self.elementStack[-1] + parentNode.appendChild(node) + + def characters(self, chars): + PullDOM.characters(self, chars) + node = self.lastEvent[0][1] + parentNode = self.elementStack[-1] + parentNode.appendChild(node) + + +default_bufsize = (2 ** 14) - 20 + +def parse(stream_or_string, parser=None, bufsize=None): + if bufsize is None: + bufsize = default_bufsize + if isinstance(stream_or_string, str): + stream = open(stream_or_string, 'rb') + else: + stream = stream_or_string + if not parser: + parser = xml.sax.make_parser() + return DOMEventStream(stream, parser, bufsize) + +def parseString(string, parser=None): + from io import StringIO + + bufsize = len(string) + buf = StringIO(string) + if not parser: + parser = xml.sax.make_parser() + return DOMEventStream(buf, parser, bufsize) diff --git a/modules/language/python/module/xml/dom/xmlbuilder.py~ b/modules/language/python/module/xml/dom/xmlbuilder.py~ new file mode 100644 index 0000000..e9a1536 --- /dev/null +++ b/modules/language/python/module/xml/dom/xmlbuilder.py~ @@ -0,0 +1,410 @@ +"""Implementation of the DOM Level 3 'LS-Load' feature.""" + +import copy +import warnings +import xml.dom + +from xml.dom.NodeFilter import NodeFilter + + +__all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"] + + +class Options: + """Features object that has variables set for each DOMBuilder feature. + + The DOMBuilder class uses an instance of this class to pass settings to + the ExpatBuilder class. + """ + + # Note that the DOMBuilder class in LoadSave constrains which of these + # values can be set using the DOM Level 3 LoadSave feature. + + namespaces = 1 + namespace_declarations = True + validation = False + external_parameter_entities = True + external_general_entities = True + external_dtd_subset = True + validate_if_schema = False + validate = False + datatype_normalization = False + create_entity_ref_nodes = True + entities = True + whitespace_in_element_content = True + cdata_sections = True + comments = True + charset_overrides_xml_encoding = True + infoset = False + supported_mediatypes_only = False + + errorHandler = None + filter = None + + +class DOMBuilder: + entityResolver = None + errorHandler = None + filter = None + + ACTION_REPLACE = 1 + ACTION_APPEND_AS_CHILDREN = 2 + ACTION_INSERT_AFTER = 3 + ACTION_INSERT_BEFORE = 4 + + _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN, + ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE) + + def __init__(self): + self._options = Options() + + def _get_entityResolver(self): + return self.entityResolver + def _set_entityResolver(self, entityResolver): + self.entityResolver = entityResolver + + def _get_errorHandler(self): + return self.errorHandler + def _set_errorHandler(self, errorHandler): + self.errorHandler = errorHandler + + def _get_filter(self): + return self.filter + def _set_filter(self, filter): + self.filter = filter + + def setFeature(self, name, state): + if self.supportsFeature(name): + state = state and 1 or 0 + try: + settings = self._settings[(_name_xform(name), state)] + except KeyError: + raise xml.dom.NotSupportedErr( + "unsupported feature: %r" % (name,)) + else: + for name, value in settings: + setattr(self._options, name, value) + else: + raise xml.dom.NotFoundErr("unknown feature: " + repr(name)) + + def supportsFeature(self, name): + return hasattr(self._options, _name_xform(name)) + + def canSetFeature(self, name, state): + key = (_name_xform(name), state and 1 or 0) + return key in self._settings + + # This dictionary maps from (feature,value) to a list of + # (option,value) pairs that should be set on the Options object. + # If a (feature,value) setting is not in this dictionary, it is + # not supported by the DOMBuilder. + # + _settings = { + ("namespace_declarations", 0): [ + ("namespace_declarations", 0)], + ("namespace_declarations", 1): [ + ("namespace_declarations", 1)], + ("validation", 0): [ + ("validation", 0)], + ("external_general_entities", 0): [ + ("external_general_entities", 0)], + ("external_general_entities", 1): [ + ("external_general_entities", 1)], + ("external_parameter_entities", 0): [ + ("external_parameter_entities", 0)], + ("external_parameter_entities", 1): [ + ("external_parameter_entities", 1)], + ("validate_if_schema", 0): [ + ("validate_if_schema", 0)], + ("create_entity_ref_nodes", 0): [ + ("create_entity_ref_nodes", 0)], + ("create_entity_ref_nodes", 1): [ + ("create_entity_ref_nodes", 1)], + ("entities", 0): [ + ("create_entity_ref_nodes", 0), + ("entities", 0)], + ("entities", 1): [ + ("entities", 1)], + ("whitespace_in_element_content", 0): [ + ("whitespace_in_element_content", 0)], + ("whitespace_in_element_content", 1): [ + ("whitespace_in_element_content", 1)], + ("cdata_sections", 0): [ + ("cdata_sections", 0)], + ("cdata_sections", 1): [ + ("cdata_sections", 1)], + ("comments", 0): [ + ("comments", 0)], + ("comments", 1): [ + ("comments", 1)], + ("charset_overrides_xml_encoding", 0): [ + ("charset_overrides_xml_encoding", 0)], + ("charset_overrides_xml_encoding", 1): [ + ("charset_overrides_xml_encoding", 1)], + ("infoset", 0): [], + ("infoset", 1): [ + ("namespace_declarations", 0), + ("validate_if_schema", 0), + ("create_entity_ref_nodes", 0), + ("entities", 0), + ("cdata_sections", 0), + ("datatype_normalization", 1), + ("whitespace_in_element_content", 1), + ("comments", 1), + ("charset_overrides_xml_encoding", 1)], + ("supported_mediatypes_only", 0): [ + ("supported_mediatypes_only", 0)], + ("namespaces", 0): [ + ("namespaces", 0)], + ("namespaces", 1): [ + ("namespaces", 1)], + } + + def getFeature(self, name): + xname = _name_xform(name) + try: + return getattr(self._options, xname) + except AttributeError: + if name == "infoset": + options = self._options + return (options.datatype_normalization + and options.whitespace_in_element_content + and options.comments + and options.charset_overrides_xml_encoding + and not (options.namespace_declarations + or options.validate_if_schema + or options.create_entity_ref_nodes + or options.entities + or options.cdata_sections)) + raise xml.dom.NotFoundErr("feature %s not known" % repr(name)) + + def parseURI(self, uri): + if self.entityResolver: + input = self.entityResolver.resolveEntity(None, uri) + else: + input = DOMEntityResolver().resolveEntity(None, uri) + return self.parse(input) + + def parse(self, input): + options = copy.copy(self._options) + options.filter = self.filter + options.errorHandler = self.errorHandler + fp = input.byteStream + if fp is None and options.systemId: + import urllib.request + fp = urllib.request.urlopen(input.systemId) + return self._parse_bytestream(fp, options) + + def parseWithContext(self, input, cnode, action): + if action not in self._legal_actions: + raise ValueError("not a legal action") + raise NotImplementedError("Haven't written this yet...") + + def _parse_bytestream(self, stream, options): + import xml.dom.expatbuilder + builder = xml.dom.expatbuilder.makeBuilder(options) + return builder.parseFile(stream) + + +def _name_xform(name): + return name.lower().replace('-', '_') + + +class DOMEntityResolver(object): + __slots__ = '_opener', + + def resolveEntity(self, publicId, systemId): + assert systemId is not None + source = DOMInputSource() + source.publicId = publicId + source.systemId = systemId + source.byteStream = self._get_opener().open(systemId) + + # determine the encoding if the transport provided it + source.encoding = self._guess_media_encoding(source) + + # determine the base URI is we can + import posixpath, urllib.parse + parts = urllib.parse.urlparse(systemId) + scheme, netloc, path, params, query, fragment = parts + # XXX should we check the scheme here as well? + if path and not path.endswith("/"): + path = posixpath.dirname(path) + "/" + parts = scheme, netloc, path, params, query, fragment + source.baseURI = urllib.parse.urlunparse(parts) + + return source + + def _get_opener(self): + try: + return self._opener + except AttributeError: + self._opener = self._create_opener() + return self._opener + + def _create_opener(self): + import urllib.request + return urllib.request.build_opener() + + def _guess_media_encoding(self, source): + info = source.byteStream.info() + if "Content-Type" in info: + for param in info.getplist(): + if param.startswith("charset="): + return param.split("=", 1)[1].lower() + + +class DOMInputSource(object): + __slots__ = ('byteStream', 'characterStream', 'stringData', + 'encoding', 'publicId', 'systemId', 'baseURI') + + def __init__(self): + self.byteStream = None + self.characterStream = None + self.stringData = None + self.encoding = None + self.publicId = None + self.systemId = None + self.baseURI = None + + def _get_byteStream(self): + return self.byteStream + def _set_byteStream(self, byteStream): + self.byteStream = byteStream + + def _get_characterStream(self): + return self.characterStream + def _set_characterStream(self, characterStream): + self.characterStream = characterStream + + def _get_stringData(self): + return self.stringData + def _set_stringData(self, data): + self.stringData = data + + def _get_encoding(self): + return self.encoding + def _set_encoding(self, encoding): + self.encoding = encoding + + def _get_publicId(self): + return self.publicId + def _set_publicId(self, publicId): + self.publicId = publicId + + def _get_systemId(self): + return self.systemId + def _set_systemId(self, systemId): + self.systemId = systemId + + def _get_baseURI(self): + return self.baseURI + def _set_baseURI(self, uri): + self.baseURI = uri + + +class DOMBuilderFilter: + """Element filter which can be used to tailor construction of + a DOM instance. + """ + + # There's really no need for this class; concrete implementations + # should just implement the endElement() and startElement() + # methods as appropriate. Using this makes it easy to only + # implement one of them. + + FILTER_ACCEPT = 1 + FILTER_REJECT = 2 + FILTER_SKIP = 3 + FILTER_INTERRUPT = 4 + + whatToShow = NodeFilter.SHOW_ALL + + def _get_whatToShow(self): + return self.whatToShow + + def acceptNode(self, element): + return self.FILTER_ACCEPT + + def startContainer(self, element): + return self.FILTER_ACCEPT + +del NodeFilter + + +class _AsyncDeprecatedProperty: + def warn(self, cls): + clsname = cls.__name__ + warnings.warn( + "{cls}.async is deprecated; use {cls}.async_".format(cls=clsname), + DeprecationWarning) + + def __get__(self, instance, cls): + self.warn(cls) + if instance is not None: + return instance.async_ + return False + + def __set__(self, instance, value): + self.warn(type(instance)) + setattr(instance, 'async_', value) + + +class DocumentLS: + """Mixin to create documents that conform to the load/save spec.""" + + async_ = False + locals()['async'] = _AsyncDeprecatedProperty() # Avoid DeprecationWarning + + def _get_async(self): + return False + + def _set_async(self, flag): + if flag: + raise xml.dom.NotSupportedErr( + "asynchronous document loading is not supported") + + def abort(self): + # What does it mean to "clear" a document? Does the + # documentElement disappear? + raise NotImplementedError( + "haven't figured out what this means yet") + + def load(self, uri): + raise NotImplementedError("haven't written this yet") + + def loadXML(self, source): + raise NotImplementedError("haven't written this yet") + + def saveXML(self, snode): + if snode is None: + snode = self + elif snode.ownerDocument is not self: + raise xml.dom.WrongDocumentErr() + return snode.toxml() + + +del _AsyncDeprecatedProperty + + +class DOMImplementationLS: + MODE_SYNCHRONOUS = 1 + MODE_ASYNCHRONOUS = 2 + + def createDOMBuilder(self, mode, schemaType): + if schemaType is not None: + raise xml.dom.NotSupportedErr( + "schemaType not yet supported") + if mode == self.MODE_SYNCHRONOUS: + return DOMBuilder() + if mode == self.MODE_ASYNCHRONOUS: + raise xml.dom.NotSupportedErr( + "asynchronous builders are not supported") + raise ValueError("unknown value for mode") + + def createDOMWriter(self): + raise NotImplementedError( + "the writer interface hasn't been written yet!") + + def createDOMInputSource(self): + return DOMInputSource() diff --git a/modules/language/python/module/xml/etree.py~ b/modules/language/python/module/xml/etree.py~ new file mode 100644 index 0000000..27fd8f6 --- /dev/null +++ b/modules/language/python/module/xml/etree.py~ @@ -0,0 +1,33 @@ +# $Id: __init__.py 3375 2008-02-13 08:05:08Z fredrik $ +# elementtree package + +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2008 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/psf/license for licensing details. diff --git a/modules/language/python/module/xml/etree/ElementInclude.py~ b/modules/language/python/module/xml/etree/ElementInclude.py~ new file mode 100644 index 0000000..963470e --- /dev/null +++ b/modules/language/python/module/xml/etree/ElementInclude.py~ @@ -0,0 +1,143 @@ +# +# ElementTree +# $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $ +# +# limited xinclude support for element trees +# +# history: +# 2003-08-15 fl created +# 2003-11-14 fl fixed default loader +# +# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. +# +# fredrik@pythonware.com +# http://www.pythonware.com +# +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2008 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/psf/license for licensing details. + +## +# Limited XInclude support for the ElementTree package. +## + +import copy +from . import ElementTree + +XINCLUDE = "{http://www.w3.org/2001/XInclude}" + +XINCLUDE_INCLUDE = XINCLUDE + "include" +XINCLUDE_FALLBACK = XINCLUDE + "fallback" + +## +# Fatal include error. + +class FatalIncludeError(SyntaxError): + pass + +## +# Default loader. This loader reads an included resource from disk. +# +# @param href Resource reference. +# @param parse Parse mode. Either "xml" or "text". +# @param encoding Optional text encoding (UTF-8 by default for "text"). +# @return The expanded resource. If the parse mode is "xml", this +# is an ElementTree instance. If the parse mode is "text", this +# is a Unicode string. If the loader fails, it can return None +# or raise an OSError exception. +# @throws OSError If the loader fails to load the resource. + +def default_loader(href, parse, encoding=None): + if parse == "xml": + with open(href, 'rb') as file: + data = ElementTree.parse(file).getroot() + else: + if not encoding: + encoding = 'UTF-8' + with open(href, 'r', encoding=encoding) as file: + data = file.read() + return data + +## +# Expand XInclude directives. +# +# @param elem Root element. +# @param loader Optional resource loader. If omitted, it defaults +# to {@link default_loader}. If given, it should be a callable +# that implements the same interface as <b>default_loader</b>. +# @throws FatalIncludeError If the function fails to include a given +# resource, or if the tree contains malformed XInclude elements. +# @throws OSError If the function fails to load a given resource. + +def include(elem, loader=None): + if loader is None: + loader = default_loader + # look for xinclude elements + i = 0 + while i < len(elem): + e = elem[i] + if e.tag == XINCLUDE_INCLUDE: + # process xinclude directive + href = e.get("href") + parse = e.get("parse", "xml") + if parse == "xml": + node = loader(href, parse) + if node is None: + raise FatalIncludeError( + "cannot load %r as %r" % (href, parse) + ) + node = copy.copy(node) + if e.tail: + node.tail = (node.tail or "") + e.tail + elem[i] = node + elif parse == "text": + text = loader(href, parse, e.get("encoding")) + if text is None: + raise FatalIncludeError( + "cannot load %r as %r" % (href, parse) + ) + if i: + node = elem[i-1] + node.tail = (node.tail or "") + text + (e.tail or "") + else: + elem.text = (elem.text or "") + text + (e.tail or "") + del elem[i] + continue + else: + raise FatalIncludeError( + "unknown parse type in xi:include tag (%r)" % parse + ) + elif e.tag == XINCLUDE_FALLBACK: + raise FatalIncludeError( + "xi:fallback tag must be child of xi:include (%r)" % e.tag + ) + else: + include(e, loader) + i = i + 1 diff --git a/modules/language/python/module/xml/etree/ElementPath.py~ b/modules/language/python/module/xml/etree/ElementPath.py~ new file mode 100644 index 0000000..ab6b79a --- /dev/null +++ b/modules/language/python/module/xml/etree/ElementPath.py~ @@ -0,0 +1,314 @@ +# +# ElementTree +# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $ +# +# limited xpath support for element trees +# +# history: +# 2003-05-23 fl created +# 2003-05-28 fl added support for // etc +# 2003-08-27 fl fixed parsing of periods in element names +# 2007-09-10 fl new selection engine +# 2007-09-12 fl fixed parent selector +# 2007-09-13 fl added iterfind; changed findall to return a list +# 2007-11-30 fl added namespaces support +# 2009-10-30 fl added child element value filter +# +# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved. +# +# fredrik@pythonware.com +# http://www.pythonware.com +# +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2009 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/psf/license for licensing details. + +## +# Implementation module for XPath support. There's usually no reason +# to import this module directly; the <b>ElementTree</b> does this for +# you, if needed. +## + +import re + +xpath_tokenizer_re = re.compile( + r"(" + r"'[^']*'|\"[^\"]*\"|" + r"::|" + r"//?|" + r"\.\.|" + r"\(\)|" + r"[/.*:\[\]\(\)@=])|" + r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" + r"\s+" + ) + +def xpath_tokenizer(pattern, namespaces=None): + for token in xpath_tokenizer_re.findall(pattern): + tag = token[1] + if tag and tag[0] != "{" and ":" in tag: + try: + prefix, uri = tag.split(":", 1) + if not namespaces: + raise KeyError + yield token[0], "{%s}%s" % (namespaces[prefix], uri) + except KeyError: + raise SyntaxError("prefix %r not found in prefix map" % prefix) + else: + yield token + +def get_parent_map(context): + parent_map = context.parent_map + if parent_map is None: + context.parent_map = parent_map = {} + for p in context.root.iter(): + for e in p: + parent_map[e] = p + return parent_map + +def prepare_child(next, token): + tag = token[1] + def select(context, result): + for elem in result: + for e in elem: + if e.tag == tag: + yield e + return select + +def prepare_star(next, token): + def select(context, result): + for elem in result: + yield from elem + return select + +def prepare_self(next, token): + def select(context, result): + yield from result + return select + +def prepare_descendant(next, token): + try: + token = next() + except StopIteration: + return + if token[0] == "*": + tag = "*" + elif not token[0]: + tag = token[1] + else: + raise SyntaxError("invalid descendant") + def select(context, result): + for elem in result: + for e in elem.iter(tag): + if e is not elem: + yield e + return select + +def prepare_parent(next, token): + def select(context, result): + # FIXME: raise error if .. is applied at toplevel? + parent_map = get_parent_map(context) + result_map = {} + for elem in result: + if elem in parent_map: + parent = parent_map[elem] + if parent not in result_map: + result_map[parent] = None + yield parent + return select + +def prepare_predicate(next, token): + # FIXME: replace with real parser!!! refs: + # http://effbot.org/zone/simple-iterator-parser.htm + # http://javascript.crockford.com/tdop/tdop.html + signature = [] + predicate = [] + while 1: + try: + token = next() + except StopIteration: + return + if token[0] == "]": + break + if token[0] and token[0][:1] in "'\"": + token = "'", token[0][1:-1] + signature.append(token[0] or "-") + predicate.append(token[1]) + signature = "".join(signature) + # use signature to determine predicate type + if signature == "@-": + # [@attribute] predicate + key = predicate[1] + def select(context, result): + for elem in result: + if elem.get(key) is not None: + yield elem + return select + if signature == "@-='": + # [@attribute='value'] + key = predicate[1] + value = predicate[-1] + def select(context, result): + for elem in result: + if elem.get(key) == value: + yield elem + return select + if signature == "-" and not re.match(r"\-?\d+$", predicate[0]): + # [tag] + tag = predicate[0] + def select(context, result): + for elem in result: + if elem.find(tag) is not None: + yield elem + return select + if signature == "-='" and not re.match(r"\-?\d+$", predicate[0]): + # [tag='value'] + tag = predicate[0] + value = predicate[-1] + def select(context, result): + for elem in result: + for e in elem.findall(tag): + if "".join(e.itertext()) == value: + yield elem + break + return select + if signature == "-" or signature == "-()" or signature == "-()-": + # [index] or [last()] or [last()-index] + if signature == "-": + # [index] + index = int(predicate[0]) - 1 + if index < 0: + raise SyntaxError("XPath position >= 1 expected") + else: + if predicate[0] != "last": + raise SyntaxError("unsupported function") + if signature == "-()-": + try: + index = int(predicate[2]) - 1 + except ValueError: + raise SyntaxError("unsupported expression") + if index > -2: + raise SyntaxError("XPath offset from last() must be negative") + else: + index = -1 + def select(context, result): + parent_map = get_parent_map(context) + for elem in result: + try: + parent = parent_map[elem] + # FIXME: what if the selector is "*" ? + elems = list(parent.findall(elem.tag)) + if elems[index] is elem: + yield elem + except (IndexError, KeyError): + pass + return select + raise SyntaxError("invalid predicate") + +ops = { + "": prepare_child, + "*": prepare_star, + ".": prepare_self, + "..": prepare_parent, + "//": prepare_descendant, + "[": prepare_predicate, + } + +_cache = {} + +class _SelectorContext: + parent_map = None + def __init__(self, root): + self.root = root + +# -------------------------------------------------------------------- + +## +# Generate all matching objects. + +def iterfind(elem, path, namespaces=None): + # compile selector pattern + cache_key = (path, None if namespaces is None + else tuple(sorted(namespaces.items()))) + if path[-1:] == "/": + path = path + "*" # implicit all (FIXME: keep this?) + try: + selector = _cache[cache_key] + except KeyError: + if len(_cache) > 100: + _cache.clear() + if path[:1] == "/": + raise SyntaxError("cannot use absolute path on element") + next = iter(xpath_tokenizer(path, namespaces)).__next__ + try: + token = next() + except StopIteration: + return + selector = [] + while 1: + try: + selector.append(ops[token[0]](next, token)) + except StopIteration: + raise SyntaxError("invalid path") + try: + token = next() + if token[0] == "/": + token = next() + except StopIteration: + break + _cache[cache_key] = selector + # execute selector pattern + result = [elem] + context = _SelectorContext(elem) + for select in selector: + result = select(context, result) + return result + +## +# Find first matching object. + +def find(elem, path, namespaces=None): + return next(iterfind(elem, path, namespaces), None) + +## +# Find all matching objects. + +def findall(elem, path, namespaces=None): + return list(iterfind(elem, path, namespaces)) + +## +# Find text for first matching object. + +def findtext(elem, path, default=None, namespaces=None): + try: + elem = next(iterfind(elem, path, namespaces)) + return elem.text or "" + except StopIteration: + return default diff --git a/modules/language/python/module/xml/etree/ElementTree.py~ b/modules/language/python/module/xml/etree/ElementTree.py~ new file mode 100644 index 0000000..cf4e1da --- /dev/null +++ b/modules/language/python/module/xml/etree/ElementTree.py~ @@ -0,0 +1,1656 @@ +"""Lightweight XML support for Python. + + XML is an inherently hierarchical data format, and the most natural way to + represent it is with a tree. This module has two classes for this purpose: + + 1. ElementTree represents the whole XML document as a tree and + + 2. Element represents a single node in this tree. + + Interactions with the whole document (reading and writing to/from files) are + usually done on the ElementTree level. Interactions with a single XML element + and its sub-elements are done on the Element level. + + Element is a flexible container object designed to store hierarchical data + structures in memory. It can be described as a cross between a list and a + dictionary. Each Element has a number of properties associated with it: + + 'tag' - a string containing the element's name. + + 'attributes' - a Python dictionary storing the element's attributes. + + 'text' - a string containing the element's text content. + + 'tail' - an optional string containing text after the element's end tag. + + And a number of child elements stored in a Python sequence. + + To create an element instance, use the Element constructor, + or the SubElement factory function. + + You can also use the ElementTree class to wrap an element structure + and convert it to and from XML. + +""" + +#--------------------------------------------------------------------- +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/psf/license for licensing details. +# +# ElementTree +# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved. +# +# fredrik@pythonware.com +# http://www.pythonware.com +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2008 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + +__all__ = [ + # public symbols + "Comment", + "dump", + "Element", "ElementTree", + "fromstring", "fromstringlist", + "iselement", "iterparse", + "parse", "ParseError", + "PI", "ProcessingInstruction", + "QName", + "SubElement", + "tostring", "tostringlist", + "TreeBuilder", + "VERSION", + "XML", "XMLID", + "XMLParser", "XMLPullParser", + "register_namespace", + ] + +VERSION = "1.3.0" + +import sys +import re +import warnings +import io +import collections +import contextlib + +from . import ElementPath + + +class ParseError(SyntaxError): + """An error when parsing an XML document. + + In addition to its exception value, a ParseError contains + two extra attributes: + 'code' - the specific exception code + 'position' - the line and column of the error + + """ + pass + +# -------------------------------------------------------------------- + + +def iselement(element): + """Return True if *element* appears to be an Element.""" + return hasattr(element, 'tag') + + +class Element: + """An XML element. + + This class is the reference implementation of the Element interface. + + An element's length is its number of subelements. That means if you + want to check if an element is truly empty, you should check BOTH + its length AND its text attribute. + + The element tag, attribute names, and attribute values can be either + bytes or strings. + + *tag* is the element name. *attrib* is an optional dictionary containing + element attributes. *extra* are additional element attributes given as + keyword arguments. + + Example form: + <tag attrib>text<child/>...</tag>tail + + """ + + tag = None + """The element's name.""" + + attrib = None + """Dictionary of the element's attributes.""" + + text = None + """ + Text before first subelement. This is either a string or the value None. + Note that if there is no text, this attribute may be either + None or the empty string, depending on the parser. + + """ + + tail = None + """ + Text after this element's end tag, but before the next sibling element's + start tag. This is either a string or the value None. Note that if there + was no text, this attribute may be either None or an empty string, + depending on the parser. + + """ + + def __init__(self, tag, attrib={}, **extra): + if not isinstance(attrib, dict): + raise TypeError("attrib must be dict, not %s" % ( + attrib.__class__.__name__,)) + attrib = attrib.copy() + attrib.update(extra) + self.tag = tag + self.attrib = attrib + self._children = [] + + def __repr__(self): + return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self)) + + def makeelement(self, tag, attrib): + """Create a new element with the same type. + + *tag* is a string containing the element name. + *attrib* is a dictionary containing the element attributes. + + Do not call this method, use the SubElement factory function instead. + + """ + return self.__class__(tag, attrib) + + def copy(self): + """Return copy of current element. + + This creates a shallow copy. Subelements will be shared with the + original tree. + + """ + elem = self.makeelement(self.tag, self.attrib) + elem.text = self.text + elem.tail = self.tail + elem[:] = self + return elem + + def __len__(self): + return len(self._children) + + def __bool__(self): + warnings.warn( + "The behavior of this method will change in future versions. " + "Use specific 'len(elem)' or 'elem is not None' test instead.", + FutureWarning, stacklevel=2 + ) + return len(self._children) != 0 # emulate old behaviour, for now + + def __getitem__(self, index): + return self._children[index] + + def __setitem__(self, index, element): + # if isinstance(index, slice): + # for elt in element: + # assert iselement(elt) + # else: + # assert iselement(element) + self._children[index] = element + + def __delitem__(self, index): + del self._children[index] + + def append(self, subelement): + """Add *subelement* to the end of this element. + + The new element will appear in document order after the last existing + subelement (or directly after the text, if it's the first subelement), + but before the end tag for this element. + + """ + self._assert_is_element(subelement) + self._children.append(subelement) + + def extend(self, elements): + """Append subelements from a sequence. + + *elements* is a sequence with zero or more elements. + + """ + for element in elements: + self._assert_is_element(element) + self._children.extend(elements) + + def insert(self, index, subelement): + """Insert *subelement* at position *index*.""" + self._assert_is_element(subelement) + self._children.insert(index, subelement) + + def _assert_is_element(self, e): + # Need to refer to the actual Python implementation, not the + # shadowing C implementation. + if not isinstance(e, _Element_Py): + raise TypeError('expected an Element, not %s' % type(e).__name__) + + def remove(self, subelement): + """Remove matching subelement. + + Unlike the find methods, this method compares elements based on + identity, NOT ON tag value or contents. To remove subelements by + other means, the easiest way is to use a list comprehension to + select what elements to keep, and then use slice assignment to update + the parent element. + + ValueError is raised if a matching element could not be found. + + """ + # assert iselement(element) + self._children.remove(subelement) + + def getchildren(self): + """(Deprecated) Return all subelements. + + Elements are returned in document order. + + """ + warnings.warn( + "This method will be removed in future versions. " + "Use 'list(elem)' or iteration over elem instead.", + DeprecationWarning, stacklevel=2 + ) + return self._children + + def find(self, path, namespaces=None): + """Find first matching element by tag name or path. + + *path* is a string having either an element tag or an XPath, + *namespaces* is an optional mapping from namespace prefix to full name. + + Return the first matching element, or None if no element was found. + + """ + return ElementPath.find(self, path, namespaces) + + def findtext(self, path, default=None, namespaces=None): + """Find text for first matching element by tag name or path. + + *path* is a string having either an element tag or an XPath, + *default* is the value to return if the element was not found, + *namespaces* is an optional mapping from namespace prefix to full name. + + Return text content of first matching element, or default value if + none was found. Note that if an element is found having no text + content, the empty string is returned. + + """ + return ElementPath.findtext(self, path, default, namespaces) + + def findall(self, path, namespaces=None): + """Find all matching subelements by tag name or path. + + *path* is a string having either an element tag or an XPath, + *namespaces* is an optional mapping from namespace prefix to full name. + + Returns list containing all matching elements in document order. + + """ + return ElementPath.findall(self, path, namespaces) + + def iterfind(self, path, namespaces=None): + """Find all matching subelements by tag name or path. + + *path* is a string having either an element tag or an XPath, + *namespaces* is an optional mapping from namespace prefix to full name. + + Return an iterable yielding all matching elements in document order. + + """ + return ElementPath.iterfind(self, path, namespaces) + + def clear(self): + """Reset element. + + This function removes all subelements, clears all attributes, and sets + the text and tail attributes to None. + + """ + self.attrib.clear() + self._children = [] + self.text = self.tail = None + + def get(self, key, default=None): + """Get element attribute. + + Equivalent to attrib.get, but some implementations may handle this a + bit more efficiently. *key* is what attribute to look for, and + *default* is what to return if the attribute was not found. + + Returns a string containing the attribute value, or the default if + attribute was not found. + + """ + return self.attrib.get(key, default) + + def set(self, key, value): + """Set element attribute. + + Equivalent to attrib[key] = value, but some implementations may handle + this a bit more efficiently. *key* is what attribute to set, and + *value* is the attribute value to set it to. + + """ + self.attrib[key] = value + + def keys(self): + """Get list of attribute names. + + Names are returned in an arbitrary order, just like an ordinary + Python dict. Equivalent to attrib.keys() + + """ + return self.attrib.keys() + + def items(self): + """Get element attributes as a sequence. + + The attributes are returned in arbitrary order. Equivalent to + attrib.items(). + + Return a list of (name, value) tuples. + + """ + return self.attrib.items() + + def iter(self, tag=None): + """Create tree iterator. + + The iterator loops over the element and all subelements in document + order, returning all elements with a matching tag. + + If the tree structure is modified during iteration, new or removed + elements may or may not be included. To get a stable set, use the + list() function on the iterator, and loop over the resulting list. + + *tag* is what tags to look for (default is to return all elements) + + Return an iterator containing all the matching elements. + + """ + if tag == "*": + tag = None + if tag is None or self.tag == tag: + yield self + for e in self._children: + yield from e.iter(tag) + + # compatibility + def getiterator(self, tag=None): + # Change for a DeprecationWarning in 1.4 + warnings.warn( + "This method will be removed in future versions. " + "Use 'elem.iter()' or 'list(elem.iter())' instead.", + PendingDeprecationWarning, stacklevel=2 + ) + return list(self.iter(tag)) + + def itertext(self): + """Create text iterator. + + The iterator loops over the element and all subelements in document + order, returning all inner text. + + """ + tag = self.tag + if not isinstance(tag, str) and tag is not None: + return + t = self.text + if t: + yield t + for e in self: + yield from e.itertext() + t = e.tail + if t: + yield t + + +def SubElement(parent, tag, attrib={}, **extra): + """Subelement factory which creates an element instance, and appends it + to an existing parent. + + The element tag, attribute names, and attribute values can be either + bytes or Unicode strings. + + *parent* is the parent element, *tag* is the subelements name, *attrib* is + an optional directory containing element attributes, *extra* are + additional attributes given as keyword arguments. + + """ + attrib = attrib.copy() + attrib.update(extra) + element = parent.makeelement(tag, attrib) + parent.append(element) + return element + + +def Comment(text=None): + """Comment element factory. + + This function creates a special element which the standard serializer + serializes as an XML comment. + + *text* is a string containing the comment string. + + """ + element = Element(Comment) + element.text = text + return element + + +def ProcessingInstruction(target, text=None): + """Processing Instruction element factory. + + This function creates a special element which the standard serializer + serializes as an XML comment. + + *target* is a string containing the processing instruction, *text* is a + string containing the processing instruction contents, if any. + + """ + element = Element(ProcessingInstruction) + element.text = target + if text: + element.text = element.text + " " + text + return element + +PI = ProcessingInstruction + + +class QName: + """Qualified name wrapper. + + This class can be used to wrap a QName attribute value in order to get + proper namespace handing on output. + + *text_or_uri* is a string containing the QName value either in the form + {uri}local, or if the tag argument is given, the URI part of a QName. + + *tag* is an optional argument which if given, will make the first + argument (text_or_uri) be interpreted as a URI, and this argument (tag) + be interpreted as a local name. + + """ + def __init__(self, text_or_uri, tag=None): + if tag: + text_or_uri = "{%s}%s" % (text_or_uri, tag) + self.text = text_or_uri + def __str__(self): + return self.text + def __repr__(self): + return '<%s %r>' % (self.__class__.__name__, self.text) + def __hash__(self): + return hash(self.text) + def __le__(self, other): + if isinstance(other, QName): + return self.text <= other.text + return self.text <= other + def __lt__(self, other): + if isinstance(other, QName): + return self.text < other.text + return self.text < other + def __ge__(self, other): + if isinstance(other, QName): + return self.text >= other.text + return self.text >= other + def __gt__(self, other): + if isinstance(other, QName): + return self.text > other.text + return self.text > other + def __eq__(self, other): + if isinstance(other, QName): + return self.text == other.text + return self.text == other + +# -------------------------------------------------------------------- + + +class ElementTree: + """An XML element hierarchy. + + This class also provides support for serialization to and from + standard XML. + + *element* is an optional root element node, + *file* is an optional file handle or file name of an XML file whose + contents will be used to initialize the tree with. + + """ + def __init__(self, element=None, file=None): + # assert element is None or iselement(element) + self._root = element # first node + if file: + self.parse(file) + + def getroot(self): + """Return root element of this tree.""" + return self._root + + def _setroot(self, element): + """Replace root element of this tree. + + This will discard the current contents of the tree and replace it + with the given element. Use with care! + + """ + # assert iselement(element) + self._root = element + + def parse(self, source, parser=None): + """Load external XML document into element tree. + + *source* is a file name or file object, *parser* is an optional parser + instance that defaults to XMLParser. + + ParseError is raised if the parser fails to parse the document. + + Returns the root element of the given source document. + + """ + close_source = False + if not hasattr(source, "read"): + source = open(source, "rb") + close_source = True + try: + if parser is None: + # If no parser was specified, create a default XMLParser + parser = XMLParser() + if hasattr(parser, '_parse_whole'): + # The default XMLParser, when it comes from an accelerator, + # can define an internal _parse_whole API for efficiency. + # It can be used to parse the whole source without feeding + # it with chunks. + self._root = parser._parse_whole(source) + return self._root + while True: + data = source.read(65536) + if not data: + break + parser.feed(data) + self._root = parser.close() + return self._root + finally: + if close_source: + source.close() + + def iter(self, tag=None): + """Create and return tree iterator for the root element. + + The iterator loops over all elements in this tree, in document order. + + *tag* is a string with the tag name to iterate over + (default is to return all elements). + + """ + # assert self._root is not None + return self._root.iter(tag) + + # compatibility + def getiterator(self, tag=None): + # Change for a DeprecationWarning in 1.4 + warnings.warn( + "This method will be removed in future versions. " + "Use 'tree.iter()' or 'list(tree.iter())' instead.", + PendingDeprecationWarning, stacklevel=2 + ) + return list(self.iter(tag)) + + def find(self, path, namespaces=None): + """Find first matching element by tag name or path. + + Same as getroot().find(path), which is Element.find() + + *path* is a string having either an element tag or an XPath, + *namespaces* is an optional mapping from namespace prefix to full name. + + Return the first matching element, or None if no element was found. + + """ + # assert self._root is not None + if path[:1] == "/": + path = "." + path + warnings.warn( + "This search is broken in 1.3 and earlier, and will be " + "fixed in a future version. If you rely on the current " + "behaviour, change it to %r" % path, + FutureWarning, stacklevel=2 + ) + return self._root.find(path, namespaces) + + def findtext(self, path, default=None, namespaces=None): + """Find first matching element by tag name or path. + + Same as getroot().findtext(path), which is Element.findtext() + + *path* is a string having either an element tag or an XPath, + *namespaces* is an optional mapping from namespace prefix to full name. + + Return the first matching element, or None if no element was found. + + """ + # assert self._root is not None + if path[:1] == "/": + path = "." + path + warnings.warn( + "This search is broken in 1.3 and earlier, and will be " + "fixed in a future version. If you rely on the current " + "behaviour, change it to %r" % path, + FutureWarning, stacklevel=2 + ) + return self._root.findtext(path, default, namespaces) + + def findall(self, path, namespaces=None): + """Find all matching subelements by tag name or path. + + Same as getroot().findall(path), which is Element.findall(). + + *path* is a string having either an element tag or an XPath, + *namespaces* is an optional mapping from namespace prefix to full name. + + Return list containing all matching elements in document order. + + """ + # assert self._root is not None + if path[:1] == "/": + path = "." + path + warnings.warn( + "This search is broken in 1.3 and earlier, and will be " + "fixed in a future version. If you rely on the current " + "behaviour, change it to %r" % path, + FutureWarning, stacklevel=2 + ) + return self._root.findall(path, namespaces) + + def iterfind(self, path, namespaces=None): + """Find all matching subelements by tag name or path. + + Same as getroot().iterfind(path), which is element.iterfind() + + *path* is a string having either an element tag or an XPath, + *namespaces* is an optional mapping from namespace prefix to full name. + + Return an iterable yielding all matching elements in document order. + + """ + # assert self._root is not None + if path[:1] == "/": + path = "." + path + warnings.warn( + "This search is broken in 1.3 and earlier, and will be " + "fixed in a future version. If you rely on the current " + "behaviour, change it to %r" % path, + FutureWarning, stacklevel=2 + ) + return self._root.iterfind(path, namespaces) + + def write(self, file_or_filename, + encoding=None, + xml_declaration=None, + default_namespace=None, + method=None, *, + short_empty_elements=True): + """Write element tree to a file as XML. + + Arguments: + *file_or_filename* -- file name or a file object opened for writing + + *encoding* -- the output encoding (default: US-ASCII) + + *xml_declaration* -- bool indicating if an XML declaration should be + added to the output. If None, an XML declaration + is added if encoding IS NOT either of: + US-ASCII, UTF-8, or Unicode + + *default_namespace* -- sets the default XML namespace (for "xmlns") + + *method* -- either "xml" (default), "html, "text", or "c14n" + + *short_empty_elements* -- controls the formatting of elements + that contain no content. If True (default) + they are emitted as a single self-closed + tag, otherwise they are emitted as a pair + of start/end tags + + """ + if not method: + method = "xml" + elif method not in _serialize: + raise ValueError("unknown method %r" % method) + if not encoding: + if method == "c14n": + encoding = "utf-8" + else: + encoding = "us-ascii" + enc_lower = encoding.lower() + with _get_writer(file_or_filename, enc_lower) as write: + if method == "xml" and (xml_declaration or + (xml_declaration is None and + enc_lower not in ("utf-8", "us-ascii", "unicode"))): + declared_encoding = encoding + if enc_lower == "unicode": + # Retrieve the default encoding for the xml declaration + import locale + declared_encoding = locale.getpreferredencoding() + write("<?xml version='1.0' encoding='%s'?>\n" % ( + declared_encoding,)) + if method == "text": + _serialize_text(write, self._root) + else: + qnames, namespaces = _namespaces(self._root, default_namespace) + serialize = _serialize[method] + serialize(write, self._root, qnames, namespaces, + short_empty_elements=short_empty_elements) + + def write_c14n(self, file): + # lxml.etree compatibility. use output method instead + return self.write(file, method="c14n") + +# -------------------------------------------------------------------- +# serialization support + +@contextlib.contextmanager +def _get_writer(file_or_filename, encoding): + # returns text write method and release all resources after using + try: + write = file_or_filename.write + except AttributeError: + # file_or_filename is a file name + if encoding == "unicode": + file = open(file_or_filename, "w") + else: + file = open(file_or_filename, "w", encoding=encoding, + errors="xmlcharrefreplace") + with file: + yield file.write + else: + # file_or_filename is a file-like object + # encoding determines if it is a text or binary writer + if encoding == "unicode": + # use a text writer as is + yield write + else: + # wrap a binary writer with TextIOWrapper + with contextlib.ExitStack() as stack: + if isinstance(file_or_filename, io.BufferedIOBase): + file = file_or_filename + elif isinstance(file_or_filename, io.RawIOBase): + file = io.BufferedWriter(file_or_filename) + # Keep the original file open when the BufferedWriter is + # destroyed + stack.callback(file.detach) + else: + # This is to handle passed objects that aren't in the + # IOBase hierarchy, but just have a write method + file = io.BufferedIOBase() + file.writable = lambda: True + file.write = write + try: + # TextIOWrapper uses this methods to determine + # if BOM (for UTF-16, etc) should be added + file.seekable = file_or_filename.seekable + file.tell = file_or_filename.tell + except AttributeError: + pass + file = io.TextIOWrapper(file, + encoding=encoding, + errors="xmlcharrefreplace", + newline="\n") + # Keep the original file open when the TextIOWrapper is + # destroyed + stack.callback(file.detach) + yield file.write + +def _namespaces(elem, default_namespace=None): + # identify namespaces used in this tree + + # maps qnames to *encoded* prefix:local names + qnames = {None: None} + + # maps uri:s to prefixes + namespaces = {} + if default_namespace: + namespaces[default_namespace] = "" + + def add_qname(qname): + # calculate serialized qname representation + try: + if qname[:1] == "{": + uri, tag = qname[1:].rsplit("}", 1) + prefix = namespaces.get(uri) + if prefix is None: + prefix = _namespace_map.get(uri) + if prefix is None: + prefix = "ns%d" % len(namespaces) + if prefix != "xml": + namespaces[uri] = prefix + if prefix: + qnames[qname] = "%s:%s" % (prefix, tag) + else: + qnames[qname] = tag # default element + else: + if default_namespace: + # FIXME: can this be handled in XML 1.0? + raise ValueError( + "cannot use non-qualified names with " + "default_namespace option" + ) + qnames[qname] = qname + except TypeError: + _raise_serialization_error(qname) + + # populate qname and namespaces table + for elem in elem.iter(): + tag = elem.tag + if isinstance(tag, QName): + if tag.text not in qnames: + add_qname(tag.text) + elif isinstance(tag, str): + if tag not in qnames: + add_qname(tag) + elif tag is not None and tag is not Comment and tag is not PI: + _raise_serialization_error(tag) + for key, value in elem.items(): + if isinstance(key, QName): + key = key.text + if key not in qnames: + add_qname(key) + if isinstance(value, QName) and value.text not in qnames: + add_qname(value.text) + text = elem.text + if isinstance(text, QName) and text.text not in qnames: + add_qname(text.text) + return qnames, namespaces + +def _serialize_xml(write, elem, qnames, namespaces, + short_empty_elements, **kwargs): + tag = elem.tag + text = elem.text + if tag is Comment: + write("<!--%s-->" % text) + elif tag is ProcessingInstruction: + write("<?%s?>" % text) + else: + tag = qnames[tag] + if tag is None: + if text: + write(_escape_cdata(text)) + for e in elem: + _serialize_xml(write, e, qnames, None, + short_empty_elements=short_empty_elements) + else: + write("<" + tag) + items = list(elem.items()) + if items or namespaces: + if namespaces: + for v, k in sorted(namespaces.items(), + key=lambda x: x[1]): # sort on prefix + if k: + k = ":" + k + write(" xmlns%s=\"%s\"" % ( + k, + _escape_attrib(v) + )) + for k, v in sorted(items): # lexical order + if isinstance(k, QName): + k = k.text + if isinstance(v, QName): + v = qnames[v.text] + else: + v = _escape_attrib(v) + write(" %s=\"%s\"" % (qnames[k], v)) + if text or len(elem) or not short_empty_elements: + write(">") + if text: + write(_escape_cdata(text)) + for e in elem: + _serialize_xml(write, e, qnames, None, + short_empty_elements=short_empty_elements) + write("</" + tag + ">") + else: + write(" />") + if elem.tail: + write(_escape_cdata(elem.tail)) + +HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", + "img", "input", "isindex", "link", "meta", "param") + +try: + HTML_EMPTY = set(HTML_EMPTY) +except NameError: + pass + +def _serialize_html(write, elem, qnames, namespaces, **kwargs): + tag = elem.tag + text = elem.text + if tag is Comment: + write("<!--%s-->" % _escape_cdata(text)) + elif tag is ProcessingInstruction: + write("<?%s?>" % _escape_cdata(text)) + else: + tag = qnames[tag] + if tag is None: + if text: + write(_escape_cdata(text)) + for e in elem: + _serialize_html(write, e, qnames, None) + else: + write("<" + tag) + items = list(elem.items()) + if items or namespaces: + if namespaces: + for v, k in sorted(namespaces.items(), + key=lambda x: x[1]): # sort on prefix + if k: + k = ":" + k + write(" xmlns%s=\"%s\"" % ( + k, + _escape_attrib(v) + )) + for k, v in sorted(items): # lexical order + if isinstance(k, QName): + k = k.text + if isinstance(v, QName): + v = qnames[v.text] + else: + v = _escape_attrib_html(v) + # FIXME: handle boolean attributes + write(" %s=\"%s\"" % (qnames[k], v)) + write(">") + ltag = tag.lower() + if text: + if ltag == "script" or ltag == "style": + write(text) + else: + write(_escape_cdata(text)) + for e in elem: + _serialize_html(write, e, qnames, None) + if ltag not in HTML_EMPTY: + write("</" + tag + ">") + if elem.tail: + write(_escape_cdata(elem.tail)) + +def _serialize_text(write, elem): + for part in elem.itertext(): + write(part) + if elem.tail: + write(elem.tail) + +_serialize = { + "xml": _serialize_xml, + "html": _serialize_html, + "text": _serialize_text, +# this optional method is imported at the end of the module +# "c14n": _serialize_c14n, +} + + +def register_namespace(prefix, uri): + """Register a namespace prefix. + + The registry is global, and any existing mapping for either the + given prefix or the namespace URI will be removed. + + *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and + attributes in this namespace will be serialized with prefix if possible. + + ValueError is raised if prefix is reserved or is invalid. + + """ + if re.match(r"ns\d+$", prefix): + raise ValueError("Prefix format reserved for internal use") + for k, v in list(_namespace_map.items()): + if k == uri or v == prefix: + del _namespace_map[k] + _namespace_map[uri] = prefix + +_namespace_map = { + # "well-known" namespace prefixes + "http://www.w3.org/XML/1998/namespace": "xml", + "http://www.w3.org/1999/xhtml": "html", + "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", + "http://schemas.xmlsoap.org/wsdl/": "wsdl", + # xml schema + "http://www.w3.org/2001/XMLSchema": "xs", + "http://www.w3.org/2001/XMLSchema-instance": "xsi", + # dublin core + "http://purl.org/dc/elements/1.1/": "dc", +} +# For tests and troubleshooting +register_namespace._namespace_map = _namespace_map + +def _raise_serialization_error(text): + raise TypeError( + "cannot serialize %r (type %s)" % (text, type(text).__name__) + ) + +def _escape_cdata(text): + # escape character data + try: + # it's worth avoiding do-nothing calls for strings that are + # shorter than 500 characters, or so. assume that's, by far, + # the most common case in most applications. + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + return text + except (TypeError, AttributeError): + _raise_serialization_error(text) + +def _escape_attrib(text): + # escape attribute value + try: + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + if "\"" in text: + text = text.replace("\"", """) + # The following business with carriage returns is to satisfy + # Section 2.11 of the XML specification, stating that + # CR or CR LN should be replaced with just LN + # http://www.w3.org/TR/REC-xml/#sec-line-ends + if "\r\n" in text: + text = text.replace("\r\n", "\n") + if "\r" in text: + text = text.replace("\r", "\n") + #The following four lines are issue 17582 + if "\n" in text: + text = text.replace("\n", " ") + if "\t" in text: + text = text.replace("\t", "	") + return text + except (TypeError, AttributeError): + _raise_serialization_error(text) + +def _escape_attrib_html(text): + # escape attribute value + try: + if "&" in text: + text = text.replace("&", "&") + if ">" in text: + text = text.replace(">", ">") + if "\"" in text: + text = text.replace("\"", """) + return text + except (TypeError, AttributeError): + _raise_serialization_error(text) + +# -------------------------------------------------------------------- + +def tostring(element, encoding=None, method=None, *, + short_empty_elements=True): + """Generate string representation of XML element. + + All subelements are included. If encoding is "unicode", a string + is returned. Otherwise a bytestring is returned. + + *element* is an Element instance, *encoding* is an optional output + encoding defaulting to US-ASCII, *method* is an optional output which can + be one of "xml" (default), "html", "text" or "c14n". + + Returns an (optionally) encoded string containing the XML data. + + """ + stream = io.StringIO() if encoding == 'unicode' else io.BytesIO() + ElementTree(element).write(stream, encoding, method=method, + short_empty_elements=short_empty_elements) + return stream.getvalue() + +class _ListDataStream(io.BufferedIOBase): + """An auxiliary stream accumulating into a list reference.""" + def __init__(self, lst): + self.lst = lst + + def writable(self): + return True + + def seekable(self): + return True + + def write(self, b): + self.lst.append(b) + + def tell(self): + return len(self.lst) + +def tostringlist(element, encoding=None, method=None, *, + short_empty_elements=True): + lst = [] + stream = _ListDataStream(lst) + ElementTree(element).write(stream, encoding, method=method, + short_empty_elements=short_empty_elements) + return lst + + +def dump(elem): + """Write element tree or element structure to sys.stdout. + + This function should be used for debugging only. + + *elem* is either an ElementTree, or a single Element. The exact output + format is implementation dependent. In this version, it's written as an + ordinary XML file. + + """ + # debugging + if not isinstance(elem, ElementTree): + elem = ElementTree(elem) + elem.write(sys.stdout, encoding="unicode") + tail = elem.getroot().tail + if not tail or tail[-1] != "\n": + sys.stdout.write("\n") + +# -------------------------------------------------------------------- +# parsing + + +def parse(source, parser=None): + """Parse XML document into element tree. + + *source* is a filename or file object containing XML data, + *parser* is an optional parser instance defaulting to XMLParser. + + Return an ElementTree instance. + + """ + tree = ElementTree() + tree.parse(source, parser) + return tree + + +def iterparse(source, events=None, parser=None): + """Incrementally parse XML document into ElementTree. + + This class also reports what's going on to the user based on the + *events* it is initialized with. The supported events are the strings + "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get + detailed namespace information). If *events* is omitted, only + "end" events are reported. + + *source* is a filename or file object containing XML data, *events* is + a list of events to report back, *parser* is an optional parser instance. + + Returns an iterator providing (event, elem) pairs. + + """ + # Use the internal, undocumented _parser argument for now; When the + # parser argument of iterparse is removed, this can be killed. + pullparser = XMLPullParser(events=events, _parser=parser) + def iterator(): + try: + while True: + yield from pullparser.read_events() + # load event buffer + data = source.read(16 * 1024) + if not data: + break + pullparser.feed(data) + root = pullparser._close_and_return_root() + yield from pullparser.read_events() + it.root = root + finally: + if close_source: + source.close() + + class IterParseIterator(collections.Iterator): + __next__ = iterator().__next__ + it = IterParseIterator() + it.root = None + del iterator, IterParseIterator + + close_source = False + if not hasattr(source, "read"): + source = open(source, "rb") + close_source = True + + return it + + +class XMLPullParser: + + def __init__(self, events=None, *, _parser=None): + # The _parser argument is for internal use only and must not be relied + # upon in user code. It will be removed in a future release. + # See http://bugs.python.org/issue17741 for more details. + + self._events_queue = collections.deque() + self._parser = _parser or XMLParser(target=TreeBuilder()) + # wire up the parser for event reporting + if events is None: + events = ("end",) + self._parser._setevents(self._events_queue, events) + + def feed(self, data): + """Feed encoded data to parser.""" + if self._parser is None: + raise ValueError("feed() called after end of stream") + if data: + try: + self._parser.feed(data) + except SyntaxError as exc: + self._events_queue.append(exc) + + def _close_and_return_root(self): + # iterparse needs this to set its root attribute properly :( + root = self._parser.close() + self._parser = None + return root + + def close(self): + """Finish feeding data to parser. + + Unlike XMLParser, does not return the root element. Use + read_events() to consume elements from XMLPullParser. + """ + self._close_and_return_root() + + def read_events(self): + """Return an iterator over currently available (event, elem) pairs. + + Events are consumed from the internal event queue as they are + retrieved from the iterator. + """ + events = self._events_queue + while events: + event = events.popleft() + if isinstance(event, Exception): + raise event + else: + yield event + + +def XML(text, parser=None): + """Parse XML document from string constant. + + This function can be used to embed "XML Literals" in Python code. + + *text* is a string containing XML data, *parser* is an + optional parser instance, defaulting to the standard XMLParser. + + Returns an Element instance. + + """ + if not parser: + parser = XMLParser(target=TreeBuilder()) + parser.feed(text) + return parser.close() + + +def XMLID(text, parser=None): + """Parse XML document from string constant for its IDs. + + *text* is a string containing XML data, *parser* is an + optional parser instance, defaulting to the standard XMLParser. + + Returns an (Element, dict) tuple, in which the + dict maps element id:s to elements. + + """ + if not parser: + parser = XMLParser(target=TreeBuilder()) + parser.feed(text) + tree = parser.close() + ids = {} + for elem in tree.iter(): + id = elem.get("id") + if id: + ids[id] = elem + return tree, ids + +# Parse XML document from string constant. Alias for XML(). +fromstring = XML + +def fromstringlist(sequence, parser=None): + """Parse XML document from sequence of string fragments. + + *sequence* is a list of other sequence, *parser* is an optional parser + instance, defaulting to the standard XMLParser. + + Returns an Element instance. + + """ + if not parser: + parser = XMLParser(target=TreeBuilder()) + for text in sequence: + parser.feed(text) + return parser.close() + +# -------------------------------------------------------------------- + + +class TreeBuilder: + """Generic element structure builder. + + This builder converts a sequence of start, data, and end method + calls to a well-formed element structure. + + You can use this class to build an element structure using a custom XML + parser, or a parser for some other XML-like format. + + *element_factory* is an optional element factory which is called + to create new Element instances, as necessary. + + """ + def __init__(self, element_factory=None): + self._data = [] # data collector + self._elem = [] # element stack + self._last = None # last element + self._tail = None # true if we're after an end tag + if element_factory is None: + element_factory = Element + self._factory = element_factory + + def close(self): + """Flush builder buffers and return toplevel document Element.""" + assert len(self._elem) == 0, "missing end tags" + assert self._last is not None, "missing toplevel element" + return self._last + + def _flush(self): + if self._data: + if self._last is not None: + text = "".join(self._data) + if self._tail: + assert self._last.tail is None, "internal error (tail)" + self._last.tail = text + else: + assert self._last.text is None, "internal error (text)" + self._last.text = text + self._data = [] + + def data(self, data): + """Add text to current element.""" + self._data.append(data) + + def start(self, tag, attrs): + """Open new element and return it. + + *tag* is the element name, *attrs* is a dict containing element + attributes. + + """ + self._flush() + self._last = elem = self._factory(tag, attrs) + if self._elem: + self._elem[-1].append(elem) + self._elem.append(elem) + self._tail = 0 + return elem + + def end(self, tag): + """Close and return current Element. + + *tag* is the element name. + + """ + self._flush() + self._last = self._elem.pop() + assert self._last.tag == tag,\ + "end tag mismatch (expected %s, got %s)" % ( + self._last.tag, tag) + self._tail = 1 + return self._last + + +# also see ElementTree and TreeBuilder +class XMLParser: + """Element structure builder for XML source data based on the expat parser. + + *html* are predefined HTML entities (deprecated and not supported), + *target* is an optional target object which defaults to an instance of the + standard TreeBuilder class, *encoding* is an optional encoding string + which if given, overrides the encoding specified in the XML file: + http://www.iana.org/assignments/character-sets + + """ + + def __init__(self, html=0, target=None, encoding=None): + try: + from xml.parsers import expat + except ImportError: + try: + import pyexpat as expat + except ImportError: + raise ImportError( + "No module named expat; use SimpleXMLTreeBuilder instead" + ) + parser = expat.ParserCreate(encoding, "}") + if target is None: + target = TreeBuilder() + # underscored names are provided for compatibility only + self.parser = self._parser = parser + self.target = self._target = target + self._error = expat.error + self._names = {} # name memo cache + # main callbacks + parser.DefaultHandlerExpand = self._default + if hasattr(target, 'start'): + parser.StartElementHandler = self._start + if hasattr(target, 'end'): + parser.EndElementHandler = self._end + if hasattr(target, 'data'): + parser.CharacterDataHandler = target.data + # miscellaneous callbacks + if hasattr(target, 'comment'): + parser.CommentHandler = target.comment + if hasattr(target, 'pi'): + parser.ProcessingInstructionHandler = target.pi + # Configure pyexpat: buffering, new-style attribute handling. + parser.buffer_text = 1 + parser.ordered_attributes = 1 + parser.specified_attributes = 1 + self._doctype = None + self.entity = {} + try: + self.version = "Expat %d.%d.%d" % expat.version_info + except AttributeError: + pass # unknown + + def _setevents(self, events_queue, events_to_report): + # Internal API for XMLPullParser + # events_to_report: a list of events to report during parsing (same as + # the *events* of XMLPullParser's constructor. + # events_queue: a list of actual parsing events that will be populated + # by the underlying parser. + # + parser = self._parser + append = events_queue.append + for event_name in events_to_report: + if event_name == "start": + parser.ordered_attributes = 1 + parser.specified_attributes = 1 + def handler(tag, attrib_in, event=event_name, append=append, + start=self._start): + append((event, start(tag, attrib_in))) + parser.StartElementHandler = handler + elif event_name == "end": + def handler(tag, event=event_name, append=append, + end=self._end): + append((event, end(tag))) + parser.EndElementHandler = handler + elif event_name == "start-ns": + def handler(prefix, uri, event=event_name, append=append): + append((event, (prefix or "", uri or ""))) + parser.StartNamespaceDeclHandler = handler + elif event_name == "end-ns": + def handler(prefix, event=event_name, append=append): + append((event, None)) + parser.EndNamespaceDeclHandler = handler + else: + raise ValueError("unknown event %r" % event_name) + + def _raiseerror(self, value): + err = ParseError(value) + err.code = value.code + err.position = value.lineno, value.offset + raise err + + def _fixname(self, key): + # expand qname, and convert name string to ascii, if possible + try: + name = self._names[key] + except KeyError: + name = key + if "}" in name: + name = "{" + name + self._names[key] = name + return name + + def _start(self, tag, attr_list): + # Handler for expat's StartElementHandler. Since ordered_attributes + # is set, the attributes are reported as a list of alternating + # attribute name,value. + fixname = self._fixname + tag = fixname(tag) + attrib = {} + if attr_list: + for i in range(0, len(attr_list), 2): + attrib[fixname(attr_list[i])] = attr_list[i+1] + return self.target.start(tag, attrib) + + def _end(self, tag): + return self.target.end(self._fixname(tag)) + + def _default(self, text): + prefix = text[:1] + if prefix == "&": + # deal with undefined entities + try: + data_handler = self.target.data + except AttributeError: + return + try: + data_handler(self.entity[text[1:-1]]) + except KeyError: + from xml.parsers import expat + err = expat.error( + "undefined entity %s: line %d, column %d" % + (text, self.parser.ErrorLineNumber, + self.parser.ErrorColumnNumber) + ) + err.code = 11 # XML_ERROR_UNDEFINED_ENTITY + err.lineno = self.parser.ErrorLineNumber + err.offset = self.parser.ErrorColumnNumber + raise err + elif prefix == "<" and text[:9] == "<!DOCTYPE": + self._doctype = [] # inside a doctype declaration + elif self._doctype is not None: + # parse doctype contents + if prefix == ">": + self._doctype = None + return + text = text.strip() + if not text: + return + self._doctype.append(text) + n = len(self._doctype) + if n > 2: + type = self._doctype[1] + if type == "PUBLIC" and n == 4: + name, type, pubid, system = self._doctype + if pubid: + pubid = pubid[1:-1] + elif type == "SYSTEM" and n == 3: + name, type, system = self._doctype + pubid = None + else: + return + if hasattr(self.target, "doctype"): + self.target.doctype(name, pubid, system[1:-1]) + elif self.doctype != self._XMLParser__doctype: + # warn about deprecated call + self._XMLParser__doctype(name, pubid, system[1:-1]) + self.doctype(name, pubid, system[1:-1]) + self._doctype = None + + def doctype(self, name, pubid, system): + """(Deprecated) Handle doctype declaration + + *name* is the Doctype name, *pubid* is the public identifier, + and *system* is the system identifier. + + """ + warnings.warn( + "This method of XMLParser is deprecated. Define doctype() " + "method on the TreeBuilder target.", + DeprecationWarning, + ) + + # sentinel, if doctype is redefined in a subclass + __doctype = doctype + + def feed(self, data): + """Feed encoded data to parser.""" + try: + self.parser.Parse(data, 0) + except self._error as v: + self._raiseerror(v) + + def close(self): + """Finish feeding data to parser and return element structure.""" + try: + self.parser.Parse("", 1) # end of data + except self._error as v: + self._raiseerror(v) + try: + close_handler = self.target.close + except AttributeError: + pass + else: + return close_handler() + finally: + # get rid of circular references + del self.parser, self._parser + del self.target, self._target + + +# Import the C accelerators +try: + # Element is going to be shadowed by the C implementation. We need to keep + # the Python version of it accessible for some "creative" by external code + # (see tests) + _Element_Py = Element + + # Element, SubElement, ParseError, TreeBuilder, XMLParser + from _elementtree import * +except ImportError: + pass diff --git a/modules/language/python/module/xml/etree/__pycache__/ElementInclude.cpython-36.pyc b/modules/language/python/module/xml/etree/__pycache__/ElementInclude.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..3d32db0 --- /dev/null +++ b/modules/language/python/module/xml/etree/__pycache__/ElementInclude.cpython-36.pyc diff --git a/modules/language/python/module/xml/etree/__pycache__/ElementPath.cpython-36.pyc b/modules/language/python/module/xml/etree/__pycache__/ElementPath.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..cec585b --- /dev/null +++ b/modules/language/python/module/xml/etree/__pycache__/ElementPath.cpython-36.pyc diff --git a/modules/language/python/module/xml/etree/__pycache__/ElementTree.cpython-36.pyc b/modules/language/python/module/xml/etree/__pycache__/ElementTree.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..fadd600 --- /dev/null +++ b/modules/language/python/module/xml/etree/__pycache__/ElementTree.cpython-36.pyc diff --git a/modules/language/python/module/xml/etree/__pycache__/__init__.cpython-36.pyc b/modules/language/python/module/xml/etree/__pycache__/__init__.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..40671c9 --- /dev/null +++ b/modules/language/python/module/xml/etree/__pycache__/__init__.cpython-36.pyc diff --git a/modules/language/python/module/xml/etree/__pycache__/cElementTree.cpython-36.pyc b/modules/language/python/module/xml/etree/__pycache__/cElementTree.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..1a44430 --- /dev/null +++ b/modules/language/python/module/xml/etree/__pycache__/cElementTree.cpython-36.pyc diff --git a/modules/language/python/module/xml/parser.py b/modules/language/python/module/xml/parser.py new file mode 100644 index 0000000..eb314a3 --- /dev/null +++ b/modules/language/python/module/xml/parser.py @@ -0,0 +1,8 @@ +"""Python interfaces to XML parsers. + +This package contains one module: + +expat -- Python wrapper for James Clark's Expat parser, with namespace + support. + +""" diff --git a/modules/language/python/module/xml/parsers.py~ b/modules/language/python/module/xml/parsers.py~ new file mode 100644 index 0000000..eb314a3 --- /dev/null +++ b/modules/language/python/module/xml/parsers.py~ @@ -0,0 +1,8 @@ +"""Python interfaces to XML parsers. + +This package contains one module: + +expat -- Python wrapper for James Clark's Expat parser, with namespace + support. + +""" diff --git a/modules/language/python/module/xml/parsers/__init__.py b/modules/language/python/module/xml/parsers/__init__.py new file mode 100644 index 0000000..eb314a3 --- /dev/null +++ b/modules/language/python/module/xml/parsers/__init__.py @@ -0,0 +1,8 @@ +"""Python interfaces to XML parsers. + +This package contains one module: + +expat -- Python wrapper for James Clark's Expat parser, with namespace + support. + +""" diff --git a/modules/language/python/module/xml/parsers/__pycache__/__init__.cpython-36.pyc b/modules/language/python/module/xml/parsers/__pycache__/__init__.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..638e180 --- /dev/null +++ b/modules/language/python/module/xml/parsers/__pycache__/__init__.cpython-36.pyc diff --git a/modules/language/python/module/xml/parsers/__pycache__/expat.cpython-36.pyc b/modules/language/python/module/xml/parsers/__pycache__/expat.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..b22e9aa --- /dev/null +++ b/modules/language/python/module/xml/parsers/__pycache__/expat.cpython-36.pyc diff --git a/modules/language/python/module/xml/parsers/expat.py b/modules/language/python/module/xml/parsers/expat.py new file mode 100644 index 0000000..90ba0f0 --- /dev/null +++ b/modules/language/python/module/xml/parsers/expat.py @@ -0,0 +1,11 @@ +module(xml,parsers,expat) +"""Interface to the Expat non-validating XML parser.""" +import sys + +# +#from pyexpat import * +# +## provide pyexpat submodules as xml.parsers.expat submodules +#sys.modules['xml.parsers.expat.model'] = model +#sys.modules['xml.parsers.expat.errors'] = errors +# diff --git a/modules/language/python/module/xml/parsers/expat.py~ b/modules/language/python/module/xml/parsers/expat.py~ new file mode 100644 index 0000000..bcbe9fb --- /dev/null +++ b/modules/language/python/module/xml/parsers/expat.py~ @@ -0,0 +1,8 @@ +"""Interface to the Expat non-validating XML parser.""" +import sys + +from pyexpat import * + +# provide pyexpat submodules as xml.parsers.expat submodules +sys.modules['xml.parsers.expat.model'] = model +sys.modules['xml.parsers.expat.errors'] = errors diff --git a/modules/language/python/module/xml/sax.py~ b/modules/language/python/module/xml/sax.py~ new file mode 100644 index 0000000..ef67ae6 --- /dev/null +++ b/modules/language/python/module/xml/sax.py~ @@ -0,0 +1,107 @@ +"""Simple API for XML (SAX) implementation for Python. + +This module provides an implementation of the SAX 2 interface; +information about the Java version of the interface can be found at +http://www.megginson.com/SAX/. The Python version of the interface is +documented at <...>. + +This package contains the following modules: + +handler -- Base classes and constants which define the SAX 2 API for + the 'client-side' of SAX for Python. + +saxutils -- Implementation of the convenience classes commonly used to + work with SAX. + +xmlreader -- Base classes and constants which define the SAX 2 API for + the parsers used with SAX for Python. + +expatreader -- Driver that allows use of the Expat parser with SAX. +""" + +from .xmlreader import InputSource +from .handler import ContentHandler, ErrorHandler +from ._exceptions import SAXException, SAXNotRecognizedException, \ + SAXParseException, SAXNotSupportedException, \ + SAXReaderNotAvailable + + +def parse(source, handler, errorHandler=ErrorHandler()): + parser = make_parser() + parser.setContentHandler(handler) + parser.setErrorHandler(errorHandler) + parser.parse(source) + +def parseString(string, handler, errorHandler=ErrorHandler()): + import io + if errorHandler is None: + errorHandler = ErrorHandler() + parser = make_parser() + parser.setContentHandler(handler) + parser.setErrorHandler(errorHandler) + + inpsrc = InputSource() + if isinstance(string, str): + inpsrc.setCharacterStream(io.StringIO(string)) + else: + inpsrc.setByteStream(io.BytesIO(string)) + parser.parse(inpsrc) + +# this is the parser list used by the make_parser function if no +# alternatives are given as parameters to the function + +default_parser_list = ["xml.sax.expatreader"] + +# tell modulefinder that importing sax potentially imports expatreader +_false = 0 +if _false: + import xml.sax.expatreader + +import os, sys +if "PY_SAX_PARSER" in os.environ: + default_parser_list = os.environ["PY_SAX_PARSER"].split(",") +del os + +_key = "python.xml.sax.parser" +if sys.platform[:4] == "java" and sys.registry.containsKey(_key): + default_parser_list = sys.registry.getProperty(_key).split(",") + + +def make_parser(parser_list = []): + """Creates and returns a SAX parser. + + Creates the first parser it is able to instantiate of the ones + given in the list created by doing parser_list + + default_parser_list. The lists must contain the names of Python + modules containing both a SAX parser and a create_parser function.""" + + for parser_name in parser_list + default_parser_list: + try: + return _create_parser(parser_name) + except ImportError as e: + import sys + if parser_name in sys.modules: + # The parser module was found, but importing it + # failed unexpectedly, pass this exception through + raise + except SAXReaderNotAvailable: + # The parser module detected that it won't work properly, + # so try the next one + pass + + raise SAXReaderNotAvailable("No parsers found", None) + +# --- Internal utility methods used by make_parser + +if sys.platform[ : 4] == "java": + def _create_parser(parser_name): + from org.python.core import imp + drv_module = imp.importName(parser_name, 0, globals()) + return drv_module.create_parser() + +else: + def _create_parser(parser_name): + drv_module = __import__(parser_name,{},{},['create_parser']) + return drv_module.create_parser() + +del sys diff --git a/modules/language/python/module/xml/sax/__pycache__/__init__.cpython-36.pyc b/modules/language/python/module/xml/sax/__pycache__/__init__.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..c8a6bcc --- /dev/null +++ b/modules/language/python/module/xml/sax/__pycache__/__init__.cpython-36.pyc diff --git a/modules/language/python/module/xml/sax/__pycache__/_exceptions.cpython-36.pyc b/modules/language/python/module/xml/sax/__pycache__/_exceptions.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..7f60012 --- /dev/null +++ b/modules/language/python/module/xml/sax/__pycache__/_exceptions.cpython-36.pyc diff --git a/modules/language/python/module/xml/sax/__pycache__/expatreader.cpython-36.pyc b/modules/language/python/module/xml/sax/__pycache__/expatreader.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..c449778 --- /dev/null +++ b/modules/language/python/module/xml/sax/__pycache__/expatreader.cpython-36.pyc diff --git a/modules/language/python/module/xml/sax/__pycache__/handler.cpython-36.pyc b/modules/language/python/module/xml/sax/__pycache__/handler.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..c167188 --- /dev/null +++ b/modules/language/python/module/xml/sax/__pycache__/handler.cpython-36.pyc diff --git a/modules/language/python/module/xml/sax/__pycache__/saxutils.cpython-36.pyc b/modules/language/python/module/xml/sax/__pycache__/saxutils.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..645cd61 --- /dev/null +++ b/modules/language/python/module/xml/sax/__pycache__/saxutils.cpython-36.pyc diff --git a/modules/language/python/module/xml/sax/__pycache__/xmlreader.cpython-36.pyc b/modules/language/python/module/xml/sax/__pycache__/xmlreader.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..5f37c19 --- /dev/null +++ b/modules/language/python/module/xml/sax/__pycache__/xmlreader.cpython-36.pyc diff --git a/modules/language/python/module/xml/sax/_exceptions.py~ b/modules/language/python/module/xml/sax/_exceptions.py~ new file mode 100644 index 0000000..a9b2ba3 --- /dev/null +++ b/modules/language/python/module/xml/sax/_exceptions.py~ @@ -0,0 +1,131 @@ +"""Different kinds of SAX Exceptions""" +import sys +if sys.platform[:4] == "java": + from java.lang import Exception +del sys + +# ===== SAXEXCEPTION ===== + +class SAXException(Exception): + """Encapsulate an XML error or warning. This class can contain + basic error or warning information from either the XML parser or + the application: you can subclass it to provide additional + functionality, or to add localization. Note that although you will + receive a SAXException as the argument to the handlers in the + ErrorHandler interface, you are not actually required to raise + the exception; instead, you can simply read the information in + it.""" + + def __init__(self, msg, exception=None): + """Creates an exception. The message is required, but the exception + is optional.""" + self._msg = msg + self._exception = exception + Exception.__init__(self, msg) + + def getMessage(self): + "Return a message for this exception." + return self._msg + + def getException(self): + "Return the embedded exception, or None if there was none." + return self._exception + + def __str__(self): + "Create a string representation of the exception." + return self._msg + + def __getitem__(self, ix): + """Avoids weird error messages if someone does exception[ix] by + mistake, since Exception has __getitem__ defined.""" + raise AttributeError("__getitem__") + + +# ===== SAXPARSEEXCEPTION ===== + +class SAXParseException(SAXException): + """Encapsulate an XML parse error or warning. + + This exception will include information for locating the error in + the original XML document. Note that although the application will + receive a SAXParseException as the argument to the handlers in the + ErrorHandler interface, the application is not actually required + to raise the exception; instead, it can simply read the + information in it and take a different action. + + Since this exception is a subclass of SAXException, it inherits + the ability to wrap another exception.""" + + def __init__(self, msg, exception, locator): + "Creates the exception. The exception parameter is allowed to be None." + SAXException.__init__(self, msg, exception) + self._locator = locator + + # We need to cache this stuff at construction time. + # If this exception is raised, the objects through which we must + # traverse to get this information may be deleted by the time + # it gets caught. + self._systemId = self._locator.getSystemId() + self._colnum = self._locator.getColumnNumber() + self._linenum = self._locator.getLineNumber() + + def getColumnNumber(self): + """The column number of the end of the text where the exception + occurred.""" + return self._colnum + + def getLineNumber(self): + "The line number of the end of the text where the exception occurred." + return self._linenum + + def getPublicId(self): + "Get the public identifier of the entity where the exception occurred." + return self._locator.getPublicId() + + def getSystemId(self): + "Get the system identifier of the entity where the exception occurred." + return self._systemId + + def __str__(self): + "Create a string representation of the exception." + sysid = self.getSystemId() + if sysid is None: + sysid = "<unknown>" + linenum = self.getLineNumber() + if linenum is None: + linenum = "?" + colnum = self.getColumnNumber() + if colnum is None: + colnum = "?" + return "%s:%s:%s: %s" % (sysid, linenum, colnum, self._msg) + + +# ===== SAXNOTRECOGNIZEDEXCEPTION ===== + +class SAXNotRecognizedException(SAXException): + """Exception class for an unrecognized identifier. + + An XMLReader will raise this exception when it is confronted with an + unrecognized feature or property. SAX applications and extensions may + use this class for similar purposes.""" + + +# ===== SAXNOTSUPPORTEDEXCEPTION ===== + +class SAXNotSupportedException(SAXException): + """Exception class for an unsupported operation. + + An XMLReader will raise this exception when a service it cannot + perform is requested (specifically setting a state or value). SAX + applications and extensions may use this class for similar + purposes.""" + +# ===== SAXNOTSUPPORTEDEXCEPTION ===== + +class SAXReaderNotAvailable(SAXNotSupportedException): + """Exception class for a missing driver. + + An XMLReader module (driver) should raise this exception when it + is first imported, e.g. when a support module cannot be imported. + It also may be raised during parsing, e.g. if executing an external + program is not permitted.""" diff --git a/modules/language/python/module/xml/sax/expatreader.py~ b/modules/language/python/module/xml/sax/expatreader.py~ new file mode 100644 index 0000000..421358f --- /dev/null +++ b/modules/language/python/module/xml/sax/expatreader.py~ @@ -0,0 +1,446 @@ +""" +SAX driver for the pyexpat C module. This driver works with +pyexpat.__version__ == '2.22'. +""" + +version = "0.20" + +from xml.sax._exceptions import * +from xml.sax.handler import feature_validation, feature_namespaces +from xml.sax.handler import feature_namespace_prefixes +from xml.sax.handler import feature_external_ges, feature_external_pes +from xml.sax.handler import feature_string_interning +from xml.sax.handler import property_xml_string, property_interning_dict + +# xml.parsers.expat does not raise ImportError in Jython +import sys +if sys.platform[:4] == "java": + raise SAXReaderNotAvailable("expat not available in Java", None) +del sys + +try: + from xml.parsers import expat +except ImportError: + raise SAXReaderNotAvailable("expat not supported", None) +else: + if not hasattr(expat, "ParserCreate"): + raise SAXReaderNotAvailable("expat not supported", None) +from xml.sax import xmlreader, saxutils, handler + +AttributesImpl = xmlreader.AttributesImpl +AttributesNSImpl = xmlreader.AttributesNSImpl + +# If we're using a sufficiently recent version of Python, we can use +# weak references to avoid cycles between the parser and content +# handler, otherwise we'll just have to pretend. +try: + import _weakref +except ImportError: + def _mkproxy(o): + return o +else: + import weakref + _mkproxy = weakref.proxy + del weakref, _weakref + +class _ClosedParser: + pass + +# --- ExpatLocator + +class ExpatLocator(xmlreader.Locator): + """Locator for use with the ExpatParser class. + + This uses a weak reference to the parser object to avoid creating + a circular reference between the parser and the content handler. + """ + def __init__(self, parser): + self._ref = _mkproxy(parser) + + def getColumnNumber(self): + parser = self._ref + if parser._parser is None: + return None + return parser._parser.ErrorColumnNumber + + def getLineNumber(self): + parser = self._ref + if parser._parser is None: + return 1 + return parser._parser.ErrorLineNumber + + def getPublicId(self): + parser = self._ref + if parser is None: + return None + return parser._source.getPublicId() + + def getSystemId(self): + parser = self._ref + if parser is None: + return None + return parser._source.getSystemId() + + +# --- ExpatParser + +class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): + """SAX driver for the pyexpat C module.""" + + def __init__(self, namespaceHandling=0, bufsize=2**16-20): + xmlreader.IncrementalParser.__init__(self, bufsize) + self._source = xmlreader.InputSource() + self._parser = None + self._namespaces = namespaceHandling + self._lex_handler_prop = None + self._parsing = 0 + self._entity_stack = [] + self._external_ges = 1 + self._interning = None + + # XMLReader methods + + def parse(self, source): + "Parse an XML document from a URL or an InputSource." + source = saxutils.prepare_input_source(source) + + self._source = source + try: + self.reset() + self._cont_handler.setDocumentLocator(ExpatLocator(self)) + xmlreader.IncrementalParser.parse(self, source) + except: + # bpo-30264: Close the source on error to not leak resources: + # xml.sax.parse() doesn't give access to the underlying parser + # to the caller + self._close_source() + raise + + def prepareParser(self, source): + if source.getSystemId() is not None: + self._parser.SetBase(source.getSystemId()) + + # Redefined setContentHandler to allow changing handlers during parsing + + def setContentHandler(self, handler): + xmlreader.IncrementalParser.setContentHandler(self, handler) + if self._parsing: + self._reset_cont_handler() + + def getFeature(self, name): + if name == feature_namespaces: + return self._namespaces + elif name == feature_string_interning: + return self._interning is not None + elif name in (feature_validation, feature_external_pes, + feature_namespace_prefixes): + return 0 + elif name == feature_external_ges: + return self._external_ges + raise SAXNotRecognizedException("Feature '%s' not recognized" % name) + + def setFeature(self, name, state): + if self._parsing: + raise SAXNotSupportedException("Cannot set features while parsing") + + if name == feature_namespaces: + self._namespaces = state + elif name == feature_external_ges: + self._external_ges = state + elif name == feature_string_interning: + if state: + if self._interning is None: + self._interning = {} + else: + self._interning = None + elif name == feature_validation: + if state: + raise SAXNotSupportedException( + "expat does not support validation") + elif name == feature_external_pes: + if state: + raise SAXNotSupportedException( + "expat does not read external parameter entities") + elif name == feature_namespace_prefixes: + if state: + raise SAXNotSupportedException( + "expat does not report namespace prefixes") + else: + raise SAXNotRecognizedException( + "Feature '%s' not recognized" % name) + + def getProperty(self, name): + if name == handler.property_lexical_handler: + return self._lex_handler_prop + elif name == property_interning_dict: + return self._interning + elif name == property_xml_string: + if self._parser: + if hasattr(self._parser, "GetInputContext"): + return self._parser.GetInputContext() + else: + raise SAXNotRecognizedException( + "This version of expat does not support getting" + " the XML string") + else: + raise SAXNotSupportedException( + "XML string cannot be returned when not parsing") + raise SAXNotRecognizedException("Property '%s' not recognized" % name) + + def setProperty(self, name, value): + if name == handler.property_lexical_handler: + self._lex_handler_prop = value + if self._parsing: + self._reset_lex_handler_prop() + elif name == property_interning_dict: + self._interning = value + elif name == property_xml_string: + raise SAXNotSupportedException("Property '%s' cannot be set" % + name) + else: + raise SAXNotRecognizedException("Property '%s' not recognized" % + name) + + # IncrementalParser methods + + def feed(self, data, isFinal = 0): + if not self._parsing: + self.reset() + self._parsing = 1 + self._cont_handler.startDocument() + + try: + # The isFinal parameter is internal to the expat reader. + # If it is set to true, expat will check validity of the entire + # document. When feeding chunks, they are not normally final - + # except when invoked from close. + self._parser.Parse(data, isFinal) + except expat.error as e: + exc = SAXParseException(expat.ErrorString(e.code), e, self) + # FIXME: when to invoke error()? + self._err_handler.fatalError(exc) + + def _close_source(self): + source = self._source + try: + file = source.getCharacterStream() + if file is not None: + file.close() + finally: + file = source.getByteStream() + if file is not None: + file.close() + + def close(self): + if (self._entity_stack or self._parser is None or + isinstance(self._parser, _ClosedParser)): + # If we are completing an external entity, do nothing here + return + try: + self.feed("", isFinal = 1) + self._cont_handler.endDocument() + self._parsing = 0 + # break cycle created by expat handlers pointing to our methods + self._parser = None + finally: + self._parsing = 0 + if self._parser is not None: + # Keep ErrorColumnNumber and ErrorLineNumber after closing. + parser = _ClosedParser() + parser.ErrorColumnNumber = self._parser.ErrorColumnNumber + parser.ErrorLineNumber = self._parser.ErrorLineNumber + self._parser = parser + self._close_source() + + def _reset_cont_handler(self): + self._parser.ProcessingInstructionHandler = \ + self._cont_handler.processingInstruction + self._parser.CharacterDataHandler = self._cont_handler.characters + + def _reset_lex_handler_prop(self): + lex = self._lex_handler_prop + parser = self._parser + if lex is None: + parser.CommentHandler = None + parser.StartCdataSectionHandler = None + parser.EndCdataSectionHandler = None + parser.StartDoctypeDeclHandler = None + parser.EndDoctypeDeclHandler = None + else: + parser.CommentHandler = lex.comment + parser.StartCdataSectionHandler = lex.startCDATA + parser.EndCdataSectionHandler = lex.endCDATA + parser.StartDoctypeDeclHandler = self.start_doctype_decl + parser.EndDoctypeDeclHandler = lex.endDTD + + def reset(self): + if self._namespaces: + self._parser = expat.ParserCreate(self._source.getEncoding(), " ", + intern=self._interning) + self._parser.namespace_prefixes = 1 + self._parser.StartElementHandler = self.start_element_ns + self._parser.EndElementHandler = self.end_element_ns + else: + self._parser = expat.ParserCreate(self._source.getEncoding(), + intern = self._interning) + self._parser.StartElementHandler = self.start_element + self._parser.EndElementHandler = self.end_element + + self._reset_cont_handler() + self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl + self._parser.NotationDeclHandler = self.notation_decl + self._parser.StartNamespaceDeclHandler = self.start_namespace_decl + self._parser.EndNamespaceDeclHandler = self.end_namespace_decl + + self._decl_handler_prop = None + if self._lex_handler_prop: + self._reset_lex_handler_prop() +# self._parser.DefaultHandler = +# self._parser.DefaultHandlerExpand = +# self._parser.NotStandaloneHandler = + self._parser.ExternalEntityRefHandler = self.external_entity_ref + try: + self._parser.SkippedEntityHandler = self.skipped_entity_handler + except AttributeError: + # This pyexpat does not support SkippedEntity + pass + self._parser.SetParamEntityParsing( + expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) + + self._parsing = 0 + self._entity_stack = [] + + # Locator methods + + def getColumnNumber(self): + if self._parser is None: + return None + return self._parser.ErrorColumnNumber + + def getLineNumber(self): + if self._parser is None: + return 1 + return self._parser.ErrorLineNumber + + def getPublicId(self): + return self._source.getPublicId() + + def getSystemId(self): + return self._source.getSystemId() + + # event handlers + def start_element(self, name, attrs): + self._cont_handler.startElement(name, AttributesImpl(attrs)) + + def end_element(self, name): + self._cont_handler.endElement(name) + + def start_element_ns(self, name, attrs): + pair = name.split() + if len(pair) == 1: + # no namespace + pair = (None, name) + elif len(pair) == 3: + pair = pair[0], pair[1] + else: + # default namespace + pair = tuple(pair) + + newattrs = {} + qnames = {} + for (aname, value) in attrs.items(): + parts = aname.split() + length = len(parts) + if length == 1: + # no namespace + qname = aname + apair = (None, aname) + elif length == 3: + qname = "%s:%s" % (parts[2], parts[1]) + apair = parts[0], parts[1] + else: + # default namespace + qname = parts[1] + apair = tuple(parts) + + newattrs[apair] = value + qnames[apair] = qname + + self._cont_handler.startElementNS(pair, None, + AttributesNSImpl(newattrs, qnames)) + + def end_element_ns(self, name): + pair = name.split() + if len(pair) == 1: + pair = (None, name) + elif len(pair) == 3: + pair = pair[0], pair[1] + else: + pair = tuple(pair) + + self._cont_handler.endElementNS(pair, None) + + # this is not used (call directly to ContentHandler) + def processing_instruction(self, target, data): + self._cont_handler.processingInstruction(target, data) + + # this is not used (call directly to ContentHandler) + def character_data(self, data): + self._cont_handler.characters(data) + + def start_namespace_decl(self, prefix, uri): + self._cont_handler.startPrefixMapping(prefix, uri) + + def end_namespace_decl(self, prefix): + self._cont_handler.endPrefixMapping(prefix) + + def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): + self._lex_handler_prop.startDTD(name, pubid, sysid) + + def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): + self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) + + def notation_decl(self, name, base, sysid, pubid): + self._dtd_handler.notationDecl(name, pubid, sysid) + + def external_entity_ref(self, context, base, sysid, pubid): + if not self._external_ges: + return 1 + + source = self._ent_handler.resolveEntity(pubid, sysid) + source = saxutils.prepare_input_source(source, + self._source.getSystemId() or + "") + + self._entity_stack.append((self._parser, self._source)) + self._parser = self._parser.ExternalEntityParserCreate(context) + self._source = source + + try: + xmlreader.IncrementalParser.parse(self, source) + except: + return 0 # FIXME: save error info here? + + (self._parser, self._source) = self._entity_stack[-1] + del self._entity_stack[-1] + return 1 + + def skipped_entity_handler(self, name, is_pe): + if is_pe: + # The SAX spec requires to report skipped PEs with a '%' + name = '%'+name + self._cont_handler.skippedEntity(name) + +# --- + +def create_parser(*args, **kwargs): + return ExpatParser(*args, **kwargs) + +# --- + +if __name__ == "__main__": + import xml.sax.saxutils + p = create_parser() + p.setContentHandler(xml.sax.saxutils.XMLGenerator()) + p.setErrorHandler(xml.sax.ErrorHandler()) + p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml") diff --git a/modules/language/python/module/xml/sax/handler.py~ b/modules/language/python/module/xml/sax/handler.py~ new file mode 100644 index 0000000..481733d --- /dev/null +++ b/modules/language/python/module/xml/sax/handler.py~ @@ -0,0 +1,342 @@ +""" +This module contains the core classes of version 2.0 of SAX for Python. +This file provides only default classes with absolutely minimum +functionality, from which drivers and applications can be subclassed. + +Many of these classes are empty and are included only as documentation +of the interfaces. + +$Id$ +""" + +version = '2.0beta' + +#============================================================================ +# +# HANDLER INTERFACES +# +#============================================================================ + +# ===== ERRORHANDLER ===== + +class ErrorHandler: + """Basic interface for SAX error handlers. + + If you create an object that implements this interface, then + register the object with your XMLReader, the parser will call the + methods in your object to report all warnings and errors. There + are three levels of errors available: warnings, (possibly) + recoverable errors, and unrecoverable errors. All methods take a + SAXParseException as the only parameter.""" + + def error(self, exception): + "Handle a recoverable error." + raise exception + + def fatalError(self, exception): + "Handle a non-recoverable error." + raise exception + + def warning(self, exception): + "Handle a warning." + print(exception) + + +# ===== CONTENTHANDLER ===== + +class ContentHandler: + """Interface for receiving logical document content events. + + This is the main callback interface in SAX, and the one most + important to applications. The order of events in this interface + mirrors the order of the information in the document.""" + + def __init__(self): + self._locator = None + + def setDocumentLocator(self, locator): + """Called by the parser to give the application a locator for + locating the origin of document events. + + SAX parsers are strongly encouraged (though not absolutely + required) to supply a locator: if it does so, it must supply + the locator to the application by invoking this method before + invoking any of the other methods in the DocumentHandler + interface. + + The locator allows the application to determine the end + position of any document-related event, even if the parser is + not reporting an error. Typically, the application will use + this information for reporting its own errors (such as + character content that does not match an application's + business rules). The information returned by the locator is + probably not sufficient for use with a search engine. + + Note that the locator will return correct information only + during the invocation of the events in this interface. The + application should not attempt to use it at any other time.""" + self._locator = locator + + def startDocument(self): + """Receive notification of the beginning of a document. + + The SAX parser will invoke this method only once, before any + other methods in this interface or in DTDHandler (except for + setDocumentLocator).""" + + def endDocument(self): + """Receive notification of the end of a document. + + The SAX parser will invoke this method only once, and it will + be the last method invoked during the parse. The parser shall + not invoke this method until it has either abandoned parsing + (because of an unrecoverable error) or reached the end of + input.""" + + def startPrefixMapping(self, prefix, uri): + """Begin the scope of a prefix-URI Namespace mapping. + + The information from this event is not necessary for normal + Namespace processing: the SAX XML reader will automatically + replace prefixes for element and attribute names when the + http://xml.org/sax/features/namespaces feature is true (the + default). + + There are cases, however, when applications need to use + prefixes in character data or in attribute values, where they + cannot safely be expanded automatically; the + start/endPrefixMapping event supplies the information to the + application to expand prefixes in those contexts itself, if + necessary. + + Note that start/endPrefixMapping events are not guaranteed to + be properly nested relative to each-other: all + startPrefixMapping events will occur before the corresponding + startElement event, and all endPrefixMapping events will occur + after the corresponding endElement event, but their order is + not guaranteed.""" + + def endPrefixMapping(self, prefix): + """End the scope of a prefix-URI mapping. + + See startPrefixMapping for details. This event will always + occur after the corresponding endElement event, but the order + of endPrefixMapping events is not otherwise guaranteed.""" + + def startElement(self, name, attrs): + """Signals the start of an element in non-namespace mode. + + The name parameter contains the raw XML 1.0 name of the + element type as a string and the attrs parameter holds an + instance of the Attributes class containing the attributes of + the element.""" + + def endElement(self, name): + """Signals the end of an element in non-namespace mode. + + The name parameter contains the name of the element type, just + as with the startElement event.""" + + def startElementNS(self, name, qname, attrs): + """Signals the start of an element in namespace mode. + + The name parameter contains the name of the element type as a + (uri, localname) tuple, the qname parameter the raw XML 1.0 + name used in the source document, and the attrs parameter + holds an instance of the Attributes class containing the + attributes of the element. + + The uri part of the name tuple is None for elements which have + no namespace.""" + + def endElementNS(self, name, qname): + """Signals the end of an element in namespace mode. + + The name parameter contains the name of the element type, just + as with the startElementNS event.""" + + def characters(self, content): + """Receive notification of character data. + + The Parser will call this method to report each chunk of + character data. SAX parsers may return all contiguous + character data in a single chunk, or they may split it into + several chunks; however, all of the characters in any single + event must come from the same external entity so that the + Locator provides useful information.""" + + def ignorableWhitespace(self, whitespace): + """Receive notification of ignorable whitespace in element content. + + Validating Parsers must use this method to report each chunk + of ignorable whitespace (see the W3C XML 1.0 recommendation, + section 2.10): non-validating parsers may also use this method + if they are capable of parsing and using content models. + + SAX parsers may return all contiguous whitespace in a single + chunk, or they may split it into several chunks; however, all + of the characters in any single event must come from the same + external entity, so that the Locator provides useful + information.""" + + def processingInstruction(self, target, data): + """Receive notification of a processing instruction. + + The Parser will invoke this method once for each processing + instruction found: note that processing instructions may occur + before or after the main document element. + + A SAX parser should never report an XML declaration (XML 1.0, + section 2.8) or a text declaration (XML 1.0, section 4.3.1) + using this method.""" + + def skippedEntity(self, name): + """Receive notification of a skipped entity. + + The Parser will invoke this method once for each entity + skipped. Non-validating processors may skip entities if they + have not seen the declarations (because, for example, the + entity was declared in an external DTD subset). All processors + may skip external entities, depending on the values of the + http://xml.org/sax/features/external-general-entities and the + http://xml.org/sax/features/external-parameter-entities + properties.""" + + +# ===== DTDHandler ===== + +class DTDHandler: + """Handle DTD events. + + This interface specifies only those DTD events required for basic + parsing (unparsed entities and attributes).""" + + def notationDecl(self, name, publicId, systemId): + "Handle a notation declaration event." + + def unparsedEntityDecl(self, name, publicId, systemId, ndata): + "Handle an unparsed entity declaration event." + + +# ===== ENTITYRESOLVER ===== + +class EntityResolver: + """Basic interface for resolving entities. If you create an object + implementing this interface, then register the object with your + Parser, the parser will call the method in your object to + resolve all external entities. Note that DefaultHandler implements + this interface with the default behaviour.""" + + def resolveEntity(self, publicId, systemId): + """Resolve the system identifier of an entity and return either + the system identifier to read from as a string, or an InputSource + to read from.""" + return systemId + + +#============================================================================ +# +# CORE FEATURES +# +#============================================================================ + +feature_namespaces = "http://xml.org/sax/features/namespaces" +# true: Perform Namespace processing (default). +# false: Optionally do not perform Namespace processing +# (implies namespace-prefixes). +# access: (parsing) read-only; (not parsing) read/write + +feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes" +# true: Report the original prefixed names and attributes used for Namespace +# declarations. +# false: Do not report attributes used for Namespace declarations, and +# optionally do not report original prefixed names (default). +# access: (parsing) read-only; (not parsing) read/write + +feature_string_interning = "http://xml.org/sax/features/string-interning" +# true: All element names, prefixes, attribute names, Namespace URIs, and +# local names are interned using the built-in intern function. +# false: Names are not necessarily interned, although they may be (default). +# access: (parsing) read-only; (not parsing) read/write + +feature_validation = "http://xml.org/sax/features/validation" +# true: Report all validation errors (implies external-general-entities and +# external-parameter-entities). +# false: Do not report validation errors. +# access: (parsing) read-only; (not parsing) read/write + +feature_external_ges = "http://xml.org/sax/features/external-general-entities" +# true: Include all external general (text) entities. +# false: Do not include external general entities. +# access: (parsing) read-only; (not parsing) read/write + +feature_external_pes = "http://xml.org/sax/features/external-parameter-entities" +# true: Include all external parameter entities, including the external +# DTD subset. +# false: Do not include any external parameter entities, even the external +# DTD subset. +# access: (parsing) read-only; (not parsing) read/write + +all_features = [feature_namespaces, + feature_namespace_prefixes, + feature_string_interning, + feature_validation, + feature_external_ges, + feature_external_pes] + + +#============================================================================ +# +# CORE PROPERTIES +# +#============================================================================ + +property_lexical_handler = "http://xml.org/sax/properties/lexical-handler" +# data type: xml.sax.sax2lib.LexicalHandler +# description: An optional extension handler for lexical events like comments. +# access: read/write + +property_declaration_handler = "http://xml.org/sax/properties/declaration-handler" +# data type: xml.sax.sax2lib.DeclHandler +# description: An optional extension handler for DTD-related events other +# than notations and unparsed entities. +# access: read/write + +property_dom_node = "http://xml.org/sax/properties/dom-node" +# data type: org.w3c.dom.Node +# description: When parsing, the current DOM node being visited if this is +# a DOM iterator; when not parsing, the root DOM node for +# iteration. +# access: (parsing) read-only; (not parsing) read/write + +property_xml_string = "http://xml.org/sax/properties/xml-string" +# data type: String +# description: The literal string of characters that was the source for +# the current event. +# access: read-only + +property_encoding = "http://www.python.org/sax/properties/encoding" +# data type: String +# description: The name of the encoding to assume for input data. +# access: write: set the encoding, e.g. established by a higher-level +# protocol. May change during parsing (e.g. after +# processing a META tag) +# read: return the current encoding (possibly established through +# auto-detection. +# initial value: UTF-8 +# + +property_interning_dict = "http://www.python.org/sax/properties/interning-dict" +# data type: Dictionary +# description: The dictionary used to intern common strings in the document +# access: write: Request that the parser uses a specific dictionary, to +# allow interning across different documents +# read: return the current interning dictionary, or None +# + +all_properties = [property_lexical_handler, + property_dom_node, + property_declaration_handler, + property_xml_string, + property_encoding, + property_interning_dict] diff --git a/modules/language/python/module/xml/sax/saxutils.py~ b/modules/language/python/module/xml/sax/saxutils.py~ new file mode 100644 index 0000000..a69c7f7 --- /dev/null +++ b/modules/language/python/module/xml/sax/saxutils.py~ @@ -0,0 +1,368 @@ +"""\ +A library of useful helper classes to the SAX classes, for the +convenience of application and driver writers. +""" + +import os, urllib.parse, urllib.request +import io +import codecs +from . import handler +from . import xmlreader + +def __dict_replace(s, d): + """Replace substrings of a string using a dictionary.""" + for key, value in d.items(): + s = s.replace(key, value) + return s + +def escape(data, entities={}): + """Escape &, <, and > in a string of data. + + You can escape other strings of data by passing a dictionary as + the optional entities parameter. The keys and values must all be + strings; each key will be replaced with its corresponding value. + """ + + # must do ampersand first + data = data.replace("&", "&") + data = data.replace(">", ">") + data = data.replace("<", "<") + if entities: + data = __dict_replace(data, entities) + return data + +def unescape(data, entities={}): + """Unescape &, <, and > in a string of data. + + You can unescape other strings of data by passing a dictionary as + the optional entities parameter. The keys and values must all be + strings; each key will be replaced with its corresponding value. + """ + data = data.replace("<", "<") + data = data.replace(">", ">") + if entities: + data = __dict_replace(data, entities) + # must do ampersand last + return data.replace("&", "&") + +def quoteattr(data, entities={}): + """Escape and quote an attribute value. + + Escape &, <, and > in a string of data, then quote it for use as + an attribute value. The \" character will be escaped as well, if + necessary. + + You can escape other strings of data by passing a dictionary as + the optional entities parameter. The keys and values must all be + strings; each key will be replaced with its corresponding value. + """ + entities = entities.copy() + entities.update({'\n': ' ', '\r': ' ', '\t':'	'}) + data = escape(data, entities) + if '"' in data: + if "'" in data: + data = '"%s"' % data.replace('"', """) + else: + data = "'%s'" % data + else: + data = '"%s"' % data + return data + + +def _gettextwriter(out, encoding): + if out is None: + import sys + return sys.stdout + + if isinstance(out, io.TextIOBase): + # use a text writer as is + return out + + if isinstance(out, (codecs.StreamWriter, codecs.StreamReaderWriter)): + # use a codecs stream writer as is + return out + + # wrap a binary writer with TextIOWrapper + if isinstance(out, io.RawIOBase): + # Keep the original file open when the TextIOWrapper is + # destroyed + class _wrapper: + __class__ = out.__class__ + def __getattr__(self, name): + return getattr(out, name) + buffer = _wrapper() + buffer.close = lambda: None + else: + # This is to handle passed objects that aren't in the + # IOBase hierarchy, but just have a write method + buffer = io.BufferedIOBase() + buffer.writable = lambda: True + buffer.write = out.write + try: + # TextIOWrapper uses this methods to determine + # if BOM (for UTF-16, etc) should be added + buffer.seekable = out.seekable + buffer.tell = out.tell + except AttributeError: + pass + return io.TextIOWrapper(buffer, encoding=encoding, + errors='xmlcharrefreplace', + newline='\n', + write_through=True) + +class XMLGenerator(handler.ContentHandler): + + def __init__(self, out=None, encoding="iso-8859-1", short_empty_elements=False): + handler.ContentHandler.__init__(self) + out = _gettextwriter(out, encoding) + self._write = out.write + self._flush = out.flush + self._ns_contexts = [{}] # contains uri -> prefix dicts + self._current_context = self._ns_contexts[-1] + self._undeclared_ns_maps = [] + self._encoding = encoding + self._short_empty_elements = short_empty_elements + self._pending_start_element = False + + def _qname(self, name): + """Builds a qualified name from a (ns_url, localname) pair""" + if name[0]: + # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is + # bound by definition to http://www.w3.org/XML/1998/namespace. It + # does not need to be declared and will not usually be found in + # self._current_context. + if 'http://www.w3.org/XML/1998/namespace' == name[0]: + return 'xml:' + name[1] + # The name is in a non-empty namespace + prefix = self._current_context[name[0]] + if prefix: + # If it is not the default namespace, prepend the prefix + return prefix + ":" + name[1] + # Return the unqualified name + return name[1] + + def _finish_pending_start_element(self,endElement=False): + if self._pending_start_element: + self._write('>') + self._pending_start_element = False + + # ContentHandler methods + + def startDocument(self): + self._write('<?xml version="1.0" encoding="%s"?>\n' % + self._encoding) + + def endDocument(self): + self._flush() + + def startPrefixMapping(self, prefix, uri): + self._ns_contexts.append(self._current_context.copy()) + self._current_context[uri] = prefix + self._undeclared_ns_maps.append((prefix, uri)) + + def endPrefixMapping(self, prefix): + self._current_context = self._ns_contexts[-1] + del self._ns_contexts[-1] + + def startElement(self, name, attrs): + self._finish_pending_start_element() + self._write('<' + name) + for (name, value) in attrs.items(): + self._write(' %s=%s' % (name, quoteattr(value))) + if self._short_empty_elements: + self._pending_start_element = True + else: + self._write(">") + + def endElement(self, name): + if self._pending_start_element: + self._write('/>') + self._pending_start_element = False + else: + self._write('</%s>' % name) + + def startElementNS(self, name, qname, attrs): + self._finish_pending_start_element() + self._write('<' + self._qname(name)) + + for prefix, uri in self._undeclared_ns_maps: + if prefix: + self._write(' xmlns:%s="%s"' % (prefix, uri)) + else: + self._write(' xmlns="%s"' % uri) + self._undeclared_ns_maps = [] + + for (name, value) in attrs.items(): + self._write(' %s=%s' % (self._qname(name), quoteattr(value))) + if self._short_empty_elements: + self._pending_start_element = True + else: + self._write(">") + + def endElementNS(self, name, qname): + if self._pending_start_element: + self._write('/>') + self._pending_start_element = False + else: + self._write('</%s>' % self._qname(name)) + + def characters(self, content): + if content: + self._finish_pending_start_element() + if not isinstance(content, str): + content = str(content, self._encoding) + self._write(escape(content)) + + def ignorableWhitespace(self, content): + if content: + self._finish_pending_start_element() + if not isinstance(content, str): + content = str(content, self._encoding) + self._write(content) + + def processingInstruction(self, target, data): + self._finish_pending_start_element() + self._write('<?%s %s?>' % (target, data)) + + +class XMLFilterBase(xmlreader.XMLReader): + """This class is designed to sit between an XMLReader and the + client application's event handlers. By default, it does nothing + but pass requests up to the reader and events on to the handlers + unmodified, but subclasses can override specific methods to modify + the event stream or the configuration requests as they pass + through.""" + + def __init__(self, parent = None): + xmlreader.XMLReader.__init__(self) + self._parent = parent + + # ErrorHandler methods + + def error(self, exception): + self._err_handler.error(exception) + + def fatalError(self, exception): + self._err_handler.fatalError(exception) + + def warning(self, exception): + self._err_handler.warning(exception) + + # ContentHandler methods + + def setDocumentLocator(self, locator): + self._cont_handler.setDocumentLocator(locator) + + def startDocument(self): + self._cont_handler.startDocument() + + def endDocument(self): + self._cont_handler.endDocument() + + def startPrefixMapping(self, prefix, uri): + self._cont_handler.startPrefixMapping(prefix, uri) + + def endPrefixMapping(self, prefix): + self._cont_handler.endPrefixMapping(prefix) + + def startElement(self, name, attrs): + self._cont_handler.startElement(name, attrs) + + def endElement(self, name): + self._cont_handler.endElement(name) + + def startElementNS(self, name, qname, attrs): + self._cont_handler.startElementNS(name, qname, attrs) + + def endElementNS(self, name, qname): + self._cont_handler.endElementNS(name, qname) + + def characters(self, content): + self._cont_handler.characters(content) + + def ignorableWhitespace(self, chars): + self._cont_handler.ignorableWhitespace(chars) + + def processingInstruction(self, target, data): + self._cont_handler.processingInstruction(target, data) + + def skippedEntity(self, name): + self._cont_handler.skippedEntity(name) + + # DTDHandler methods + + def notationDecl(self, name, publicId, systemId): + self._dtd_handler.notationDecl(name, publicId, systemId) + + def unparsedEntityDecl(self, name, publicId, systemId, ndata): + self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata) + + # EntityResolver methods + + def resolveEntity(self, publicId, systemId): + return self._ent_handler.resolveEntity(publicId, systemId) + + # XMLReader methods + + def parse(self, source): + self._parent.setContentHandler(self) + self._parent.setErrorHandler(self) + self._parent.setEntityResolver(self) + self._parent.setDTDHandler(self) + self._parent.parse(source) + + def setLocale(self, locale): + self._parent.setLocale(locale) + + def getFeature(self, name): + return self._parent.getFeature(name) + + def setFeature(self, name, state): + self._parent.setFeature(name, state) + + def getProperty(self, name): + return self._parent.getProperty(name) + + def setProperty(self, name, value): + self._parent.setProperty(name, value) + + # XMLFilter methods + + def getParent(self): + return self._parent + + def setParent(self, parent): + self._parent = parent + +# --- Utility functions + +def prepare_input_source(source, base=""): + """This function takes an InputSource and an optional base URL and + returns a fully resolved InputSource object ready for reading.""" + + if isinstance(source, str): + source = xmlreader.InputSource(source) + elif hasattr(source, "read"): + f = source + source = xmlreader.InputSource() + if isinstance(f.read(0), str): + source.setCharacterStream(f) + else: + source.setByteStream(f) + if hasattr(f, "name") and isinstance(f.name, str): + source.setSystemId(f.name) + + if source.getCharacterStream() is None and source.getByteStream() is None: + sysid = source.getSystemId() + basehead = os.path.dirname(os.path.normpath(base)) + sysidfilename = os.path.join(basehead, sysid) + if os.path.isfile(sysidfilename): + source.setSystemId(sysidfilename) + f = open(sysidfilename, "rb") + else: + source.setSystemId(urllib.parse.urljoin(base, sysid)) + f = urllib.request.urlopen(source.getSystemId()) + + source.setByteStream(f) + + return source diff --git a/modules/language/python/module/xml/sax/xmlreader.py~ b/modules/language/python/module/xml/sax/xmlreader.py~ new file mode 100644 index 0000000..716f228 --- /dev/null +++ b/modules/language/python/module/xml/sax/xmlreader.py~ @@ -0,0 +1,380 @@ +"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers +should be based on this code. """ + +from . import handler + +from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException + + +# ===== XMLREADER ===== + +class XMLReader: + """Interface for reading an XML document using callbacks. + + XMLReader is the interface that an XML parser's SAX2 driver must + implement. This interface allows an application to set and query + features and properties in the parser, to register event handlers + for document processing, and to initiate a document parse. + + All SAX interfaces are assumed to be synchronous: the parse + methods must not return until parsing is complete, and readers + must wait for an event-handler callback to return before reporting + the next event.""" + + def __init__(self): + self._cont_handler = handler.ContentHandler() + self._dtd_handler = handler.DTDHandler() + self._ent_handler = handler.EntityResolver() + self._err_handler = handler.ErrorHandler() + + def parse(self, source): + "Parse an XML document from a system identifier or an InputSource." + raise NotImplementedError("This method must be implemented!") + + def getContentHandler(self): + "Returns the current ContentHandler." + return self._cont_handler + + def setContentHandler(self, handler): + "Registers a new object to receive document content events." + self._cont_handler = handler + + def getDTDHandler(self): + "Returns the current DTD handler." + return self._dtd_handler + + def setDTDHandler(self, handler): + "Register an object to receive basic DTD-related events." + self._dtd_handler = handler + + def getEntityResolver(self): + "Returns the current EntityResolver." + return self._ent_handler + + def setEntityResolver(self, resolver): + "Register an object to resolve external entities." + self._ent_handler = resolver + + def getErrorHandler(self): + "Returns the current ErrorHandler." + return self._err_handler + + def setErrorHandler(self, handler): + "Register an object to receive error-message events." + self._err_handler = handler + + def setLocale(self, locale): + """Allow an application to set the locale for errors and warnings. + + SAX parsers are not required to provide localization for errors + and warnings; if they cannot support the requested locale, + however, they must raise a SAX exception. Applications may + request a locale change in the middle of a parse.""" + raise SAXNotSupportedException("Locale support not implemented") + + def getFeature(self, name): + "Looks up and returns the state of a SAX2 feature." + raise SAXNotRecognizedException("Feature '%s' not recognized" % name) + + def setFeature(self, name, state): + "Sets the state of a SAX2 feature." + raise SAXNotRecognizedException("Feature '%s' not recognized" % name) + + def getProperty(self, name): + "Looks up and returns the value of a SAX2 property." + raise SAXNotRecognizedException("Property '%s' not recognized" % name) + + def setProperty(self, name, value): + "Sets the value of a SAX2 property." + raise SAXNotRecognizedException("Property '%s' not recognized" % name) + +class IncrementalParser(XMLReader): + """This interface adds three extra methods to the XMLReader + interface that allow XML parsers to support incremental + parsing. Support for this interface is optional, since not all + underlying XML parsers support this functionality. + + When the parser is instantiated it is ready to begin accepting + data from the feed method immediately. After parsing has been + finished with a call to close the reset method must be called to + make the parser ready to accept new data, either from feed or + using the parse method. + + Note that these methods must _not_ be called during parsing, that + is, after parse has been called and before it returns. + + By default, the class also implements the parse method of the XMLReader + interface using the feed, close and reset methods of the + IncrementalParser interface as a convenience to SAX 2.0 driver + writers.""" + + def __init__(self, bufsize=2**16): + self._bufsize = bufsize + XMLReader.__init__(self) + + def parse(self, source): + from . import saxutils + source = saxutils.prepare_input_source(source) + + self.prepareParser(source) + file = source.getCharacterStream() + if file is None: + file = source.getByteStream() + buffer = file.read(self._bufsize) + while buffer: + self.feed(buffer) + buffer = file.read(self._bufsize) + self.close() + + def feed(self, data): + """This method gives the raw XML data in the data parameter to + the parser and makes it parse the data, emitting the + corresponding events. It is allowed for XML constructs to be + split across several calls to feed. + + feed may raise SAXException.""" + raise NotImplementedError("This method must be implemented!") + + def prepareParser(self, source): + """This method is called by the parse implementation to allow + the SAX 2.0 driver to prepare itself for parsing.""" + raise NotImplementedError("prepareParser must be overridden!") + + def close(self): + """This method is called when the entire XML document has been + passed to the parser through the feed method, to notify the + parser that there are no more data. This allows the parser to + do the final checks on the document and empty the internal + data buffer. + + The parser will not be ready to parse another document until + the reset method has been called. + + close may raise SAXException.""" + raise NotImplementedError("This method must be implemented!") + + def reset(self): + """This method is called after close has been called to reset + the parser so that it is ready to parse new documents. The + results of calling parse or feed after close without calling + reset are undefined.""" + raise NotImplementedError("This method must be implemented!") + +# ===== LOCATOR ===== + +class Locator: + """Interface for associating a SAX event with a document + location. A locator object will return valid results only during + calls to DocumentHandler methods; at any other time, the + results are unpredictable.""" + + def getColumnNumber(self): + "Return the column number where the current event ends." + return -1 + + def getLineNumber(self): + "Return the line number where the current event ends." + return -1 + + def getPublicId(self): + "Return the public identifier for the current event." + return None + + def getSystemId(self): + "Return the system identifier for the current event." + return None + +# ===== INPUTSOURCE ===== + +class InputSource: + """Encapsulation of the information needed by the XMLReader to + read entities. + + This class may include information about the public identifier, + system identifier, byte stream (possibly with character encoding + information) and/or the character stream of an entity. + + Applications will create objects of this class for use in the + XMLReader.parse method and for returning from + EntityResolver.resolveEntity. + + An InputSource belongs to the application, the XMLReader is not + allowed to modify InputSource objects passed to it from the + application, although it may make copies and modify those.""" + + def __init__(self, system_id = None): + self.__system_id = system_id + self.__public_id = None + self.__encoding = None + self.__bytefile = None + self.__charfile = None + + def setPublicId(self, public_id): + "Sets the public identifier of this InputSource." + self.__public_id = public_id + + def getPublicId(self): + "Returns the public identifier of this InputSource." + return self.__public_id + + def setSystemId(self, system_id): + "Sets the system identifier of this InputSource." + self.__system_id = system_id + + def getSystemId(self): + "Returns the system identifier of this InputSource." + return self.__system_id + + def setEncoding(self, encoding): + """Sets the character encoding of this InputSource. + + The encoding must be a string acceptable for an XML encoding + declaration (see section 4.3.3 of the XML recommendation). + + The encoding attribute of the InputSource is ignored if the + InputSource also contains a character stream.""" + self.__encoding = encoding + + def getEncoding(self): + "Get the character encoding of this InputSource." + return self.__encoding + + def setByteStream(self, bytefile): + """Set the byte stream (a Python file-like object which does + not perform byte-to-character conversion) for this input + source. + + The SAX parser will ignore this if there is also a character + stream specified, but it will use a byte stream in preference + to opening a URI connection itself. + + If the application knows the character encoding of the byte + stream, it should set it with the setEncoding method.""" + self.__bytefile = bytefile + + def getByteStream(self): + """Get the byte stream for this input source. + + The getEncoding method will return the character encoding for + this byte stream, or None if unknown.""" + return self.__bytefile + + def setCharacterStream(self, charfile): + """Set the character stream for this input source. (The stream + must be a Python 2.0 Unicode-wrapped file-like that performs + conversion to Unicode strings.) + + If there is a character stream specified, the SAX parser will + ignore any byte stream and will not attempt to open a URI + connection to the system identifier.""" + self.__charfile = charfile + + def getCharacterStream(self): + "Get the character stream for this input source." + return self.__charfile + +# ===== ATTRIBUTESIMPL ===== + +class AttributesImpl: + + def __init__(self, attrs): + """Non-NS-aware implementation. + + attrs should be of the form {name : value}.""" + self._attrs = attrs + + def getLength(self): + return len(self._attrs) + + def getType(self, name): + return "CDATA" + + def getValue(self, name): + return self._attrs[name] + + def getValueByQName(self, name): + return self._attrs[name] + + def getNameByQName(self, name): + if name not in self._attrs: + raise KeyError(name) + return name + + def getQNameByName(self, name): + if name not in self._attrs: + raise KeyError(name) + return name + + def getNames(self): + return list(self._attrs.keys()) + + def getQNames(self): + return list(self._attrs.keys()) + + def __len__(self): + return len(self._attrs) + + def __getitem__(self, name): + return self._attrs[name] + + def keys(self): + return list(self._attrs.keys()) + + def __contains__(self, name): + return name in self._attrs + + def get(self, name, alternative=None): + return self._attrs.get(name, alternative) + + def copy(self): + return self.__class__(self._attrs) + + def items(self): + return list(self._attrs.items()) + + def values(self): + return list(self._attrs.values()) + +# ===== ATTRIBUTESNSIMPL ===== + +class AttributesNSImpl(AttributesImpl): + + def __init__(self, attrs, qnames): + """NS-aware implementation. + + attrs should be of the form {(ns_uri, lname): value, ...}. + qnames of the form {(ns_uri, lname): qname, ...}.""" + self._attrs = attrs + self._qnames = qnames + + def getValueByQName(self, name): + for (nsname, qname) in self._qnames.items(): + if qname == name: + return self._attrs[nsname] + + raise KeyError(name) + + def getNameByQName(self, name): + for (nsname, qname) in self._qnames.items(): + if qname == name: + return nsname + + raise KeyError(name) + + def getQNameByName(self, name): + return self._qnames[name] + + def getQNames(self): + return list(self._qnames.values()) + + def copy(self): + return self.__class__(self._attrs, self._qnames) + + +def _test(): + XMLReader() + IncrementalParser() + Locator() + +if __name__ == "__main__": + _test() diff --git a/modules/language/python/persist.go b/modules/language/python/persist.go Binary files differnew file mode 100644 index 0000000..736e41b --- /dev/null +++ b/modules/language/python/persist.go diff --git a/modules/language/python/set.scm b/modules/language/python/set.scm index 23fb552..340d70a 100644 --- a/modules/language/python/set.scm +++ b/modules/language/python/set.scm @@ -1,6 +1,7 @@ (define-module (language python set) #:use-module (oop pf-objects) #:use-module (oop goops) + #:use-module (ice-9 format) #:use-module (language python exceptions) #:use-module (language python dict) #:use-module (language python for) diff --git a/modules/language/python/spec.scm b/modules/language/python/spec.scm index 8451222..8291a14 100644 --- a/modules/language/python/spec.scm +++ b/modules/language/python/spec.scm @@ -1,4 +1,5 @@ (define-module (language python spec) + #:use-module (language python guilemod) #:use-module (parser stis-parser lang python3-parser) #:use-module ((language python module python) #:select ()) #:use-module (language python compile) @@ -31,7 +32,11 @@ (define (e x) (eval (c x) (current-module))) -(set! (@@ (ice-9 readline) *readline-completion-function*) (complete-fkn e)) +(catch #t + (lambda () + (set! (@@ (ice-9 readline) *readline-completion-function*) + (complete-fkn e))) + (lambda x #f)) (define-language python #:title "python" diff --git a/modules/language/python/string b/modules/language/python/string new file mode 100644 index 0000000..7d275bf --- /dev/null +++ b/modules/language/python/string @@ -0,0 +1,147 @@ +compile.scm: ((append) (L 'pylist-append!)) +compile.scm: ((count) (L 'pylist-count)) +compile.scm: ((extend) (L 'pylist-extend!)) +compile.scm: ((index) (L 'pylist-index)) +compile.scm: ((pop) (L 'pylist-pop!)) +compile.scm: ((insert) (L 'pylist-insert!)) +compile.scm: ((remove) (L 'pylist-remove!)) +compile.scm: ((reverse) (L 'pylist-reverse!)) +compile.scm: ((sort) (L 'pylist-sort!)) +compile.scm: ((__delitem__) (L 'pylist-delete!)) +compile.scm: ((__delslice__) (L 'pylist-delslice)) +compile.scm: ((__setitem__) (L 'pylist-set!)) +compile.scm: `(,(L 'pylist-ref) ,e ,(exp vs n))) +compile.scm: `(,(L 'pylist-slice) ,e +compile.scm: ,(gen-sel vs e `(,(L 'pylist-append!) ,l ,(exp vs x))) +compile.scm: ,(gen-sel vs e `(,(L 'pylist-set!) ,dict ,(exp vs k) ,(exp vs v))) +compile.scm: `(,(L 'pylist-set!) ,dict ,(exp vs k) ,(exp vs v))) +compile.scm: (ref-x (pylist-ref v x) . l)) +compile.scm: (ref-x (pylist-slice v . x) . l)))) +compile.scm: (pylist-delete! v x)) +compile.scm: (pylist-subset! v x ... pylist-null)))) +compile.scm: (pylist-set! v n val)) +compile.scm: (pylist-subset! v x ... val)))) +dict.scm:(define-method (pylist-ref (o <hashtable>) x) +dict.scm:(define-method (pylist-ref (o <py-hashtable>) x) +dict.scm:(define-method (pylist-delete! (o <hashtable>) k) +dict.scm:(define-method (pylist-delete! (o <py-hashtable>) k) +dict.scm:(define-method (pylist-pop! (o <hashtable>) k . l) +dict.scm:(define-method (pylist-pop! (o <py-hashtable>) k . l) +dict.scm:(define-method (pylist-set! (o <hashtable>) key val) +dict.scm:(define-method (pylist-set! (o <py-hashtable>) key val) +dict.scm: (pylist-set! h k newval) +dict.scm: (pylist-set! o k (apply py-get o k l))) +dict.scm: (pylist-set! o k (apply py-get o k l)))) +dict.scm: (pylist-set! o k v))) +dict.scm: (pylist-set! o k v)))) +dir.scm: (pylist-append! l k)) +dir.scm: (f (lambda (k v) (pylist-append! h k #t)))) +dir.scm: (hash-for-each (lambda (k v) (pylist-append! l k)) h) +dir.scm: (pylist-sort! l) +dir.scm: (f (lambda (k v) (pylist-append! h k #t)))) +dir.scm: (hash-for-each (lambda (k v) (pylist-append! l k)) h) +dir.scm: (pylist-sort! l) +dir.scm: (pylist-append! l k)) +dir.scm: (f (lambda (k.v) (pylist-append! h (car k.v) #t)))) +dir.scm: (vhash-for-each (lambda (k v) (pylist-append! l k)) h) +dir.scm: (pylist-sort! l) +dir.scm: (f (lambda (k.v) (pylist-append! h (car k.v) #t)))) +dir.scm: (vhash-for-each (lambda (k v) (pylist-append! l k)) h) +dir.scm: (pylist-sort! l) +dir.scm:(define-method (dir (o <pylist> )) pylist-listing) +expr.scm: (list-in x y)) +expr.scm: (to-py (list-in x y))) +list.scm: pylist-ref pylist-set! pylist-append! +list.scm: pylist-slice pylist-subset! pylist-reverse! +list.scm: pylist-pop! pylist-count pylist-extend! len in +list.scm: pylist-insert! pylist-remove! pylist-sort! +list.scm: pylist-index pylist-null pylist-delete! +list.scm: pylist pylist-listing)) +list.scm:(define-method (pylist-delete! (o <py-list>) k) +list.scm: (pylist-subset! o k (+ k 1) None pylist-null))) +list.scm:(define-method (pylist-delete! (o <p>) k) +list.scm:(define pylist-null +list.scm:(define-method (pylist-ref (o <py-list>) nin) +list.scm:(define-method (pylist-ref (o <pair>) n) +list.scm: (list-ref o n)) +list.scm:(define-method (pylist-ref (o <vector>) n) +list.scm:(define-method (pylist-ref (o <p>) n) +list.scm:(define-method (pylist-set! (o <py-list>) nin val) +list.scm:(define-method (pylist-set! (o <pair>) n val) +list.scm: (list-set! o n val)) +list.scm:(define-method (pylist-set! (o <vector>) n val) +list.scm:(define-method (pylist-set! (o <p>) n val) +list.scm:(define-method (pylist-slice (o <p>) n1 n2 n3) +list.scm:(define-method (pylist-slice (o <py-list>) n1 n2 n3) +list.scm:(define-method (pylist-slice (o <string>) n1 n2 n3) +list.scm: (list->string +list.scm: (pylist-slice (to-pylist o) n1 n2 n3)))) +list.scm:(define-method (pylist-slice (o <pair>) n1 n2 n3) +list.scm: (pylist-slice (to-pylist o) n1 n2 n3))) +list.scm:(define-method (pylist-slice (o <vector>) n1 n2 n3) +list.scm: (list->vector +list.scm: (pylist-slice (to-pylist o) n1 n2 n3)))) +list.scm:(define-method (pylist-subset! (o <p>) n1 n2 n3 val) +list.scm:(define-method (pylist-subset! (o <py-list>) n1 n2 n3 val) +list.scm:(define-method (pylist-append! (o <py-list>) val) +list.scm:(define-method (pylist-append! o n) +list.scm:(define-method (pylist-append! (o <p>) n . l) +list.scm:(define-method (pylist-reverse! (o <py-list>)) +list.scm:(define-method (pylist-reverse! (o <p>) . l) +list.scm:(define-method (pylist-pop! (o <py-list>)) +list.scm:(define-method (pylist-pop! (o <p>) . l) +list.scm:(define-method (pylist-count (o <py-list>) q) +list.scm:(define-method (pylist-count (s <string>) q) +list.scm:(define-method (pylist-count (l <pair>) q) +list.scm:(define-method (pylist-count (o <p>) . l) +list.scm:(define-method (pylist-extend! (o <py-list>) iter) +list.scm: (pylist-append! o x))) +list.scm:(define-method (pylist-extend! (o <p>) . l) +list.scm:(define-class <py-list-iter> (<py-list>) i) +list.scm: (let ((out (make <py-list-iter>))) +list.scm:(define-method (wrap-in (o <py-list-iter>)) o) +list.scm: (list->string (list ret))) +list.scm:(define-method (next (o <py-list-iter>)) +list.scm:(define-method (pylist-insert! (o <py-list>) i val) +list.scm: (pylist-append! o v))) +list.scm:(define-method (pylist-insert! (o <p>) . l) (apply (ref o 'insert) l)) +list.scm:(define-method (pylist-remove! (o <py-list>) val) +list.scm: (pylist-subset! o i (+ i 1) 1 '()) +list.scm:(define-method (pylist-remove! (o <p>) . l) (apply (ref o 'remove) l)) +list.scm:(define-method (pylist-sort! (o <py-list>) ) +list.scm: (pylist-set! o i (car l)) +list.scm:(define-method (pylist-sort! (o <p>) . l) (apply (ref o 'sort) l)) +list.scm:(define-method (pylist-index (o <py-list>) val . l) +list.scm:(define-method (pylist-index (o <string>) val . l) +list.scm:(define-method (pylist-index (o <pair>) val . l) +list.scm:(define-method (pylist-index (o <p>) . l) (apply (ref o 'index) l)) +list.scm: (for-each (lambda (x) (pylist-append! self x)) x)))) +list.scm:(define pylist-listing +list.scm:;(pylist-sort! pylist-listing) +set.scm: (pylist-set! d y #t))) +set.scm: (call-with-values (lambda () (pylist-pop! (slot-ref self 'dict))) +set.scm: (pylist-set! (slot-ref self 'dict) k #t))) +set.scm: (pylist-delete! r x))) +set.scm: (pylist-delete! r x))) +set.scm: (pylist-delete! r (car l)) +set.scm: (pylist-delete! r (car dels)) +set.scm: (pylist-delete! r (car dels)) +set.scm: (pylist-delete! r x))))) +set.scm: (pylist-set! r k #t)) +set.scm: (pylist-set! r k #t)) +string.scm: (list->string (reverse r)))))) +string.scm: (lp l (cons "~a" r) (cons (list-ref args (string->number x)) u) i)) +string.scm: (lp l (cons "~a" r) (cons (list-ref args i) u) (+ i 1))) +string.scm: (pylist-slice s 0 width) +string.scm: (pylist-slice s (- width) (len s)) +string.scm: (list (pylist-slice s 0 i) sep (pylist-slice s (+ i m) n)) +string.scm: (pylist-slice s (+ i m) n)) +string.scm: (pylist-slice s 0 i))) +string.scm: (lp (list-ref l 2) (cons (car l) r)))))) +string.scm: (- n (apply pylist-index (string-reverse s) l) 1))) +string.scm: (pylist-slice s old (+ i 1) 1) +string.scm: (pylist-slice s old i 1) +string.scm: (list->string +string.scm: (if (string-contains d (list->string (list ch))) +string.scm: (pylist-slice w 0 k 1)))))) +string.scm: (w (pk (pylist-slice s 0 n 1)))) diff --git a/modules/language/python/test.go b/modules/language/python/test.go Binary files differnew file mode 100644 index 0000000..a28a0a9 --- /dev/null +++ b/modules/language/python/test.go diff --git a/modules/language/python/test.py b/modules/language/python/test.py new file mode 100644 index 0000000..104d0b5 --- /dev/null +++ b/modules/language/python/test.py @@ -0,0 +1,10 @@ +def f(x): + def h(q): + return q + y + z + global y + y=x + z=1 + return y + x + +def g(): + return y,y diff --git a/modules/language/python/try.go b/modules/language/python/try.go Binary files differnew file mode 100644 index 0000000..24f385f --- /dev/null +++ b/modules/language/python/try.go diff --git a/modules/language/python/tuple.go b/modules/language/python/tuple.go Binary files differnew file mode 100644 index 0000000..deb3ce7 --- /dev/null +++ b/modules/language/python/tuple.go diff --git a/modules/language/python/with.go b/modules/language/python/with.go Binary files differnew file mode 100644 index 0000000..3525896 --- /dev/null +++ b/modules/language/python/with.go diff --git a/modules/language/python/yield.go b/modules/language/python/yield.go Binary files differnew file mode 100644 index 0000000..16fe75f --- /dev/null +++ b/modules/language/python/yield.go |