diff options
-rw-r--r-- | modules/language/python/bytes.scm | 48 | ||||
-rw-r--r-- | modules/language/python/compile.scm | 6 | ||||
-rw-r--r-- | modules/language/python/string.scm | 35 |
3 files changed, 76 insertions, 13 deletions
diff --git a/modules/language/python/bytes.scm b/modules/language/python/bytes.scm index daf7adc..4ba7d2b 100644 --- a/modules/language/python/bytes.scm +++ b/modules/language/python/bytes.scm @@ -2,10 +2,12 @@ #:use-module (oop goops) #:use-module (oop pf-objects) #:use-module (ice-9 match) + #:use-module (ice-9 iconv) #:use-module (rnrs bytevectors) #:use-module (system foreign) #:use-module (language python string) #:use-module (language python for) + #:use-module (language python def) #:use-module (language python try) #:use-module (language python exceptions) #:use-module (language python list) @@ -596,12 +598,46 @@ (lp (+ i 1) (+ j 1))))) (bytes s))) -(define-py** decode (py-decode bytes o n . l) - (let lp ((i 0) (r '())) - (if (< i n) - (lp (+ i 1) (cons (b-ref o i) r)) - (utf8->string (list->b (reverse r)))))) +(define-python-class UnicodeDecodeError (Exception)) +(define-py** decode (py-decode bytes o n . l) + (apply + (lam ((= encoding "UTF-8") (= errors "strict")) + (set! errors (py-lower (scm-str errors))) + (set! errors (cond + ((equal? errors "strict") + 'error) + ((equal? errors "replace") + 'substitute) + ((equal? errors "ignore") + (warn + (string-append + "not possible to use ignore " + "encodong error strategy " + "using replace in stead")) + 'substitute) + (else + (warn + "not a correct encodong error strategy") + 'error))) + (set! encoding (py-upper (scm-str encoding))) + + (let lp ((i 0) (r '())) + (if (< i n) + (lp (+ i 1) (cons (b-ref o i) r)) + (catch #t + (lambda () + (bytevector->string + (list->b (reverse r)) + encoding + errors)) + (lambda x + (raise (UnicodeDecodeError + (+ + "failed to decode " + encoding)))))))) + l)) + ;;;py-encode (define-py* endswith (py-endswith bytes o n suff . l) @@ -1330,3 +1366,5 @@ zfill))))) (pylist-sort! l) l)) + +(set! (@@ (language python string) bytes) bytes) diff --git a/modules/language/python/compile.scm b/modules/language/python/compile.scm index 36182a4..42da128 100644 --- a/modules/language/python/compile.scm +++ b/modules/language/python/compile.scm @@ -419,9 +419,11 @@ ((swapcase) (S 'py-swapcase)) ((translate) (S 'py-translate)) ((zfill) (S 'py-zfill)) - - ;;bytevectors + ((encode) (S 'py-encode)) + + ;;Nytevectors ((decode) (B 'py-decode)) + ;;DICTS ((copy) (Di 'py-copy)) ((fromkeys) (Di 'py-fromkeys)) diff --git a/modules/language/python/string.scm b/modules/language/python/string.scm index a560801..c1d7d1b 100644 --- a/modules/language/python/string.scm +++ b/modules/language/python/string.scm @@ -4,16 +4,19 @@ #:use-module (oop pf-objects) #:use-module (language python hash) #:use-module (ice-9 match) + #:use-module (ice-9 iconv) #:use-module (language python list) #:use-module (language python exceptions) #:use-module (language python for) + #:use-module (language python def) #:use-module (language python bool) #:use-module (language python persist) + #:use-module (rnrs bytevectors) #:export (py-format py-capitalize py-center py-endswith py-expandtabs py-find py-rfind py-isalnum py-isalpha py-isdigit py-islower py-isspace py-isupper py-istitle py-join py-ljust - py-rjust py-format-map + py-rjust py-format-map py-encode py-rljust py-lower py-upper py-lstrip py-rstrip py-partition py-replace py-strip py-title py-rpartitio py-rindex py-split py-rsplit py-splitlines @@ -100,10 +103,6 @@ -;;;py-decode -;;;py-encode - - (define-py (py-endswith endswith o (suff <string>) . l) (let* ((n (string-length o)) (ns (string-length suff)) @@ -376,7 +375,31 @@ (let ((n (len s))) (- n (apply pylist-index (string-reverse s) l) 1))) - +(define bytes #f) + +(define-py (py-encode encode s . l) + (apply (lam ((= encoding "UTF-8") (= errors "strict")) + (set! errors (py-lower (scm-str errors))) + (set! errors (cond + ((equal? errors "strict") + 'error) + ((equal? errors "replace") + 'substitute) + ((equal? errors "ignore") + (warn + (string-append + "not possible to use ignore " + "encodong error strategy " + "using replace in stead")) + 'substitute) + (else + (warn + "not a correct encodong error strategy") + 'error))) + (set! encoding (py-upper (scm-str encoding))) + + (bytes (string->bytevector (scm-str s) encoding errors))) + l)) (define-py (py-split split s . l) (define ws (f+ (f-reg "[ \t\n]"))) |