summaryrefslogtreecommitdiff
path: root/modules/language/python
diff options
context:
space:
mode:
authorStefan Israelsson Tampe <stefan.itampe@gmail.com>2018-08-14 12:56:05 +0200
committerStefan Israelsson Tampe <stefan.itampe@gmail.com>2018-08-14 12:56:05 +0200
commit20286224a1567c4caf15e20f47aaede9281e347b (patch)
treef70a348e89e8803d56707b7f0522de92622d2172 /modules/language/python
parent1ff54d35de490b997c498721902dfdb6fe917089 (diff)
better encoding and decoding
Diffstat (limited to 'modules/language/python')
-rw-r--r--modules/language/python/bytes.scm48
-rw-r--r--modules/language/python/compile.scm6
-rw-r--r--modules/language/python/string.scm35
3 files changed, 76 insertions, 13 deletions
diff --git a/modules/language/python/bytes.scm b/modules/language/python/bytes.scm
index daf7adc..4ba7d2b 100644
--- a/modules/language/python/bytes.scm
+++ b/modules/language/python/bytes.scm
@@ -2,10 +2,12 @@
#:use-module (oop goops)
#:use-module (oop pf-objects)
#:use-module (ice-9 match)
+ #:use-module (ice-9 iconv)
#:use-module (rnrs bytevectors)
#:use-module (system foreign)
#:use-module (language python string)
#:use-module (language python for)
+ #:use-module (language python def)
#:use-module (language python try)
#:use-module (language python exceptions)
#:use-module (language python list)
@@ -596,12 +598,46 @@
(lp (+ i 1) (+ j 1)))))
(bytes s)))
-(define-py** decode (py-decode bytes o n . l)
- (let lp ((i 0) (r '()))
- (if (< i n)
- (lp (+ i 1) (cons (b-ref o i) r))
- (utf8->string (list->b (reverse r))))))
+(define-python-class UnicodeDecodeError (Exception))
+(define-py** decode (py-decode bytes o n . l)
+ (apply
+ (lam ((= encoding "UTF-8") (= errors "strict"))
+ (set! errors (py-lower (scm-str errors)))
+ (set! errors (cond
+ ((equal? errors "strict")
+ 'error)
+ ((equal? errors "replace")
+ 'substitute)
+ ((equal? errors "ignore")
+ (warn
+ (string-append
+ "not possible to use ignore "
+ "encodong error strategy "
+ "using replace in stead"))
+ 'substitute)
+ (else
+ (warn
+ "not a correct encodong error strategy")
+ 'error)))
+ (set! encoding (py-upper (scm-str encoding)))
+
+ (let lp ((i 0) (r '()))
+ (if (< i n)
+ (lp (+ i 1) (cons (b-ref o i) r))
+ (catch #t
+ (lambda ()
+ (bytevector->string
+ (list->b (reverse r))
+ encoding
+ errors))
+ (lambda x
+ (raise (UnicodeDecodeError
+ (+
+ "failed to decode "
+ encoding))))))))
+ l))
+
;;;py-encode
(define-py* endswith (py-endswith bytes o n suff . l)
@@ -1330,3 +1366,5 @@
zfill)))))
(pylist-sort! l)
l))
+
+(set! (@@ (language python string) bytes) bytes)
diff --git a/modules/language/python/compile.scm b/modules/language/python/compile.scm
index 36182a4..42da128 100644
--- a/modules/language/python/compile.scm
+++ b/modules/language/python/compile.scm
@@ -419,9 +419,11 @@
((swapcase) (S 'py-swapcase))
((translate) (S 'py-translate))
((zfill) (S 'py-zfill))
-
- ;;bytevectors
+ ((encode) (S 'py-encode))
+
+ ;;Nytevectors
((decode) (B 'py-decode))
+
;;DICTS
((copy) (Di 'py-copy))
((fromkeys) (Di 'py-fromkeys))
diff --git a/modules/language/python/string.scm b/modules/language/python/string.scm
index a560801..c1d7d1b 100644
--- a/modules/language/python/string.scm
+++ b/modules/language/python/string.scm
@@ -4,16 +4,19 @@
#:use-module (oop pf-objects)
#:use-module (language python hash)
#:use-module (ice-9 match)
+ #:use-module (ice-9 iconv)
#:use-module (language python list)
#:use-module (language python exceptions)
#:use-module (language python for)
+ #:use-module (language python def)
#:use-module (language python bool)
#:use-module (language python persist)
+ #:use-module (rnrs bytevectors)
#:export (py-format py-capitalize py-center py-endswith
py-expandtabs py-find py-rfind
py-isalnum py-isalpha py-isdigit py-islower
py-isspace py-isupper py-istitle py-join py-ljust
- py-rjust py-format-map
+ py-rjust py-format-map py-encode
py-rljust py-lower py-upper py-lstrip py-rstrip
py-partition py-replace py-strip py-title
py-rpartitio py-rindex py-split py-rsplit py-splitlines
@@ -100,10 +103,6 @@
-;;;py-decode
-;;;py-encode
-
-
(define-py (py-endswith endswith o (suff <string>) . l)
(let* ((n (string-length o))
(ns (string-length suff))
@@ -376,7 +375,31 @@
(let ((n (len s)))
(- n (apply pylist-index (string-reverse s) l) 1)))
-
+(define bytes #f)
+
+(define-py (py-encode encode s . l)
+ (apply (lam ((= encoding "UTF-8") (= errors "strict"))
+ (set! errors (py-lower (scm-str errors)))
+ (set! errors (cond
+ ((equal? errors "strict")
+ 'error)
+ ((equal? errors "replace")
+ 'substitute)
+ ((equal? errors "ignore")
+ (warn
+ (string-append
+ "not possible to use ignore "
+ "encodong error strategy "
+ "using replace in stead"))
+ 'substitute)
+ (else
+ (warn
+ "not a correct encodong error strategy")
+ 'error)))
+ (set! encoding (py-upper (scm-str encoding)))
+
+ (bytes (string->bytevector (scm-str s) encoding errors)))
+ l))
(define-py (py-split split s . l)
(define ws (f+ (f-reg "[ \t\n]")))