bytevectors as str
[software/python-on-guile.git] / modules / language / python / str.scm
1 (define-module (language python str)
2 #:use-module (oop goops)
3 #:use-module (oop pf-objects)
4 #:use-module (ice-9 match)
5 #:use-module (rnrs bytevectors)
6 #:use-module (system foreign)
7 #:use-module (language python string)
8 #:export (<py-str> chf ch-find str))
9
10
11 (define-syntax-rule (aif it p x y) (let ((it p)) (if it x y)))
12
13 (define b-ref bytevector-u8-ref)
14 (define b-set! bytevector-u8-set!)
15 (define b-make make-bytevector)
16 (define b-len bytevector-length)
17
18 (define-class <py-str> () str)
19
20 (define-python-class str (<py-str>)
21 (define __init__
22 (case-lambda
23 ((self s)
24 (cond
25 ((is-a? s <py-str>)
26 (slot-set! self 'str (slot-ref s 'str)))
27 ((is-a? s <bytevector>)
28 (slot-set! self 'str s)))))))
29
30 (define-syntax-rule (define-py (f o . u) code ...)
31 (begin
32 (define-method (f (o <bytevector>) . u) code ...)
33 (define-method (f (o <py-str>) . l) (apply f (slot-ref o 'str) l))))
34
35 (define-method (write (b <py-str>) . l)
36 (apply write (b->string (slot-ref b 'str)) l))
37
38 (define dynlink (dynamic-link))
39
40 (define stringn
41 (pointer->procedure
42 '*
43 (dynamic-func "scm_from_locale_stringn" dynlink)
44 (list '* size_t)))
45
46 (define ch->i (make-hash-table))
47
48 (define (re-eval ch)
49 (let lp ((i 0))
50 (if (< i 256)
51 (if (eq? ch (chf i))
52 (begin
53 (hash-set! ch->i ch i)
54 (lp (+ i 1)))
55 (lp (+ i 1)))
56 (hash-ref ch->i ch))))
57
58 (define (ch-find ch)
59 (aif it (hash-ref ch->i ch #f)
60 (if (eq? ch (chf it))
61 it
62 (re-eval ch))
63 (re-eval ch)))
64
65 (define (chf ch)
66 (let ((str (pointer->scm
67 (stringn
68 (bytevector->pointer
69 (b-make 1 ch))
70 1))))
71 (if (= (string-length str) 1)
72 (string-ref str 0)
73 (chf 0))))
74
75 (define (b->string b)
76 (pointer->scm
77 (stringn (bytevector->pointer b) (b-len b))))
78
79 (define-py (py-capitalize s)
80 (let* ((n (b-len s))
81 (w (b-make n)))
82 (let lp ((i 0) (first? #t))
83 (if (< i n)
84 (let* ((x (b-ref s i))
85 (ch (chf x)))
86 (define (f first?)
87 (b-set! w i x)
88 (lp (+ i 1) first?))
89
90 (if (and first? (char-alphabetic? ch))
91 (aif it (ch-find (char-upcase ch))
92 (begin
93 (b-set! w i it)
94 (lp (+ i 1) #f))
95 (f #t))
96 (f #f)))
97 (str w)))))
98
99 (define-py (py-center o w . l)
100 (let* ((ws (if (pair? l)
101 (ch-find (b-ref (car l) 0))
102 (ch-find #\space)))
103 (n (b-len o))
104 (w (if (< w n) n w))
105 (d (- w n))
106 (e (floor-quotient (- w n) 2))
107 (s (b-make w (ch-find #\space))))
108 (let lp ((i 0) (j e))
109 (if (< i n)
110 (begin
111 (b-set! s j (b-ref o i))
112 (lp (+ i 1) (+ j 1)))))
113 (str s)))
114
115
116
117 ;;;py-decode
118 ;;;py-encode
119
120
121 (define-py (py-endswith o (suff <bytevector>) . l)
122 (let* ((n (b-len o))
123 (ns (b-len suff))
124 (f (lambda (x) (< x 0) (+ n x) x)))
125 (call-with-values (lambda ()
126 (match l
127 (() (values 0 n ))
128 ((x) (values (f x) n ))
129 ((x y) (values (f x) (f y)))))
130 (lambda (start end)
131 (let lp ((i (- n ns)) (j 0))
132 (if (< i start)
133 (lp (+ i 1) (+ j 1))
134 (if (>= i end)
135 #t
136 (and
137 (eq? (b-ref o i) (b-ref suff j))
138 (lp (+ i 1) (+ j 1))))))))))
139
140 (define-py (py-startswith o (suff <bytevector>) . l)
141 (let* ((n (b-len o))
142 (ns (b-len suff))
143 (f (lambda (x) (< x 0) (+ n x) x)))
144 (call-with-values (lambda ()
145 (match l
146 (() (values 0 n ))
147 ((x) (values (f x) n ))
148 ((x y) (values (f x) (f y)))))
149 (lambda (start end)
150 (let lp ((i 0))
151 (cond
152 ((or (>= i end)
153 (>= i ns))
154 #t)
155 ((< i start)
156 (lp (+ i 1)))
157 (else
158 (and
159 (eq? (b-ref o i) (b-ref suff i))
160 (lp (+ i 1))))))))))
161
162 #|
163 (define-py (py-expandtabs expandtabs s . l)
164 (let* ((tabsize (match l (() 8) ((x) x)))
165 (u (string->list (make-string tabsize #\space)))
166 (n (string-length s)))
167 (let lp ((l (string->list s)) (r '()))
168 (if (pair? l)
169 (let ((x (car l)))
170 (if (eq? x #\tab)
171 (lp (cdr l) (append u r))
172 (lp (cdr l) (cons x r))))
173 (list->string (reverse r))))))
174
175 (define-py (py-find find s sub . l)
176 (let* ((n (string-length s))
177 (f (lambda (x) (< x 0) (+ n x) x)))
178 (call-with-values (lambda ()
179 (match l
180 (() (values 0 n ))
181 ((x) (values (f x) n ))
182 ((x y) (values (f x) (f y)))))
183 (lambda (start end)
184 (aif it (string-contains s sub start end)
185 it
186 -1)))))
187
188 (define-py (py-rfind rfind s sub . l)
189 (let* ((n (string-length s))
190 (s (string-reverse s))
191 (sub (string-reverse sub))
192 (f (lambda (x) (< x 0) (+ n x) x)))
193 (call-with-values (lambda ()
194 (match l
195 (() (values 0 n ))
196 ((x) (values (f x) n ))
197 ((x y) (values (f x) (f y)))))
198 (lambda (start end)
199 (aif it (string-contains s sub start end)
200 (- n it (len sub))
201 -1)))))
202
203 (define i (f-list #:i (mk-token (f+ (f-reg! "[0-9]")))))
204 (define s (f-list #:s (mk-token (f+ (f-not! (f-tag "}"))))))
205 (define e (f-list #:e (f-and (f-tag "}") f-true)))
206 (define tagbody (f-or! e i s))
207
208 (define tag (f-seq "{" tagbody "}"))
209 (define nontag (f-list #:str (mk-token (f+ (f-or! (f-tag "{{")
210 (f-not! tag))))))
211 (define e (ff* (f-or! tag nontag)))
212
213 (define (compile x args kwargs)
214 (let lp ((l x) (r '()) (u '()) (i 0))
215 (match l
216 (((#:str x) . l)
217 (lp l (cons x r) u i))
218 (((#:i x) . l)
219 (lp l (cons "~a" r) (cons (list-ref args (string->number x)) u) i))
220 (((#:s x) . l)
221 (lp l (cons "~a" r) (cons (hash-ref kwargs x None) u) i))
222 (((#:e) . l)
223 (lp l (cons "~a" r) (cons (list-ref args i) u) (+ i 1)))
224 (()
225 (apply format #f (string-join (reverse r) "") (reverse u))))))
226
227 (define-py (py-format format s . l)
228 (call-with-values
229 (lambda ()
230 (let lp ((l l) (args '()) (kwargs (make-hash-table)))
231 (match l
232 (((? keyword? key) x . l)
233 (hash-set! kwargs (symbol->string (keyword->symbol key)) x)
234 (lp l args kwargs))
235 ((x . l)
236 (lp l (cons x args) kwargs))
237 (()
238 (values (reverse args) kwargs)))))
239 (lambda (args kwargs)
240 (compile (parse s e) args kwargs))))
241
242 (define-syntax-rule (mk-is py-isalnum isalnum x ...)
243 (define-py (py-isalnum isalnum s)
244 (and (> (len s) 0)
245 (string-fold
246 (lambda (ch s)
247 (if (or (x ch) ...)
248 s
249 #f))
250 #t s))))
251
252 (mk-is py-isalnum isalnum char-alphabetic? char-numeric?)
253 (mk-is py-isalpha isalpha char-alphabetic?)
254 (mk-is py-isdigit isdigit char-numeric?)
255 (mk-is py-islower islower char-lower-case?)
256 (mk-is py-isspace isspace char-whitespace?)
257 (mk-is py-isupper isupper char-upper-case?)
258
259 (define-py (py-istitle istitle s)
260 (let ((n (len s)))
261 (if ((> n 0))
262 (let lp ((i 0) (space? #t))
263 (if (< i n)
264 (let ((ch (string-ref s i)))
265 (if space?
266 (cond
267 ((char-whitespace? ch)
268 (lp (+ i 1) #t))
269 ((char-upper-case? ch)
270 (lp (+ i 1) #f))
271 (else
272 #f))
273 (cond
274 ((char-whitespace? ch)
275 (lp (+ i 1) #t))
276 ((char-upper-case? ch)
277 #f)
278 ((char-lower-case? ch)
279 (lp (+ i 1) #f))
280 (else
281 #f))))
282 #t))
283 #f)))
284
285
286 (define-py (py-join join s iterator)
287 (string-join (to-list iterator) s))
288
289 (define-py (py-ljust ljust s width . l)
290 (let* ((n (len s))
291 (ch (match l
292 ((x . l)
293 (if (string? x)
294 (string-ref x 0)
295 x))
296 (()
297 #\space))))
298 (if (< width n)
299 (pylist-slice s 0 width)
300 (let ((ret (make-string width ch)))
301 (let lp ((i 0))
302 (if (< i n)
303 (string-set! ret i (string-ref s i))
304 ret))))))
305
306 (define-py (py-rjust rjust s width . l)
307 (let* ((n (len s))
308 (ch (match l
309 ((x . l)
310 (if (string? x)
311 (string-ref x 0)
312 x))
313 (()
314 #\space))))
315 (if (< width n)
316 (pylist-slice s (- width) (len s))
317 (let ((ret (make-string width ch)))
318 (let lp ((i 0) (j (- width n)))
319 (if (< i n)
320 (string-set! ret j (string-ref s i))
321 ret))))))
322
323 (define-py (py-lower lower s)
324 (string-downcase s))
325
326 (define-py (py-upper upper s)
327 (string-upcase s))
328
329 (define-py (py-lstrip lstrip s . l)
330 (match l
331 (()
332 (string-trim s))
333 ((x . _)
334 (let ((l (map (lambda (x) (if (string? x) (string-ref x 0) x)) x)))
335 (string-trim s (lambda (ch) (member ch l)))))))
336
337 (define-py (py-rstrip rstrip s . l)
338 (match l
339 (()
340 (string-trim-right s))
341 ((x . _)
342 (let ((l (map (lambda (x) (if (string? x) (string-ref x 0) x)) x)))
343 (string-trim-right s (lambda (ch) (member ch l)))))))
344
345 (define-py (py-partition partition s (sep <string>))
346 (let ((n (len s))
347 (m (len sep)))
348 (define (test i)
349 (let lp ((i i) (j 0))
350 (if (< i n)
351 (if (< j m)
352 (if (eq? (string-ref s i) (string-ref sep j))
353 (lp (+ i 1) (+ j 1))
354 #f)
355 #t)
356 #f)))
357 (let lp ((i 0))
358 (if (< i n)
359 (if (test i)
360 (list (pylist-slice s 0 i) sep (pylist-slice s (+ i m) n))
361 (lp (+ i 1)))
362 (list s "" "")))))
363
364 (define-py (py-rpartition rpartition ss (ssep <string>))
365 (let* ((s (string-reverse ss))
366 (sep (string-reverse ssep))
367 (n (len s))
368 (m (len sep)))
369 (define (test i)
370 (let lp ((i i) (j 0))
371 (if (< i n)
372 (if (< j m)
373 (if (eq? (string-ref s i) (string-ref sep j))
374 (lp (+ i 1) (+ j 1))
375 #f)
376 #t)
377 #f)))
378 (let lp ((i 0))
379 (if (< i n)
380 (if (test i)
381 (list (string-reverse
382 (pylist-slice s (+ i m) n))
383 ssep
384 (string-reverse
385 (pylist-slice s 0 i)))
386 (lp (+ i 1)))
387 (list "" "" s)))))
388
389 (define-py (py-replace replace s old new . l)
390 (let ((n (match l (() #f) ((n . _) n))))
391 (string-join
392 (reverse
393 (let lp ((s s) (r '()))
394 (let ((l (py-partition s old)))
395 (if (equal? (cadr l) "")
396 (cons s r)
397 (lp (list-ref l 2) (cons (car l) r))))))
398 new)))
399
400 (define-py (py-strip strip s . l)
401 (apply py-rstrip (apply py-lstrip s l) l))
402
403 (define-py (py-title title s)
404 (string-titlecase s))
405
406 (define-py (py-rindex rindex s . l)
407 (let ((n (len s)))
408 (- n (apply pylist-index (string-reverse s) l) 1)))
409
410
411
412 (define-py (py-split split s . l)
413 (define ws (f+ (f-reg "[ \t\n]")))
414 (define r
415 (f-or! (f-seq f-eof (f-out '()))
416 (f-cons (f-seq (mk-token (f* (f-reg! "."))) f-eof) (f-out '()))))
417 (define (u ws) (mk-token (f+ (f-not! ws))))
418 (define (tok ws i)
419 (if (= i 0)
420 (f-list (mk-token (f* (f-reg! "."))))
421 (let ((e (mk-token (f* (f-not! ws)))))
422 (f-seq (f? ws)
423 (f-cons e
424 (let lp ((i i))
425 (if (> (- i 1) 0)
426 (f-or! (f-seq (f? ws) f-eof (f-out '()))
427 (f-cons (f-seq ws e) (Ds (lp (- i 1)))))
428 r)))))))
429
430 (define N 1000000000000)
431 (let ((e (call-with-values
432 (lambda ()
433 (match l
434 (() (values ws N))
435 ((sep) (values (f-tag sep) N))
436 ((sep n) (values (f-tag sep) n))))
437 tok)))
438 (parse s e)))
439
440 (define-py (py-rsplit rsplit s . l)
441 (reverse
442 (map string-reverse
443 (apply py-split
444 (string-reverse s)
445 (match l
446 (() '())
447 ((sep . l) (cons (string-reverse sep) l)))))))
448
449
450 (define-py (py-splitlines splitlines s . l)
451 (let ((n (len s))
452 (keep? (match l
453 ((#:keepends v)
454 v)
455 ((v)
456 v)
457 (_ #f))))
458 (let lp ((i 0) (r '()) (old 0))
459 (if (< i n)
460 (let ((ch (string-ref s i)))
461 (if (eq? ch #\newline)
462 (if keep?
463 (lp (+ i 1)
464 (cons
465 (pylist-slice s old (+ i 1) 1)
466 r)
467 (+ i 1))
468 (lp (+ i 1)
469 (cons
470 (pylist-slice s old i 1)
471 r)
472 (+ i 1)))
473 (lp (+ i 1) r old)))
474 (reverse r)))))
475
476 (define-py (py-swapcase swapcase s)
477 (list->string
478 (string-fold
479 (lambda (ch s)
480 (cons
481 (cond
482 ((char-upper-case? ch)
483 (char-downcase ch))
484 ((char-lower-case? ch)
485 (char-upcase ch))
486 (else ch))
487 s))
488 '()
489 s)))
490
491 (define-py (py-translate translate s table . l)
492 (let* ((n (len s))
493 (w (make-string n))
494 (t (if (eq? table None) #f table))
495 (d (match l (() #f) ((x) x))))
496 (define (tr ch)
497 (define (e)
498 (if t
499 (let ((i (char->integer ch)))
500 (if (< i (string-length t))
501 (string-ref t i)
502 ch))
503 ch))
504
505 (if d
506 (if (string-contains d (list->string (list ch)))
507 #f
508 (e))
509 (e)))
510
511 (let lp ((i 0) (k 0))
512 (if (< i n)
513 (let ((ch (tr (string-ref s i))))
514 (if ch
515 (begin
516 (string-set! w k ch)
517 (lp (+ i 1) (+ k 1)))
518 (lp (+ i 1) k)))
519 (if (= k n)
520 w
521 (pylist-slice w 0 k 1))))))
522
523 (define-syntax-rule (a b x y) (b (symbol->string x) (symbol->string y)))
524
525 (define-syntax-rule (mkop op)
526 (begin
527 (define-method (op (s1 <string>) (s2 <py-string>))
528 (op s1 (slot-ref s2 'str)))
529 (define-method (op (s2 <py-string>) (s1 <string>))
530 (op s1 (slot-ref s2 'str)))))
531
532 (mkop <)
533 (mkop <=)
534 (mkop >)
535 (mkop >=)
536 (mkop +)
537 (mkop *)
538
539 (define-method (< (s1 <string>) (s2 <string>)) (string-ci< s1 s2))
540 (define-method (<= (s1 <string>) (s2 <string>)) (string-ci<= s1 s2))
541 (define-method (> (s1 <string>) (s2 <string>)) (string-ci> s1 s2))
542 (define-method (>= (s1 <string>) (s2 <string>)) (string-ci>= s1 s2))
543
544 (define-method (< (s1 <symbol>) (s2 <symbol>)) (a string-ci< s1 s2))
545 (define-method (<= (s1 <symbol>) (s2 <symbol>)) (a string-ci<= s1 s2))
546 (define-method (> (s1 <symbol>) (s2 <symbol>)) (a string-ci> s1 s2))
547 (define-method (>= (s1 <symbol>) (s2 <symbol>)) (a string-ci>= s1 s2))
548
549
550 (define-py (py-zfill zfill s width)
551 (let* ((n (len s))
552 (w (pk (pylist-slice s 0 n 1))))
553 (let lp ((i 0))
554 (if (< i n)
555 (let ((ch (string-ref s i)))
556 (if (char-numeric? ch)
557 (let lp ((j (max 0 (- i width))))
558 (pk i j)
559 (if (< j i)
560 (begin
561 (string-set! w j #\0)
562 (lp (+ j 1)))
563 w))
564 (lp (+ i 1))))
565 s))))
566
567 (define-python-class string (<py-string>)
568 (define __init__
569 (case-lambda
570 ((self s)
571 (cond
572 ((is-a? s <py-string>)
573 (slot-set! self 'str (slot-ref s 'src)))
574 ((is-a? s <string>)
575 (slot-set! self 'str s)))))))
576
577 (define pystring string)
578
579 (define-method (py-class (o <string>)) string)
580 (define-method (py-class (o <py-string>)) string)
581
582 (define-method (pyhash (o <py-string>)) (hash (slot-ref o 'str) pyhash-N))
583
584 (define-method (py-equal? (o <py-string>) x)
585 (equal? (slot-ref o 'str) x))
586 (define-method (py-equal? x (o <py-string>))
587 (equal? (slot-ref o 'str) x))
588
589 (define-class <string-iter> (<py-string>) str i d)
590
591 (define-method (write (o <string-iter>) . l)
592 (define port (if (null? l) #t (car l)))
593 (for ((x : o)) ((l '()))
594 (cons (string-ref x 0) l)
595 #:final
596 (format port "iter(~s)" (list->string (reverse l)))))
597
598 (define-method (wrap-in (o <string-iter> ))
599 (let ((out (make <string-iter>)))
600 (slot-set! out 'str (slot-ref o 'str))
601 (slot-set! out 'i (slot-ref o 'i))
602 (slot-set! out 'd (slot-ref o 'd))
603 out))
604
605 (define-method (wrap-in (s <string>))
606 (let ((out (make <string-iter>)))
607 (slot-set! out 'str s)
608 (slot-set! out 'i 0)
609 (slot-set! out 'd 1)
610 out))
611
612 (define-method (py-reversed (s <string>))
613 (let ((out (make <string-iter>)))
614 (slot-set! out 'str s)
615 (slot-set! out 'i (- (string-length s) 1))
616 (slot-set! out 'd -1)
617 out))
618
619 (define-method (next (o <string-iter>))
620 (let ((i (slot-ref o 'i ))
621 (d (slot-ref o 'd))
622 (str (slot-ref o 'str)))
623 (if (> d 0)
624 (if (< i (string-length str))
625 (let ((ret (string-ref str i)))
626 (slot-set! o 'i (+ i d))
627 (list->string (list ret)))
628 (throw StopIteration))
629 (if (>= i 0)
630 (let ((ret (string-ref str i)))
631 (slot-set! o 'i (+ i d))
632 (list->string (list ret)))
633 (throw StopIteration)))))
634
635 (define (pystring-listing)
636 (let ((l (to-pylist
637 (map symbol->string
638 '(__add__ __class__ __contains__ __delattr__ __doc__
639 __eq__ __format__ __ge__ __getattribute__
640 __getitem__ __getnewargs__ __getslice__ __gt__
641 __hash__ __init__ __le__ __len__ __lt__ __mod__
642 __mul__ __ne__ __new__ __reduce__ __reduce_ex__
643 __repr__ __rmod__ __rmul__ __setattr__ __sizeof__
644 __str__ __subclasshook__
645 _formatter_field_name_split _formatter_parser
646 capitalize center count decode encode endswith
647 expandtabs find format index isalnum isalpha
648 isdigit islower isspace istitle isupper join
649 ljust lower lstrip partition replace rfind rindex
650 rjust rpartition rsplit rstrip split splitlines
651 startswith strip swapcase
652 title translate upper zfill)))))
653 (pylist-sort! l)
654 l))
655
656
657 |#