bytevectors as str
[software/python-on-guile.git] / modules / language / python / string.scm
1 (define-module (language python string)
2 #:use-module (oop goops)
3 #:use-module (oop pf-objects)
4 #:use-module (language python hash)
5 #:use-module (ice-9 match)
6 #:use-module (language python list)
7 #:use-module (language python exceptions)
8 #:use-module (language python for)
9 #:use-module (parser stis-parser)
10 #:export (py-format py-capitalize py-center py-endswith
11 py-expandtabs py-find py-rfind
12 py-isalnum py-isalpha py-isdigit py-islower
13 py-isspace py-isupper py-istitle py-join py-ljust
14 py-rljust py-lower py-upper py-lstrip py-rstrip
15 py-partition py-replace py-strip py-title
16 py-rpartitio py-rindex py-split py-rsplit py-splitlines
17 py-startswith py-swapcase py-translate py-zfill
18 pystring-listing <py-string> pystring))
19
20 (define-syntax-rule (aif it p x y) (let ((it p)) (if it x y)))
21
22
23 (define-class <py-string> () str)
24
25 (define-syntax-rule (define-py (f n o . u) code ...)
26 (begin
27 (define-method (f (o <string>) . u) code ...)
28 (define-method (f (o <py-string>) . l) (apply f (slot-ref o 'str) l))
29 (define-method (f (o <p>) . l)
30 (aif it (ref o 'n)
31 (apply it l)
32 (next-method)))))
33
34 (define-py (py-capitalize capitalize s)
35 (let* ((n (len s))
36 (w (make-string n)))
37 (let lp ((i 0) (first? #t))
38 (if (< i n)
39 (let ((ch (string-ref s i)))
40 (if (and first? (char-alphabetic? ch))
41 (begin
42 (string-set! w i (char-upcase ch))
43 (lp (+ i 1) #f))
44 (begin
45 (string-set! w i ch)
46 (lp (+ i 1) first?))))
47 w))))
48
49 (define-py (py-center center o w . l)
50 (let* ((ws (if (pair? l)
51 (car (string->list (car l)))
52 #\space))
53 (n (string-length o))
54 (w (if (< w n) n w))
55 (d (- w n))
56 (e (floor-quotient (- w n) 2))
57 (s (make-string w #\space)))
58 (let lp ((i 0) (j e))
59 (if (< i n)
60 (begin
61 (string-set! s j (string-ref o i))
62 (lp (+ i 1) (+ j 1)))))
63 s))
64
65
66
67 ;;;py-decode
68 ;;;py-encode
69
70
71 (define-py (py-endswith endswith o (suff <string>) . l)
72 (let* ((n (string-length o))
73 (ns (string-length suff))
74 (f (lambda (x) (< x 0) (+ n x) x)))
75 (call-with-values (lambda ()
76 (match l
77 (() (values 0 n ))
78 ((x) (values (f x) n ))
79 ((x y) (values (f x) (f y)))))
80 (lambda (start end)
81 (string-suffix? suff o 0 ns start end)))))
82
83 (define-py (py-startswith startswith o (suff <string>) . l)
84 (let* ((n (string-length o))
85 (ns (string-length suff))
86 (f (lambda (x) (< x 0) (+ n x) x)))
87 (call-with-values (lambda ()
88 (match l
89 (() (values 0 n ))
90 ((x) (values (f x) n ))
91 ((x y) (values (f x) (f y)))))
92 (lambda (start end)
93 (string-prefix? suff o 0 ns start end)))))
94
95 (define-py (py-expandtabs expandtabs s . l)
96 (let* ((tabsize (match l (() 8) ((x) x)))
97 (u (string->list (make-string tabsize #\space)))
98 (n (string-length s)))
99 (let lp ((l (string->list s)) (r '()))
100 (if (pair? l)
101 (let ((x (car l)))
102 (if (eq? x #\tab)
103 (lp (cdr l) (append u r))
104 (lp (cdr l) (cons x r))))
105 (list->string (reverse r))))))
106
107 (define-py (py-find find s sub . l)
108 (let* ((n (string-length s))
109 (f (lambda (x) (< x 0) (+ n x) x)))
110 (call-with-values (lambda ()
111 (match l
112 (() (values 0 n ))
113 ((x) (values (f x) n ))
114 ((x y) (values (f x) (f y)))))
115 (lambda (start end)
116 (aif it (string-contains s sub start end)
117 it
118 -1)))))
119
120 (define-py (py-rfind rfind s sub . l)
121 (let* ((n (string-length s))
122 (s (string-reverse s))
123 (sub (string-reverse sub))
124 (f (lambda (x) (< x 0) (+ n x) x)))
125 (call-with-values (lambda ()
126 (match l
127 (() (values 0 n ))
128 ((x) (values (f x) n ))
129 ((x y) (values (f x) (f y)))))
130 (lambda (start end)
131 (aif it (string-contains s sub start end)
132 (- n it (len sub))
133 -1)))))
134
135 (define i (f-list #:i (mk-token (f+ (f-reg! "[0-9]")))))
136 (define s (f-list #:s (mk-token (f+ (f-not! (f-tag "}"))))))
137 (define e (f-list #:e (f-and (f-tag "}") f-true)))
138 (define tagbody (f-or! e i s))
139
140 (define tag (f-seq "{" tagbody "}"))
141 (define nontag (f-list #:str (mk-token (f+ (f-or! (f-tag "{{")
142 (f-not! tag))))))
143 (define e (ff* (f-or! tag nontag)))
144
145 (define (compile x args kwargs)
146 (let lp ((l x) (r '()) (u '()) (i 0))
147 (match l
148 (((#:str x) . l)
149 (lp l (cons x r) u i))
150 (((#:i x) . l)
151 (lp l (cons "~a" r) (cons (list-ref args (string->number x)) u) i))
152 (((#:s x) . l)
153 (lp l (cons "~a" r) (cons (hash-ref kwargs x None) u) i))
154 (((#:e) . l)
155 (lp l (cons "~a" r) (cons (list-ref args i) u) (+ i 1)))
156 (()
157 (apply format #f (string-join (reverse r) "") (reverse u))))))
158
159 (define-py (py-format format s . l)
160 (call-with-values
161 (lambda ()
162 (let lp ((l l) (args '()) (kwargs (make-hash-table)))
163 (match l
164 (((? keyword? key) x . l)
165 (hash-set! kwargs (symbol->string (keyword->symbol key)) x)
166 (lp l args kwargs))
167 ((x . l)
168 (lp l (cons x args) kwargs))
169 (()
170 (values (reverse args) kwargs)))))
171 (lambda (args kwargs)
172 (compile (parse s e) args kwargs))))
173
174 (define-syntax-rule (mk-is py-isalnum isalnum x ...)
175 (define-py (py-isalnum isalnum s)
176 (and (> (len s) 0)
177 (string-fold
178 (lambda (ch s)
179 (if (or (x ch) ...)
180 s
181 #f))
182 #t s))))
183
184 (mk-is py-isalnum isalnum char-alphabetic? char-numeric?)
185 (mk-is py-isalpha isalpha char-alphabetic?)
186 (mk-is py-isdigit isdigit char-numeric?)
187 (mk-is py-islower islower char-lower-case?)
188 (mk-is py-isspace isspace char-whitespace?)
189 (mk-is py-isupper isupper char-upper-case?)
190
191 (define-py (py-istitle istitle s)
192 (let ((n (len s)))
193 (if ((> n 0))
194 (let lp ((i 0) (space? #t))
195 (if (< i n)
196 (let ((ch (string-ref s i)))
197 (if space?
198 (cond
199 ((char-whitespace? ch)
200 (lp (+ i 1) #t))
201 ((char-upper-case? ch)
202 (lp (+ i 1) #f))
203 (else
204 #f))
205 (cond
206 ((char-whitespace? ch)
207 (lp (+ i 1) #t))
208 ((char-upper-case? ch)
209 #f)
210 ((char-lower-case? ch)
211 (lp (+ i 1) #f))
212 (else
213 #f))))
214 #t))
215 #f)))
216
217
218 (define-py (py-join join s iterator)
219 (string-join (to-list iterator) s))
220
221 (define-py (py-ljust ljust s width . l)
222 (let* ((n (len s))
223 (ch (match l
224 ((x . l)
225 (if (string? x)
226 (string-ref x 0)
227 x))
228 (()
229 #\space))))
230 (if (< width n)
231 (pylist-slice s 0 width)
232 (let ((ret (make-string width ch)))
233 (let lp ((i 0))
234 (if (< i n)
235 (string-set! ret i (string-ref s i))
236 ret))))))
237
238 (define-py (py-rjust rjust s width . l)
239 (let* ((n (len s))
240 (ch (match l
241 ((x . l)
242 (if (string? x)
243 (string-ref x 0)
244 x))
245 (()
246 #\space))))
247 (if (< width n)
248 (pylist-slice s (- width) (len s))
249 (let ((ret (make-string width ch)))
250 (let lp ((i 0) (j (- width n)))
251 (if (< i n)
252 (string-set! ret j (string-ref s i))
253 ret))))))
254
255 (define-py (py-lower lower s)
256 (string-downcase s))
257
258 (define-py (py-upper upper s)
259 (string-upcase s))
260
261 (define-py (py-lstrip lstrip s . l)
262 (match l
263 (()
264 (string-trim s))
265 ((x . _)
266 (let ((l (map (lambda (x) (if (string? x) (string-ref x 0) x)) x)))
267 (string-trim s (lambda (ch) (member ch l)))))))
268
269 (define-py (py-rstrip rstrip s . l)
270 (match l
271 (()
272 (string-trim-right s))
273 ((x . _)
274 (let ((l (map (lambda (x) (if (string? x) (string-ref x 0) x)) x)))
275 (string-trim-right s (lambda (ch) (member ch l)))))))
276
277 (define-py (py-partition partition s (sep <string>))
278 (let ((n (len s))
279 (m (len sep)))
280 (define (test i)
281 (let lp ((i i) (j 0))
282 (if (< i n)
283 (if (< j m)
284 (if (eq? (string-ref s i) (string-ref sep j))
285 (lp (+ i 1) (+ j 1))
286 #f)
287 #t)
288 #f)))
289 (let lp ((i 0))
290 (if (< i n)
291 (if (test i)
292 (list (pylist-slice s 0 i) sep (pylist-slice s (+ i m) n))
293 (lp (+ i 1)))
294 (list s "" "")))))
295
296 (define-py (py-rpartition rpartition ss (ssep <string>))
297 (let* ((s (string-reverse ss))
298 (sep (string-reverse ssep))
299 (n (len s))
300 (m (len sep)))
301 (define (test i)
302 (let lp ((i i) (j 0))
303 (if (< i n)
304 (if (< j m)
305 (if (eq? (string-ref s i) (string-ref sep j))
306 (lp (+ i 1) (+ j 1))
307 #f)
308 #t)
309 #f)))
310 (let lp ((i 0))
311 (if (< i n)
312 (if (test i)
313 (list (string-reverse
314 (pylist-slice s (+ i m) n))
315 ssep
316 (string-reverse
317 (pylist-slice s 0 i)))
318 (lp (+ i 1)))
319 (list "" "" s)))))
320
321 (define-py (py-replace replace s old new . l)
322 (let ((n (match l (() #f) ((n . _) n))))
323 (string-join
324 (reverse
325 (let lp ((s s) (r '()))
326 (let ((l (py-partition s old)))
327 (if (equal? (cadr l) "")
328 (cons s r)
329 (lp (list-ref l 2) (cons (car l) r))))))
330 new)))
331
332 (define-py (py-strip strip s . l)
333 (apply py-rstrip (apply py-lstrip s l) l))
334
335 (define-py (py-title title s)
336 (string-titlecase s))
337
338 (define-py (py-rindex rindex s . l)
339 (let ((n (len s)))
340 (- n (apply pylist-index (string-reverse s) l) 1)))
341
342
343
344 (define-py (py-split split s . l)
345 (define ws (f+ (f-reg "[ \t\n]")))
346 (define r
347 (f-or! (f-seq f-eof (f-out '()))
348 (f-cons (f-seq (mk-token (f* (f-reg! "."))) f-eof) (f-out '()))))
349 (define (u ws) (mk-token (f+ (f-not! ws))))
350 (define (tok ws i)
351 (if (= i 0)
352 (f-list (mk-token (f* (f-reg! "."))))
353 (let ((e (mk-token (f* (f-not! ws)))))
354 (f-seq (f? ws)
355 (f-cons e
356 (let lp ((i i))
357 (if (> (- i 1) 0)
358 (f-or! (f-seq (f? ws) f-eof (f-out '()))
359 (f-cons (f-seq ws e) (Ds (lp (- i 1)))))
360 r)))))))
361
362 (define N 1000000000000)
363 (let ((e (call-with-values
364 (lambda ()
365 (match l
366 (() (values ws N))
367 ((sep) (values (f-tag sep) N))
368 ((sep n) (values (f-tag sep) n))))
369 tok)))
370 (parse s e)))
371
372 (define-py (py-rsplit rsplit s . l)
373 (reverse
374 (map string-reverse
375 (apply py-split
376 (string-reverse s)
377 (match l
378 (() '())
379 ((sep . l) (cons (string-reverse sep) l)))))))
380
381
382 (define-py (py-splitlines splitlines s . l)
383 (let ((n (len s))
384 (keep? (match l
385 ((#:keepends v)
386 v)
387 ((v)
388 v)
389 (_ #f))))
390 (let lp ((i 0) (r '()) (old 0))
391 (if (< i n)
392 (let ((ch (string-ref s i)))
393 (if (eq? ch #\newline)
394 (if keep?
395 (lp (+ i 1)
396 (cons
397 (pylist-slice s old (+ i 1) 1)
398 r)
399 (+ i 1))
400 (lp (+ i 1)
401 (cons
402 (pylist-slice s old i 1)
403 r)
404 (+ i 1)))
405 (lp (+ i 1) r old)))
406 (reverse r)))))
407
408 (define-py (py-swapcase swapcase s)
409 (list->string
410 (string-fold
411 (lambda (ch s)
412 (cons
413 (cond
414 ((char-upper-case? ch)
415 (char-downcase ch))
416 ((char-lower-case? ch)
417 (char-upcase ch))
418 (else ch))
419 s))
420 '()
421 s)))
422
423 (define-py (py-translate translate s table . l)
424 (let* ((n (len s))
425 (w (make-string n))
426 (t (if (eq? table None) #f table))
427 (d (match l (() #f) ((x) x))))
428 (define (tr ch)
429 (define (e)
430 (if t
431 (let ((i (char->integer ch)))
432 (if (< i (string-length t))
433 (string-ref t i)
434 ch))
435 ch))
436
437 (if d
438 (if (string-contains d (list->string (list ch)))
439 #f
440 (e))
441 (e)))
442
443 (let lp ((i 0) (k 0))
444 (if (< i n)
445 (let ((ch (tr (string-ref s i))))
446 (if ch
447 (begin
448 (string-set! w k ch)
449 (lp (+ i 1) (+ k 1)))
450 (lp (+ i 1) k)))
451 (if (= k n)
452 w
453 (pylist-slice w 0 k 1))))))
454
455 (define-syntax-rule (a b x y) (b (symbol->string x) (symbol->string y)))
456
457 (define-syntax-rule (mkop op)
458 (begin
459 (define-method (op (s1 <string>) (s2 <py-string>))
460 (op s1 (slot-ref s2 'str)))
461 (define-method (op (s2 <py-string>) (s1 <string>))
462 (op s1 (slot-ref s2 'str)))))
463
464 (mkop <)
465 (mkop <=)
466 (mkop >)
467 (mkop >=)
468 (mkop +)
469 (mkop *)
470
471 (define-method (< (s1 <string>) (s2 <string>)) (string-ci< s1 s2))
472 (define-method (<= (s1 <string>) (s2 <string>)) (string-ci<= s1 s2))
473 (define-method (> (s1 <string>) (s2 <string>)) (string-ci> s1 s2))
474 (define-method (>= (s1 <string>) (s2 <string>)) (string-ci>= s1 s2))
475
476 (define-method (< (s1 <symbol>) (s2 <symbol>)) (a string-ci< s1 s2))
477 (define-method (<= (s1 <symbol>) (s2 <symbol>)) (a string-ci<= s1 s2))
478 (define-method (> (s1 <symbol>) (s2 <symbol>)) (a string-ci> s1 s2))
479 (define-method (>= (s1 <symbol>) (s2 <symbol>)) (a string-ci>= s1 s2))
480
481
482 (define-py (py-zfill zfill s width)
483 (let* ((n (len s))
484 (w (pk (pylist-slice s 0 n 1))))
485 (let lp ((i 0))
486 (if (< i n)
487 (let ((ch (string-ref s i)))
488 (if (char-numeric? ch)
489 (let lp ((j (max 0 (- i width))))
490 (pk i j)
491 (if (< j i)
492 (begin
493 (string-set! w j #\0)
494 (lp (+ j 1)))
495 w))
496 (lp (+ i 1))))
497 s))))
498
499 (define-python-class string (<py-string>)
500 (define __init__
501 (case-lambda
502 ((self s)
503 (cond
504 ((is-a? s <py-string>)
505 (slot-set! self 'str (slot-ref s 'src)))
506 ((is-a? s <string>)
507 (slot-set! self 'str s)))))))
508
509 (define pystring string)
510
511 (define-method (py-class (o <string>)) string)
512 (define-method (py-class (o <py-string>)) string)
513
514 (define-method (pyhash (o <py-string>)) (hash (slot-ref o 'str) pyhash-N))
515
516 (define-method (py-equal? (o <py-string>) x)
517 (equal? (slot-ref o 'str) x))
518 (define-method (py-equal? x (o <py-string>))
519 (equal? (slot-ref o 'str) x))
520
521 (define-class <string-iter> (<py-string>) str i d)
522
523 (define-method (write (o <string-iter>) . l)
524 (define port (if (null? l) #t (car l)))
525 (for ((x : o)) ((l '()))
526 (cons (string-ref x 0) l)
527 #:final
528 (format port "iter(~s)" (list->string (reverse l)))))
529
530 (define-method (wrap-in (o <string-iter> ))
531 (let ((out (make <string-iter>)))
532 (slot-set! out 'str (slot-ref o 'str))
533 (slot-set! out 'i (slot-ref o 'i))
534 (slot-set! out 'd (slot-ref o 'd))
535 out))
536
537 (define-method (wrap-in (s <string>))
538 (let ((out (make <string-iter>)))
539 (slot-set! out 'str s)
540 (slot-set! out 'i 0)
541 (slot-set! out 'd 1)
542 out))
543
544 (define-method (py-reversed (s <string>))
545 (let ((out (make <string-iter>)))
546 (slot-set! out 'str s)
547 (slot-set! out 'i (- (string-length s) 1))
548 (slot-set! out 'd -1)
549 out))
550
551 (define-method (next (o <string-iter>))
552 (let ((i (slot-ref o 'i ))
553 (d (slot-ref o 'd))
554 (str (slot-ref o 'str)))
555 (if (> d 0)
556 (if (< i (string-length str))
557 (let ((ret (string-ref str i)))
558 (slot-set! o 'i (+ i d))
559 (list->string (list ret)))
560 (throw StopIteration))
561 (if (>= i 0)
562 (let ((ret (string-ref str i)))
563 (slot-set! o 'i (+ i d))
564 (list->string (list ret)))
565 (throw StopIteration)))))
566
567 (define (pystring-listing)
568 (let ((l (to-pylist
569 (map symbol->string
570 '(__add__ __class__ __contains__ __delattr__ __doc__
571 __eq__ __format__ __ge__ __getattribute__
572 __getitem__ __getnewargs__ __getslice__ __gt__
573 __hash__ __init__ __le__ __len__ __lt__ __mod__
574 __mul__ __ne__ __new__ __reduce__ __reduce_ex__
575 __repr__ __rmod__ __rmul__ __setattr__ __sizeof__
576 __str__ __subclasshook__
577 _formatter_field_name_split _formatter_parser
578 capitalize center count decode encode endswith
579 expandtabs find format index isalnum isalpha
580 isdigit islower isspace istitle isupper join
581 ljust lower lstrip partition replace rfind rindex
582 rjust rpartition rsplit rstrip split splitlines
583 startswith strip swapcase
584 title translate upper zfill)))))
585 (pylist-sort! l)
586 l))
587
588