(define-module (language python module re) #:use-module (language python module re compile) #:use-module (language python module re flags) #:use-module (language python list) #:use-module (language python dict) #:use-module (language python try) #:use-module (language python bool) #:use-module (language python exceptions) #:use-module ((language python module _python) #:select (str isinstance range)) #:use-module (language python string) #:use-module (language python for) #:use-module (language python def) #:use-module (language python yield) #:use-module (oop pf-objects) #:use-module ((parser stis-parser) #:select (f-checkr f-eof f-ftr f-pos f-pk f-seq! f-and! f-nl f-reg f-seq Ds f-or! ff* f-ftr)) #:use-module ((ice-9 match) #:select ((match . ice-match))) #:re-export (A ASCII DEBUG I IGNORECASE L LOCALE M MULTILINE X VERBOSE S DOTALL) #:export (compile match fullmatch search split splitm splitmm finditer findall sub subn purge escape)) (define start (make-fluid 0)) (define end (make-fluid 0)) (define e-start (f-pos (lambda (x n m) (fluid-set! start x)))) (define e-end (f-pos (lambda (x n m) (fluid-set! end x)))) (define (ge) (fluid-ref end)) (define (gs) (fluid-ref start)) (define maxsplit (make-fluid 0)) (define splitn (make-fluid 0)) (define (set-maxsplit x) (fluid-set! maxsplit x) (fluid-set! splitn 0)) (define e-maxsplit (f-checkr (lambda (c) (let ((m (fluid-ref maxsplit)) (i (fluid-ref splitn))) (if (> m 0) (if (< i m) (begin (fluid-set! splitn (+ i 1)) #t) #f) #t))))) (define (e-fullmatch e) (f-seq e-start (f-and! e) e-end f-eof)) (define (e-search e) (let lp () (f-or! (f-seq! e-start (f-and! e) e-end) (f-seq (f-or! (f-reg ".") f-nl) (Ds (lp)))))) (define-python-class Regexp () (define __init__ (lam (self s (= flags 0)) (set self 'flags flags) (set self 'pattern s) (let lp ((l s)) (if (string? l) (lp (compile-reg l)) (if (list? l) (begin (set self '__re (list-ref l 0)) (set self 'groups (- (list-ref l 1) 1)) (set self 'groupindex (list-ref l 2))) (begin (set self '__re l) (set self 'groups 0) (set self 'groupindex '()))))))) (define __repr__ (lambda (self) (format #f "re.compile('~a')" (ref self 'pattern)))) (define __str__ __repr__) (define findall (lam (self str (= flags 0)) (findall- self str flags))) (define finditer (lam (self str (= flags None)) (if (eq? flags None) (set! flags (ref self 'flags))) (finditer- self str flags))) (define flags 0) (define fullmatch (lam (self str (= flags None)) (if (eq? flags None) (set! flags (ref self 'flags))) (fullmatch- self str flags))) (define groupindex '()) (define groups '()) (define match (lam (self str (= flags None)) (if (eq? flags None) (set! flags (ref self 'flags))) (match- self str flags))) (define search (lam (self str (= flags None)) (if (eq? flags None) (set! flags (ref self 'flags))) (search- self str flags))) (define split (lam (self str (= maxsplit 0) (= flags None)) (if (eq? flags None) (set! flags (ref self 'flags))) (split- self str maxsplit flags))) (define sub (lam (self repl str (= count 0) (= flags None)) (if (eq? flags None) (set! flags (ref self 'flags))) (sub- self repl str count flags))) (define subn (lam (self repl str (= count 0) (= flags None)) (if (eq? flags None) (set! flags (ref self 'flags))) (subn- self repl str count flags)))) (def (match- re s (= flags 0)) (if (isinstance re str) (match- (Regexp re) s flags) (begin (set-flags flags) (let ((m (parse s (f-seq e-start (ref re '__re) e-end)))) (if m (ReMatch m re (gs) (ge) s) None))))) (define match match-) (def (search- re s (= flags 0)) (if (isinstance re str) (search- (Regexp re) s flags) (begin (set-flags flags) (let ((m (parse s (e-search (ref re '__re))))) (if m (ReMatch m re (gs) (ge) s) None))))) (define search search-) (def (fullmatch- re ss (= flags 0)) (if (isinstance re str) (fullmatch- (Regexp re) ss flags) (begin (set-flags flags) (let ((m (parse ss (e-fullmatch (ref re '__re))))) (if m (ReMatch m re (gs) (ge) ss) None))))) (define fullmatch fullmatch-) (define (grps m) (reverse ((ref m 'groups)))) (define (mku re ss) (lambda (m) (ReMatch (caddr m) re (car m) (cadr m) ss))) (define (mkid re ss) id) (define-syntax-rule (mk split- grps mku) (def (split- re ss (= maxsplit 0) (= flags 0)) (if (not (isinstance re Regexp)) (split- (Regexp re) ss maxsplit flags) (begin (set-flags flags) (set-maxsplit maxsplit) (let ((m (parse ss (ff* (f-seq! e-maxsplit (e-search (ref re '__re)) (f-ftr (lambda (c) (list (gs) (ge) c)))))))) (let lp ((s 0) (l (map (mku re ss) m)) (r '())) (if (pair? l) (let* ((m (car l)) (i ((ref m 'start))) (j ((ref m 'end)))) (lp j (cdr l) (append (grps m) (list (pylist-slice ss s i None)) r))) (reverse (cons (pylist-slice ss s None None) r))))))))) (mk split- grps mku) (define split split-) (define (id x) (list x)) (mk splitm- id mku) (define splitm splitm-) (define (id2 x) (list (ref x '__m))) (mk splitmm- id2 mku) (define splitmm splitmm-) (def (finditer- re s (= flags 0)) (if (isinstance re str) (finditer- (Regexp re) s flags) (let ((e (ref re '__re))) (set-flags flags) ((make-generator () (lambda (yield) (parse s (let lp () (f-seq (f-or! (f-seq! e-start (f-and! e) e-end (f-ftr (lambda (c) (yield (ReMatch c re (gs) (ge) s)) '()))) (f-seq (f-reg "."))) (Ds (lp))))))))))) (define finditer finditer-) (def (findall- re s (= flags 0)) (for ((m : (finditer re s flags))) ((l '())) (let ((grps ((ref m 'groups)))) (cons (cond ((null? grps) ((ref m 'group))) ((= (length grps) 1) (car grps)) (else grps)) l)) #:final (reverse l))) (define findall findall-) (define (mk-repl-str repl) (define (take n) (lambda (m) (let ((s (pylist-ref m n))) (if (eq? s None) (set! s "")) (reverse (string->list s))))) (define (comp r) (let lp ((r r) (l '())) (if (pair? r) (let ((x (car r))) (if (char? x) (lp (cdr r) (cons x l)) (if (null? l) r (let ((l (reverse l))) (cons (lambda (m) l) r))))) (if (null? l) '() (let ((l (reverse l))) (cons (lambda (m) l) r)))))) (define num? char-numeric?) (let* ((l (string->list repl)) (c (let lp ((l l) (r '())) (ice-match l ((#\\ (? num? x) (? num? y) . l) (lp l (cons (take (string->number (list->string (list x y)))) (comp r)))) ((#\\ (? num? x) . l) (lp l (cons (take (string->number (list->string (list x)))) (comp r)))) ((#\\ #\n . l) (lp l (cons #\newline r))) ((x . l) (lp l (cons x r))) (() (reverse (comp r))))))) (lambda (m) (let lp ((c c) (r '())) (if (pair? c) (let ((x (car c))) (if (char? x) (lp (cdr c) (cons x r)) (lp (cdr c) (append (x m) r)))) (list->string (reverse r))))))) (define (mk-repl repl) (if (isinstance repl str) (mk-repl-str repl) repl)) (def (subn- re repl s (= count 0) (= flags 0)) (let ((l (splitm re s count flags))) (define f (mk-repl repl)) (let lp ((l l) (r '()) (i 0)) (ice-match l ((x m . l) (lp l (cons* (f m) x r) (+ i 1))) ((x) (list (py-join "" (reverse (cons x r))) i)) (() (list (py-join "" (reverse r)) i)))))) (define subn subn-) (def (sub- re repl s (= count 0) (= flags 0)) (car (subn re repl s count flags))) (define sub sub-) (define (val x) (if (eq? x None) x (car x))) (define (vali x) (if (not (bool x)) (list -1 -1) (cdr x))) (define-python-class ReMatch () (define __init__ (lambda (self m re s e ss) (set self 're re) (set self '__start s) (set self '__end e) (set self 'string ss) (let ((t (make-hash-table))) (for ((k v : (ref re 'groupindex))) () (hash-set! t k None)) (for ((i : (range (ref re 'groups)))) () (hash-set! t i None)) (if (string? (ref re 'pattern)) (let lp ((l (list-ref m 0))) (ice-match l ((((n . i) . v) . l) (hash-set! t n v) (hash-set! t i v) (lp l)) (((i . v) . l) (hash-set! t i v) (lp l)) (() #t)))) (set self '__m m) (set self '__t t) (set self 'regs (__regs self))))) (define er (list 'er)) (define group (case-lambda ((self nm) (if (and (number? nm) (= nm 0)) (group self) (let ((x (py-get (ref self '__t) nm er))) (if (eq? x er) (raise (IndexError "no souch group in regexp match")) (val x))))) ((self) (pylist-slice (ref self 'string) (start self) (end self) None)))) (define __getitem__ (lambda (self k) (group self k))) (define groups (lam (self (= default None)) (let ((t (ref self '__t)) (n (ref (ref self 're) 'groups))) (let lp ((i 1)) (if (<= i n) (let ((w (val (hash-ref t i)))) (cons (if (eq? w None) default w) (lp (+ i 1)))) '()))))) (define __regs (lambda (self) (let ((t (ref self '__t)) (n (ref (ref self 're) 'groups))) (cons (span self) (let lp ((i 1)) (if (<= i n) (cons (vali (hash-ref t i)) (lp (+ i 1))) '())))))) (define groupdict (lam (self (= default None)) (let ((t (make-hash-table))) (for ((k v : (ref self '__t))) () (if (not (number? k)) (let ((w (val v))) (hash-set! t k (if (eq? w None) default w)))) #:final t)))) (define start (lambda (self) (ref self '__start))) (define end (lambda (self) (ref self '__end))) (define span (lambda (self) (list (ref self '__start) (ref self '__end)))) (define expand (lambda (self template) ((mk-repl template) self))) (define __repr__ (lambda (self) (format #f "ReMatch<~s>" (group self))))) (def (compile s (= flags 0)) (Regexp s flags)) (define (purge) (values)) (define escape (let ((m (string->list "()[]#$*+-.^|\\"))) (set! m (cons #\newline m)) (lambda (x) (let lp ((l (string->list x)) (r '())) (ice-match l ((x . l) (lp l (if (member x m) (if (eq? x #\newline) (cons* #\n #\\ r) (cons* x #\\ r)) (cons x r)))) (() (list->string (reverse r))))))))