(define-module (language python module re) #:use-module (language python module re compile) #:use-module (language python module re flags) #:use-module (language python list) #:use-module (language python dict) #:use-module (language python try) #:use-module (language python exceptions) #:use-module ((language python module python) #:select (str isinstance range)) #:use-module (language python string) #:use-module (language python for) #:use-module (language python def) #:use-module (language python yield) #:use-module (oop pf-objects) #:use-module ((parser stis-parser) #:select (f-checkr f-eof f-ftr f-pos f-pk f-seq! f-and! f-nl f-reg f-seq Ds f-or! ff* f-ftr)) #:use-module ((ice-9 match) #:select ((match . ice-match))) #:re-export (A ASCII DEBUG I IGNORECASE L LOCALE M MULTILINE X VERBOSE S DOTALL) #:export (compile match fullmatch search split finditer findall sub subn purge escape)) (define start (make-fluid 0)) (define end (make-fluid 0)) (define e-start (f-pos (lambda (x n m) (fluid-set! start x)))) (define e-end (f-pos (lambda (x n m) (fluid-set! end x)))) (define (ge) (fluid-ref end)) (define (gs) (fluid-ref start)) (define maxsplit (make-fluid 0)) (define splitn (make-fluid 0)) (define (set-maxsplit x) (fluid-set! maxsplit x) (fluid-set! splitn 0)) (define e-maxsplit (f-checkr (lambda (c) (let ((m (fluid-ref maxsplit)) (i (fluid-ref splitn))) (if (> m 0) (if (< i m) (begin (fluid-set! splitn (+ i 1)) #t) #f) #t))))) (define (e-fullmatch e) (f-seq e-start (f-and! e) e-end f-eof)) (define (e-search e) (let lp () (f-or! (f-seq! e-start (f-and! e) e-end) (f-seq (f-or! (f-reg ".") f-nl) (Ds (lp)))))) (define-python-class Regexp () (define __init__ (lam (self s (= flags 0)) (set self 'flags flags) (set self 'pattern s) (let ((l (compile-reg s))) (set self '__re (list-ref l 0)) (set self 'groups (- (list-ref l 1) 1)) (set self 'groupindex (list-ref l 2))))) (define __repr__ (lambda (self) (format #f "re.compile('~a')" (ref self 'pattern)))) (define __str__ __repr__) (define findall (lam (self str (= flags 0)) (findall- self str flags))) (define finditer (lam (self str (= flags None)) (if (eq? flags None) (set! flags (ref self 'flags))) (finditer- self str flags))) (define flags 0) (define fullmatch (lam (self str (= flags None)) (if (eq? flags None) (set! flags (ref self 'flags))) (fullmatch- self str flags))) (define groupindex '()) (define groups '()) (define match (lam (self str (= flags None)) (if (eq? flags None) (set! flags (ref self 'flags))) (match- self str flags))) (define search (lam (self str (= flags None)) (if (eq? flags None) (set! flags (ref self 'flags))) (search- self str flags))) (define split (lam (self str (= maxsplit 0) (= flags None)) (if (eq? flags None) (set! flags (ref self 'flags))) (split- self str maxsplit flags))) (define sub (lam (self repl str (= count 0) (= flags None)) (if (eq? flags None) (set! flags (ref self 'flags))) (sub- self repl str count flags))) (define subn (lam (self repl str (= count 0) (= flags None)) (if (eq? flags None) (set! flags (ref self 'flags))) (subn- self repl str count flags)))) (def (match- re s (= flags 0)) (if (isinstance re str) (match- (Regexp re) s flags) (begin (set-flags flags) (let ((m (parse s (f-seq e-start (ref re '__re) e-end)))) (if m (ReMatch m re (gs) (ge) s) None))))) (define match match-) (def (search- re s (= flags 0)) (if (isinstance re str) (search- (Regexp re) s flags) (begin (set-flags flags) (let ((m (parse s (e-search (ref re '__re))))) (if m (ReMatch m re (gs) (ge) s) None))))) (define search search-) (def (fullmatch- re ss (= flags 0)) (if (isinstance re str) (fullmatch- (Regexp re) ss flags) (begin (set-flags flags) (let ((m (parse ss (e-fullmatch (ref re '__re))))) (if m (ReMatch m re (gs) (ge) ss) None))))) (define fullmatch fullmatch-) (define (grps m) (reverse ((ref m 'groups)))) (define-syntax-rule (mk split- grps) (def (split- re ss (= maxsplit 0) (= flags 0)) (if (isinstance re str) (split- (Regexp re) ss maxsplit flags) (begin (set-flags flags) (set-maxsplit maxsplit) (let ((m (parse ss (ff* (f-seq! e-maxsplit (e-search (ref re '__re)) (f-ftr (lambda (c) (list (gs) (ge) c)))))))) (let lp ((s 0) (l (map (lambda (m) (ReMatch (caddr m) re (car m) (cadr m) ss)) m)) (r '())) (if (pair? l) (let* ((m (car l)) (i ((ref m 'start))) (j ((ref m 'end)))) (lp j (cdr l) (append (grps m) (list (pylist-slice ss s i None)) r))) (reverse (cons (pylist-slice ss s None None) r))))))))) (mk split- grps) (define split split-) (define (id x) (list x)) (mk splitm- id) (define splitm splitm-) (def (finditer- re s (= flags 0)) (if (isinstance re str) (finditer- (Regexp re) s flags) (let ((e (ref re '__re))) (set-flags flags) ((make-generator () (lambda (yield) (parse s (let lp () (f-seq (f-or! (f-seq! e-start (f-and! e) e-end (f-ftr (lambda (c) (yield (ReMatch c re (gs) (ge) s)) '()))) (f-seq (f-reg "."))) (Ds (lp))))))))))) (define finditer finditer-) (def (findall- re s (= flags 0)) (for ((m : (finditer re s flags))) ((l '())) (let ((grps ((ref m 'groups)))) (cons (cond ((null? grps) ((ref m 'group))) ((= (length grps) 1) (car grps)) (else grps)) l)) #:final (reverse l))) (define findall findall-) (define (mk-repl-str repl) (define (take n) (lambda (m) (let ((s (pylist-ref m n))) (if (eq? s None) (set! s "")) (reverse (string->list s))))) (define (comp r) (let lp ((r r) (l '())) (if (pair? r) (let ((x (car r))) (if (char? x) (lp (cdr r) (cons x l)) (if (null? l) r (let ((l (reverse l))) (cons (lambda (m) l) r))))) (if (null? l) '() (let ((l (reverse l))) (cons (lambda (m) l) r)))))) (define num? char-numeric?) (let* ((l (string->list repl)) (c (let lp ((l l) (r '())) (ice-match l ((#\\ (? num? x) (? num? y) . l) (lp l (cons (take (string->number (list->string (list x y)))) (comp r)))) ((#\\ (? num? x) . l) (lp l (cons (take (string->number (list->string (list x)))) (comp r)))) ((#\\ #\n . l) (lp l (cons #\newline r))) ((x . l) (lp l (cons x r))) (() (reverse (comp r))))))) (lambda (m) (let lp ((c c) (r '())) (if (pair? c) (let ((x (car c))) (if (char? x) (lp (cdr c) (cons x r)) (lp (cdr c) (append (x m) r)))) (list->string (reverse r))))))) (define (mk-repl repl) (if (isinstance repl str) (mk-repl-str repl) repl)) (def (subn- re repl s (= count 0) (= flags 0)) (let ((l (splitm re s count flags))) (define f (mk-repl repl)) (let lp ((l l) (r '()) (i 0)) (ice-match l ((x m . l) (lp l (cons* (f m) x r) (+ i 1))) ((x) (list (py-join "" (reverse (cons x r))) i)) (() (list (py-join "" (reverse r)) i)))))) (define subn subn-) (def (sub- re repl s (= count 0) (= flags 0)) (car (subn re repl s count flags))) (define sub sub-) (define (val x) (if (eq? x None) x (car x))) (define (vali x) (if (eq? x None) (list -1 -1) (cdr x))) (define-python-class ReMatch () (define __init__ (lambda (self m re s e ss) (set self 're re) (set self '__start s) (set self '__end e) (set self 'string ss) (let ((t (make-hash-table))) (for ((k v : (ref re 'groupindex))) () (hash-set! t k None)) (for ((i : (range (ref re 'groups)))) () (hash-set! t i None)) (let lp ((l (list-ref m 0))) (ice-match l ((((n . i) . v) . l) (hash-set! t n v) (hash-set! t i v) (lp l)) (((i . v) . l) (hash-set! t i v) (lp l)) (() #t))) (set self '__t t) (set self 'regs (__regs self))))) (define er (list 'er)) (define group (case-lambda ((self nm) (if (= nm 0) (group self) (let ((x (py-get (ref self '__t) nm er))) (if (eq? x er) (raise (IndexError "no souch group in regexp match")) (val x))))) ((self) (pylist-slice (ref self 'string) (start self) (end self) None)))) (define __getitem__ (lambda (self k) (group self k))) (define groups (lam (self (= default None)) (let ((t (ref self '__t)) (n (ref (ref self 're) 'groups))) (let lp ((i 1)) (if (<= i n) (let ((w (val (hash-ref t i)))) (cons (if (eq? w None) default w) (lp (+ i 1)))) '()))))) (define __regs (lambda (self) (let ((t (ref self '__t)) (n (ref (ref self 're) 'groups))) (cons (span self) (let lp ((i 1)) (if (<= i n) (cons (vali (hash-ref t i)) (lp (+ i 1))) '())))))) (define groupdict (lam (self (= default None)) (let ((t (make-hash-table))) (for ((k v : (ref self '__t))) () (if (not (number? k)) (let ((w (val v))) (hash-set! t k (if (eq? w None) default w)))) #:final t)))) (define start (lambda (self) (ref self '__start))) (define end (lambda (self) (ref self '__end))) (define span (lambda (self) (list (ref self '__start) (ref self '__end)))) (define expand (lambda (self template) ((mk-repl template) self))) (define __repr__ (lambda (self) (format #f "ReMatch<~s>" (group self))))) (define (compile s) (Regexp s)) (define (purge) (values)) (define escape (let ((m (string->list "()[]#$*+-.^|\\"))) (lambda (x) (let lp ((l (string->list x)) (r '())) (ice-match l ((x . l) (lp l (if (member x m) (cons* #\\ x r) x))) (() (list->string (reverse r))))))))