(define-module (language python module _csv) #:use-module (ice-9 control) #:use-module (oop pf-objects) #:use-module (language python list) #:use-module (language python def) #:use-module (language python yield) #:use-module (language python for) #:use-module (language python string) #:use-module (language python exceptions) #:export (QUOTE_ALL QUOTE_MINIMAL QUOTE_NONNUMERIC QUOTE_NONE reader writer Error field_size_limit get_dialect register_dialect unregister_dialect list_dialects __doc__ Dialect __version__)) (define __version__ "1.0") (define-syntax-rule (aif it p . l) (let ((it p)) (if it . l))) (define-python-class Error (Exception)) (define-python-class Dialect () (define __init__ (lambda (self . x) #f))) (define *field-size* (make-fluid 131072)) (define field_size_limit (case-lambda (() (fluid-ref *field-size*)) ((x) (fluid-set! *field-size* x)))) (define *dialects* (make-hash-table)) (define register_dialect (lam (nm (= val None) (** keyw)) (let ((newval (Dialect))) (define-syntax-rule (set- x y z key default) (set x 'key (hash-ref z (symbol->string 'key) (if (eq? y None) default (ref y 'key default))))) (define-syntax-rule (setter x y z ((k def) ...)) (begin (set- x y z k def) ...)) (setter newval val keyw ((delimiter ",") (doublequote #t) (escapechar None) (lineterminator "\r\n") (quotechar "\"") (quoting 'minimal) (skipinitialspace #f) (strict #f))) (hash-set! *dialects* nm newval)))) (define (get_dialect nm) (hash-ref *dialects* nm None)) (define (unregister_dialect nm) (hash-remove! *dialects* nm)) (define (list_dialects) (let ((ret '())) (hash-for-each (lambda (k v) (set! ret (cons k ret))) *dialects*) (py-list ret))) (define __doc__ "CSV parsing and writing. This module provides classes that assist in the reading and writing of Comma Separated Value (CSV) files, and implements the interface described by PEP 305. Although many CSV files are simple to parse, the format is not formally defined by a stable specification and is subtle enough that parsing lines of a CSV file with something like line.split(\",\") is bound to fail. The module supports three\n basic APIs: reading, writing, and registration of dialects. DIALECT REGISTRATION: Readers and writers support a dialect argument, which is a convenient handle on a group of settings. When the dialect argument is a string, it identifies one of the dialects previously registered with the module. If it is a class or instance, the attributes of the argument are used as the settings for the reader or writer: class excel: delimiter = ',' quotechar = '\"' escapechar = None doublequote = True skipinitialspace = False lineterminator = '\\r\\n' quoting = QUOTE_MINIMAL SETTINGS: * quotechar - specifies a one-character string to use as the quoting character. It defaults to '\"'. * delimiter - specifies a one-character string to use as the field separator. It defaults to ','. * skipinitialspace - specifies how to interpret whitespace which immediately follows a delimiter. It defaults to False, which means that whitespace immediately following a delimiter is part of the following field. * lineterminator - specifies the character sequence which should terminate rows. * quoting - controls when quotes should be generated by the writer. It can take on any of the following module constants: csv.QUOTE_MINIMAL means only when required, for example, when a field contains either the quotechar or the delimiter csv.QUOTE_ALL means that quotes are always placed around fields. csv.QUOTE_NONNUMERIC means that quotes are always placed around fields which do not parse as integers or floating point numbers. csv.QUOTE_NONE means that quotes are never placed around fields. * escapechar - specifies a one-character string used to escape the delimiter when quoting is set to QUOTE_NONE. * doublequote - controls the handling of quotes inside fields. When True, two consecutive quotes are interpreted as one during read, and when writing, each quote character embedded in the data is written as two quotes") (define QUOTE_ALL 'all) (define QUOTE_MINIMAL 'minimal) (define QUOTE_NONNUMERIC 'nonnumeric) (define QUOTE_NONE 'none) (define e (list 'fail)) (define-syntax-rule (bif it p a b) (let ((it p)) (if (eq? it e) b a))) (define-syntax-rule (chr a b c) (let ((x (bif it a it (bif it b it c)))) (if (string? x) (string-ref x 0) #f))) (define-syntax-rule (oor a b c) (bif it a it (bif it b it c))) (define-syntax-rule (str a b c) (oor a b c)) (def (reader csvfile (= dialect "excel") (** fmtparams)) (let* ((dialect (get_dialect dialect)) (delimiter (chr (py-get fmtparams "delimiter" e) (ref dialect 'Delimiter e) ",")) (doublequote (oor (py-get fmtparams "doublequote" e) (ref dialect 'doublequote e) #t)) (escapechar (chr (py-get fmtparams "escapechar" e) (ref dialect 'escapechar e) None)) (lineterminator (str (py-get fmtparams "lineterminator" e) (ref dialect 'lineterminator e) "\r\n")) (quotechar (chr (py-get fmtparams "quotechar" e) (ref dialect 'quotechar e) "\"")) (quoting (oor (py-get fmtparams "quoting" e) (ref dialect 'quoting e) QUOTE_MINIMAL)) (skipispace (oor (py-get fmtparams "skipinitialspace" e) (ref dialect 'skipinitialspace e) #t)) (strict (oor (py-get fmtparams "strict" e) (ref dialect 'strict e) #f))) ((make-generator () (lambda (yield) (let/ec ret (let ((iter (wrap-in csvfile))) (let lp0 ((state 'start) (r '()) (l '())) (define-syntax-rule (raise- s) (if strict (raise s))) (define-syntax-rule (mk-wrap wrap state) (define (wrap r) (let* ((x (list->string (reverse r))) (x (if (eq? state 'numeric) (string->number x) x))) x))) (mk-wrap wrap1 state) (let ((s (catch #t (lambda () (next iter)) (lambda x (cond ((or (eq? state 'line-end) (eq? state 'start)) (ret (reverse l))) ((eq? state 'field-end) (ret (reverse (cons (wrap1 r) l)))) ((or (eq? state 'numeric) (eq? state 'normal)) (ret (reverse (cons (wrap1 r) l)))) (else (raise- (Error "missing quote")) (ret (reverse (cons (wrap1 r) l))))))))) (let* ((n (len s)) (? (= n 1))) (let lp ((i 0) (state state) (r r) (l l)) (mk-wrap wrap state) (define-syntax-rule (raise- s) (if strict (raise s) (lp (+ i 1) state r l))) (define (end j ch) (if (eq? ch #\newline) (cond ((eq? state 'start) (begin (yield (py-list (reverse l))) (if (= n j) (lp0 'start '() '()) (lp (+ j 1) 'line-end '() '())))) ((eq? state 'quote) (if strict (raise (Error "newline in quote")) (lp j 'normal r l))) ((eq? state 'line-end) (if (= n j) (lp0 'start r l) (lp (+ j 1) state r l))) (else (yield (reverse (cons (wrap r) l))) (if (= n j) (lp0 'start '() '()) (lp (+ j 1) 'line-end '() '())))) (if (= j n) (begin (yield (reverse (cons* "" (wrap r) l))) (lp0 'start '() '())) (lp (+ j 1) 'start '() (cons (wrap r) l))))) (define (do-quotechar ch) (cond ((eq? state 'doublequote) (lp (+ i 1) 'quote (cons ch r) l)) ((and (pair? state) (eq? (car state) 'escape)) (lp (+ i 1) (cdr state) (cons ch r) l)) ((eq? state 'quote) (if doublequote (if (and (< (+ i 1) n) (equal? quotechar (string-ref s (+ i 1)))) (lp (+ i 2) state (cons quotechar r) l) (lp (+ i 1) 'doublequote r l)) (lp (+ i 1) 'field-end r l))) ((eq? state 'start) (if (or (eq? quoting 'minimal) (eq? quoting 'all) (eq? quoting 'nonnumeric)) (lp (+ i 1) 'quote r l) (raise- (Error "QOUTE_NONE supports no quoteing")))) (else (raise- (Error "wrong quoting found"))))) (define (do-whitespace ch) (cond ((eq? state 'doublequote) (raise- (Error "whitespace after end of quote"))) ((pair? state) (lp (+ i 1) (cdr state) r l)) ((eq? state 'start) (if skipispace (lp (+ i 1) state r l) (if (or (eq? quoting 'minimal) (eq? quoting 'none) (eq? quoting 'nonnumeric)) (lp i 'normal r l) (raise- (Error "whitespace outside quote"))))) ((or (eq? state 'normal) (eq? state 'quote)) (lp (+ i 1) state (cons ch r) l)) ((eq? state 'numeric) (raise- (Error "whitespace in numeric field"))))) (define (do-esc-quote) (if (< (+ i 1) n) (let ((ch2 (string-ref s (+ i 1)))) (cond ((and (eq? state 'quoting) (eq? ch2 quotechar)) (lp (+ i 2) state (cons quotechar r) l)) ((eq? ch2 delimiter) (lp (+ i 2) state (cons delimiter r) l)) ((eq? ch2 escapechar) (lp (+ i 2) state (cons escapechar r) l)) (else (lp (+ i 2) state r l)))) (lp (+ i 1) (cons 'escape state) r l))) (define (do-escape ch) (cond ((pair? state) (lp (+ i 1) (cdr state) (cons ch r) l)) ((eq? state 'doublequote) (raise (Error "no field or line end after quote, found esc"))) ((eq? state 'start) (if (eq? quoting 'none) (lp i 'normal r l) (raise- (Error "escapecharacter in nonquote")))) ((eq? state 'normal) (if (eq? quoting 'none) (do-esc-quote) (raise- (Error "escapecharacter in nonequote")))) ((eq? state 'numeric) (raise- (Error "escacpechar in numeric field"))) ((eq? state 'quote) (do-esc-quote)) ((eq? state 'end) (raise- (Error "escapechar after quote"))))) (define (do-delim ch) (cond ((eq? state 'start) (end i ch)) ((eq? state 'quote) (lp (+ i 1) state (cons ch r) l)) ((eq? state 'normal) (end i ch)) ((eq? state 'doublequote) (lp (+ i 1) 'start '() (cons (wrap r) l))) ((eq? state 'numeric) (end i ch)))) (define (do-line-end ch) (cond ((eq? state 'quote) (lp (+ i 1) state (cons ch r) l)) ((pair? state) (lp (+ i 1) (cdr state) (cons ch r) l)) ((eq? state 'field-end) (end i ch)) ((eq? state 'line-end) (lp (+ i 1) 'line-end r l)) (else (end i #\newline)))) (if (< i n) (let ((ch (string-ref s i))) (cond ((or (eq? ch #\newline) (eq? ch #\return)) (do-line-end ch)) ((eq? state 'line-end) (lp i 'start r l)) ((eq? ch delimiter) (do-delim ch)) ((eq? state 'field-end) (raise- (Error "no ending char after field-end"))) ((or (eq? ch #\space) (eq? ch #\tab)) (do-whitespace ch)) ((eq? ch quotechar) (do-quotechar ch)) ((eq? ch escapechar) (do-escape ch)) ((eq? state 'numeric) (if (or (eq? ch #\.) (eq? ch #\-) (eq? ch #\e) (eq? ch #\E) (char-numeric? ch)) (lp (+ i 1) state (cons ch r) l) (raise- (Error "nonumeric in numeric field")))) ((eq? state 'start) (cond ((eq? quoting 'all) (raise- (Error "nonquoted field when all should be quoted"))) ((eq? quoting 'nonnumeric) (lp i 'numeric r l)) (else (lp i 'normal r l)))) ((or (eq? state 'quote) (eq? state 'normal)) (lp (+ i 1) state (cons ch r) l)) ((eq? state 'doublequote) (raise- (Error "spur char after end of quote"))) ((pair? state) (lp (+ i 1) (cdr state) r l)) (else (error "Bug in csv reader")))) (if ? (lp0 state r l) (end i #\newline)))))))))))))) (define-python-class writer () (define __init__ (lam (self csvfile (= dialect "excel") (** fmtparams)) (set! dialect (if (string? dialect) (get_dialect dialect) dialect)) (let* ((hash (make-hash-table))) (hash-set! hash 'delimiter (chr (py-get fmtparams "delimiter" e) (ref dialect 'Delimiter e) ",")) (hash-set! hash 'doublequote (oor (py-get fmtparams "doublequote" e) (ref dialect 'doublequote e) #t)) (hash-set! hash 'escapechar (chr (py-get fmtparams "escapechar" e) (ref dialect 'escapechar e) None)) (hash-set! hash 'lineterminator (str (py-get fmtparams "lineterminator" e) (ref dialect 'lineterminator e) "\r\n")) (hash-set! hash 'quotechar (chr (py-get fmtparams "quotechar" e) (ref dialect 'quotechar e) "\"")) (hash-set! hash 'quoting (oor (py-get fmtparams "quoting" e) (ref dialect 'quoting e) QUOTE_MINIMAL)) (hash-set! hash 'skipispace (oor (py-get fmtparams "skipinitialspace" e) (ref dialect 'skipinitialspace e) #t)) (hash-set! hash 'strict (oor (py-get fmtparams "strict" e) (ref dialect 'strict e) #f)) (set self 'csvfile csvfile) (set self '_hash csvfile)))) (define writerow (lambda (self l) (define (write x) ((ref (ref self 'csvfile) 'write) x)) (define (get-str x) (if (string? x) x (scm-str x))) (let* ((hash (ref self '_hash)) (delimiter (hash-ref hash 'delimiter)) (doublequote (hash-ref hash 'doublequote)) (escapechar (hash-ref hash 'escapechar)) (lineterminator (hash-ref hash 'lineterminator)) (quotechar (hash-ref hash 'quotechar)) (quoting (hash-ref hash 'quoting)) (skipispace (hash-ref hash 'skipinitialspace)) (strict (hash-ref hash 'strict)) (terms (string->list lineterminator))) (define (has-escape-1 x) (let ((n (len x))) (let lp ((i 0)) (if (< i n) (let ((a (string-ref x i))) (if (or (eq? a #\,) (member a terms)) #t (lp (+ i 1)))) #f)))) (define (has-escape-2 x) (let ((n (len x))) (let lp ((i 0)) (if (< i n) (let ((a (string-ref x i))) (if (or (eq? a quotechar) (eq? a delimiter) (member a terms)) #t (lp (+ i 1)))) #f)))) (define (quote-it x) (let ((n (len x))) (let lp ((i 0) (r '())) (if (< i n) (let ((a (string-ref x i))) (cond ((eq? a quotechar) (if doublequote (lp (+ i 1) (cons* a a r)) (if escapechar (lp (+ i 1) (cons* a escapechar r)) (raise (Error "no escapechar defined"))))) ((eq? a escapechar) (lp (+ i 1) (cons* a a r))) (else (lp (+ i 1) (cons a r))))) (list->string (reverse! r)))))) (define (is-numeric x) (catch #t (lambda () (string->number x)) (lambda x #f))) (for ((x : l)) ((r '())) (let/ec ret (cons (cond ((eq? quoting 'none) (let ((x (get-str x))) (if (has-escape-1 x) (if strict (raise (Error "None quoting and nonspecial chars")) (ret r)) x))) ((eq? quoting 'nonnumeric) (let ((x (get-str x))) (aif it (is-numeric x) (number->string it) (quote-it x)))) ((eq? quoting 'none) (get-str x)) ((eq? quoting 'minimal) (let ((x (get-str x))) (if (has-escape-2 x) (quote-it x) x)))) r) #:final (write (string-join (reverse (cons lineterminator r)) (string-ref delimiter 0)))))))))