diff options
author | Stefan Israelsson Tampe <stefan.itampe@gmail.com> | 2018-08-22 21:35:43 +0200 |
---|---|---|
committer | Stefan Israelsson Tampe <stefan.itampe@gmail.com> | 2018-08-22 21:35:43 +0200 |
commit | 43792510bfeb15e8416a5782ab64126ee8950950 (patch) | |
tree | 7de7499a1a5a3f841d24ad2e0ea50964fa0e9e84 /modules/language/python/module/_csv.py | |
parent | a41eeb67b1aa32199501db6d013e259ccb7484e6 (diff) |
csv.py
Diffstat (limited to 'modules/language/python/module/_csv.py')
-rw-r--r-- | modules/language/python/module/_csv.py | 413 |
1 files changed, 413 insertions, 0 deletions
diff --git a/modules/language/python/module/_csv.py b/modules/language/python/module/_csv.py new file mode 100644 index 0000000..6db2d05 --- /dev/null +++ b/modules/language/python/module/_csv.py @@ -0,0 +1,413 @@ +(define-module (language python module _csv) + #:use-module (oop pf-objects) + #:use-module (langauge python list) + #:use-module (langauge python def) + #:use-module (langauge python yield) + #:use-module (langauge python for) + #:use-module (langauge python exceptions) + #:export (QUOTE_ALL QUOTE_MINIMAL QUOTE_NONNUMERIC QUOTE_NONE + reader writer Error field_size_limit + get_dialect register_dialect unregister_dialect + list_dialects __doc__ Dialect)) + +(define-python-class Error (Exception)) + +(define-python-class Dialect () + (define __init__ + (lambda (self . x) + #f))) + +(define *field-size* (make-fluid 131072)) +(define field_size_limit + (case-lambda + (() (fluid-ref *field-size*)) + ((x) (fluid-set! *field-size* x)))) + +(define *dialects* (make-hash-table)) +(def (register_dialect nm (= val None) (** keyw)) + (let ((newval (Dialect))) + (define-syntax-rule (set- x y z key default) + (set x 'key (hash-ref z (symbol->string 'key) + (if (eq? y None) + default + (ref y 'key default))))) + (define-syntax-rule (setter x y z ((k def) ...)) + (begin (set- x y z k def) ...)) + + (setter newval val keyw + ((delimiter ",") + (doublequote #t) + (escapechar None) + (lineterminator "\r\n") + (quotechar "\"") + (quoting 'minimal) + (skipinitialspace #f) + (strict #f))) + + (hash-set! *dialects* nm newval))) + + + + (hash-set! *dialects* nm val)) +(define (get_dialect nm val) + (hash-ref *dialects* nm val None)) +(define (unregister_dialect nm) + (hash-delete! *dialects nm)) + +(define (list_dialects) + (let ((ret '())) + (hash-for-each + (lambda (k v) + (set! ret (cons k ret))) + *dialects*) + (py-ist ret))) + +(define __doc__ +"CSV parsing and writing. + +This module provides classes that assist in the reading and writing +of Comma Separated Value (CSV) files, and implements the interface +described by PEP 305. Although many CSV files are simple to parse, +the format is not formally defined by a stable specification and +is subtle enough that parsing lines of a CSV file with something +like line.split(\",\") is bound to fail. The module supports three\n +basic APIs: reading, writing, and registration of dialects. + + +DIALECT REGISTRATION: + +Readers and writers support a dialect argument, which is a convenient +handle on a group of settings. When the dialect argument is a string, +it identifies one of the dialects previously registered with the module. +If it is a class or instance, the attributes of the argument are used as +the settings for the reader or writer: + + class excel: + delimiter = ',' + quotechar = '\"' + escapechar = None + doublequote = True + skipinitialspace = False + lineterminator = '\\r\\n' + quoting = QUOTE_MINIMAL + +SETTINGS: + + * quotechar - specifies a one-character string to use as the + quoting character. It defaults to '\"'. + * delimiter - specifies a one-character string to use as the + field separator. It defaults to ','. + * skipinitialspace - specifies how to interpret whitespace which + immediately follows a delimiter. It defaults to False, which + means that whitespace immediately following a delimiter is part + of the following field. + * lineterminator - specifies the character sequence which should + terminate rows. + * quoting - controls when quotes should be generated by the writer. + It can take on any of the following module constants: + + csv.QUOTE_MINIMAL means only when required, for example, when a + field contains either the quotechar or the delimiter + csv.QUOTE_ALL means that quotes are always placed around fields. + csv.QUOTE_NONNUMERIC means that quotes are always placed around + fields which do not parse as integers or floating point + numbers. + csv.QUOTE_NONE means that quotes are never placed around fields. + * escapechar - specifies a one-character string used to escape + the delimiter when quoting is set to QUOTE_NONE. + * doublequote - controls the handling of quotes inside fields. When + True, two consecutive quotes are interpreted as one during read, + and when writing, each quote character embedded in the data is + written as two quotes") + +(define QUOTE_ALL 'all) +(define QUOTE_MINIMAL 'minimal) +(define QUOTE_NONNUMERIC 'nonumeric) +(define QUOTE_NONE 'none) + +(def (reader csvfile (= dialect "excel") (** fmtparams)) + (let* + ((dialect (get-dialect dialect)) + + (delimiter (chr (py-get fmtparams "delimiter" e) + (ref dialect 'Delimiter e) + ",")) + + (doublequote (oor (py-get fmtparams "doublequote" e) + (ref dialect 'doublequote e) + #t)) + + (escapechar (chr (py-get fmtparams "escapechar" e) + (ref dialect 'escapechar e) + None)) + + (lineterminator (str (py-get fmtparams "lineterminator" e) + (ref dialect 'lineterminator e) + "\r\n")) + + (quotechar (chr (py-get fmtparams "quotechar" e) + (ref dialect 'quotechar e) + "\"")) + + (quoting (oor (py-get fmtparams "quoting" e) + (ref dialect 'quoting e) + QUOTE_MINIMAL)) + + (skipispace (oor (py-get fmtparams "skipinitialspace" e) + (ref dialect 'skipinitialspace e) + #t)) + + (strict (oor (py-get fmtparams "strict" e) + (ref dialect 'strict e) + #f))) + (make-generator () + (lambda (yield) + (for ((s : cvsfile)) () + (let ((n (len s))) + (let lp ((i 0) (state #f) (l '())) + (let lp2 ((j i) (r '())) + (define-syntax-rule (raise- s) + (if strict + (raise s) + (lp (+ j 1) r))) + (define (end j ch) + (if (and (eq? state 'start) + (eq? ch #\newline)) + (yield (py-list (reverse l))) + (let* ((x (list->string (reverse r))) + (x (if (eq? state 'numeric) + (string->number x) + x))) + (if (eq? ch #\newline) + (yield (py-list (reverse (cons x l)))) + (lp (+ j 1) 'start (cons x l)))))) + + (define (do-quotechar) + (cond + ((eq? state 'quote) + (if doublequote + (if (and (< (+ i 1) n) + (equal? quotechar + (string-ref s (+ i 1)))) + (lp2 (+ j 2) (cons quotechar r)) + (end (+ j 1))) + (end (+ j 1)))) + + ((eq? state 'start) + (if (or (eq? quoting 'minimal) + (eq? quoting 'all) + (eq? quoting 'nonnumeric)) + (lp (+ j 1) 'quote l) + (raise- (Error "QOUTE_NONE supports no quoteing")))) + + (else + (raise- (Error "wrong quoting found"))))) + + (define (do-whitespace ch) + (cond + ((eq? state 'start) + (if skipispace + (lp2 (+ i 1) r) + (if (or (eq? quoting 'minimal) + (eq? quoting 'none)) + (lp i 'normal l) + (raise- (Error "whitespace outside quote"))))) + + ((or (eq? state 'normal) + (eq? state 'quote)) + (lp2 (+ i 1) (cons ch r))) + + ((eq? state 'nnumeric) + (raise- (Error "whitespace in numeric field"))) + + ((eq? state 'end) + (raise- (Error "whitespace after quote"))))) + + (define (do-esc-qupote) + (if (< (+ j 1) n) + (let ((ch2 (string-ref s (+ j 1)))) + (cond + ((and (eq? state 'quoting) + (eq? ch2 quotechar)) + (lp2 (+ j 2) + (cons quotechar r))) + + ((eq? ch2 delimiter) + (lp2 (+ j 2) + (cons delimiter r))) + + ((eq? ch2 escapechar) + (lp2 (+ j 2) + (cons escapechar r))) + + (else + (lp2 (+ j 2) r)))) + (raise- (Error "single escape ends line")))) + + (define (do-escape) + (cond + ((eq? state 'start) + (if (eq? quoting 'none) + (lp j 'normal l) + (raise- (Error "escapecharacter in nonquote")))) + + ((eq? state 'normal) + (if (eq? quoting 'none) + (do-esc-quote) + (raise- (Error "escapecharacter in nonequote")))) + + ((eq? state 'numeric) + (raise- (Error "escacpechar in numeric field"))) + + ((eq? state 'quote) + (do-esc-quote)) + + ((eq? state 'end) + (raise- (Error "escapechar after quote"))))) + + (define (do-delim ch) + (cond + ((or (eq? state 'start) + (eq? state 'end)) + (end ch)) + + ((eq? state 'quote) + (if (eq? quoteing 'minimal) + (raise- + (Error "minimal quoting must quote delimiter")) + (end ch))) + + ((eq? state 'normal) + (end ch)) + + ((eq? state 'numeric) + (end ch)))) + + (if (< j n) + (let ((ch (string-ref s i))) + (cond + ((or (eq? ch #\newline) + (eq? ch #\return)) + (if (eq? state 'quote) + (raise- (Error "missing end quote character")) + (end #\newline))) + + ((or (eq? ch #\space) (eq? ch #\tab)) + (do-whitespace ch)) + + ((eq? ch quotechar) + (do-quotechar)) + + ((eq? ch escapechar) + (do-escape)) + + ((eq? ch delimiter) + (do-delim ch)) + + ((eq? state 'numeric) + (if (or (eq? ch #\.) + (eq? ch #\-) + (eq? ch #\e) + (eq? ch #\E) + (char-numeric? ch)) + (lp2 (+ j 1) (cons ch r)) + (raise- (Error "nonumeric in numeric field")))) + + ((eq? state 'start) + (cond + ((eq? quoting 'all) + (raise- + (Error + "nonquoted field when all should be quoted"))) + ((eq? quoting 'nonnumeric) + (lp j 'numeric l)) + (else + (lp j 'normal l)))) + + ((or (eq? state 'quote) (eq? state 'normal)) + (lp2 (+ j 1) (cons ch r))) + + ((eq? state 'end) + (raise- + (Error + "non delimeter after qouted field"))))) + (do-delim #\newline)))))))))) + +(define-python-class writer () + (define __init__ + (lam (csvfile (= dialect "excel") (** fmt)) + (set! dialect (if (string? dialect) + (get_dialect dialect) + dialect)) + (set self 'csvfile csvfile) + (set self 'dialect dialect))) + + (define writerow + (lambda (self l) + (let* + ((dialect (ref self 'dialect)) + + (delimiter (chr (py-get fmtparams "delimiter" e) + (ref dialect 'Delimiter e) + ",")) + + (doublequote (oor (py-get fmtparams "doublequote" e) + (ref dialect 'doublequote e) + #t)) + + (escapechar (chr (py-get fmtparams "escapechar" e) + (ref dialect 'escapechar e) + None)) + + (lineterminator (str (py-get fmtparams "lineterminator" e) + (ref dialect 'lineterminator e) + "\r\n")) + + (quotechar (chr (py-get fmtparams "quotechar" e) + (ref dialect 'quotechar e) + "\"")) + + (quoting (oor (py-get fmtparams "quoting" e) + (ref dialect 'quoting e) + QUOTE_MINIMAL)) + + (skipispace (oor (py-get fmtparams "skipinitialspace" e) + (ref dialect 'skipinitialspace e) + #t)) + + (strict (oor (py-get fmtparams "strict" e) + (ref dialect 'strict e) + #f))) + + (for ((x : l)) (r '()) + (let/ec ret + (cons + (cond + ((eq? quoting 'none) + (let ((x (if (string? x) x (str x)))) + (if (has-escape-1 x) + (if strict + (raise (Error "None quoting and nonspecial chars")) + (ret r)) + x))) + + ((eq? quoting 'nonnumeric) + (let ((x (if (string? x) x (str x)))) + (if (is-numeric x) + (number->string x) + (quote-it x)))) + + ((eq? quoting 'none) + (if (string? x) + x + (str x))) + + ((eq? quoting 'minimal) + (let ((x (if (string? x) x (str x)))) + (if (has-escape-2 x) (quote-it x) x)))) + r) + #:final + (write + (string-join + (reverse + (cons lineterminator r)) delim)))))))) |