summaryrefslogtreecommitdiff
path: root/modules/language/python/module/_csv.py
diff options
context:
space:
mode:
authorStefan Israelsson Tampe <stefan.itampe@gmail.com>2018-08-22 21:35:43 +0200
committerStefan Israelsson Tampe <stefan.itampe@gmail.com>2018-08-22 21:35:43 +0200
commit43792510bfeb15e8416a5782ab64126ee8950950 (patch)
tree7de7499a1a5a3f841d24ad2e0ea50964fa0e9e84 /modules/language/python/module/_csv.py
parenta41eeb67b1aa32199501db6d013e259ccb7484e6 (diff)
csv.py
Diffstat (limited to 'modules/language/python/module/_csv.py')
-rw-r--r--modules/language/python/module/_csv.py413
1 files changed, 413 insertions, 0 deletions
diff --git a/modules/language/python/module/_csv.py b/modules/language/python/module/_csv.py
new file mode 100644
index 0000000..6db2d05
--- /dev/null
+++ b/modules/language/python/module/_csv.py
@@ -0,0 +1,413 @@
+(define-module (language python module _csv)
+ #:use-module (oop pf-objects)
+ #:use-module (langauge python list)
+ #:use-module (langauge python def)
+ #:use-module (langauge python yield)
+ #:use-module (langauge python for)
+ #:use-module (langauge python exceptions)
+ #:export (QUOTE_ALL QUOTE_MINIMAL QUOTE_NONNUMERIC QUOTE_NONE
+ reader writer Error field_size_limit
+ get_dialect register_dialect unregister_dialect
+ list_dialects __doc__ Dialect))
+
+(define-python-class Error (Exception))
+
+(define-python-class Dialect ()
+ (define __init__
+ (lambda (self . x)
+ #f)))
+
+(define *field-size* (make-fluid 131072))
+(define field_size_limit
+ (case-lambda
+ (() (fluid-ref *field-size*))
+ ((x) (fluid-set! *field-size* x))))
+
+(define *dialects* (make-hash-table))
+(def (register_dialect nm (= val None) (** keyw))
+ (let ((newval (Dialect)))
+ (define-syntax-rule (set- x y z key default)
+ (set x 'key (hash-ref z (symbol->string 'key)
+ (if (eq? y None)
+ default
+ (ref y 'key default)))))
+ (define-syntax-rule (setter x y z ((k def) ...))
+ (begin (set- x y z k def) ...))
+
+ (setter newval val keyw
+ ((delimiter ",")
+ (doublequote #t)
+ (escapechar None)
+ (lineterminator "\r\n")
+ (quotechar "\"")
+ (quoting 'minimal)
+ (skipinitialspace #f)
+ (strict #f)))
+
+ (hash-set! *dialects* nm newval)))
+
+
+
+ (hash-set! *dialects* nm val))
+(define (get_dialect nm val)
+ (hash-ref *dialects* nm val None))
+(define (unregister_dialect nm)
+ (hash-delete! *dialects nm))
+
+(define (list_dialects)
+ (let ((ret '()))
+ (hash-for-each
+ (lambda (k v)
+ (set! ret (cons k ret)))
+ *dialects*)
+ (py-ist ret)))
+
+(define __doc__
+"CSV parsing and writing.
+
+This module provides classes that assist in the reading and writing
+of Comma Separated Value (CSV) files, and implements the interface
+described by PEP 305. Although many CSV files are simple to parse,
+the format is not formally defined by a stable specification and
+is subtle enough that parsing lines of a CSV file with something
+like line.split(\",\") is bound to fail. The module supports three\n
+basic APIs: reading, writing, and registration of dialects.
+
+
+DIALECT REGISTRATION:
+
+Readers and writers support a dialect argument, which is a convenient
+handle on a group of settings. When the dialect argument is a string,
+it identifies one of the dialects previously registered with the module.
+If it is a class or instance, the attributes of the argument are used as
+the settings for the reader or writer:
+
+ class excel:
+ delimiter = ','
+ quotechar = '\"'
+ escapechar = None
+ doublequote = True
+ skipinitialspace = False
+ lineterminator = '\\r\\n'
+ quoting = QUOTE_MINIMAL
+
+SETTINGS:
+
+ * quotechar - specifies a one-character string to use as the
+ quoting character. It defaults to '\"'.
+ * delimiter - specifies a one-character string to use as the
+ field separator. It defaults to ','.
+ * skipinitialspace - specifies how to interpret whitespace which
+ immediately follows a delimiter. It defaults to False, which
+ means that whitespace immediately following a delimiter is part
+ of the following field.
+ * lineterminator - specifies the character sequence which should
+ terminate rows.
+ * quoting - controls when quotes should be generated by the writer.
+ It can take on any of the following module constants:
+
+ csv.QUOTE_MINIMAL means only when required, for example, when a
+ field contains either the quotechar or the delimiter
+ csv.QUOTE_ALL means that quotes are always placed around fields.
+ csv.QUOTE_NONNUMERIC means that quotes are always placed around
+ fields which do not parse as integers or floating point
+ numbers.
+ csv.QUOTE_NONE means that quotes are never placed around fields.
+ * escapechar - specifies a one-character string used to escape
+ the delimiter when quoting is set to QUOTE_NONE.
+ * doublequote - controls the handling of quotes inside fields. When
+ True, two consecutive quotes are interpreted as one during read,
+ and when writing, each quote character embedded in the data is
+ written as two quotes")
+
+(define QUOTE_ALL 'all)
+(define QUOTE_MINIMAL 'minimal)
+(define QUOTE_NONNUMERIC 'nonumeric)
+(define QUOTE_NONE 'none)
+
+(def (reader csvfile (= dialect "excel") (** fmtparams))
+ (let*
+ ((dialect (get-dialect dialect))
+
+ (delimiter (chr (py-get fmtparams "delimiter" e)
+ (ref dialect 'Delimiter e)
+ ","))
+
+ (doublequote (oor (py-get fmtparams "doublequote" e)
+ (ref dialect 'doublequote e)
+ #t))
+
+ (escapechar (chr (py-get fmtparams "escapechar" e)
+ (ref dialect 'escapechar e)
+ None))
+
+ (lineterminator (str (py-get fmtparams "lineterminator" e)
+ (ref dialect 'lineterminator e)
+ "\r\n"))
+
+ (quotechar (chr (py-get fmtparams "quotechar" e)
+ (ref dialect 'quotechar e)
+ "\""))
+
+ (quoting (oor (py-get fmtparams "quoting" e)
+ (ref dialect 'quoting e)
+ QUOTE_MINIMAL))
+
+ (skipispace (oor (py-get fmtparams "skipinitialspace" e)
+ (ref dialect 'skipinitialspace e)
+ #t))
+
+ (strict (oor (py-get fmtparams "strict" e)
+ (ref dialect 'strict e)
+ #f)))
+ (make-generator ()
+ (lambda (yield)
+ (for ((s : cvsfile)) ()
+ (let ((n (len s)))
+ (let lp ((i 0) (state #f) (l '()))
+ (let lp2 ((j i) (r '()))
+ (define-syntax-rule (raise- s)
+ (if strict
+ (raise s)
+ (lp (+ j 1) r)))
+ (define (end j ch)
+ (if (and (eq? state 'start)
+ (eq? ch #\newline))
+ (yield (py-list (reverse l)))
+ (let* ((x (list->string (reverse r)))
+ (x (if (eq? state 'numeric)
+ (string->number x)
+ x)))
+ (if (eq? ch #\newline)
+ (yield (py-list (reverse (cons x l))))
+ (lp (+ j 1) 'start (cons x l))))))
+
+ (define (do-quotechar)
+ (cond
+ ((eq? state 'quote)
+ (if doublequote
+ (if (and (< (+ i 1) n)
+ (equal? quotechar
+ (string-ref s (+ i 1))))
+ (lp2 (+ j 2) (cons quotechar r))
+ (end (+ j 1)))
+ (end (+ j 1))))
+
+ ((eq? state 'start)
+ (if (or (eq? quoting 'minimal)
+ (eq? quoting 'all)
+ (eq? quoting 'nonnumeric))
+ (lp (+ j 1) 'quote l)
+ (raise- (Error "QOUTE_NONE supports no quoteing"))))
+
+ (else
+ (raise- (Error "wrong quoting found")))))
+
+ (define (do-whitespace ch)
+ (cond
+ ((eq? state 'start)
+ (if skipispace
+ (lp2 (+ i 1) r)
+ (if (or (eq? quoting 'minimal)
+ (eq? quoting 'none))
+ (lp i 'normal l)
+ (raise- (Error "whitespace outside quote")))))
+
+ ((or (eq? state 'normal)
+ (eq? state 'quote))
+ (lp2 (+ i 1) (cons ch r)))
+
+ ((eq? state 'nnumeric)
+ (raise- (Error "whitespace in numeric field")))
+
+ ((eq? state 'end)
+ (raise- (Error "whitespace after quote")))))
+
+ (define (do-esc-qupote)
+ (if (< (+ j 1) n)
+ (let ((ch2 (string-ref s (+ j 1))))
+ (cond
+ ((and (eq? state 'quoting)
+ (eq? ch2 quotechar))
+ (lp2 (+ j 2)
+ (cons quotechar r)))
+
+ ((eq? ch2 delimiter)
+ (lp2 (+ j 2)
+ (cons delimiter r)))
+
+ ((eq? ch2 escapechar)
+ (lp2 (+ j 2)
+ (cons escapechar r)))
+
+ (else
+ (lp2 (+ j 2) r))))
+ (raise- (Error "single escape ends line"))))
+
+ (define (do-escape)
+ (cond
+ ((eq? state 'start)
+ (if (eq? quoting 'none)
+ (lp j 'normal l)
+ (raise- (Error "escapecharacter in nonquote"))))
+
+ ((eq? state 'normal)
+ (if (eq? quoting 'none)
+ (do-esc-quote)
+ (raise- (Error "escapecharacter in nonequote"))))
+
+ ((eq? state 'numeric)
+ (raise- (Error "escacpechar in numeric field")))
+
+ ((eq? state 'quote)
+ (do-esc-quote))
+
+ ((eq? state 'end)
+ (raise- (Error "escapechar after quote")))))
+
+ (define (do-delim ch)
+ (cond
+ ((or (eq? state 'start)
+ (eq? state 'end))
+ (end ch))
+
+ ((eq? state 'quote)
+ (if (eq? quoteing 'minimal)
+ (raise-
+ (Error "minimal quoting must quote delimiter"))
+ (end ch)))
+
+ ((eq? state 'normal)
+ (end ch))
+
+ ((eq? state 'numeric)
+ (end ch))))
+
+ (if (< j n)
+ (let ((ch (string-ref s i)))
+ (cond
+ ((or (eq? ch #\newline)
+ (eq? ch #\return))
+ (if (eq? state 'quote)
+ (raise- (Error "missing end quote character"))
+ (end #\newline)))
+
+ ((or (eq? ch #\space) (eq? ch #\tab))
+ (do-whitespace ch))
+
+ ((eq? ch quotechar)
+ (do-quotechar))
+
+ ((eq? ch escapechar)
+ (do-escape))
+
+ ((eq? ch delimiter)
+ (do-delim ch))
+
+ ((eq? state 'numeric)
+ (if (or (eq? ch #\.)
+ (eq? ch #\-)
+ (eq? ch #\e)
+ (eq? ch #\E)
+ (char-numeric? ch))
+ (lp2 (+ j 1) (cons ch r))
+ (raise- (Error "nonumeric in numeric field"))))
+
+ ((eq? state 'start)
+ (cond
+ ((eq? quoting 'all)
+ (raise-
+ (Error
+ "nonquoted field when all should be quoted")))
+ ((eq? quoting 'nonnumeric)
+ (lp j 'numeric l))
+ (else
+ (lp j 'normal l))))
+
+ ((or (eq? state 'quote) (eq? state 'normal))
+ (lp2 (+ j 1) (cons ch r)))
+
+ ((eq? state 'end)
+ (raise-
+ (Error
+ "non delimeter after qouted field")))))
+ (do-delim #\newline))))))))))
+
+(define-python-class writer ()
+ (define __init__
+ (lam (csvfile (= dialect "excel") (** fmt))
+ (set! dialect (if (string? dialect)
+ (get_dialect dialect)
+ dialect))
+ (set self 'csvfile csvfile)
+ (set self 'dialect dialect)))
+
+ (define writerow
+ (lambda (self l)
+ (let*
+ ((dialect (ref self 'dialect))
+
+ (delimiter (chr (py-get fmtparams "delimiter" e)
+ (ref dialect 'Delimiter e)
+ ","))
+
+ (doublequote (oor (py-get fmtparams "doublequote" e)
+ (ref dialect 'doublequote e)
+ #t))
+
+ (escapechar (chr (py-get fmtparams "escapechar" e)
+ (ref dialect 'escapechar e)
+ None))
+
+ (lineterminator (str (py-get fmtparams "lineterminator" e)
+ (ref dialect 'lineterminator e)
+ "\r\n"))
+
+ (quotechar (chr (py-get fmtparams "quotechar" e)
+ (ref dialect 'quotechar e)
+ "\""))
+
+ (quoting (oor (py-get fmtparams "quoting" e)
+ (ref dialect 'quoting e)
+ QUOTE_MINIMAL))
+
+ (skipispace (oor (py-get fmtparams "skipinitialspace" e)
+ (ref dialect 'skipinitialspace e)
+ #t))
+
+ (strict (oor (py-get fmtparams "strict" e)
+ (ref dialect 'strict e)
+ #f)))
+
+ (for ((x : l)) (r '())
+ (let/ec ret
+ (cons
+ (cond
+ ((eq? quoting 'none)
+ (let ((x (if (string? x) x (str x))))
+ (if (has-escape-1 x)
+ (if strict
+ (raise (Error "None quoting and nonspecial chars"))
+ (ret r))
+ x)))
+
+ ((eq? quoting 'nonnumeric)
+ (let ((x (if (string? x) x (str x))))
+ (if (is-numeric x)
+ (number->string x)
+ (quote-it x))))
+
+ ((eq? quoting 'none)
+ (if (string? x)
+ x
+ (str x)))
+
+ ((eq? quoting 'minimal)
+ (let ((x (if (string? x) x (str x))))
+ (if (has-escape-2 x) (quote-it x) x))))
+ r)
+ #:final
+ (write
+ (string-join
+ (reverse
+ (cons lineterminator r)) delim))))))))