From 7d47faa9e658b14d67697856b3980b2ffa9d6776 Mon Sep 17 00:00:00 2001 From: Stefan Israelsson Tampe Date: Fri, 13 Jul 2018 15:10:02 +0200 Subject: python re module now implemeted and works partially --- modules/language/python/module/decimal.scm | 391 ++++++++++++++++------------- 1 file changed, 223 insertions(+), 168 deletions(-) (limited to 'modules/language/python/module/decimal.scm') diff --git a/modules/language/python/module/decimal.scm b/modules/language/python/module/decimal.scm index a71b990..0dc207e 100644 --- a/modules/language/python/module/decimal.scm +++ b/modules/language/python/module/decimal.scm @@ -1,7 +1,31 @@ (define-module (language python module decimal) #:use-module ((language python module collections) #:select (namedtuple)) + #:use-module ((language python module itertools) #:select (chain repeat)) + #:use-module ((language python module sys) #:select (maxsize hash_info)) + #:use-module (language python Module) + #:use-module (language python list) + #:use-module (language python dict) + #:use-module (language python module re) #:export ()) +#| +This is the copyright information of the file ported over to scheme +# Copyright (c) 2004 Python Software Foundation. +# All rights reserved. + +# Written by Eric Price +# and Facundo Batista +# and Raymond Hettinger +# and Aahz +# and Tim Peters + +# This module should be kept in sync with the latest updates of the +# IBM specification as it evolves. Those updates will be treated +# as bug fixes (deviation from the spec is a compatibility, usability +# bug) and will be backported. At this point the spec is stabilizing +# and the updates are becoming fewer, smaller, and less significant. +|# + (define guile:modulo (@ (guile) moduolo)) (define __name__ "decimal") @@ -28,7 +52,7 @@ (define MAX_EMAX 425000000) (define MIN_EMIN -425000000) -(if (= sys:maxsize (- (ash 1 63) 1)) +(if (= maxsize (- (ash 1 63) 1)) (begin (set! MAX_PREC 999999999999999999) (set! MAX_EMAX 999999999999999999) @@ -37,12 +61,21 @@ (define MIN_ETINY (- MIN_EMIN (- MAX_PREC 1))) ;; Context -(define (cx-prec x) (vector-ref x 0)) -(define (cx-emax x) (vector-ref x 1)) -(define (cx-raise x) (vector-ref x 2)) -(define (cx-error x) (vector-ref x 3)) -(define (cx-capitals x) (vector-ref x 4)) -(define (cx-rounding x) (vector-ref x 5)) +(define-inlinable (cx-prec x) (rawref x 'prec)) +(define-inlinable (cx-emax x) (rawref x 'Emax)) +(define-inlinable (cx-emin x) (rawref x 'Emin)) +(define-inlinable (cx-etiny x) ((ref x 'Etiny))) +(define-inlinable (cx-etop x) ((ref x 'Etop))) +(define-inlinable (cx-copy x) ((ref x 'copy))) +(define-inlinable (cx-clear_flags x) ((ref x 'clear_flags))) +(define-inlinable (cx-raise x) (ref x '_raise_error)) +(define-inlinable (cx-error x) (ref x '_raise_error)) +(define-inlinable (cx-capitals x) (rawref x 'capitals)) +(define-inlinable (cx-rounding x) (rawref x 'rounding)) +(define-inlinable (cx-clamp x) (rawref x 'clamp)) +(define-inlinable (cx-traps x) (rawref x 'traps)) +(define-inlinable (cx-flags x) (rawref x 'flags)) + ;; Errors (define-python-class DecimalException (ArithmeticError) @@ -6596,7 +6629,6 @@ ;;# at least one decimal digit, possibly after the decimal point. The ;;# lookahead expression '(?=\d|\.\d)' checks this. -(use-modules (languge python module re)) (define _parser (ref (compile " # A numeric string consists of: @@ -6650,10 +6682,11 @@ ;; The locale module is only needed for the 'n' format specifier. The ;; rest of the PEP 3101 code functions quite happily without it, so we ;; don't care too much if locale isn't present. -(define _locale (import "locale")) +(define _locale (try-module "locale")) -def _parse_format_specifier(format_spec, _localeconv=None): - """Parse and validate a format specifier. +(define _parse_format_specifier + (lam (format_spec (=_localeconv None)) + "Parse and validate a format specifier. Turns a standard numeric format specifier into a dict, with the following entries: @@ -6670,120 +6703,133 @@ def _parse_format_specifier(format_spec, _localeconv=None): precision: nonnegative integer giving precision, or None type: one of the characters 'eEfFgG%', or None - """ - m = _parse_format_specifier_regex.match(format_spec) - if m is None: - raise ValueError("Invalid format specifier: " + format_spec) - - # get the dictionary - format_dict = m.groupdict() - - # zeropad; defaults for fill and alignment. If zero padding - # is requested, the fill and align fields should be absent. - fill = format_dict['fill'] - align = format_dict['align'] - format_dict['zeropad'] = (format_dict['zeropad'] is not None) - if format_dict['zeropad']: - if fill is not None: - raise ValueError("Fill character conflicts with '0'" - " in format specifier: " + format_spec) - if align is not None: - raise ValueError("Alignment conflicts with '0' in " - "format specifier: " + format_spec) - format_dict['fill'] = fill or ' ' - # PEP 3101 originally specified that the default alignment should - # be left; it was later agreed that right-aligned makes more sense - # for numeric types. See http://bugs.python.org/issue6857. - format_dict['align'] = align or '>' - - # default sign handling: '-' for negative, '' for positive - if format_dict['sign'] is None: - format_dict['sign'] = '-' - - # minimumwidth defaults to 0; precision remains None if not given - format_dict['minimumwidth'] = int(format_dict['minimumwidth'] or '0') - if format_dict['precision'] is not None: - format_dict['precision'] = int(format_dict['precision']) - - # if format type is 'g' or 'G' then a precision of 0 makes little - # sense; convert it to 1. Same if format type is unspecified. - if format_dict['precision'] == 0: - if format_dict['type'] is None or format_dict['type'] in 'gGn': - format_dict['precision'] = 1 - - # determine thousands separator, grouping, and decimal separator, and - # add appropriate entries to format_dict - if format_dict['type'] == 'n': - # apart from separators, 'n' behaves just like 'g' - format_dict['type'] = 'g' - if _localeconv is None: - _localeconv = _locale.localeconv() - if format_dict['thousands_sep'] is not None: - raise ValueError("Explicit thousands separator conflicts with " - "'n' type in format specifier: " + format_spec) - format_dict['thousands_sep'] = _localeconv['thousands_sep'] - format_dict['grouping'] = _localeconv['grouping'] - format_dict['decimal_point'] = _localeconv['decimal_point'] - else: - if format_dict['thousands_sep'] is None: - format_dict['thousands_sep'] = '' - format_dict['grouping'] = [3, 0] - format_dict['decimal_point'] = '.' - - return format_dict - -def _format_align(sign, body, spec): - """Given an unpadded, non-aligned numeric string 'body' and sign + " + (let* ((m (let ((m ((ref _parse_format_specifier_regex 'match) format_spec))) + (if (eq? m None) + (raise (ValueError (+ "Invalid format specifier: " format_spec)))) + m)) + + ;; get the dictionary + (format_dict = ((ref m 'groupdict))) + + ;; zeropad; defaults for fill and alignment. If zero padding + ;; is requested, the fill and align fields should be absent. + (fill (pylist-ref format_dict "fill")) + (minw (pylist-ref format_dict "minimumwidth")) + (sign (pylist-ref format_dict "sign")) + (prec (pylist-ref format_dict "precition")) + (sepM (pylist-ref format_dict "thousands_sep")) + (type (pylist-ref format_dict "type")) + (align (pylist-ref format_dict "align"))) + + (pylist-set! format_dict "zeropad" (not (eq? (pylist-ref format_dict "zeropad") None))) + (when (pylist-ref format_dict "zeropad") + (if (not (eq? fill None)) + (raise (ValueError (+ "Fill character conflicts with '0'" + " in format specifier: " format_spec)))) + (if (not (eq? align None)) + (raise (ValueError (+ "Alignment conflicts with '0' in " + "format specifier: " format_spec))))) + + (pylist-set! format_dict "fill" (or (bool fill) " ")) + + ;; PEP 3101 originally specified that the default alignment should + ;; be left; it was later agreed that right-aligned makes more sense + ;; for numeric types. See http://bugs.python.org/issue6857. + + (pylist-set! format_dict "align" (or (bool align) ">")) + + ;; default sign handling: '-' for negative, '' for positive + (pylist-set! format_dict "sign" (or (bool sign) "-")) + + ;; minimumwidth defaults to 0; precision remains None if not given + (pylist-set! format_dict "minimumwidth" (int (if (eq? minw None) "0" minw))) + (if (not (eq? prec None)) + (pylist-set! format_dict "precision" (let ((w (int prec))) (set! prec w) w))) + + ;; if format type is 'g' or 'G' then a precision of 0 makes little + ;; sense; convert it to 1. Same if format type is unspecified. + (if (equal? prec 0) + (if (or (eq? type None) (in type "gGn")) + (pylist-set! format_dict "precision" 1))) + + ;; determine thousands separator, grouping, and decimal separator, and + ;; add appropriate entries to format_dict + (if (equal? type "n") + (begin + ;; apart from separators, 'n' behaves just like 'g' + (pylist-set! format_dict "type" "g") + (if _(eq? _localeconv None) + (set! _localeconv ((ref _locale 'localeconv)))) + (if (not (eq? sepM None)) + (raise (ValueError (+ "Explicit thousands separator conflicts with " + "'n' type in format specifier: " format_spec)))) + (pylist-set! format_dict "thousands_sep" (pylist-ref _localeconv "thousands_sep")) + (pylist-set! format_dict "grouping" (pylist-ref _localeconv "grouping")) + (pylist-set! format_dict "decimal_point" (pylist-ref _localeconv "decimal_point"))) + (begin + (if (eq? sepM None) + (pylist-set! format_dict "thousands_sep" "")) + (pylist-set! format_dict "grouping" (list 3 0)) + (pylist-set! format_dict "decimal_point" "."))) + + format_dict))) + +(define _format_align + (lambda (sign body spec) + "Given an unpadded, non-aligned numeric string 'body' and sign string 'sign', add padding and alignment conforming to the given format specifier dictionary 'spec' (as produced by parse_format_specifier). - """ - # how much extra space do we have to play with? - minimumwidth = spec['minimumwidth'] - fill = spec['fill'] - padding = fill*(minimumwidth - len(sign) - len(body)) - - align = spec['align'] - if align == '<': - result = sign + body + padding - elif align == '>': - result = padding + sign + body - elif align == '=': - result = sign + padding + body - elif align == '^': - half = len(padding)//2 - result = padding[:half] + sign + body + padding[half:] - else: - raise ValueError('Unrecognised alignment field') - - return result - -def _group_lengths(grouping): - """Convert a localeconv-style grouping into a (possibly infinite) + " + ;; how much extra space do we have to play with? + (let ((minimumwidth (pylist-ref spec "minimumwidth")) + (fill (pylist-ref spec "fill")) + (padding (* fill (- minimumwidth (len sign) (len body)))) + (align (pylist-ref spec "align"))) + (cond + ((equal? align "<") + (+ sign body padding)) + ((equal? align ">") + (+ padding sign body)) + ((equal? align "=") + (+ sign padding body)) + ((equal? align "^") + (let* ((half (floor-quotient (len padding) 2)) + (pad1 (pylist-slice padding None half None)) + (pad2 (pylist-slice padding half None None))) + (+ pad1 sign body pad2))) + (else + (raise (ValueError "Unrecognised alignment field"))))))) + +(define _group_lengths + (lambda (grouping) + "Convert a localeconv-style grouping into a (possibly infinite) iterable of integers representing group lengths. - """ - # The result from localeconv()['grouping'], and the input to this - # function, should be a list of integers in one of the - # following three forms: - # - # (1) an empty list, or - # (2) nonempty list of positive integers + [0] - # (3) list of positive integers + [locale.CHAR_MAX], or - - from itertools import chain, repeat - if not grouping: - return [] - elif grouping[-1] == 0 and len(grouping) >= 2: - return chain(grouping[:-1], repeat(grouping[-2])) - elif grouping[-1] == _locale.CHAR_MAX: - return grouping[:-1] - else: - raise ValueError('unrecognised format for grouping') - -def _insert_thousands_sep(digits, spec, min_width=1): - """Insert thousands separators into a digit string. + " + ;; The result from localeconv()['grouping'], and the input to this + ;; function, should be a list of integers in one of the + ;; following three forms: + ;; + ;; (1) an empty list, or + ;; (2) nonempty list of positive integers + [0] + ;; (3) list of positive integers + [locale.CHAR_MAX], or + + (cond + ((not (bool grouping)) + '()) + ((and (= (pylist-ref grouping -1) 0) (>= (len grouping) 2)) + (chain (pylist-slice grouping None -1 None) (repeat (pylist-ref grouping -2)))) + ((= (pylist-ref grouping -1) (ref _locale 'CHAR_MAX)) + (pylist-slice grouping None -1 None)) + (else + (raise (ValueError "unrecognised format for grouping")))))) + +(define _insert_thousands_sep + (lam (digits spec (= min_width 1)) + "Insert thousands separators into a digit string. spec is a dictionary whose keys should include 'thousands_sep' and 'grouping'; typically it's the result of parsing the format @@ -6797,40 +6843,47 @@ def _insert_thousands_sep(digits, spec, min_width=1): commas every three digits in '123456', with min_width=8, gives '0,123,456', even though that has length 9. - """ - - sep = spec['thousands_sep'] - grouping = spec['grouping'] - - groups = [] - for l in _group_lengths(grouping): - if l <= 0: - raise ValueError("group length should be positive") - # max(..., 1) forces at least 1 digit to the left of a separator - l = min(max(len(digits), min_width, 1), l) - groups.append('0'*(l - len(digits)) + digits[-l:]) - digits = digits[:-l] - min_width -= l - if not digits and min_width <= 0: - break - min_width -= len(sep) - else: - l = max(len(digits), min_width, 1) - groups.append('0'*(l - len(digits)) + digits[-l:]) - return sep.join(reversed(groups)) - -def _format_sign(is_negative, spec): - """Determine sign character.""" - - if is_negative: - return '-' - elif spec['sign'] in ' +': - return spec['sign'] - else: - return '' - -def _format_number(is_negative, intpart, fracpart, exp, spec): - """Format a number, given the following data: + " + + (let ((sep (pylist-ref spec "thousands_sep")) + (grouping (pylist-ref spec "grouping")) + (groups (pylist))) + + (for ((l :: (_group_lengths grouping))) () + (if (<= l 0) + (raise (ValueError "group length should be positive"))) + ;; max(..., 1) forces at least 1 digit to the left of a separator + (let ((l (min (max (len digits) min_width 1) l))) + ((ref groups 'append) (+ (* '0' (- l (len digits))) + (pylist-slice digits (- l) None None))) + (set! digits (pylist-slice digits None (- l) None)) + (set! min_width (- min_width l)) + (if (and (= 0 digits) (<= min_width 0)) + (break)) + (set! min_width (. min_width (len sep)))) + #:final + (let ((l (max (len digits) min_width 1))) + ((ref groups 'append) (+ (* "0" (- l (len digits))) + (pylist-slice digits (- l) None None))))) + ((ref sep 'join) (reversed groups))))) + +(define _format_sign + (lam (is_negative spec) + "Determine sign character." + + (cond + ((bool is_negative) + "-") + ((in (pylist-ref spec "sign") " +") + (pylist-ref spec "sign")) + (else + "")))) + +(define typed (dict '(("E" . "E") ("e" . "e") ("G" . "E") ("g" . "e")))) + +(define _format_number + (lambda (is_negative intpart fracpart exp spec) + "Format a number, given the following data: is_negative: true if the number is negative, else false intpart: string of digits that must appear before the decimal point @@ -6845,26 +6898,28 @@ def _format_number(is_negative, intpart, fracpart, exp, spec): add trailing '%' for the '%' type zero-pad if necessary fill and align if necessary - """ + " + + (let ((sign (_format_sign is_negative spec))) - sign = _format_sign(is_negative, spec) + (if (or (bool fracpart) (bool (pylist-ref spec "alt"))) + (set! fracpart (+ (pylist-ref spec "decimal_point") fracpart))) - if fracpart or spec['alt']: - fracpart = spec['decimal_point'] + fracpart + (if (or (not (= exp 0)) (in (pylist-ref spec "type") "eEgG")) + (let ((echar (pylist-ref typed /pylist-ref spec "type"))) + (set! fracpart (+ fracpart (str-format "{0}{1:+}" echar exp))))) + + (if (equal? (pylist-ref spec "type") "%") + (set! fracpart (+ fracpart "%"))) - if exp != 0 or spec['type'] in 'eE': - echar = {'E': 'E', 'e': 'e', 'G': 'E', 'g': 'e'}[spec['type']] - fracpart += "{0}{1:+}".format(echar, exp) - if spec['type'] == '%': - fracpart += '%' + (let* ((min_width + (if (bool (pylist.ref spec "zeropad")) + (- (pylist-ref spec "minimumwidth") (len fracpart) (len sign)) + 0)) + (intpart (_insert_thousands_sep intpart spec min_width))) - if spec['zeropad']: - min_width = spec['minimumwidth'] - len(fracpart) - len(sign) - else: - min_width = 0 - intpart = _insert_thousands_sep(intpart, spec, min_width) - return _format_align(sign, intpart+fracpart, spec) + (_format_align sign (+ intpart fracpart) spec))))) ;;##### Useful Constants (internal use only) ################################ -- cgit v1.2.3