diff options
Diffstat (limited to 'modules/language/python/module')
-rw-r--r-- | modules/language/python/module/decimal.scm | 391 | ||||
-rw-r--r-- | modules/language/python/module/locale.scm | 1707 | ||||
-rw-r--r-- | modules/language/python/module/re.scm | 645 |
3 files changed, 2336 insertions, 407 deletions
diff --git a/modules/language/python/module/decimal.scm b/modules/language/python/module/decimal.scm index a71b990..0dc207e 100644 --- a/modules/language/python/module/decimal.scm +++ b/modules/language/python/module/decimal.scm @@ -1,7 +1,31 @@ (define-module (language python module decimal) #:use-module ((language python module collections) #:select (namedtuple)) + #:use-module ((language python module itertools) #:select (chain repeat)) + #:use-module ((language python module sys) #:select (maxsize hash_info)) + #:use-module (language python Module) + #:use-module (language python list) + #:use-module (language python dict) + #:use-module (language python module re) #:export ()) +#| +This is the copyright information of the file ported over to scheme +# Copyright (c) 2004 Python Software Foundation. +# All rights reserved. + +# Written by Eric Price <eprice at tjhsst.edu> +# and Facundo Batista <facundo at taniquetil.com.ar> +# and Raymond Hettinger <python at rcn.com> +# and Aahz <aahz at pobox.com> +# and Tim Peters + +# This module should be kept in sync with the latest updates of the +# IBM specification as it evolves. Those updates will be treated +# as bug fixes (deviation from the spec is a compatibility, usability +# bug) and will be backported. At this point the spec is stabilizing +# and the updates are becoming fewer, smaller, and less significant. +|# + (define guile:modulo (@ (guile) moduolo)) (define __name__ "decimal") @@ -28,7 +52,7 @@ (define MAX_EMAX 425000000) (define MIN_EMIN -425000000) -(if (= sys:maxsize (- (ash 1 63) 1)) +(if (= maxsize (- (ash 1 63) 1)) (begin (set! MAX_PREC 999999999999999999) (set! MAX_EMAX 999999999999999999) @@ -37,12 +61,21 @@ (define MIN_ETINY (- MIN_EMIN (- MAX_PREC 1))) ;; Context -(define (cx-prec x) (vector-ref x 0)) -(define (cx-emax x) (vector-ref x 1)) -(define (cx-raise x) (vector-ref x 2)) -(define (cx-error x) (vector-ref x 3)) -(define (cx-capitals x) (vector-ref x 4)) -(define (cx-rounding x) (vector-ref x 5)) +(define-inlinable (cx-prec x) (rawref x 'prec)) +(define-inlinable (cx-emax x) (rawref x 'Emax)) +(define-inlinable (cx-emin x) (rawref x 'Emin)) +(define-inlinable (cx-etiny x) ((ref x 'Etiny))) +(define-inlinable (cx-etop x) ((ref x 'Etop))) +(define-inlinable (cx-copy x) ((ref x 'copy))) +(define-inlinable (cx-clear_flags x) ((ref x 'clear_flags))) +(define-inlinable (cx-raise x) (ref x '_raise_error)) +(define-inlinable (cx-error x) (ref x '_raise_error)) +(define-inlinable (cx-capitals x) (rawref x 'capitals)) +(define-inlinable (cx-rounding x) (rawref x 'rounding)) +(define-inlinable (cx-clamp x) (rawref x 'clamp)) +(define-inlinable (cx-traps x) (rawref x 'traps)) +(define-inlinable (cx-flags x) (rawref x 'flags)) + ;; Errors (define-python-class DecimalException (ArithmeticError) @@ -6596,7 +6629,6 @@ ;;# at least one decimal digit, possibly after the decimal point. The ;;# lookahead expression '(?=\d|\.\d)' checks this. -(use-modules (languge python module re)) (define _parser (ref (compile " # A numeric string consists of: @@ -6650,10 +6682,11 @@ ;; The locale module is only needed for the 'n' format specifier. The ;; rest of the PEP 3101 code functions quite happily without it, so we ;; don't care too much if locale isn't present. -(define _locale (import "locale")) +(define _locale (try-module "locale")) -def _parse_format_specifier(format_spec, _localeconv=None): - """Parse and validate a format specifier. +(define _parse_format_specifier + (lam (format_spec (=_localeconv None)) + "Parse and validate a format specifier. Turns a standard numeric format specifier into a dict, with the following entries: @@ -6670,120 +6703,133 @@ def _parse_format_specifier(format_spec, _localeconv=None): precision: nonnegative integer giving precision, or None type: one of the characters 'eEfFgG%', or None - """ - m = _parse_format_specifier_regex.match(format_spec) - if m is None: - raise ValueError("Invalid format specifier: " + format_spec) - - # get the dictionary - format_dict = m.groupdict() - - # zeropad; defaults for fill and alignment. If zero padding - # is requested, the fill and align fields should be absent. - fill = format_dict['fill'] - align = format_dict['align'] - format_dict['zeropad'] = (format_dict['zeropad'] is not None) - if format_dict['zeropad']: - if fill is not None: - raise ValueError("Fill character conflicts with '0'" - " in format specifier: " + format_spec) - if align is not None: - raise ValueError("Alignment conflicts with '0' in " - "format specifier: " + format_spec) - format_dict['fill'] = fill or ' ' - # PEP 3101 originally specified that the default alignment should - # be left; it was later agreed that right-aligned makes more sense - # for numeric types. See http://bugs.python.org/issue6857. - format_dict['align'] = align or '>' - - # default sign handling: '-' for negative, '' for positive - if format_dict['sign'] is None: - format_dict['sign'] = '-' - - # minimumwidth defaults to 0; precision remains None if not given - format_dict['minimumwidth'] = int(format_dict['minimumwidth'] or '0') - if format_dict['precision'] is not None: - format_dict['precision'] = int(format_dict['precision']) - - # if format type is 'g' or 'G' then a precision of 0 makes little - # sense; convert it to 1. Same if format type is unspecified. - if format_dict['precision'] == 0: - if format_dict['type'] is None or format_dict['type'] in 'gGn': - format_dict['precision'] = 1 - - # determine thousands separator, grouping, and decimal separator, and - # add appropriate entries to format_dict - if format_dict['type'] == 'n': - # apart from separators, 'n' behaves just like 'g' - format_dict['type'] = 'g' - if _localeconv is None: - _localeconv = _locale.localeconv() - if format_dict['thousands_sep'] is not None: - raise ValueError("Explicit thousands separator conflicts with " - "'n' type in format specifier: " + format_spec) - format_dict['thousands_sep'] = _localeconv['thousands_sep'] - format_dict['grouping'] = _localeconv['grouping'] - format_dict['decimal_point'] = _localeconv['decimal_point'] - else: - if format_dict['thousands_sep'] is None: - format_dict['thousands_sep'] = '' - format_dict['grouping'] = [3, 0] - format_dict['decimal_point'] = '.' - - return format_dict - -def _format_align(sign, body, spec): - """Given an unpadded, non-aligned numeric string 'body' and sign + " + (let* ((m (let ((m ((ref _parse_format_specifier_regex 'match) format_spec))) + (if (eq? m None) + (raise (ValueError (+ "Invalid format specifier: " format_spec)))) + m)) + + ;; get the dictionary + (format_dict = ((ref m 'groupdict))) + + ;; zeropad; defaults for fill and alignment. If zero padding + ;; is requested, the fill and align fields should be absent. + (fill (pylist-ref format_dict "fill")) + (minw (pylist-ref format_dict "minimumwidth")) + (sign (pylist-ref format_dict "sign")) + (prec (pylist-ref format_dict "precition")) + (sepM (pylist-ref format_dict "thousands_sep")) + (type (pylist-ref format_dict "type")) + (align (pylist-ref format_dict "align"))) + + (pylist-set! format_dict "zeropad" (not (eq? (pylist-ref format_dict "zeropad") None))) + (when (pylist-ref format_dict "zeropad") + (if (not (eq? fill None)) + (raise (ValueError (+ "Fill character conflicts with '0'" + " in format specifier: " format_spec)))) + (if (not (eq? align None)) + (raise (ValueError (+ "Alignment conflicts with '0' in " + "format specifier: " format_spec))))) + + (pylist-set! format_dict "fill" (or (bool fill) " ")) + + ;; PEP 3101 originally specified that the default alignment should + ;; be left; it was later agreed that right-aligned makes more sense + ;; for numeric types. See http://bugs.python.org/issue6857. + + (pylist-set! format_dict "align" (or (bool align) ">")) + + ;; default sign handling: '-' for negative, '' for positive + (pylist-set! format_dict "sign" (or (bool sign) "-")) + + ;; minimumwidth defaults to 0; precision remains None if not given + (pylist-set! format_dict "minimumwidth" (int (if (eq? minw None) "0" minw))) + (if (not (eq? prec None)) + (pylist-set! format_dict "precision" (let ((w (int prec))) (set! prec w) w))) + + ;; if format type is 'g' or 'G' then a precision of 0 makes little + ;; sense; convert it to 1. Same if format type is unspecified. + (if (equal? prec 0) + (if (or (eq? type None) (in type "gGn")) + (pylist-set! format_dict "precision" 1))) + + ;; determine thousands separator, grouping, and decimal separator, and + ;; add appropriate entries to format_dict + (if (equal? type "n") + (begin + ;; apart from separators, 'n' behaves just like 'g' + (pylist-set! format_dict "type" "g") + (if _(eq? _localeconv None) + (set! _localeconv ((ref _locale 'localeconv)))) + (if (not (eq? sepM None)) + (raise (ValueError (+ "Explicit thousands separator conflicts with " + "'n' type in format specifier: " format_spec)))) + (pylist-set! format_dict "thousands_sep" (pylist-ref _localeconv "thousands_sep")) + (pylist-set! format_dict "grouping" (pylist-ref _localeconv "grouping")) + (pylist-set! format_dict "decimal_point" (pylist-ref _localeconv "decimal_point"))) + (begin + (if (eq? sepM None) + (pylist-set! format_dict "thousands_sep" "")) + (pylist-set! format_dict "grouping" (list 3 0)) + (pylist-set! format_dict "decimal_point" "."))) + + format_dict))) + +(define _format_align + (lambda (sign body spec) + "Given an unpadded, non-aligned numeric string 'body' and sign string 'sign', add padding and alignment conforming to the given format specifier dictionary 'spec' (as produced by parse_format_specifier). - """ - # how much extra space do we have to play with? - minimumwidth = spec['minimumwidth'] - fill = spec['fill'] - padding = fill*(minimumwidth - len(sign) - len(body)) - - align = spec['align'] - if align == '<': - result = sign + body + padding - elif align == '>': - result = padding + sign + body - elif align == '=': - result = sign + padding + body - elif align == '^': - half = len(padding)//2 - result = padding[:half] + sign + body + padding[half:] - else: - raise ValueError('Unrecognised alignment field') - - return result - -def _group_lengths(grouping): - """Convert a localeconv-style grouping into a (possibly infinite) + " + ;; how much extra space do we have to play with? + (let ((minimumwidth (pylist-ref spec "minimumwidth")) + (fill (pylist-ref spec "fill")) + (padding (* fill (- minimumwidth (len sign) (len body)))) + (align (pylist-ref spec "align"))) + (cond + ((equal? align "<") + (+ sign body padding)) + ((equal? align ">") + (+ padding sign body)) + ((equal? align "=") + (+ sign padding body)) + ((equal? align "^") + (let* ((half (floor-quotient (len padding) 2)) + (pad1 (pylist-slice padding None half None)) + (pad2 (pylist-slice padding half None None))) + (+ pad1 sign body pad2))) + (else + (raise (ValueError "Unrecognised alignment field"))))))) + +(define _group_lengths + (lambda (grouping) + "Convert a localeconv-style grouping into a (possibly infinite) iterable of integers representing group lengths. - """ - # The result from localeconv()['grouping'], and the input to this - # function, should be a list of integers in one of the - # following three forms: - # - # (1) an empty list, or - # (2) nonempty list of positive integers + [0] - # (3) list of positive integers + [locale.CHAR_MAX], or - - from itertools import chain, repeat - if not grouping: - return [] - elif grouping[-1] == 0 and len(grouping) >= 2: - return chain(grouping[:-1], repeat(grouping[-2])) - elif grouping[-1] == _locale.CHAR_MAX: - return grouping[:-1] - else: - raise ValueError('unrecognised format for grouping') - -def _insert_thousands_sep(digits, spec, min_width=1): - """Insert thousands separators into a digit string. + " + ;; The result from localeconv()['grouping'], and the input to this + ;; function, should be a list of integers in one of the + ;; following three forms: + ;; + ;; (1) an empty list, or + ;; (2) nonempty list of positive integers + [0] + ;; (3) list of positive integers + [locale.CHAR_MAX], or + + (cond + ((not (bool grouping)) + '()) + ((and (= (pylist-ref grouping -1) 0) (>= (len grouping) 2)) + (chain (pylist-slice grouping None -1 None) (repeat (pylist-ref grouping -2)))) + ((= (pylist-ref grouping -1) (ref _locale 'CHAR_MAX)) + (pylist-slice grouping None -1 None)) + (else + (raise (ValueError "unrecognised format for grouping")))))) + +(define _insert_thousands_sep + (lam (digits spec (= min_width 1)) + "Insert thousands separators into a digit string. spec is a dictionary whose keys should include 'thousands_sep' and 'grouping'; typically it's the result of parsing the format @@ -6797,40 +6843,47 @@ def _insert_thousands_sep(digits, spec, min_width=1): commas every three digits in '123456', with min_width=8, gives '0,123,456', even though that has length 9. - """ - - sep = spec['thousands_sep'] - grouping = spec['grouping'] - - groups = [] - for l in _group_lengths(grouping): - if l <= 0: - raise ValueError("group length should be positive") - # max(..., 1) forces at least 1 digit to the left of a separator - l = min(max(len(digits), min_width, 1), l) - groups.append('0'*(l - len(digits)) + digits[-l:]) - digits = digits[:-l] - min_width -= l - if not digits and min_width <= 0: - break - min_width -= len(sep) - else: - l = max(len(digits), min_width, 1) - groups.append('0'*(l - len(digits)) + digits[-l:]) - return sep.join(reversed(groups)) - -def _format_sign(is_negative, spec): - """Determine sign character.""" - - if is_negative: - return '-' - elif spec['sign'] in ' +': - return spec['sign'] - else: - return '' - -def _format_number(is_negative, intpart, fracpart, exp, spec): - """Format a number, given the following data: + " + + (let ((sep (pylist-ref spec "thousands_sep")) + (grouping (pylist-ref spec "grouping")) + (groups (pylist))) + + (for ((l :: (_group_lengths grouping))) () + (if (<= l 0) + (raise (ValueError "group length should be positive"))) + ;; max(..., 1) forces at least 1 digit to the left of a separator + (let ((l (min (max (len digits) min_width 1) l))) + ((ref groups 'append) (+ (* '0' (- l (len digits))) + (pylist-slice digits (- l) None None))) + (set! digits (pylist-slice digits None (- l) None)) + (set! min_width (- min_width l)) + (if (and (= 0 digits) (<= min_width 0)) + (break)) + (set! min_width (. min_width (len sep)))) + #:final + (let ((l (max (len digits) min_width 1))) + ((ref groups 'append) (+ (* "0" (- l (len digits))) + (pylist-slice digits (- l) None None))))) + ((ref sep 'join) (reversed groups))))) + +(define _format_sign + (lam (is_negative spec) + "Determine sign character." + + (cond + ((bool is_negative) + "-") + ((in (pylist-ref spec "sign") " +") + (pylist-ref spec "sign")) + (else + "")))) + +(define typed (dict '(("E" . "E") ("e" . "e") ("G" . "E") ("g" . "e")))) + +(define _format_number + (lambda (is_negative intpart fracpart exp spec) + "Format a number, given the following data: is_negative: true if the number is negative, else false intpart: string of digits that must appear before the decimal point @@ -6845,26 +6898,28 @@ def _format_number(is_negative, intpart, fracpart, exp, spec): add trailing '%' for the '%' type zero-pad if necessary fill and align if necessary - """ + " + + (let ((sign (_format_sign is_negative spec))) - sign = _format_sign(is_negative, spec) + (if (or (bool fracpart) (bool (pylist-ref spec "alt"))) + (set! fracpart (+ (pylist-ref spec "decimal_point") fracpart))) - if fracpart or spec['alt']: - fracpart = spec['decimal_point'] + fracpart + (if (or (not (= exp 0)) (in (pylist-ref spec "type") "eEgG")) + (let ((echar (pylist-ref typed /pylist-ref spec "type"))) + (set! fracpart (+ fracpart (str-format "{0}{1:+}" echar exp))))) + + (if (equal? (pylist-ref spec "type") "%") + (set! fracpart (+ fracpart "%"))) - if exp != 0 or spec['type'] in 'eE': - echar = {'E': 'E', 'e': 'e', 'G': 'E', 'g': 'e'}[spec['type']] - fracpart += "{0}{1:+}".format(echar, exp) - if spec['type'] == '%': - fracpart += '%' + (let* ((min_width + (if (bool (pylist.ref spec "zeropad")) + (- (pylist-ref spec "minimumwidth") (len fracpart) (len sign)) + 0)) + (intpart (_insert_thousands_sep intpart spec min_width))) - if spec['zeropad']: - min_width = spec['minimumwidth'] - len(fracpart) - len(sign) - else: - min_width = 0 - intpart = _insert_thousands_sep(intpart, spec, min_width) - return _format_align(sign, intpart+fracpart, spec) + (_format_align sign (+ intpart fracpart) spec))))) ;;##### Useful Constants (internal use only) ################################ diff --git a/modules/language/python/module/locale.scm b/modules/language/python/module/locale.scm new file mode 100644 index 0000000..5ed77f1 --- /dev/null +++ b/modules/language/python/module/locale.scm @@ -0,0 +1,1707 @@ +(define-module (language python module locale) + ;#:use-module (language python module re) + #:use-module (language python module sys) + #:use-module (language python module os) + #:use-module (language python list) + #:use-module (language python string) + #:use-module (language python dict) + #:use-module (language python exceptions) + #:use-module (language python for) + #:use-module (language python try) + #:use-module (language python def) + #:use-module (language python yield) + #:use-module (language python bool) + #:use-module (language python number) + #:use-module (language python tuple) + #:use-module (oop pf-objects) + #:use-module ((language python module python) #:select ((str . builtin_str) sorted isinstance print)) + #:use-module (language python module collections) + #:use-module (system foreign) + #:use-module (ice-9 control) + #:use-module (rnrs bytevectors) + + #:export (getlocale getdefaultlocale getpreferredencoding Error + setlocale resetlocale localeconv strcoll strxfrm + str atof atoi format format_string currency + normalize LC_CTYPE LC_COLLATE LC_TIME LC_MONETARY LC_MESSAGES + LC_NUMERIC LC_ALL LC_ADDRESS LC_IDENTIFICATION LC_MEASUREMENT + LC_NAME LC_PAPER LC_TELEPHONECHAR_MAX)) + +"""Locale support module. + +The module provides low-level access to the C lib's locale APIs and adds high +level number formatting APIs as well as a locale aliasing engine to complement +these. + +The aliasing engine includes support for many commonly used locale names and +maps them to values suitable for passing to the C lib's setlocale() function. +""" + +(define gformat (@ (guile) format)) + +;; Try importing the _locale module. +;; +;; If this fails, fall back on a basic 'C' locale emulation. + +;; Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before +;; trying the import. So __all__ is also fiddled at the end of the file. + +(define (_strcoll a b) + " strcoll(string,string) -> int. + Compares two strings according to the locale. + " + (- (if (> a b) 1 0) (if (< a b) 1 0))) + +(define (_strxfrm s) + " strxfrm(string) -> string. + Returns a string that behaves for cmp locale-aware. + " + s) + +;; Locale emulation + +(define CHAR_MAX 127) +(define LC_ALL 6) +(define LC_COLLATE 3) +(define LC_CTYPE 0) +(define LC_MESSAGES 5) +(define LC_MONETARY 4) +(define LC_NUMERIC 1) +(define LC_TIME 2) +(define LC_ADDRESS 9) +(define LC_IDENTIFICATION 12) +(define LC_MEASUREMENT 11) +(define LC_NAME 8) +(define LC_PAPER 7) +(define LC_TELEPHONE 10) + +(define Error ValueError) + +(define (_localeconv) + " localeconv() -> dict. + Returns numeric and monetary locale-specific parameters. + " + ;; 'C' locale default values + (dict '(("grouping" . (127)) + ("currency_symbol" . "") + ("n_sign_posn" . 127) + ("p_cs_precedes" . 127) + ("n_cs_precedes" . 127) + ("mon_grouping" . ()) + ("n_sep_by_space" . 127) + ("decimal_point" . ".") + ("negative_sign" . "-") + ("positive_sign" . "+") + ("p_sep_by_space" . 127) + ("int_curr_symbol" . "") + ("p_sign_posn" . 127) + ("thousands_sep" . "") + ("mon_thousands_sep" . "") + ("frac_digits" . 127) + ("mon_decimal_point" . ".") + ("int_frac_digits" . 127)))) + +(define clocaleconv + (let* ((f (pointer->procedure + '* + (dynamic-func "localeconv" (dynamic-link)) + '()))) + + (lambda () + (let* ((pt (f)) + (a (pointer->bytevector pt 94)) + (i 0)) + + (define char + (case-lambda + (() (let ((x (bytevector-s8-ref a i))) + (set! i (+ i 1)) + x)) + ((n) + (set! i (+ i n)) + (char)))) + + (define char* + (case-lambda + (() (let ((x (bytevector-u64-ref a i (native-endianness)))) + (set! i (+ i 8)) + (pointer->string + (make-pointer x)))) + ((n) + (set! i (+ i n)) + (char*)))) + (define (gr x) (map char->integer (string->list x))) + (dict `(("decimal_point" . ,(char*)) + ("thousands_sep" . ,(char*)) + ("grouping" . ,(gr (char*))) + ("int_curr_symbol" . ,(char*)) + ("currency_symbol" . ,(char*)) + ("mon_decimal_point" . ,(char*)) + ("mon_thousands_sep" . ,(char*)) + ("mon_grouping" . ,(gr (char*))) + ("positive_sign" . ,(char*)) + ("negative_sign" . ,(char*)) + ("int_frac_digits" . ,(char)) + ("frac_digits" . ,(char)) + ("p_cs_precedes" . ,(char)) + ("p_sep_by_space" . ,(char)) + ("n_cs_precedes" . ,(char)) + ("n_sep_by_space" . ,(char)) + ("p_sign_posn" . ,(char)) + ("n_sign_posn" . ,(char)))))))) + + + + + +(define setlocale + (lam (category (= value None)) + " setlocale(integer,string=None) -> string. + Activates/queries locale processing. + " + ((@ (guile) setlocale) category (if (bool value) value "")))) + +;; These may or may not exist in _locale, so be sure to set them. +(define strxfrm _strxfrm) +(define strcoll _strcoll) + +;; With this dict, you can override some items of localeconv's return value. +;; This is useful for testing purposes. +(define _override_localeconv (dict)) + + +(define (localeconv) + (let ((d (clocaleconv))) + (if (bool _override_localeconv) + (py-update d _override_localeconv)) + d)) + + +;;### Number formatting APIs + +;; Author: Martin von Loewis +;; improved by Georg Brandl + +;; Iterate over grouping intervals +(define (_grouping_intervals grouping) + ((make-generator () + (lambda (yield) + (let ((last_interval None)) + (for ((interval : grouping)) ((last_interval None)) + ;; if grouping is -1, we are done + (when (not (= interval CHAR_MAX)) + ;; 0: re-use last group ad infinitum + (if (= interval 0) + (begin + (if (eq? last_interval None) + (raise (ValueError "invalid grouping"))) + (let lp () + (yield last_interval) + (lp)))) + + (yield interval) + interval))))))) + +;;perform the grouping from right to left +(define _group + (lam (s (= monetary #f)) + (let* ((conv (localeconv)) + (thousands_sep (pylist-ref conv + (or (and monetary "mon_thousands_sep") + "thousands_sep"))) + (grouping (pylist-ref conv + (or (and monetary "mon_grouping") + "grouping")))) + (if (not (bool grouping)) + (values s 0) + (let ((right_spaces + (if (equal? (pylist-ref s -1) " ") + (let* ((stripped (py-rstrip s)) + (ret (pylist-ref s (len stripped)))) + (set! s stripped) + ret) + "")) + (left_spaces "") + (groups (pylist))) + (for ((interval : (_grouping_intervals grouping))) () + (when (or (not (bool s)) (not (in (pylist-ref s -1) "0123456789"))) + ;; only non-digit characters remain (sign, spaces) + (set! left_spaces s ) + (set! s "") + (break)) + (pylist-append! groups (pylist-slice s (- interval) None None)) + (set! s (pylist-slice s None (- interval) None))) + + (if (bool s) + (pylist-append! groups s)) + (pylist-reverse! groups) + (values + (+ left_spaces (py-join thousands_sep groups) right_spaces) + (* (len thousands_sep) (- (len groups) 1)))))))) + +;; Strip a given amount of excess padding from the given string +(define _strip_padding + (lambda (s amount) + (call-with-values + (lambda () + (let lp ((lpos 0) (amount amount)) + (if (and (bool amount) (equal? (pylist-ref s lpos) " ")) + (lp (+ lpos 1) (- amount 1)) + (values lpos amount)))) + (lambda (lpos amount) + (call-with-values + (lambda () + (let lp ((rpos (- (len s) 1)) (amount amount)) + (if (and (bool amount) (equal? (pylist-ref s rpos) " ")) + (lp (- rpos 1) (- amount 1)) + (values rpos amount)))) + (lambda (rpos amount) + (pylist-slice s lpos (+ rpos 1) None))))))) + +#; +(define _percent_re (compile (+ "%(?:\\((?P<key>.*?)\\))?" + "(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]"))) + +(define format + (lam (percent value (= grouping #f) (= monetary #f) (* additional)) + "Returns the locale-aware substitution of a %? specifier + (percent). + + additional is for format strings which contain one or more + '*' modifiers." + ;; this is only for one-percent-specifier strings and this should be checked + (let ((match ((ref _percent_re 'match) percent))) + (if (or (not (bool match)) (not (= (len ((ref match 'group))) (len percent)))) + (raise (ValueError (+ "format() must be given exactly one %%char " + (gformat #f "format specifier, ~a not valid" percent))))) + (py-apply _format percent value grouping monetary (* additional))))) + +(define _format + (lam (percent value (= grouping #f) (= monetary #f) (* additional)) + (let ((formated (py-apply py-mod percent + (* (if additional + (+ (list value) additional) + (list value)))))) + ;; floats and decimal ints need special action! + (when (in (pylist-ref percent -1) "eEfFgG") + (let ((seps 0) + (parts (py-split formated "."))) + (when grouping + (call-with-values + (lambda () (_group (pylist-ref parts 0) #:monetary monetary)) + (lambda (p s) + (pylist-set! parts 0 p) + (set! seps s)))) + + (let ((decimal_point (pylist-ref (localeconv) + (or (and (bool monetary) "mon_decimal_point") + "decimal_point")))) + (set! formated (py-join decimal_point parts)) + (when (bool seps) + (set! formated (_strip_padding formated seps)))))) + + (when (in (pylist-ref percent -1) "diu") + (let ((seps 0)) + (when grouping + (call-with-values + (lambda () (_group formated #:monetary monetary)) + (lambda (p s) + (set! formated p) + (set! seps s)))) + + (when (bool seps) + (set! formated (_strip_padding formated seps))))) + + formated))) + +(define format_string + (lam (f val (= grouping #f)) + "Formats a string in the same way that the % formatting would use, + but takes the current locale into account. + Grouping is applied if the third parameter is true." + (let* ((percents (pylist ((ref _percent_re 'finditer) f))) + (new_f ((ref _percent_re 'sub) "%s" f)) + (new_val + (if (isinstance val Mapping) + (for ((perc : percents)) ((r '())) + (cons + (if (equal? (pylist-ref ((ref perc 'group)) -1) "%") + "%" + (format ((ref perc 'group)) val grouping)) + r) + #:final (reverse r)) + (begin + (if (not (isinstance val tuple)) + (set! val (list val))) + (for ((perc : percents)) ((i 0) (r '())) + (if (equal? (pylist-ref ((ref perc 'group)) -1) "%") + (values i (cons "%" r)) + (let ((starcount (pylist-count ((ref perc 'group) "modifiers") "*"))) + (values (+ i 1 starcount) + (cons + (py-apply _format ((ref perc 'group)) + (pylist-ref val i) + grouping + #f + (* (pylist-slice val (+ i 1) (+ i 1 starcount) None))) + r)))) + #:final (reverse r)))))) + + (set! val (tuple new_val)) + (py-mod new_f val)))) + +(define currency + (lam (val (= symbol #t) (= grouping #f) (= international #f)) + "Formats val according to the currency settings + in the current locale." + + (let* ((conv (localeconv)) + ;; check for illegal values + (digits (pylist-ref conv (or (bool international) "int_frac_digits" + "frac_digits")))) + (if (= digits 127) + (raise (ValueError (+ "Currency formatting is not possible using " + "the 'C' locale.")))) + + (let ((s (format (py-mod "%%.%if" digits) (abs val) grouping #:monetary #t))) + ;; '<' and '>' are markers if the sign must be inserted between symbol and value + (set! s (+ "<" s ">")) + + (if (bool symbol) + (let ((smb + (pylist-ref conv (or (and (bool international) "int_curr_symbol") + "currency_symbol"))) + (precedes + (pylist-ref conv (or (and (< val 0) "n_cs_precedes") + "p_cs_precedes"))) + (separated + (pylist-ref conv (or (and (< val 0) "n_sep_by_space") + "p_sep_by_space")))) + + (if (bool precedes) + (set! s (+ smb (or (and (bool separated) " ") "") s)) + (set! s (+ s (or (and (bool separated) " ") "") smb))))) + + (let ((sign_pos + (pylist-ref conv (or (and (< val 0) "n_sign_posn") + "p_sign_posn"))) + (sign + (pylist-ref conv (or (and (< val 0) "negative_sign") + "positive_sign")))) + + (set! s + (cond + ((= sign_pos 0) + (+ "(" s ")")) + ((= sign_pos 1) + (+ sign s)) + ((= sign_pos 2) + (+ s sign)) + ((= sign_pos 3) + (py-replace s "<" sign)) + ((= sign_pos 4) + (py-replace s ">" sign)) + (else (+ sign s)))) + + (py-replace (py-replace s "<" "") ">" "")))))) + + +(define (str val) + "Convert float to string, taking the locale into account." + (format "%.12g" val)) + +(define (delocalize string) + "Parses a string as a normalized number according to the locale settings." + + (let* ((conv (localeconv)) + ;;First, get rid of the grouping + (ts (pylist-ref conv "thousands_sep")) + ;;next, replace the decimal point with a dot + (dd (pylist-ref conv "decimal_point"))) + + + (if (bool ts) + (set! string (py-replace string ts ""))) + + (if (bool dd) + (set! string (py-replace string dd "."))) + + string)) + +(define atof + (lam (string (= func float)) + "Parses a string as a float according to the locale settings." + (func (delocalize string)))) + +(define (atoi string) + "Converts a string to an integer according to the locale settings." + (atof string int)) + +(define (_test) + (setlocale LC_ALL "") + ;;do grouping + (let ((s1 (format "%d" 123456789 1))) + (print s1 "is" (atoi s1)) + ;;standard formatting + (set! s1 (str 3.14)) + (print s1 "is" (atof s1)))) + +;;### Locale name aliasing engine + +;; Author: Marc-Andre Lemburg, mal@lemburg.com +;; Various tweaks by Fredrik Lundh <fredrik@pythonware.com> + +;; store away the low-level version of setlocale (it's +;; overridden below) + +(define _setlocale setlocale) + +(define (_replace_encoding code encoding) + (let* ((langname + (if (in "." code) + (pylist-slice code None (py-index code ".") None) + code)) + ;;Convert the encoding to a C lib compatible encoding string + (norm_encoding encoding) + (norm_encoding (py-lower norm_encoding))) + + (if (in norm_encoding locale_encoding_alias) + (set! encoding (pylist-ref locale_encoding_alias norm_encoding)) + (let* ((norm_encoding (py-replace norm_encoding "_" "")) + (norm_encoding (py-replace norm_encoding "-" ""))) + (if (in norm_encoding locale_encoding_alias) + (set! encoding (pylist-ref locale_encoding_alias norm_encoding))))) + ;;print('found encoding %r' % encoding) + (+ langname '.' encoding))) + +(define (_append_modifier code modifier) + (define (f) (+ code "@" modifier)) + (if (equal? modifier "euro") + (if (not (in "." code)) + (+ code + ".ISO8859-15") + (call-with-values + (lambda () (py-partition code ".")) + (lambda (a b encoding) + (if (in encoding (list "ISO8859-15" "UTF-8")) + code + (if (equal? encoding "ISO8859-1") + (_replace_encoding code "ISO8859-15") + (f)))))) + (f))) + +(define (normalize localname) + + " Returns a normalized locale code for the given locale + name. + + The returned locale code is formatted for use with + setlocale(). + + If normalization fails, the original name is returned + unchanged. + + If the given encoding is not known, the function defaults to + the default encoding for the locale code just like setlocale() + does. + + " + (define (apply-l thunk f) (apply f (thunk))) + + ;; Normalize the locale name and extract the encoding and modifier + (let/ec return + (let ((code (py-lower localname))) + (apply-l + (lambda () + (if (in ":" code) + ;; ':' is sometimes used as encoding delimiter. + (set! code (py-replace code ":" "."))) + (if (in "@" code) + (py-split code "@" 1) + (list code ""))) + (lambda (code modifier) + (apply-l + (lambda () + (if (in "." code) + (pylist-slice (py-split code ".") None 2 None) + (list code ""))) + (lambda (langname encoding) + ;; First lookup: fullname (possibly with encoding and modifier) + (let ((lang_enc langname)) + (if (bool encoding) + (let* ((norm_encoding (py-replace encoding "-" "")) + (norm_encoding (py-replace norm_encoding "_" ""))) + (set! lang_enc (+ lang_enc "." norm_encoding))) + (let ((lookup_name lang_enc)) + (if (bool modifier) + (set! lookup_name (+ lookup_name "@" modifier))) + + (set! code (py-get locale_alias lookup_name None)))) + + (if (not (eq? code None)) (return code)) + + ;;print('first lookup failed') + + (when (bool modifier) + ;; Second try: fullname without modifier (possibly with encoding) + (set! code (py-get locale_alias lang_enc None)) + (if (not (eq? code None)) + ;;print('lookup without modifier succeeded') + (if (not (in "@" code)) + (return (_append_modifier code modifier))) + (if (equal? (py-lower (pylist-ref (py-split code "@" 1) 1)) modifier) + (return code)))) + + ;;print('second lookup failed') + + (when (bool encoding) + ;; Third try: langname (without encoding, possibly with modifier) + (let ((lookup_name langname)) + (if (bool modifier) + (set! lookup_name (+ lookup_name "@" modifier))) + (set! code (py-get locale_alias lookup_name None)) + (when (not (eq? code None)) + ;;print('lookup without encoding succeeded') + (if (not (in "@" code)) + (return (_replace_encoding code encoding))) + (apply-l + (lambda () (py-split code "@" 1)) + (lambda (code modifier) + (return (+ (_replace_encoding code encoding) "@" modifier))))) + + (when (bool modifier) + ;; Fourth try: langname (without encoding and modifier) + (set! code (py-get locale_alias langname None)) + (when (not (eq? code None)) + ;;print('lookup without modifier and encoding succeeded') + (when (not (in "@" code)) + (set! code (_replace_encoding code encoding)) + (return (_append_modifier code modifier))) + (apply-l + (lambda () (py-split code "@" 1)) + (lambda (code defmod) + (if (equal? (py-lower defmod) modifier) + (return (+ (_replace_encoding code encoding) "@" defmod))))))))) + + localname)))))))) + +(define (_parse_localname localname) + + " Parses the locale code for localename and returns the + result as tuple (language code, encoding). + + The localename is normalized and passed through the locale + alias engine. A ValueError is raised in case the locale name + cannot be parsed. + + The language code corresponds to RFC 1766. code and encoding + can be None in case the values cannot be determined or are + unknown to this implementation. + + " + (let/ec return + (let ((code (normalize localname))) + (define (apply-l t f) (apply f (t))) + (if (in "@" code) + ;;Deal with locale modifiers + (apply-l + (lambda () (py-split code "@" 1)) + (lambda (code modifier) + (if (and (equal? modifier "euro") (not (in "." code))) + ;; Assume Latin-9 for @euro locales. This is bogus, + ;; since some systems may use other encodings for these + ;; locales. Also, we ignore other modifiers. + (return code "iso-8859-15"))))) + + (cond + ((in "." code) + (apply values (pylist-slice (py-split code ".") None 2 None))) + ((equal? code "C") + (values None None)) + (else + (ValueError (py-mod "unknown locale: %s" localname))))))) + +(define (_build_localename localetuple) + + " Builds a locale code from the given tuple (language code, + encoding). + + No aliasing or normalizing takes place. + + " + (define (apply-l f r) (r (f))) + + (try + (lambda () + (apply-l + (lambda () localetuple) + (lambda (language encoding) + (if (eq? language None) + (set! language "C")) + (if (eq? encoding None) + language + (+ language "." encoding))))) + (#:except (or TypeError ValueError) + (raise (TypeError "Locale must be None, a string, or an iterable of two strings -- language code, encoding."))))) + + +(define getdefaultlocale + (lam ((= envvars (list "LC_ALL" "LC_CTYPE" "LANG" "LANGUAGE"))) + + " Tries to determine the default locale settings and returns + them as tuple (language code, encoding). + + According to POSIX, a program which has not called + setlocale(LC_ALL, "") runs using the portable 'C' locale. + Calling setlocale(LC_ALL, "") lets it use the default locale as + defined by the LANG variable. Since we don't want to interfere + with the current locale setting we thus emulate the behavior + in the way described above. + + To maintain compatibility with other platforms, not only the + LANG variable is tested, but a list of variables given as + envvars parameter. The first found to be defined will be + used. envvars defaults to the search path used in GNU gettext; + it must always contain the variable name 'LANG'. + + Except for the code 'C', the language code corresponds to RFC + 1766. code and encoding can be None in case the values cannot + be determined. + + " + + ;; fall back on POSIX behaviour + (let () + (define lookup (lambda (x y) (py-get environ x y))) + (_parse_localname + (for ((variable : envvars)) () + (let ((localename (lookup variable #f))) + (if localename + (begin + (if (equal? variable "LANGUAGE") + (set! localename (car (py-split localename ":")))) + (break localename)))) + #:final "C"))))) + +(define getlocale + (lam ((= category LC_CTYPE)) + + " Returns the current setting for the given locale category as + tuple (language code, encoding). + + category may be one of the LC_* value except LC_ALL. It + defaults to LC_CTYPE. + + Except for the code 'C', the language code corresponds to RFC + 1766. code and encoding can be None in case the values cannot + be determined. + + " + (let ((localename (_setlocale category))) + (if (and (= category LC_ALL) (in ";" localename)) + (raise (TypeError "category LC_ALL is not supported"))) + (_parse_localname localename)))) + +(define setlocale + (lam (category (= locale None)) + + " Set the locale for the given category. The locale can be + a string, an iterable of two strings (language code and encoding), + or None. + + Iterables are converted to strings using the locale aliasing + engine. Locale strings are passed directly to the C lib. + + category may be given as one of the LC_* values. + + " + (if (and (bool locale) (not (isinstance locale builtin_str))) + ;; convert to string + (set! locale (normalize (_build_localename locale)))) + (_setlocale category (pk locale)))) + +(define resetlocale + (lam ((= category LC_ALL)) + " Sets the locale for category to the default setting. + + The default setting is determined by calling + getdefaultlocale(). category defaults to LC_ALL. + + " + (_setlocale category (_build_localename(getdefaultlocale))))) + +(define getpreferredencoding + (lam ((= do_setlocale #t)) + "Return the charset that the user is likely using, + according to the system configuration." + (fluid-ref %default-port-encoding))) + +#| +### Database +# +# The following data was extracted from the locale.alias file which +# comes with X11 and then hand edited removing the explicit encoding +# definitions and adding some more aliases. The file is usually +# available as /usr/lib/X11/locale/locale.alias. +# + +# +# The local_encoding_alias table maps lowercase encoding alias names +# to C locale encoding names (case-sensitive). Note that normalize() +# first looks up the encoding in the encodings.aliases dictionary and +# then applies this mapping to find the correct C lib name for the +# encoding. +# +|# + +(define locale_encoding_alias (dict '( + ;; Mappings for non-standard encoding names used in locale names + ("437" . "C") + ("c" . "C") + ("en" . "ISO8859-1") + ("jis" . "JIS7") + ("jis7" . "JIS7") + ("ajec" . "eucJP") + ("koi8c" . "KOI8-C") + ("microsoftcp1251" . "CP1251") + ("microsoftcp1255" . "CP1255") + ("microsoftcp1256" . "CP1256") + ("88591" . "ISO8859-1") + ("88592" . "ISO8859-2") + ("88595" . "ISO8859-5") + ("885915" . "ISO8859-15") + + ;; Mappings from Python codec names to C lib encoding names + ("ascii" . "ISO8859-1") + ("latin_1" . "ISO8859-1") + ("iso8859_1" . "ISO8859-1") + ("iso8859_10" . "ISO8859-10") + ("iso8859_11" . "ISO8859-11") + ("iso8859_13" . "ISO8859-13") + ("iso8859_14" . "ISO8859-14") + ("iso8859_15" . "ISO8859-15") + ("iso8859_16" . "ISO8859-16") + ("iso8859_2" . "ISO8859-2") + ("iso8859_3" . "ISO8859-3") + ("iso8859_4" . "ISO8859-4") + ("iso8859_5" . "ISO8859-5") + ("iso8859_6" . "ISO8859-6") + ("iso8859_7" . "ISO8859-7") + ("iso8859_8" . "ISO8859-8") + ("iso8859_9" . "ISO8859-9") + ("iso2022_jp" . "JIS7") + ("shift_jis" . "SJIS") + ("tactis" . "TACTIS") + ("euc_jp" . "eucJP") + ("euc_kr" . "eucKR") + ("utf_8" . "UTF-8") + ("koi8_r" . "KOI8-R") + ("koi8_t" . "KOI8-T") + ("koi8_u" . "KOI8-U") + ("kz1048" . "RK1048") + ("cp1251" . "CP1251") + ("cp1255" . "CP1255") + ("cp1256" . "CP1256") + + ;; XXX This list is still incomplete. If you know more + ;; mappings, please file a bug report. Thanks. + ))) + +(for ((k v : (sorted (py-items locale_encoding_alias)))) () + (let ((kk (py-replace k "_" ""))) + (py-setdefault locale_encoding_alias kk v))) + +#| +# +# The locale_alias table maps lowercase alias names to C locale names +# (case-sensitive). Encodings are always separated from the locale +# name using a dot ('.'); they should only be given in case the +# language name is needed to interpret the given encoding alias +# correctly (CJK codes often have this need). +# +# Note that the normalize() function which uses this tables +# removes '_' and '-' characters from the encoding part of the +# locale name before doing the lookup. This saves a lot of +# space in the table. +# +# MAL 2004-12-10: +# Updated alias mapping to most recent locale.alias file +# from X.org distribution using makelocalealias.py. +# +# These are the differences compared to the old mapping (Python 2.4 +# and older): +# +# updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251' +# updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251' +# updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251' +# updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2' +# updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2' +# updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2' +# updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1' +# updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15' +# updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15' +# updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15' +# updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15' +# updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' +# updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' +# updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP' +# updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13' +# updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13' +# updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2' +# updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2' +# updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11' +# updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312' +# updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5' +# updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5' +# +# MAL 2008-05-30: +# Updated alias mapping to most recent locale.alias file +# from X.org distribution using makelocalealias.py. +# +# These are the differences compared to the old mapping (Python 2.5 +# and older): +# +# updated 'cs_cs.iso88592' -> 'cs_CZ.ISO8859-2' to 'cs_CS.ISO8859-2' +# updated 'serbocroatian' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' +# updated 'sh' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' +# updated 'sh_hr.iso88592' -> 'sh_HR.ISO8859-2' to 'hr_HR.ISO8859-2' +# updated 'sh_sp' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' +# updated 'sh_yu' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' +# updated 'sp' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5' +# updated 'sp_yu' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5' +# updated 'sr' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' +# updated 'sr@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' +# updated 'sr_sp' -> 'sr_SP.ISO8859-2' to 'sr_CS.ISO8859-2' +# updated 'sr_yu' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' +# updated 'sr_yu.cp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251' +# updated 'sr_yu.iso88592' -> 'sr_YU.ISO8859-2' to 'sr_CS.ISO8859-2' +# updated 'sr_yu.iso88595' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' +# updated 'sr_yu.iso88595@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' +# updated 'sr_yu.microsoftcp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251' +# updated 'sr_yu.utf8@cyrillic' -> 'sr_YU.UTF-8' to 'sr_CS.UTF-8' +# updated 'sr_yu@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' +# +# AP 2010-04-12: +# Updated alias mapping to most recent locale.alias file +# from X.org distribution using makelocalealias.py. +# +# These are the differences compared to the old mapping (Python 2.6.5 +# and older): +# +# updated 'ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8' +# updated 'ru_ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8' +# updated 'serbocroatian' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' +# updated 'sh' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' +# updated 'sh_yu' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' +# updated 'sr' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8' +# updated 'sr@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8' +# updated 'sr@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' +# updated 'sr_cs.utf8@latn' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8@latin' +# updated 'sr_cs@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' +# updated 'sr_yu' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8@latin' +# updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8' +# updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8' +# +# SS 2013-12-20: +# Updated alias mapping to most recent locale.alias file +# from X.org distribution using makelocalealias.py. +# +# These are the differences compared to the old mapping (Python 3.3.3 +# and older): +# +# updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C' +# updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C' +# updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C' +# updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2' +# updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' +# updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' +# updated 'sd' -> 'sd_IN@devanagari.UTF-8' to 'sd_IN.UTF-8' +# updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin' +# updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8' +# updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin' +# updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin' +# +# SS 2014-10-01: +# Updated alias mapping with glibc 2.19 supported locales. +|# + +(define locale_alias (dict '( + ("a3" . "az_AZ.KOI8-C") + ("a3_az" . "az_AZ.KOI8-C") + ("a3_az.koic" . "az_AZ.KOI8-C") + ("aa_dj" . "aa_DJ.ISO8859-1") + ("aa_er" . "aa_ER.UTF-8") + ("aa_et" . "aa_ET.UTF-8") + ("af" . "af_ZA.ISO8859-1") + ("af_za" . "af_ZA.ISO8859-1") + ("am" . "am_ET.UTF-8") + ("am_et" . "am_ET.UTF-8") + ("american" . "en_US.ISO8859-1") + ("an_es" . "an_ES.ISO8859-15") + ("ar" . "ar_AA.ISO8859-6") + ("ar_aa" . "ar_AA.ISO8859-6") + ("ar_ae" . "ar_AE.ISO8859-6") + ("ar_bh" . "ar_BH.ISO8859-6") + ("ar_dz" . "ar_DZ.ISO8859-6") + ("ar_eg" . "ar_EG.ISO8859-6") + ("ar_in" . "ar_IN.UTF-8") + ("ar_iq" . "ar_IQ.ISO8859-6") + ("ar_jo" . "ar_JO.ISO8859-6") + ("ar_kw" . "ar_KW.ISO8859-6") + ("ar_lb" . "ar_LB.ISO8859-6") + ("ar_ly" . "ar_LY.ISO8859-6") + ("ar_ma" . "ar_MA.ISO8859-6") + ("ar_om" . "ar_OM.ISO8859-6") + ("ar_qa" . "ar_QA.ISO8859-6") + ("ar_sa" . "ar_SA.ISO8859-6") + ("ar_sd" . "ar_SD.ISO8859-6") + ("ar_sy" . "ar_SY.ISO8859-6") + ("ar_tn" . "ar_TN.ISO8859-6") + ("ar_ye" . "ar_YE.ISO8859-6") + ("arabic" . "ar_AA.ISO8859-6") + ("as" . "as_IN.UTF-8") + ("as_in" . "as_IN.UTF-8") + ("ast_es" . "ast_ES.ISO8859-15") + ("ayc_pe" . "ayc_PE.UTF-8") + ("az" . "az_AZ.ISO8859-9E") + ("az_az" . "az_AZ.ISO8859-9E") + ("az_az.iso88599e" . "az_AZ.ISO8859-9E") + ("be" . "be_BY.CP1251") + ("be@latin" . "be_BY.UTF-8@latin") + ("be_bg.utf8" . "bg_BG.UTF-8") + ("be_by" . "be_BY.CP1251") + ("be_by@latin" . "be_BY.UTF-8@latin") + ("bem_zm" . "bem_ZM.UTF-8") + ("ber_dz" . "ber_DZ.UTF-8") + ("ber_ma" . "ber_MA.UTF-8") + ("bg" . "bg_BG.CP1251") + ("bg_bg" . "bg_BG.CP1251") + ("bho_in" . "bho_IN.UTF-8") + ("bn_bd" . "bn_BD.UTF-8") + ("bn_in" . "bn_IN.UTF-8") + ("bo_cn" . "bo_CN.UTF-8") + ("bo_in" . "bo_IN.UTF-8") + ("bokmal" . "nb_NO.ISO8859-1") + ("bokm\xe5l" . "nb_NO.ISO8859-1") + ("br" . "br_FR.ISO8859-1") + ("br_fr" . "br_FR.ISO8859-1") + ("brx_in" . "brx_IN.UTF-8") + ("bs" . "bs_BA.ISO8859-2") + ("bs_ba" . "bs_BA.ISO8859-2") + ("bulgarian" . "bg_BG.CP1251") + ("byn_er" . "byn_ER.UTF-8") + ("c" . "C") + ("c-french" . "fr_CA.ISO8859-1") + ("c.ascii" . "C") + ("c.en" . "C") + ("c.iso88591" . "en_US.ISO8859-1") + ("c.utf8" . "en_US.UTF-8") + ("c_c" . "C") + ("c_c.c" . "C") + ("ca" . "ca_ES.ISO8859-1") + ("ca_ad" . "ca_AD.ISO8859-1") + ("ca_es" . "ca_ES.ISO8859-1") + ("ca_es@valencia" . "ca_ES.ISO8859-15@valencia") + ("ca_fr" . "ca_FR.ISO8859-1") + ("ca_it" . "ca_IT.ISO8859-1") + ("catalan" . "ca_ES.ISO8859-1") + ("cextend" . "en_US.ISO8859-1") + ("chinese-s" . "zh_CN.eucCN") + ("chinese-t" . "zh_TW.eucTW") + ("crh_ua" . "crh_UA.UTF-8") + ("croatian" . "hr_HR.ISO8859-2") + ("cs" . "cs_CZ.ISO8859-2") + ("cs_cs" . "cs_CZ.ISO8859-2") + ("cs_cz" . "cs_CZ.ISO8859-2") + ("csb_pl" . "csb_PL.UTF-8") + ("cv_ru" . "cv_RU.UTF-8") + ("cy" . "cy_GB.ISO8859-1") + ("cy_gb" . "cy_GB.ISO8859-1") + ("cz" . "cs_CZ.ISO8859-2") + ("cz_cz" . "cs_CZ.ISO8859-2") + ("czech" . "cs_CZ.ISO8859-2") + ("da" . "da_DK.ISO8859-1") + ("da_dk" . "da_DK.ISO8859-1") + ("danish" . "da_DK.ISO8859-1") + ("dansk" . "da_DK.ISO8859-1") + ("de" . "de_DE.ISO8859-1") + ("de_at" . "de_AT.ISO8859-1") + ("de_be" . "de_BE.ISO8859-1") + ("de_ch" . "de_CH.ISO8859-1") + ("de_de" . "de_DE.ISO8859-1") + ("de_li.utf8" . "de_LI.UTF-8") + ("de_lu" . "de_LU.ISO8859-1") + ("deutsch" . "de_DE.ISO8859-1") + ("doi_in" . "doi_IN.UTF-8") + ("dutch" . "nl_NL.ISO8859-1") + ("dutch.iso88591" . "nl_BE.ISO8859-1") + ("dv_mv" . "dv_MV.UTF-8") + ("dz_bt" . "dz_BT.UTF-8") + ("ee" . "ee_EE.ISO8859-4") + ("ee_ee" . "ee_EE.ISO8859-4") + ("eesti" . "et_EE.ISO8859-1") + ("el" . "el_GR.ISO8859-7") + ("el_cy" . "el_CY.ISO8859-7") + ("el_gr" . "el_GR.ISO8859-7") + ("el_gr@euro" . "el_GR.ISO8859-15") + ("en" . "en_US.ISO8859-1") + ("en_ag" . "en_AG.UTF-8") + ("en_au" . "en_AU.ISO8859-1") + ("en_be" . "en_BE.ISO8859-1") + ("en_bw" . "en_BW.ISO8859-1") + ("en_ca" . "en_CA.ISO8859-1") + ("en_dk" . "en_DK.ISO8859-1") + ("en_dl.utf8" . "en_DL.UTF-8") + ("en_gb" . "en_GB.ISO8859-1") + ("en_hk" . "en_HK.ISO8859-1") + ("en_ie" . "en_IE.ISO8859-1") + ("en_in" . "en_IN.ISO8859-1") + ("en_ng" . "en_NG.UTF-8") + ("en_nz" . "en_NZ.ISO8859-1") + ("en_ph" . "en_PH.ISO8859-1") + ("en_sg" . "en_SG.ISO8859-1") + ("en_uk" . "en_GB.ISO8859-1") + ("en_us" . "en_US.ISO8859-1") + ("en_us@euro@euro" . "en_US.ISO8859-15") + ("en_za" . "en_ZA.ISO8859-1") + ("en_zm" . "en_ZM.UTF-8") + ("en_zw" . "en_ZW.ISO8859-1") + ("en_zw.utf8" . "en_ZS.UTF-8") + ("eng_gb" . "en_GB.ISO8859-1") + ("english" . "en_EN.ISO8859-1") + ("english_uk" . "en_GB.ISO8859-1") + ("english_united-states" . "en_US.ISO8859-1") + ("english_united-states.437" . "C") + ("english_us" . "en_US.ISO8859-1") + ("eo" . "eo_XX.ISO8859-3") + ("eo.utf8" . "eo.UTF-8") + ("eo_eo" . "eo_EO.ISO8859-3") + ("eo_us.utf8" . "eo_US.UTF-8") + ("eo_xx" . "eo_XX.ISO8859-3") + ("es" . "es_ES.ISO8859-1") + ("es_ar" . "es_AR.ISO8859-1") + ("es_bo" . "es_BO.ISO8859-1") + ("es_cl" . "es_CL.ISO8859-1") + ("es_co" . "es_CO.ISO8859-1") + ("es_cr" . "es_CR.ISO8859-1") + ("es_cu" . "es_CU.UTF-8") + ("es_do" . "es_DO.ISO8859-1") + ("es_ec" . "es_EC.ISO8859-1") + ("es_es" . "es_ES.ISO8859-1") + ("es_gt" . "es_GT.ISO8859-1") + ("es_hn" . "es_HN.ISO8859-1") + ("es_mx" . "es_MX.ISO8859-1") + ("es_ni" . "es_NI.ISO8859-1") + ("es_pa" . "es_PA.ISO8859-1") + ("es_pe" . "es_PE.ISO8859-1") + ("es_pr" . "es_PR.ISO8859-1") + ("es_py" . "es_PY.ISO8859-1") + ("es_sv" . "es_SV.ISO8859-1") + ("es_us" . "es_US.ISO8859-1") + ("es_uy" . "es_UY.ISO8859-1") + ("es_ve" . "es_VE.ISO8859-1") + ("estonian" . "et_EE.ISO8859-1") + ("et" . "et_EE.ISO8859-15") + ("et_ee" . "et_EE.ISO8859-15") + ("eu" . "eu_ES.ISO8859-1") + ("eu_es" . "eu_ES.ISO8859-1") + ("eu_fr" . "eu_FR.ISO8859-1") + ("fa" . "fa_IR.UTF-8") + ("fa_ir" . "fa_IR.UTF-8") + ("fa_ir.isiri3342" . "fa_IR.ISIRI-3342") + ("ff_sn" . "ff_SN.UTF-8") + ("fi" . "fi_FI.ISO8859-15") + ("fi_fi" . "fi_FI.ISO8859-15") + ("fil_ph" . "fil_PH.UTF-8") + ("finnish" . "fi_FI.ISO8859-1") + ("fo" . "fo_FO.ISO8859-1") + ("fo_fo" . "fo_FO.ISO8859-1") + ("fr" . "fr_FR.ISO8859-1") + ("fr_be" . "fr_BE.ISO8859-1") + ("fr_ca" . "fr_CA.ISO8859-1") + ("fr_ch" . "fr_CH.ISO8859-1") + ("fr_fr" . "fr_FR.ISO8859-1") + ("fr_lu" . "fr_LU.ISO8859-1") + ("fran\xe7ais" . "fr_FR.ISO8859-1") + ("fre_fr" . "fr_FR.ISO8859-1") + ("french" . "fr_FR.ISO8859-1") + ("french.iso88591" . "fr_CH.ISO8859-1") + ("french_france" . "fr_FR.ISO8859-1") + ("fur_it" . "fur_IT.UTF-8") + ("fy_de" . "fy_DE.UTF-8") + ("fy_nl" . "fy_NL.UTF-8") + ("ga" . "ga_IE.ISO8859-1") + ("ga_ie" . "ga_IE.ISO8859-1") + ("galego" . "gl_ES.ISO8859-1") + ("galician" . "gl_ES.ISO8859-1") + ("gd" . "gd_GB.ISO8859-1") + ("gd_gb" . "gd_GB.ISO8859-1") + ("ger_de" . "de_DE.ISO8859-1") + ("german" . "de_DE.ISO8859-1") + ("german.iso88591" . "de_CH.ISO8859-1") + ("german_germany" . "de_DE.ISO8859-1") + ("gez_er" . "gez_ER.UTF-8") + ("gez_et" . "gez_ET.UTF-8") + ("gl" . "gl_ES.ISO8859-1") + ("gl_es" . "gl_ES.ISO8859-1") + ("greek" . "el_GR.ISO8859-7") + ("gu_in" . "gu_IN.UTF-8") + ("gv" . "gv_GB.ISO8859-1") + ("gv_gb" . "gv_GB.ISO8859-1") + ("ha_ng" . "ha_NG.UTF-8") + ("he" . "he_IL.ISO8859-8") + ("he_il" . "he_IL.ISO8859-8") + ("hebrew" . "he_IL.ISO8859-8") + ("hi" . "hi_IN.ISCII-DEV") + ("hi_in" . "hi_IN.ISCII-DEV") + ("hi_in.isciidev" . "hi_IN.ISCII-DEV") + ("hne" . "hne_IN.UTF-8") + ("hne_in" . "hne_IN.UTF-8") + ("hr" . "hr_HR.ISO8859-2") + ("hr_hr" . "hr_HR.ISO8859-2") + ("hrvatski" . "hr_HR.ISO8859-2") + ("hsb_de" . "hsb_DE.ISO8859-2") + ("ht_ht" . "ht_HT.UTF-8") + ("hu" . "hu_HU.ISO8859-2") + ("hu_hu" . "hu_HU.ISO8859-2") + ("hungarian" . "hu_HU.ISO8859-2") + ("hy_am" . "hy_AM.UTF-8") + ("hy_am.armscii8" . "hy_AM.ARMSCII_8") + ("ia" . "ia.UTF-8") + ("ia_fr" . "ia_FR.UTF-8") + ("icelandic" . "is_IS.ISO8859-1") + ("id" . "id_ID.ISO8859-1") + ("id_id" . "id_ID.ISO8859-1") + ("ig_ng" . "ig_NG.UTF-8") + ("ik_ca" . "ik_CA.UTF-8") + ("in" . "id_ID.ISO8859-1") + ("in_id" . "id_ID.ISO8859-1") + ("is" . "is_IS.ISO8859-1") + ("is_is" . "is_IS.ISO8859-1") + ("iso-8859-1" . "en_US.ISO8859-1") + ("iso-8859-15" . "en_US.ISO8859-15") + ("iso8859-1" . "en_US.ISO8859-1") + ("iso8859-15" . "en_US.ISO8859-15") + ("iso_8859_1" . "en_US.ISO8859-1") + ("iso_8859_15" . "en_US.ISO8859-15") + ("it" . "it_IT.ISO8859-1") + ("it_ch" . "it_CH.ISO8859-1") + ("it_it" . "it_IT.ISO8859-1") + ("italian" . "it_IT.ISO8859-1") + ("iu" . "iu_CA.NUNACOM-8") + ("iu_ca" . "iu_CA.NUNACOM-8") + ("iu_ca.nunacom8" . "iu_CA.NUNACOM-8") + ("iw" . "he_IL.ISO8859-8") + ("iw_il" . "he_IL.ISO8859-8") + ("iw_il.utf8" . "iw_IL.UTF-8") + ("ja" . "ja_JP.eucJP") + ("ja_jp" . "ja_JP.eucJP") + ("ja_jp.euc" . "ja_JP.eucJP") + ("ja_jp.mscode" . "ja_JP.SJIS") + ("ja_jp.pck" . "ja_JP.SJIS") + ("japan" . "ja_JP.eucJP") + ("japanese" . "ja_JP.eucJP") + ("japanese-euc" . "ja_JP.eucJP") + ("japanese.euc" . "ja_JP.eucJP") + ("jp_jp" . "ja_JP.eucJP") + ("ka" . "ka_GE.GEORGIAN-ACADEMY") + ("ka_ge" . "ka_GE.GEORGIAN-ACADEMY") + ("ka_ge.georgianacademy" . "ka_GE.GEORGIAN-ACADEMY") + ("ka_ge.georgianps" . "ka_GE.GEORGIAN-PS") + ("ka_ge.georgianrs" . "ka_GE.GEORGIAN-ACADEMY") + ("kk_kz" . "kk_KZ.RK1048") + ("kl" . "kl_GL.ISO8859-1") + ("kl_gl" . "kl_GL.ISO8859-1") + ("km_kh" . "km_KH.UTF-8") + ("kn" . "kn_IN.UTF-8") + ("kn_in" . "kn_IN.UTF-8") + ("ko" . "ko_KR.eucKR") + ("ko_kr" . "ko_KR.eucKR") + ("ko_kr.euc" . "ko_KR.eucKR") + ("kok_in" . "kok_IN.UTF-8") + ("korean" . "ko_KR.eucKR") + ("korean.euc" . "ko_KR.eucKR") + ("ks" . "ks_IN.UTF-8") + ("ks_in" . "ks_IN.UTF-8") + ("ks_in@devanagari.utf8" . "ks_IN.UTF-8@devanagari") + ("ku_tr" . "ku_TR.ISO8859-9") + ("kw" . "kw_GB.ISO8859-1") + ("kw_gb" . "kw_GB.ISO8859-1") + ("ky" . "ky_KG.UTF-8") + ("ky_kg" . "ky_KG.UTF-8") + ("lb_lu" . "lb_LU.UTF-8") + ("lg_ug" . "lg_UG.ISO8859-10") + ("li_be" . "li_BE.UTF-8") + ("li_nl" . "li_NL.UTF-8") + ("lij_it" . "lij_IT.UTF-8") + ("lithuanian" . "lt_LT.ISO8859-13") + ("lo" . "lo_LA.MULELAO-1") + ("lo_la" . "lo_LA.MULELAO-1") + ("lo_la.cp1133" . "lo_LA.IBM-CP1133") + ("lo_la.ibmcp1133" . "lo_LA.IBM-CP1133") + ("lo_la.mulelao1" . "lo_LA.MULELAO-1") + ("lt" . "lt_LT.ISO8859-13") + ("lt_lt" . "lt_LT.ISO8859-13") + ("lv" . "lv_LV.ISO8859-13") + ("lv_lv" . "lv_LV.ISO8859-13") + ("mag_in" . "mag_IN.UTF-8") + ("mai" . "mai_IN.UTF-8") + ("mai_in" . "mai_IN.UTF-8") + ("mg_mg" . "mg_MG.ISO8859-15") + ("mhr_ru" . "mhr_RU.UTF-8") + ("mi" . "mi_NZ.ISO8859-1") + ("mi_nz" . "mi_NZ.ISO8859-1") + ("mk" . "mk_MK.ISO8859-5") + ("mk_mk" . "mk_MK.ISO8859-5") + ("ml" . "ml_IN.UTF-8") + ("ml_in" . "ml_IN.UTF-8") + ("mn_mn" . "mn_MN.UTF-8") + ("mni_in" . "mni_IN.UTF-8") + ("mr" . "mr_IN.UTF-8") + ("mr_in" . "mr_IN.UTF-8") + ("ms" . "ms_MY.ISO8859-1") + ("ms_my" . "ms_MY.ISO8859-1") + ("mt" . "mt_MT.ISO8859-3") + ("mt_mt" . "mt_MT.ISO8859-3") + ("my_mm" . "my_MM.UTF-8") + ("nan_tw@latin" . "nan_TW.UTF-8@latin") + ("nb" . "nb_NO.ISO8859-1") + ("nb_no" . "nb_NO.ISO8859-1") + ("nds_de" . "nds_DE.UTF-8") + ("nds_nl" . "nds_NL.UTF-8") + ("ne_np" . "ne_NP.UTF-8") + ("nhn_mx" . "nhn_MX.UTF-8") + ("niu_nu" . "niu_NU.UTF-8") + ("niu_nz" . "niu_NZ.UTF-8") + ("nl" . "nl_NL.ISO8859-1") + ("nl_aw" . "nl_AW.UTF-8") + ("nl_be" . "nl_BE.ISO8859-1") + ("nl_nl" . "nl_NL.ISO8859-1") + ("nn" . "nn_NO.ISO8859-1") + ("nn_no" . "nn_NO.ISO8859-1") + ("no" . "no_NO.ISO8859-1") + ("no@nynorsk" . "ny_NO.ISO8859-1") + ("no_no" . "no_NO.ISO8859-1") + ("no_no.iso88591@bokmal" . "no_NO.ISO8859-1") + ("no_no.iso88591@nynorsk" . "no_NO.ISO8859-1") + ("norwegian" . "no_NO.ISO8859-1") + ("nr" . "nr_ZA.ISO8859-1") + ("nr_za" . "nr_ZA.ISO8859-1") + ("nso" . "nso_ZA.ISO8859-15") + ("nso_za" . "nso_ZA.ISO8859-15") + ("ny" . "ny_NO.ISO8859-1") + ("ny_no" . "ny_NO.ISO8859-1") + ("nynorsk" . "nn_NO.ISO8859-1") + ("oc" . "oc_FR.ISO8859-1") + ("oc_fr" . "oc_FR.ISO8859-1") + ("om_et" . "om_ET.UTF-8") + ("om_ke" . "om_KE.ISO8859-1") + ("or" . "or_IN.UTF-8") + ("or_in" . "or_IN.UTF-8") + ("os_ru" . "os_RU.UTF-8") + ("pa" . "pa_IN.UTF-8") + ("pa_in" . "pa_IN.UTF-8") + ("pa_pk" . "pa_PK.UTF-8") + ("pap_an" . "pap_AN.UTF-8") + ("pd" . "pd_US.ISO8859-1") + ("pd_de" . "pd_DE.ISO8859-1") + ("pd_us" . "pd_US.ISO8859-1") + ("ph" . "ph_PH.ISO8859-1") + ("ph_ph" . "ph_PH.ISO8859-1") + ("pl" . "pl_PL.ISO8859-2") + ("pl_pl" . "pl_PL.ISO8859-2") + ("polish" . "pl_PL.ISO8859-2") + ("portuguese" . "pt_PT.ISO8859-1") + ("portuguese_brazil" . "pt_BR.ISO8859-1") + ("posix" . "C") + ("posix-utf2" . "C") + ("pp" . "pp_AN.ISO8859-1") + ("pp_an" . "pp_AN.ISO8859-1") + ("ps_af" . "ps_AF.UTF-8") + ("pt" . "pt_PT.ISO8859-1") + ("pt_br" . "pt_BR.ISO8859-1") + ("pt_pt" . "pt_PT.ISO8859-1") + ("ro" . "ro_RO.ISO8859-2") + ("ro_ro" . "ro_RO.ISO8859-2") + ("romanian" . "ro_RO.ISO8859-2") + ("ru" . "ru_RU.UTF-8") + ("ru_ru" . "ru_RU.UTF-8") + ("ru_ua" . "ru_UA.KOI8-U") + ("rumanian" . "ro_RO.ISO8859-2") + ("russian" . "ru_RU.ISO8859-5") + ("rw" . "rw_RW.ISO8859-1") + ("rw_rw" . "rw_RW.ISO8859-1") + ("sa_in" . "sa_IN.UTF-8") + ("sat_in" . "sat_IN.UTF-8") + ("sc_it" . "sc_IT.UTF-8") + ("sd" . "sd_IN.UTF-8") + ("sd_in" . "sd_IN.UTF-8") + ("sd_in@devanagari.utf8" . "sd_IN.UTF-8@devanagari") + ("sd_pk" . "sd_PK.UTF-8") + ("se_no" . "se_NO.UTF-8") + ("serbocroatian" . "sr_RS.UTF-8@latin") + ("sh" . "sr_RS.UTF-8@latin") + ("sh_ba.iso88592@bosnia" . "sr_CS.ISO8859-2") + ("sh_hr" . "sh_HR.ISO8859-2") + ("sh_hr.iso88592" . "hr_HR.ISO8859-2") + ("sh_sp" . "sr_CS.ISO8859-2") + ("sh_yu" . "sr_RS.UTF-8@latin") + ("shs_ca" . "shs_CA.UTF-8") + ("si" . "si_LK.UTF-8") + ("si_lk" . "si_LK.UTF-8") + ("sid_et" . "sid_ET.UTF-8") + ("sinhala" . "si_LK.UTF-8") + ("sk" . "sk_SK.ISO8859-2") + ("sk_sk" . "sk_SK.ISO8859-2") + ("sl" . "sl_SI.ISO8859-2") + ("sl_cs" . "sl_CS.ISO8859-2") + ("sl_si" . "sl_SI.ISO8859-2") + ("slovak" . "sk_SK.ISO8859-2") + ("slovene" . "sl_SI.ISO8859-2") + ("slovenian" . "sl_SI.ISO8859-2") + ("so_dj" . "so_DJ.ISO8859-1") + ("so_et" . "so_ET.UTF-8") + ("so_ke" . "so_KE.ISO8859-1") + ("so_so" . "so_SO.ISO8859-1") + ("sp" . "sr_CS.ISO8859-5") + ("sp_yu" . "sr_CS.ISO8859-5") + ("spanish" . "es_ES.ISO8859-1") + ("spanish_spain" . "es_ES.ISO8859-1") + ("sq" . "sq_AL.ISO8859-2") + ("sq_al" . "sq_AL.ISO8859-2") + ("sq_mk" . "sq_MK.UTF-8") + ("sr" . "sr_RS.UTF-8") + ("sr@cyrillic" . "sr_RS.UTF-8") + ("sr@latn" . "sr_CS.UTF-8@latin") + ("sr_cs" . "sr_CS.UTF-8") + ("sr_cs.iso88592@latn" . "sr_CS.ISO8859-2") + ("sr_cs@latn" . "sr_CS.UTF-8@latin") + ("sr_me" . "sr_ME.UTF-8") + ("sr_rs" . "sr_RS.UTF-8") + ("sr_rs@latn" . "sr_RS.UTF-8@latin") + ("sr_sp" . "sr_CS.ISO8859-2") + ("sr_yu" . "sr_RS.UTF-8@latin") + ("sr_yu.cp1251@cyrillic" . "sr_CS.CP1251") + ("sr_yu.iso88592" . "sr_CS.ISO8859-2") + ("sr_yu.iso88595" . "sr_CS.ISO8859-5") + ("sr_yu.iso88595@cyrillic" . "sr_CS.ISO8859-5") + ("sr_yu.microsoftcp1251@cyrillic" . "sr_CS.CP1251") + ("sr_yu.utf8" . "sr_RS.UTF-8") + ("sr_yu.utf8@cyrillic" . "sr_RS.UTF-8") + ("sr_yu@cyrillic" . "sr_RS.UTF-8") + ("ss" . "ss_ZA.ISO8859-1") + ("ss_za" . "ss_ZA.ISO8859-1") + ("st" . "st_ZA.ISO8859-1") + ("st_za" . "st_ZA.ISO8859-1") + ("sv" . "sv_SE.ISO8859-1") + ("sv_fi" . "sv_FI.ISO8859-1") + ("sv_se" . "sv_SE.ISO8859-1") + ("sw_ke" . "sw_KE.UTF-8") + ("sw_tz" . "sw_TZ.UTF-8") + ("swedish" . "sv_SE.ISO8859-1") + ("szl_pl" . "szl_PL.UTF-8") + ("ta" . "ta_IN.TSCII-0") + ("ta_in" . "ta_IN.TSCII-0") + ("ta_in.tscii" . "ta_IN.TSCII-0") + ("ta_in.tscii0" . "ta_IN.TSCII-0") + ("ta_lk" . "ta_LK.UTF-8") + ("te" . "te_IN.UTF-8") + ("te_in" . "te_IN.UTF-8") + ("tg" . "tg_TJ.KOI8-C") + ("tg_tj" . "tg_TJ.KOI8-C") + ("th" . "th_TH.ISO8859-11") + ("th_th" . "th_TH.ISO8859-11") + ("th_th.tactis" . "th_TH.TIS620") + ("th_th.tis620" . "th_TH.TIS620") + ("thai" . "th_TH.ISO8859-11") + ("ti_er" . "ti_ER.UTF-8") + ("ti_et" . "ti_ET.UTF-8") + ("tig_er" . "tig_ER.UTF-8") + ("tk_tm" . "tk_TM.UTF-8") + ("tl" . "tl_PH.ISO8859-1") + ("tl_ph" . "tl_PH.ISO8859-1") + ("tn" . "tn_ZA.ISO8859-15") + ("tn_za" . "tn_ZA.ISO8859-15") + ("tr" . "tr_TR.ISO8859-9") + ("tr_cy" . "tr_CY.ISO8859-9") + ("tr_tr" . "tr_TR.ISO8859-9") + ("ts" . "ts_ZA.ISO8859-1") + ("ts_za" . "ts_ZA.ISO8859-1") + ("tt" . "tt_RU.TATAR-CYR") + ("tt_ru" . "tt_RU.TATAR-CYR") + ("tt_ru.tatarcyr" . "tt_RU.TATAR-CYR") + ("tt_ru@iqtelif" . "tt_RU.UTF-8@iqtelif") + ("turkish" . "tr_TR.ISO8859-9") + ("ug_cn" . "ug_CN.UTF-8") + ("uk" . "uk_UA.KOI8-U") + ("uk_ua" . "uk_UA.KOI8-U") + ("univ" . "en_US.UTF-8") + ("universal" . "en_US.UTF-8") + ("universal.utf8@ucs4" . "en_US.UTF-8") + ("unm_us" . "unm_US.UTF-8") + ("ur" . "ur_PK.CP1256") + ("ur_in" . "ur_IN.UTF-8") + ("ur_pk" . "ur_PK.CP1256") + ("uz" . "uz_UZ.UTF-8") + ("uz_uz" . "uz_UZ.UTF-8") + ("uz_uz@cyrillic" . "uz_UZ.UTF-8") + ("ve" . "ve_ZA.UTF-8") + ("ve_za" . "ve_ZA.UTF-8") + ("vi" . "vi_VN.TCVN") + ("vi_vn" . "vi_VN.TCVN") + ("vi_vn.tcvn" . "vi_VN.TCVN") + ("vi_vn.tcvn5712" . "vi_VN.TCVN") + ("vi_vn.viscii" . "vi_VN.VISCII") + ("vi_vn.viscii111" . "vi_VN.VISCII") + ("wa" . "wa_BE.ISO8859-1") + ("wa_be" . "wa_BE.ISO8859-1") + ("wae_ch" . "wae_CH.UTF-8") + ("wal_et" . "wal_ET.UTF-8") + ("wo_sn" . "wo_SN.UTF-8") + ("xh" . "xh_ZA.ISO8859-1") + ("xh_za" . "xh_ZA.ISO8859-1") + ("yi" . "yi_US.CP1255") + ("yi_us" . "yi_US.CP1255") + ("yo_ng" . "yo_NG.UTF-8") + ("yue_hk" . "yue_HK.UTF-8") + ("zh" . "zh_CN.eucCN") + ("zh_cn" . "zh_CN.gb2312") + ("zh_cn.big5" . "zh_TW.big5") + ("zh_cn.euc" . "zh_CN.eucCN") + ("zh_hk" . "zh_HK.big5hkscs") + ("zh_hk.big5hk" . "zh_HK.big5hkscs") + ("zh_sg" . "zh_SG.GB2312") + ("zh_sg.gbk" . "zh_SG.GBK") + ("zh_tw" . "zh_TW.big5") + ("zh_tw.euc" . "zh_TW.eucTW") + ("zh_tw.euctw" . "zh_TW.eucTW") + ("zu" . "zu_ZA.ISO8859-1") + ("zu_za" . "zu_ZA.ISO8859-1")))) + +#| +# +# This maps Windows language identifiers to locale strings. +# +# This list has been updated from +# http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp +# to include every locale up to Windows Vista. +# +# NOTE: this mapping is incomplete. If your language is missing, please +# submit a bug report to the Python bug tracker at http://bugs.python.org/ +# Make sure you include the missing language identifier and the suggested +# locale code. +# +|# + +(define windows_locale (dict '( + (#x0436 . "af_ZA") ;; # Afrikaans + (#x041c . "sq_AL") ;; # Albanian + (#x0484 . "gsw_FR") ;;# Alsatian - France + (#x045e . "am_ET") ;; # Amharic - Ethiopia + (#x0401 . "ar_SA") ;; # Arabic - Saudi Arabia + (#x0801 . "ar_IQ") ;; # Arabic - Iraq + (#x0c01 . "ar_EG") ;; # Arabic - Egypt + (#x1001 . "ar_LY") ;; # Arabic - Libya + (#x1401 . "ar_DZ") ;; # Arabic - Algeria + (#x1801 . "ar_MA") ;; # Arabic - Morocco + (#x1c01 . "ar_TN") ;; # Arabic - Tunisia + (#x2001 . "ar_OM") ;; # Arabic - Oman + (#x2401 . "ar_YE") ;; # Arabic - Yemen + (#x2801 . "ar_SY") ;; # Arabic - Syria + (#x2c01 . "ar_JO") ;; # Arabic - Jordan + (#x3001 . "ar_LB") ;; # Arabic - Lebanon + (#x3401 . "ar_KW") ;; # Arabic - Kuwait + (#x3801 . "ar_AE") ;; # Arabic - United Arab Emirates + (#x3c01 . "ar_BH") ;; # Arabic - Bahrain + (#x4001 . "ar_QA") ;; # Arabic - Qatar + (#x042b . "hy_AM") ;; # Armenian + (#x044d . "as_IN") ;; # Assamese - India + (#x042c . "az_AZ") ;; # Azeri - Latin + (#x082c . "az_AZ") ;; # Azeri - Cyrillic + (#x046d . "ba_RU") ;; # Bashkir + (#x042d . "eu_ES") ;; # Basque - Russia + (#x0423 . "be_BY") ;; # Belarusian + (#x0445 . "bn_IN") ;; # Begali + (#x201a . "bs_BA") ;; # Bosnian - Cyrillic + (#x141a . "bs_BA") ;; # Bosnian - Latin + (#x047e . "br_FR") ;; # Breton - France + (#x0402 . "bg_BG") ;; # Bulgarian +;; (#x0455 . "my_MM") ;; # Burmese - Not supported + (#x0403 . "ca_ES") ;; # Catalan + (#x0004 . "zh_CHS") ;;# Chinese - Simplified + (#x0404 . "zh_TW") ;; # Chinese - Taiwan + (#x0804 . "zh_CN") ;; # Chinese - PRC + (#x0c04 . "zh_HK") ;; # Chinese - Hong Kong S.A.R. + (#x1004 . "zh_SG") ;; # Chinese - Singapore + (#x1404 . "zh_MO") ;; # Chinese - Macao S.A.R. + (#x7c04 . "zh_CHT") ;;# Chinese - Traditional + (#x0483 . "co_FR") ;; # Corsican - France + (#x041a . "hr_HR") ;; # Croatian + (#x101a . "hr_BA") ;; # Croatian - Bosnia + (#x0405 . "cs_CZ") ;; # Czech + (#x0406 . "da_DK") ;; # Danish + (#x048c . "gbz_AF") ;;# Dari - Afghanistan + (#x0465 . "div_MV") ;;# Divehi - Maldives + (#x0413 . "nl_NL") ;; # Dutch - The Netherlands + (#x0813 . "nl_BE") ;; # Dutch - Belgium + (#x0409 . "en_US") ;; # English - United States + (#x0809 . "en_GB") ;; # English - United Kingdom + (#x0c09 . "en_AU") ;; # English - Australia + (#x1009 . "en_CA") ;; # English - Canada + (#x1409 . "en_NZ") ;; # English - New Zealand + (#x1809 . "en_IE") ;; # English - Ireland + (#x1c09 . "en_ZA") ;; # English - South Africa + (#x2009 . "en_JA") ;; # English - Jamaica + (#x2409 . "en_CB") ;; # English - Caribbean + (#x2809 . "en_BZ") ;; # English - Belize + (#x2c09 . "en_TT") ;; # English - Trinidad + (#x3009 . "en_ZW") ;; # English - Zimbabwe + (#x3409 . "en_PH") ;; # English - Philippines + (#x4009 . "en_IN") ;; # English - India + (#x4409 . "en_MY") ;; # English - Malaysia + (#x4809 . "en_IN") ;; # English - Singapore + (#x0425 . "et_EE") ;; # Estonian + (#x0438 . "fo_FO") ;; # Faroese + (#x0464 . "fil_PH") ;;# Filipino + (#x040b . "fi_FI") ;; # Finnish + (#x040c . "fr_FR") ;; # French - France + (#x080c . "fr_BE") ;; # French - Belgium + (#x0c0c . "fr_CA") ;; # French - Canada + (#x100c . "fr_CH") ;; # French - Switzerland + (#x140c . "fr_LU") ;; # French - Luxembourg + (#x180c . "fr_MC") ;; # French - Monaco + (#x0462 . "fy_NL") ;; # Frisian - Netherlands + (#x0456 . "gl_ES") ;; # Galician + (#x0437 . "ka_GE") ;; # Georgian + (#x0407 . "de_DE") ;; # German - Germany + (#x0807 . "de_CH") ;; # German - Switzerland + (#x0c07 . "de_AT") ;; # German - Austria + (#x1007 . "de_LU") ;; # German - Luxembourg + (#x1407 . "de_LI") ;; # German - Liechtenstein + (#x0408 . "el_GR") ;; # Greek + (#x046f . "kl_GL") ;; # Greenlandic - Greenland + (#x0447 . "gu_IN") ;; # Gujarati + (#x0468 . "ha_NG") ;; # Hausa - Latin + (#x040d . "he_IL") ;; # Hebrew + (#x0439 . "hi_IN") ;; # Hindi + (#x040e . "hu_HU") ;; # Hungarian + (#x040f . "is_IS") ;; # Icelandic + (#x0421 . "id_ID") ;; # Indonesian + (#x045d . "iu_CA") ;; # Inuktitut - Syllabics + (#x085d . "iu_CA") ;; # Inuktitut - Latin + (#x083c . "ga_IE") ;; # Irish - Ireland + (#x0410 . "it_IT") ;; # Italian - Italy + (#x0810 . "it_CH") ;; # Italian - Switzerland + (#x0411 . "ja_JP") ;; # Japanese + (#x044b . "kn_IN") ;; # Kannada - India + (#x043f . "kk_KZ") ;; # Kazakh + (#x0453 . "kh_KH") ;; # Khmer - Cambodia + (#x0486 . "qut_GT") ;;# K'iche - Guatemala + (#x0487 . "rw_RW") ;; # Kinyarwanda - Rwanda + (#x0457 . "kok_IN") ;;# Konkani + (#x0412 . "ko_KR") ;; # Korean + (#x0440 . "ky_KG") ;; # Kyrgyz + (#x0454 . "lo_LA") ;; # Lao - Lao PDR + (#x0426 . "lv_LV") ;; # Latvian + (#x0427 . "lt_LT") ;; # Lithuanian + (#x082e . "dsb_DE") ;;# Lower Sorbian - Germany + (#x046e . "lb_LU") ;; # Luxembourgish + (#x042f . "mk_MK") ;; # FYROM Macedonian + (#x043e . "ms_MY") ;; # Malay - Malaysia + (#x083e . "ms_BN") ;; # Malay - Brunei Darussalam + (#x044c . "ml_IN") ;; # Malayalam - India + (#x043a . "mt_MT") ;; # Maltese + (#x0481 . "mi_NZ") ;; # Maori + (#x047a . "arn_CL") ;;# Mapudungun + (#x044e . "mr_IN") ;; # Marathi + (#x047c . "moh_CA") ;;# Mohawk - Canada + (#x0450 . "mn_MN") ;; # Mongolian - Cyrillic + (#x0850 . "mn_CN") ;; # Mongolian - PRC + (#x0461 . "ne_NP") ;; # Nepali + (#x0414 . "nb_NO") ;; # Norwegian - Bokmal + (#x0814 . "nn_NO") ;; # Norwegian - Nynorsk + (#x0482 . "oc_FR") ;; # Occitan - France + (#x0448 . "or_IN") ;; # Oriya - India + (#x0463 . "ps_AF") ;; # Pashto - Afghanistan + (#x0429 . "fa_IR") ;; # Persian + (#x0415 . "pl_PL") ;; # Polish + (#x0416 . "pt_BR") ;; # Portuguese - Brazil + (#x0816 . "pt_PT") ;; # Portuguese - Portugal + (#x0446 . "pa_IN") ;; # Punjabi + (#x046b . "quz_BO") ;;# Quechua (Bolivia) + (#x086b . "quz_EC") ;;# Quechua (Ecuador) + (#x0c6b . "quz_PE") ;;# Quechua (Peru) + (#x0418 . "ro_RO") ;; # Romanian - Romania + (#x0417 . "rm_CH") ;; # Romansh + (#x0419 . "ru_RU") ;; # Russian + (#x243b . "smn_FI") ;;# Sami Finland + (#x103b . "smj_NO") ;;# Sami Norway + (#x143b . "smj_SE") ;;# Sami Sweden + (#x043b . "se_NO") ;; # Sami Northern Norway + (#x083b . "se_SE") ;; # Sami Northern Sweden + (#x0c3b . "se_FI") ;; # Sami Northern Finland + (#x203b . "sms_FI") ;;# Sami Skolt + (#x183b . "sma_NO") ;;# Sami Southern Norway + (#x1c3b . "sma_SE") ;;# Sami Southern Sweden + (#x044f . "sa_IN") ;; # Sanskrit + (#x0c1a . "sr_SP") ;; # Serbian - Cyrillic + (#x1c1a . "sr_BA") ;; # Serbian - Bosnia Cyrillic + (#x081a . "sr_SP") ;; # Serbian - Latin + (#x181a . "sr_BA") ;; # Serbian - Bosnia Latin + (#x045b . "si_LK") ;; # Sinhala - Sri Lanka + (#x046c . "ns_ZA") ;; # Northern Sotho + (#x0432 . "tn_ZA") ;; # Setswana - Southern Africa + (#x041b . "sk_SK") ;; # Slovak + (#x0424 . "sl_SI") ;; # Slovenian + (#x040a . "es_ES") ;; # Spanish - Spain + (#x080a . "es_MX") ;; # Spanish - Mexico + (#x0c0a . "es_ES") ;; # Spanish - Spain (Modern) + (#x100a . "es_GT") ;; # Spanish - Guatemala + (#x140a . "es_CR") ;; # Spanish - Costa Rica + (#x180a . "es_PA") ;; # Spanish - Panama + (#x1c0a . "es_DO") ;; # Spanish - Dominican Republic + (#x200a . "es_VE") ;; # Spanish - Venezuela + (#x240a . "es_CO") ;; # Spanish - Colombia + (#x280a . "es_PE") ;; # Spanish - Peru + (#x2c0a . "es_AR") ;; # Spanish - Argentina + (#x300a . "es_EC") ;; # Spanish - Ecuador + (#x340a . "es_CL") ;; # Spanish - Chile + (#x380a . "es_UR") ;; # Spanish - Uruguay + (#x3c0a . "es_PY") ;; # Spanish - Paraguay + (#x400a . "es_BO") ;; # Spanish - Bolivia + (#x440a . "es_SV") ;; # Spanish - El Salvador + (#x480a . "es_HN") ;; # Spanish - Honduras + (#x4c0a . "es_NI") ;; # Spanish - Nicaragua + (#x500a . "es_PR") ;; # Spanish - Puerto Rico + (#x540a . "es_US") ;; # Spanish - United States +;; (#x0430 . "") ;; # Sutu - Not supported + (#x0441 . "sw_KE") ;; # Swahili + (#x041d . "sv_SE") ;; # Swedish - Sweden + (#x081d . "sv_FI") ;; # Swedish - Finland + (#x045a . "syr_SY") ;;# Syriac + (#x0428 . "tg_TJ") ;; # Tajik - Cyrillic + (#x085f . "tmz_DZ") ;;# Tamazight - Latin + (#x0449 . "ta_IN") ;; # Tamil + (#x0444 . "tt_RU") ;; # Tatar + (#x044a . "te_IN") ;; # Telugu + (#x041e . "th_TH") ;; # Thai + (#x0851 . "bo_BT") ;; # Tibetan - Bhutan + (#x0451 . "bo_CN") ;; # Tibetan - PRC + (#x041f . "tr_TR") ;; # Turkish + (#x0442 . "tk_TM") ;; # Turkmen - Cyrillic + (#x0480 . "ug_CN") ;; # Uighur - Arabic + (#x0422 . "uk_UA") ;; # Ukrainian + (#x042e . "wen_DE") ;;# Upper Sorbian - Germany + (#x0420 . "ur_PK") ;; # Urdu + (#x0820 . "ur_IN") ;; # Urdu - India + (#x0443 . "uz_UZ") ;; # Uzbek - Latin + (#x0843 . "uz_UZ") ;; # Uzbek - Cyrillic + (#x042a . "vi_VN") ;; # Vietnamese + (#x0452 . "cy_GB") ;; # Welsh + (#x0488 . "wo_SN") ;; # Wolof - Senegal + (#x0434 . "xh_ZA") ;; # Xhosa - South Africa + (#x0485 . "sah_RU") ;;# Yakut - Cyrillic + (#x0478 . "ii_CN") ;; # Yi - PRC + (#x046a . "yo_NG") ;; # Yoruba - Nigeria + (#x0435 . "zu_ZA") ;; # Zulu + ))) + diff --git a/modules/language/python/module/re.scm b/modules/language/python/module/re.scm index 48259e8..1d7dbca 100644 --- a/modules/language/python/module/re.scm +++ b/modules/language/python/module/re.scm @@ -1,247 +1,414 @@ -(define-module (language python modules re) - #:use-module (parser stis-parser) - #:export(parse-reg)) - - -(define-syntax-rule (mk n tag str) (define n (f-seq tag (f-tag str)))) -(mk f-. #:. ".") -(mk f-^ #:^ "^") -(mk f-$ #:$ "$") - -(define subexpr (f-list #:sub - (f-seq (f-tag "(") (Ds ee) (f-tag ")")))) - -(define f-back - (f-or (f-list #:class (mk-token (f-reg! "[AZbBdDsSw]"))) - (mk-token (f-reg ".")))) - -(define (ch not) - (f-list #:ch - (f-or! (f-seq (f-char #\\) f-back) - (mk-token (f-not! not))))) - -(define bbody (f-cons (ch "[\\]") (ff* (ch "[]\\]")))) - -(define q (ch (f-reg "[][?+*.$^()\\]"))) - -(define choice - (f-cons #:bracket - (f-or! - (f-seq "[^]" (f-out (list #:ch "^"))) - (f-cons* - (f-tag "[") - (f? (f-seq (f-tag "^") (f-out #t))) - bbody)))) - -(define-syntax-rule (mk-post q* str tag) - (define q* - (<p-lambda> (c) - (.. c2 ((f-tag str) c)) - (<p-cc> (cons (list tag (car c)) (cdr c)))))) - -(mk-post q* "*" #:*) -(mk-post q? "?" #:?) -(mk-post q+ "+" #:+) -(mk-post q*? "*?" #:*?) -(mk-post q?? "??" #:??) -(mk-post q+? "+?" #:+?) - -(define q-or - (<p-lambda> (c) - (.. (c2) ((f-seq (f-tag "|") e) '())) - (<p-cc> (list #:or c c2)))) - -(define q? - (<p-lambda> (c) - (.. c2 ((f-tag "*") c)) - (<p-cc> (cons (list #:* (car c)) (cdr c))))) - -(define ee - (ff* (f-or! q+? q?? q*? q* q? q+ q-or choice subexpr f-. f-$ f-^ q))) - -#| -(define-syntax with - (syntax-rules () - ((_ a b c ((s v) . l) . code) - (let ((ss v)) - (syntax-parameterize - ((s (lambda (x) - (syntax-case x () - ((_ . l) - #'(ss . l)) - (_ - #'ss))))) - - (with a b c l . code)))) - ((_ a b c () . code) (<and> a b c . code)))) - -(define group - (lambda (f) - (<p-lambda> (c1) - (.. c2 (f '())) - (with ((L (cons (cons I c2) L)) - (I (+ I 1))) - (<p-cc> (list #:list (#:append c1 c2))))))) - -(define group-name - (lambda (f name) - (<p-lambda> (c1) - (.. c2 (f '())) - (with ((L (cons* (cons name c2) (cons I c2) L)) - (I (+ i 1))) - (<p-cc> (list #:list (#:append c1 c2))))))) - -(define (incant name) - (<p-lambda> (c) - (let ((r (assoc name L))) - (if r - (<and> (.. (f-tag (cdr r)))) - (<code> (error "group is not existing in the history")))))) - -(define (incant-rev name) - (<p-lambda> (c) - (let ((r (assoc name L))) - (if r - (<and> (.. (f-tag (reverse (cdr r))))) - (<code> (error "group is not existing in the history")))))) - -(define (reverse-form x) - (match x - ((#:or x y) - (list #:or (reverse-form x) (reverse-form y))) - ((#:group f) - (list #:group (reverse-form f))) - ((#:?P< f n) - (list #:?P< (reverse-form f) n)) - ((#:?: f) - (reverse-form f)) - ((#:?P= name) - (#:?P=-rev name)) - ((#:?P=-rev name) - (#:?P= name)) - ((#:?if name yes no) - (list #:?if-rev name (reverse-form yes) (reverse-form no))) - ((#:?if-rev name yes no) - (list #:?if name (reverse-form yes) (reverse-form no))) - ((#:?= f ) (list #:?= (reverse-form f))) - ((#:?! f ) (list #:?! (reverse-form f))) - ((#:?<= f ) (list #:?<= f)) - ((#:?<! f ) (list #:?<! f)) - ((#:* x ) (list #:* (reverse-form x))) - ((#:+ x ) (list #:+ (reverse-form x))) - ((#:mn x m n) (list #:mn (reverse-form x) m n)) - ((#:? x ) (list #:? (reverse-form x))) - ((#:*? x ) (list #:*? (reverse-form x))) - ((#:+? x ) (list #:+? (reverse-form x))) - ((#:?? x ) (list #:?? (reverse-form x))) - ((:mn? x m n) (list #:mn? (reverse-form x) m n)) - ((#:ch x ) (list #:ch x)) - ((#:bracket . l) (cons #:bracket l)) - ((x . l) (map reverse-form (cons x l))) - (x x))) - -(define (compile x) - (match x - ((#:or x y) - (f-or (compile x) (compile y))) - ((#:group f) - (group (compile f))) - ((#:?P< f n) - (group-name (compile f) n)) - ((#:?: f) - (compile f)) - ((#:?P= name) - (incant name)) - ((#:?P=-rev name) - (incant-rev name)) - ((#:?= f) (f-and (compile f) f-true)) - ((#:?! f) (f-and (f-not (compile f)) f-true)) - ((#:?<= f) (f-and (f-seq f-rev (compile (reverse-form f))) f-true)) - ((#:?<! f) (f-and (f-seq f-rev (f-not (compile (reverse-form f)))) f-true)) - ((#:?if name yes no) - (f-or (f-seq (incant name) yes) - no)) - ((#:?if-rev name yes no) - (f-or (f-seq yes (incant-rev name)) - no)) - ((#:* x ) (g* (compile x) )) - ((#:+ x ) (g+ (compile x) )) - ((#:mn x m n) (gmn (commile x) m n)) - ((#:? x ) (g? (compile x) )) - ((#:*? x ) (ng* (compile x) )) - ((#:+? x ) (ng+ (compile x) )) - ((#:?? x ) (ng? (compile x) )) - ((:mn? x m n) (ngmn (compile x) m n)) - ((#:ch (#:class x)) - (get-class ch)) - ((#:ch x) - (f-tag! x)) - ((#:bracket not ch ...) - (let ((f (apply f-or! - (map (lambda (x) - (match x - ((#:ch (:class ch)) - (get-class ch)) - ((#:ch ch) - (f-tag! ch)))) ch)))) - - (if not - (f-not f) - f))))) - -(define (id c) c) -(define (e-match e) - (f-seq (f-or! (f-mute e) (f-return #f)) - (f-retfkn id))) +(define-module (language python module re) + #:use-module (language python module re compile) + #:use-module (language python module re flags) + #:use-module (language python list) + #:use-module (language python dict) + #:use-module (language python try) + #:use-module (language python exceptions) + #:use-module ((language python module python) #:select (str isinstance range)) + #:use-module (language python string) + #:use-module (language python for) + #:use-module (language python def) + #:use-module (language python yield) + #:use-module (oop pf-objects) + #:use-module ((parser stis-parser) #:select (f-checkr f-eof f-ftr f-pos f-pk f-seq! f-and! + f-reg f-seq Ds f-or! ff* f-ftr)) + #:use-module ((ice-9 match) #:select ((match . ice-match))) + #:re-export (A ASCII DEBUG I IGNORECASE L LOCALE M MULTILINE X VERBOSE) + #:export (compile match fullmatch search split finditer findall sub subn purge escape)) + +(define start (make-fluid 0)) +(define end (make-fluid 0)) +(define e-start (f-pos (lambda (x n m) (fluid-set! start x)))) +(define e-end (f-pos (lambda (x n m) (fluid-set! end x)))) +(define (ge) (fluid-ref end)) +(define (gs) (fluid-ref start)) + +(define maxsplit (make-fluid 0)) +(define splitn (make-fluid 0)) +(define (set-maxsplit x) + (fluid-set! maxsplit x) + (fluid-set! splitn 0)) + +(define e-maxsplit (f-checkr + (lambda (c) + (let ((m (fluid-ref maxsplit)) + (i (fluid-ref splitn))) + (if (> m 0) + (if (< i m) + (begin + (fluid-set! splitn (+ i 1)) + #t) + #f) + #t))))) (define (e-fullmatch e) - (f-or! (f-seq (f-mute e) f-eof (f-retfkn id)) - (f-return #f))) + (f-seq e-start (f-and! e) e-end f-eof)) (define (e-search e) - (f-or! (f-seq (f-mute e) (f-retfkn id)) - (f-seq (f-take 1) (Ds (e-search e))) - (f-return None))) - - -(define (e-sub e str) - (f-or! (f-seq (f-subst (f-mute e) str) (Ds (e-sub e str))) - (f-seq (f-take 1) (Ds (e-search e))) - (f-out-written))) - -(define (e-subn e str) - (let lp ((i 0)) - (f-or! (f-seq (f-subst (f-mute e) str) (Ds (lp (+ i 1)))) - (f-seq (f-take 1) (Ds (e-search e))) - (f-seq (f-out-written) (f-retfkn (lambda (c) (values c i))))))) - -(define (e-split e) - (f-or! (f-cons (f-seq (mk-token f-out-written) (f-mute e)) (e-split e)) - (f-cons (f-out-remaining) (f-out '())))) - -(define* (findall x s (#:flags 0)) - (call-with-values (lambda () (parse e-search x s flags)) - (lambda (m cont) - (let lp ((m m) (cont cont)) - (if (eq? m None) - '() - (cons m (call-with-values cont lp))))))) - -(define* (finditer x s (#:flags 0)) - ((make-generator () - (lambda (yield) - (call-with-values (lambda () (parse e-search x s flags)) - (lambda (m cont) - (let lp ((m m) (cont cont)) - (if (eq? m None) - #f - (begin - (yield m) - (call-with-values cont lp)))))))))) -|# - -(define (parse-reg str) (parse str ee)) + (let lp () + (f-or! (f-seq! e-start (f-and! e) e-end) (f-seq (f-reg ".") (Ds (lp)))))) + +(define-python-class Regexp () + (define __init__ + (lam (self s (= flags 0)) + (set self 'flags flags) + (set self 'pattern s) + (let ((l (compile-reg s))) + (set self '__re (list-ref l 0)) + (set self 'groups (- (list-ref l 1) 1)) + (set self 'groupindex (list-ref l 2))))) + + (define __repr__ + (lambda (self) + (format #f "re.compile('~a')" (ref self 'pattern)))) + + (define __str__ __repr__) + (define findall + (lam (self str (= flags 0)) + (findall- self str flags))) + + (define finditer + (lam (self str (= flags None)) + (if (eq? flags None) (set! flags (ref self 'flags))) + (finditer- self str flags))) + + (define flags 0) + (define fullmatch + (lam (self str (= flags None)) + (if (eq? flags None) (set! flags (ref self 'flags))) + (fullmatch- self str flags))) + + (define groupindex '()) + (define groups '()) + (define match + (lam (self str (= flags None)) + (if (eq? flags None) (set! flags (ref self 'flags))) + (match- self str flags))) + + (define search + (lam (self str (= flags None)) + (if (eq? flags None) (set! flags (ref self 'flags))) + (search- self str flags))) + + (define split + (lam (self str (= maxsplit 0) (= flags None)) + (if (eq? flags None) (set! flags (ref self 'flags))) + (split- self str maxsplit flags))) + + (define sub + (lam (self repl str (= count 0) (= flags None)) + (if (eq? flags None) (set! flags (ref self 'flags))) + (sub- self repl str count flags))) + + (define subn + (lam (self repl str (= count 0) (= flags None)) + (if (eq? flags None) (set! flags (ref self 'flags))) + (subn- self repl str count flags)))) + + +(def (match- re s (= flags 0)) + (if (isinstance re str) + (match- (Regexp re) s flags) + (begin + (set-flags flags) + (let ((m (parse s (f-seq e-start (ref re '__re) e-end)))) + (if m + (ReMatch m re (gs) (ge) s) + None))))) + +(define match match-) + +(def (search- re s (= flags 0)) + (if (isinstance re str) + (search- (Regexp re) s flags) + (begin + (set-flags flags) + (let ((m (parse s (e-search (ref re '__re))))) + (if m + (ReMatch m re (gs) (ge) s) + None))))) + +(define search search-) + +(def (fullmatch- re ss (= flags 0)) + (if (isinstance re str) + (fullmatch- (Regexp re) ss flags) + (begin + (set-flags flags) + (let ((m (parse ss (e-fullmatch (ref re '__re))))) + (if m + (ReMatch m re (gs) (ge) ss) + None))))) + +(define fullmatch fullmatch-) + +(define (grps m) (reverse ((ref m 'groups)))) + +(define-syntax-rule (mk split- grps) +(def (split- re ss (= maxsplit 0) (= flags 0)) + (if (isinstance re str) + (split- (Regexp re) ss maxsplit flags) + (begin + (set-flags flags) + (set-maxsplit maxsplit) + (let ((m (parse ss (ff* (f-seq! e-maxsplit + (e-search (ref re '__re)) + (f-ftr (lambda (c) (list (gs) (ge) c)))))))) + (let lp ((s 0) (l (map (lambda (m) (ReMatch (caddr m) re (car m) (cadr m) ss)) m)) (r '())) + (if (pair? l) + (let* ((m (car l)) + (i ((ref m 'start))) + (j ((ref m 'end)))) + (lp j (cdr l) (append + (grps m) + (list (pylist-slice ss s i None)) + r))) + (reverse (cons (pylist-slice ss s None None) r))))))))) + +(mk split- grps) +(define split split-) + +(define (id x) (list x)) +(mk splitm- id) +(define splitm splitm-) + +(def (finditer- re s (= flags 0)) + (if (isinstance re str) + (finditer- (Regexp re) s flags) + (let ((e (ref re '__re))) + (set-flags flags) + ((make-generator () + (lambda (yield) + (parse s + (let lp () + (f-seq + (f-or! + (f-seq! e-start (f-and! e) e-end + (f-ftr (lambda (c) (yield (ReMatch c re (gs) (ge) s)) '()))) + (f-seq (f-reg "."))) + (Ds (lp))))))))))) + +(define finditer finditer-) + +(def (findall- re s (= flags 0)) + (for ((m : (finditer re s flags))) ((l '())) + (let ((grps ((ref m 'groups)))) + (cons + (cond + ((null? grps) + ((ref m 'group))) + ((= (length grps) 1) + (car grps)) + (else + grps)) + l)) + #:final (reverse l))) +(define findall findall-) + +(define (mk-repl-str repl) + (define (take n) + (lambda (m) + (let ((s (pylist-ref m n))) + (if (eq? s None) + (set! s "")) + (reverse + (string->list s))))) + + (define (comp r) + (let lp ((r r) (l '())) + (if (pair? r) + (let ((x (car r))) + (if (char? x) + (lp (cdr r) (cons x l)) + (if (null? l) + r + (let ((l (reverse l))) + (cons (lambda (m) l) r))))) + (if (null? l) + '() + (let ((l (reverse l))) + (cons (lambda (m) l) r)))))) + + (define num? char-numeric?) + (let* ((l (string->list repl)) + (c (let lp ((l l) (r '())) + (ice-match l + ((#\\ (? num? x) (? num? y) . l) + (lp l (cons (take (string->number + (list->string (list x y)))) + (comp r)))) + ((#\\ (? num? x) . l) + (lp l (cons (take (string->number + (list->string (list x)))) + (comp r)))) + ((#\\ #\n . l) + (lp l (cons #\newline r))) + ((x . l) + (lp l (cons x r))) + (() + (reverse (comp r))))))) + (lambda (m) + (let lp ((c c) (r '())) + (if (pair? c) + (let ((x (car c))) + (if (char? x) + (lp (cdr c) (cons x r)) + (lp (cdr c) (append (x m) r)))) + (list->string (reverse r))))))) + + +(define (mk-repl repl) + (if (isinstance repl str) + (mk-repl-str repl) + repl)) + +(def (subn- re repl s (= count 0) (= flags 0)) + (let ((l (splitm re s count flags))) + (define f (mk-repl repl)) + (let lp ((l l) (r '()) (i 0)) + (ice-match l + ((x m . l) + (lp l (cons* (f m) x r) (+ i 1))) + ((x) + (list (py-join "" (reverse (cons x r))) i)) + (() + (list (py-join "" (reverse r)) i)))))) + +(define subn subn-) + +(def (sub- re repl s (= count 0) (= flags 0)) + (car (subn re repl s count flags))) + +(define sub sub-) + +(define (val x) + (if (eq? x None) + x + (car x))) + +(define (vali x) + (if (eq? x None) + (list -1 -1) + (cdr x))) + +(define-python-class ReMatch () + (define __init__ + (lambda (self m re s e ss) + (set self 're re) + (set self '__start s) + (set self '__end e) + (set self 'string ss) + + (let ((t (make-hash-table))) + + (for ((k v : (ref re 'groupindex))) () + (hash-set! t k None)) + + (for ((i : (range (ref re 'groups)))) () + (hash-set! t i None)) + + (let lp ((l (list-ref m 0))) + (ice-match l + ((((n . i) . v) . l) + (hash-set! t n v) + (hash-set! t i v) + (lp l)) + (((i . v) . l) + (hash-set! t i v) + (lp l)) + (() #t))) + + (set self '__t t) + (set self 'regs (__regs self))))) + + (define er (list 'er)) + (define group + (case-lambda + ((self nm) + (if (= nm 0) + (group self) + (let ((x (py-get (ref self '__t) nm er))) + (if (eq? x er) + (raise (IndexError "no souch group in regexp match")) + (val x))))) + ((self) + (pylist-slice (ref self 'string) (start self) (end self) None)))) + + (define __getitem__ + (lambda (self k) + (group self k))) + + (define groups + (lam (self (= default None)) + (let ((t (ref self '__t)) + (n (ref (ref self 're) 'groups))) + (let lp ((i 1)) + (if (<= i n) + (let ((w (val (hash-ref t i)))) + (cons (if (eq? w None) default w) (lp (+ i 1)))) + '()))))) + + (define __regs + (lambda (self) + (let ((t (ref self '__t)) + (n (ref (ref self 're) 'groups))) + (cons (span self) + (let lp ((i 1)) + (if (<= i n) + (cons (vali (hash-ref t i)) (lp (+ i 1))) + '())))))) + + (define groupdict + (lam (self (= default None)) + (let ((t (make-hash-table))) + (for ((k v : (ref self '__t))) () + (if (not (number? k)) + (let ((w (val v))) + (hash-set! t k (if (eq? w None) default w)))) + #:final t)))) + + (define start + (lambda (self) + (ref self '__start))) + + (define end + (lambda (self) + (ref self '__end))) + (define span + (lambda (self) + (list + (ref self '__start) + (ref self '__end)))) + + (define expand + (lambda (self template) + ((mk-repl template) self))) + + (define __repr__ + (lambda (self) + (format #f "ReMatch<~s>" (group self))))) + + + + +(define (compile s) (Regexp s)) + +(define (purge) (values)) + +(define escape + (let ((m (string->list "()[]#$*+-.^|\\"))) + (lambda (x) + (let lp ((l (string->list x)) (r '())) + (ice-match l + ((x . l) + (lp l + (if (member x m) + (cons* #\\ x r) + x))) + (() + (list->string (reverse r)))))))) |