diff options
author | Stefan Israelsson Tampe <stefan.itampe@gmail.com> | 2018-09-10 21:32:17 +0200 |
---|---|---|
committer | Stefan Israelsson Tampe <stefan.itampe@gmail.com> | 2018-09-10 21:32:17 +0200 |
commit | f75533465dbe70b153c75c85297422964b1a598d (patch) | |
tree | d69c4453ccdccdcdc0bad04afb0a628cdca02d89 | |
parent | b13d359c27a03c19ac113ced417d00155942ef11 (diff) |
json
-rw-r--r-- | modules/language/python/module/json.py | 369 | ||||
-rw-r--r-- | modules/language/python/module/json/decoder.py | 364 | ||||
-rw-r--r-- | modules/language/python/module/json/encoder.py | 439 | ||||
-rw-r--r-- | modules/language/python/module/json/scanner.py | 72 | ||||
-rw-r--r-- | modules/language/python/module/json/tool.py | 52 | ||||
-rw-r--r-- | modules/language/python/module/string.scm | 8 | ||||
-rw-r--r-- | modules/language/python/number.scm | 35 |
7 files changed, 1326 insertions, 13 deletions
diff --git a/modules/language/python/module/json.py b/modules/language/python/module/json.py new file mode 100644 index 0000000..0d6951f --- /dev/null +++ b/modules/language/python/module/json.py @@ -0,0 +1,369 @@ +module(json) + +r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`json` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is derived from a +version of the externally maintained simplejson library. + +Encoding basic Python object hierarchies:: + + >>> import json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print(json.dumps("\"foo\bar")) + "\"foo\bar" + >>> print(json.dumps('\u1234')) + "\u1234" + >>> print(json.dumps('\\')) + "\\" + >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) + {"a": 0, "b": 0, "c": 0} + >>> from io import StringIO + >>> io = StringIO() + >>> json.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import json + >>> from collections import OrderedDict + >>> mydict = OrderedDict([('4', 5), ('6', 7)]) + >>> json.dumps([1,2,3,mydict], separators=(',', ':')) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import json + >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import json + >>> obj = ['foo', {'bar': ['baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == '"foo\x08ar' + True + >>> from io import StringIO + >>> io = StringIO('["streaming API"]') + >>> json.load(io)[0] == 'streaming API' + True + +Specializing JSON object decoding:: + + >>> import json + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + >>> from decimal import Decimal + >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1') + True + +Specializing JSON object encoding:: + + >>> import json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError(repr(obj) + " is not JSON serializable") + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using json.tool from the shell to validate and pretty-print:: + + $ echo '{"json":"obj"}' | python -m json.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m json.tool + Expecting property name enclosed in double quotes: line 1 column 3 (char 2) +""" +__version__ = '2.0.9' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', +] + +__author__ = 'Bob Ippolito <bob@redivi.com>' + +from json.decoder import JSONDecoder, JSONDecodeError +from json.encoder import JSONEncoder +import codecs +pk(1) +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + default=None, +) +pk(2) +def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + default=None, sort_keys=False, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If ``skipkeys`` is true then ``dict`` keys that are not basic types + (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped + instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the strings written to ``fp`` can + contain non-ASCII characters if they appear in strings contained in + ``obj``. Otherwise, all such characters are escaped in JSON strings. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If specified, ``separators`` should be an ``(item_separator, key_separator)`` + tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and + ``(',', ': ')`` otherwise. To get the most compact JSON representation, + you should specify ``(',', ':')`` to eliminate whitespace. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + If *sort_keys* is true (default: ``False``), then the output of + dictionaries will be sorted by key. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + default is None and not sort_keys and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, + default=default, sort_keys=sort_keys, **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + default=None, sort_keys=False, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is true then ``dict`` keys that are not basic types + (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped + instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the return value can contain non-ASCII + characters if they appear in strings contained in ``obj``. Otherwise, all + such characters are escaped in JSON strings. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If specified, ``separators`` should be an ``(item_separator, key_separator)`` + tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and + ``(',', ': ')`` otherwise. To get the most compact JSON representation, + you should specify ``(',', ':')`` to eliminate whitespace. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + If *sort_keys* is true (default: ``False``), then the output of + dictionaries will be sorted by key. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + default is None and not sort_keys and not kw): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, default=default, sort_keys=sort_keys, + **kw).encode(obj) + + +_default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None) + + +def detect_encoding(b): + bstartswith = b.startswith + if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)): + return 'utf-32' + if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)): + return 'utf-16' + if bstartswith(codecs.BOM_UTF8): + return 'utf-8-sig' + + if len(b) >= 4: + if not b[0]: + # 00 00 -- -- - utf-32-be + # 00 XX -- -- - utf-16-be + return 'utf-16-be' if b[1] else 'utf-32-be' + if not b[1]: + # XX 00 00 00 - utf-32-le + # XX 00 00 XX - utf-16-le + # XX 00 XX -- - utf-16-le + return 'utf-16-le' if b[2] or b[3] else 'utf-32-le' + elif len(b) == 2: + if not b[0]: + # 00 XX - utf-16-be + return 'utf-16-be' + if not b[1]: + # XX 00 - utf-16-le + return 'utf-16-le' + # default + return 'utf-8' + + +def load(fp, *, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + ``object_pairs_hook`` is an optional function that will be called with the + result of any object literal decoded with an ordered list of pairs. The + return value of ``object_pairs_hook`` will be used instead of the ``dict``. + This feature can be used to implement custom decoders that rely on the + order that the key and value pairs are decoded (for example, + collections.OrderedDict will remember the order of insertion). If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg; otherwise ``JSONDecoder`` is used. + + """ + return loads(fp.read(), + cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw) + + +def loads(s, *, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): + """Deserialize ``s`` (a ``str``, ``bytes`` or ``bytearray`` instance + containing a JSON document) to a Python object. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + ``object_pairs_hook`` is an optional function that will be called with the + result of any object literal decoded with an ordered list of pairs. The + return value of ``object_pairs_hook`` will be used instead of the ``dict``. + This feature can be used to implement custom decoders that rely on the + order that the key and value pairs are decoded (for example, + collections.OrderedDict will remember the order of insertion). If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg; otherwise ``JSONDecoder`` is used. + + The ``encoding`` argument is ignored and deprecated. + + """ + if isinstance(s, str): + if s.startswith('\ufeff'): + raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)", + s, 0) + else: + if not isinstance(s, (bytes, bytearray)): + raise TypeError('the JSON object must be str, bytes or bytearray, ' + 'not {!r}'.format(s.__class__.__name__)) + s = s.decode(detect_encoding(s), 'surrogatepass') + + if (cls is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and object_pairs_hook is None and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if object_pairs_hook is not None: + kw['object_pairs_hook'] = object_pairs_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + return cls(**kw).decode(s) diff --git a/modules/language/python/module/json/decoder.py b/modules/language/python/module/json/decoder.py new file mode 100644 index 0000000..7a41935 --- /dev/null +++ b/modules/language/python/module/json/decoder.py @@ -0,0 +1,364 @@ +module(json,decoder) + +"""Implementation of JSONDecoder +""" +import re + +import json.scanner as scanner + +c_scanstring = None + +__all__ = ['JSONDecoder', 'JSONDecodeError'] + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +NaN = float('nan') +PosInf = float('inf') +NegInf = float('-inf') + + +class JSONDecodeError(ValueError): + """Subclass of ValueError with the following additional properties: + + msg: The unformatted error message + doc: The JSON document being parsed + pos: The start index of doc where parsing failed + lineno: The line corresponding to pos + colno: The column corresponding to pos + + """ + # Note that this exception is used from _json + def __init__(self, msg, doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + colno = pos - doc.rfind('\n', 0, pos) + errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) + ValueError.__init__(self, errmsg) + self.msg = msg + self.doc = doc + self.pos = pos + self.lineno = lineno + self.colno = colno + + def __reduce__(self): + return self.__class__, (self.msg, self.doc, self.pos) + + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, +} + + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': '"', '\\': '\\', '/': '/', + 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', +} + +def _decode_uXXXX(s, pos): + esc = s[pos + 1:pos + 5] + if len(esc) == 4 and esc[1] not in 'xX': + try: + return int(esc, 16) + except ValueError: + pass + msg = "Invalid \\uXXXX escape" + raise JSONDecodeError(msg, s, pos) + +def py_scanstring(s, end, strict=True, + _b=BACKSLASH, _m=STRINGCHUNK.match): + """Scan the string s for a JSON string. End is the index of the + character in s after the quote that started the JSON string. + Unescapes all valid JSON string escape sequences and raises ValueError + on attempt to decode an invalid string. If strict is False then literal + control characters are allowed in the string. + + Returns a tuple of the decoded string and the index of the character in s + after the end quote.""" + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise JSONDecodeError("Unterminated string starting at", s, begin) + end = chunk.end() + content, terminator = chunk.groups() + # Content is contains zero or more unescaped string characters + if content: + _append(content) + # Terminator is the end of string, a literal control character, + # or a backslash denoting that an escape sequence follows + if terminator == '"': + break + elif terminator != '\\': + if strict: + #msg = "Invalid control character %r at" % (terminator,) + msg = "Invalid control character {0!r} at".format(terminator) + raise JSONDecodeError(msg, s, end) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise JSONDecodeError("Unterminated string starting at", s, begin) + # If not a unicode escape sequence, must be in the lookup table + if esc != 'u': + try: + char = _b[esc] + except KeyError: + msg = "Invalid \\escape: {0!r}".format(esc) + raise JSONDecodeError(msg, s, end) + end += 1 + else: + uni = _decode_uXXXX(s, end) + end += 5 + if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': + uni2 = _decode_uXXXX(s, end + 1) + if 0xdc00 <= uni2 <= 0xdfff: + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + end += 6 + char = chr(uni) + _append(char) + return ''.join(chunks), end + + +# Use speedup if available +scanstring = c_scanstring or py_scanstring + +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' + + +def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, + memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + s, end = s_and_end + pairs = [] + pairs_append = pairs.append + # Backwards compatibility + if memo is None: + memo = {} + memo_get = memo.setdefault + # Use a slice to prevent IndexError from being raised, the following + # check will raise a more specific ValueError if the string is empty + nextchar = s[end:end + 1] + # Normally we expect nextchar == '"' + if nextchar != '"': + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + 1 + pairs = {} + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + 1 + elif nextchar != '"': + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", s, end) + end += 1 + while True: + key, end = scanstring(s, end, strict) + key = memo_get(key, key) + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". + if s[end:end + 1] != ':': + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise JSONDecodeError("Expecting ':' delimiter", s, end) + end += 1 + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + try: + value, end = scan_once(s, end) + except StopIteration as err: + raise JSONDecodeError("Expecting value", s, err.value) from None + pairs_append((key, value)) + try: + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 + + if nextchar == '}': + break + elif nextchar != ',': + raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) + end = _w(s, end).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar != '"': + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", s, end - 1) + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + pairs = dict(pairs) + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + +def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + s, end = s_and_end + values = [] + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + # Look-ahead for trivial empty array + if nextchar == ']': + return values, end + 1 + _append = values.append + while True: + try: + value, end = scan_once(s, end) + except StopIteration as err: + raise JSONDecodeError("Expecting value", s, err.value) from None + _append(value) + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + elif nextchar != ',': + raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + return values, end + +class f (): + def __init__ (self,x): + self.x= x + def __call__(self,c): + return self.x[c] + + +class JSONDecoder(object): + """Simple JSON <http://json.org> decoder + + Performs the following translations in decoding by default: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | str | + +---------------+-------------------+ + | number (int) | int | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + + """ + + def __init__(self, *, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True, + object_pairs_hook=None): + """``object_hook``, if specified, will be called with the result + of every JSON object decoded and its return value will be used in + place of the given ``dict``. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + ``object_pairs_hook``, if specified will be called with the result of + every JSON object decoded with an ordered list of pairs. The return + value of ``object_pairs_hook`` will be used instead of the ``dict``. + This feature can be used to implement custom decoders that rely on the + order that the key and value pairs are decoded (for example, + collections.OrderedDict will remember the order of insertion). If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes + priority. + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + If ``strict`` is false (true is the default), then control + characters will be allowed inside strings. Control characters in + this context are those with character codes in the 0-31 range, + including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. + + """ + self.object_hook = object_hook + self.parse_float = parse_float or float + self.parse_int = parse_int or int + self.parse_constant = parse_constant or f(_CONSTANTS) + self.strict = strict + self.object_pairs_hook = object_pairs_hook + self.parse_object = JSONObject + self.parse_array = JSONArray + self.parse_string = scanstring + self.memo = {} + self.scan_once = scanner.make_scanner(self) + + + def decode(self, s, _w=WHITESPACE.match): + """Return the Python representation of ``s`` (a ``str`` instance + containing a JSON document). + + """ + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() + if end != len(s): + raise JSONDecodeError("Extra data", s, end) + return obj + + def raw_decode(self, s, idx=0): + """Decode a JSON document from ``s`` (a ``str`` beginning with + a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + + """ + try: + obj, end = self.scan_once(s, idx) + except StopIteration as err: + raise JSONDecodeError("Expecting value", s, err.value) from None + return obj, end diff --git a/modules/language/python/module/json/encoder.py b/modules/language/python/module/json/encoder.py new file mode 100644 index 0000000..96ad651 --- /dev/null +++ b/modules/language/python/module/json/encoder.py @@ -0,0 +1,439 @@ +module(json,encoder) + +"""Implementation of JSONEncoder +""" +import re + +c_encode_basestring_ascii = None +c_encode_basestring = None +c_make_encoder = None + +__all__ = ['JSONEncoder'] + +ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +HAS_UTF8 = re.compile(b'[\x80-\xff]') +ESCAPE_DCT = { + '\\': '\\\\', + '"': '\\"', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t', +} + +for i in range(0x20): + ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) + #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) + +INFINITY = float('inf') + + +def py_encode_basestring(s): + """Return a JSON representation of a Python string + + """ + def replace(match): + return ESCAPE_DCT[match.group(0)] + return '"' + ESCAPE.sub(replace, s) + '"' + + +encode_basestring = (c_encode_basestring or py_encode_basestring) + + +def py_encode_basestring_ascii(s): + """Return an ASCII-only JSON representation of a Python string + + """ + def replace(match): + s = match.group(0) + try: + return ESCAPE_DCT[s] + except KeyError: + n = ord(s) + if n < 0x10000: + return '\\u{0:04x}'.format(n) + #return '\\u%04x' % (n,) + else: + # surrogate pair + n -= 0x10000 + s1 = 0xd800 | ((n >> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) + return '"' + ESCAPE_ASCII.sub(replace, s) + '"' + + +encode_basestring_ascii = ( + c_encode_basestring_ascii or py_encode_basestring_ascii) + +class JSONEncoder(object): + """Extensible JSON <http://json.org> encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str | string | + +-------------------+---------------+ + | int, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + + """ + item_separator = ', ' + key_separator = ': ' + def __init__(self, *, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, default=None): + """Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is false, then it is a TypeError to attempt + encoding of keys that are not str, int, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is true, the output is guaranteed to be str + objects with all incoming non-ASCII characters escaped. If + ensure_ascii is false, the output can contain non-ASCII characters. + + If check_circular is true, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is true, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is true, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + + If indent is a non-negative integer, then JSON array + elements and object members will be pretty-printed with that + indent level. An indent level of 0 will only insert newlines. + None is the most compact representation. + + If specified, separators should be an (item_separator, key_separator) + tuple. The default is (', ', ': ') if *indent* is ``None`` and + (',', ': ') otherwise. To get the most compact JSON representation, + you should specify (',', ':') to eliminate whitespace. + + If specified, default is a function that gets called for objects + that can't otherwise be serialized. It should return a JSON encodable + version of the object or raise a ``TypeError``. + + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.indent = indent + if separators is not None: + self.item_separator, self.key_separator = separators + elif indent is not None: + self.item_separator = ',' + if default is not None: + self.default = default + + def default(self, o): + """Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + # Let the base class default method raise the TypeError + return JSONEncoder.default(self, o) + + """ + raise TypeError("Object of type '%s' is not JSON serializable" % + o.__class__.__name__) + + def encode(self, o): + """Return a JSON string representation of a Python data structure. + + >>> from json.encoder import JSONEncoder + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo": ["bar", "baz"]}' + + """ + # This is for extremely simple cases and benchmarks. + if isinstance(o, str): + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. + chunks = self.iterencode(o, _one_shot=True) + if not isinstance(chunks, (list, tuple)): + chunks = list(chunks) + return ''.join(chunks) + + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + + def floatstr(o, allow_nan=self.allow_nan, + _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY): + # Check for specials. Note that this type of test is processor + # and/or platform-specific, so do tests which don't depend on the + # internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + + + if (_one_shot and c_make_encoder is not None + and self.indent is None): + _iterencode = c_make_encoder( + markers, self.default, _encoder, self.indent, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, self.allow_nan) + else: + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot) + return _iterencode(o, 0) + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, + _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + ## HACK: hand-optimized bytecode; turn globals into locals + ValueError=ValueError, + dict=dict, + float=float, + id=id, + int=int, + isinstance=isinstance, + list=list, + str=str, + tuple=tuple, + _intstr=int.__str__ + ): + + if _indent is not None and not isinstance(_indent, str): + _indent = ' ' * _indent + + def _iterencode_list(lst, _current_indent_level): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + _indent * _current_indent_level + separator = _item_separator + newline_indent + buf += newline_indent + else: + newline_indent = None + separator = _item_separator + first = True + for value in lst: + if first: + first = False + else: + buf = separator + if isinstance(value, str): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, int): + # Subclasses of int/float may override __str__, but we still + # want to encode them as integers/floats in JSON. One example + # within the standard library is IntEnum. + yield buf + _intstr(value) + elif isinstance(value, float): + # see comment above for int + yield buf + _floatstr(value) + else: + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + yield from chunks + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + _indent * _current_indent_level + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(dct, _current_indent_level): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + _indent * _current_indent_level + item_separator = _item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = _item_separator + first = True + if _sort_keys: + items = sorted(dct.items(), key=lambda kv: kv[0]) + else: + items = dct.items() + for key, value in items: + if isinstance(key, str): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + # see comment for int/float in _make_iterencode + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, int): + # see comment for int/float in _make_iterencode + key = _intstr(key) + elif _skipkeys: + continue + else: + raise TypeError("key " + repr(key) + " is not a string") + if first: + first = False + else: + yield item_separator + yield _encoder(key) + yield _key_separator + if isinstance(value, str): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, int): + # see comment for int/float in _make_iterencode + yield _intstr(value) + elif isinstance(value, float): + # see comment for int/float in _make_iterencode + yield _floatstr(value) + else: + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + yield from chunks + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + _indent * _current_indent_level + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(o, _current_indent_level): + if isinstance(o, str): + yield _encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, int): + # see comment for int/float in _make_iterencode + yield _intstr(o) + elif isinstance(o, float): + # see comment for int/float in _make_iterencode + yield _floatstr(o) + elif isinstance(o, (list, tuple)): + yield from _iterencode_list(o, _current_indent_level) + elif isinstance(o, dict): + yield from _iterencode_dict(o, _current_indent_level) + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + yield from _iterencode(o, _current_indent_level) + if markers is not None: + del markers[markerid] + return _iterencode + diff --git a/modules/language/python/module/json/scanner.py b/modules/language/python/module/json/scanner.py new file mode 100644 index 0000000..bd6a91a --- /dev/null +++ b/modules/language/python/module/json/scanner.py @@ -0,0 +1,72 @@ +module(json, scanner) + +"""JSON token scanner +""" +import re +c_make_scanner = None + +__all__ = ['make_scanner'] + +NUMBER_RE = re.compile( + r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', + (re.VERBOSE | re.MULTILINE | re.DOTALL)) + +def py_make_scanner(context): + parse_object = context.parse_object + parse_array = context.parse_array + parse_string = context.parse_string + match_number = NUMBER_RE.match + strict = context.strict + parse_float = context.parse_float + parse_int = context.parse_int + parse_constant = context.parse_constant + object_hook = context.object_hook + object_pairs_hook = context.object_pairs_hook + memo = context.memo + + def _scan_once(string, idx): + try: + nextchar = string[idx] + except IndexError: + raise StopIteration + + if nextchar == '"': + return parse_string(string, idx + 1, strict) + elif nextchar == '{': + return parse_object((string, idx + 1), strict, + _scan_once, object_hook, object_pairs_hook, memo) + elif nextchar == '[': + return parse_array((string, idx + 1), _scan_once) + elif nextchar == 'n' and string[idx:idx + 4] == 'null': + return None, idx + 4 + elif nextchar == 't' and string[idx:idx + 4] == 'true': + return True, idx + 4 + elif nextchar == 'f' and string[idx:idx + 5] == 'false': + return False, idx + 5 + + m = match_number(string, idx) + if m is not None: + integer, frac, exp = m.groups() + if frac or exp: + res = parse_float(integer + (frac or '') + (exp or '')) + else: + res = parse_int(integer) + return res, m.end() + elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + return parse_constant('NaN'), idx + 3 + elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + return parse_constant('Infinity'), idx + 8 + elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + return parse_constant('-Infinity'), idx + 9 + else: + raise StopIteration + + def scan_once(string, idx): + try: + return _scan_once(string, idx) + finally: + memo.clear() + + return _scan_once + +make_scanner = c_make_scanner or py_make_scanner diff --git a/modules/language/python/module/json/tool.py b/modules/language/python/module/json/tool.py new file mode 100644 index 0000000..c55a4b4 --- /dev/null +++ b/modules/language/python/module/json/tool.py @@ -0,0 +1,52 @@ +module(json,tool) + +r"""Command-line tool to validate and pretty-print JSON + +Usage:: + + $ echo '{"json":"obj"}' | python -m json.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m json.tool + Expecting property name enclosed in double quotes: line 1 column 3 (char 2) + +""" +import argparse +import collections +import json +import sys + + +def main(): + prog = 'python -m json.tool' + description = ('A simple command line interface for json module ' + 'to validate and pretty-print JSON objects.') + parser = argparse.ArgumentParser(prog=prog, description=description) + parser.add_argument('infile', nargs='?', type=argparse.FileType(), + help='a JSON file to be validated or pretty-printed') + parser.add_argument('outfile', nargs='?', type=argparse.FileType('w'), + help='write the output of infile to outfile') + parser.add_argument('--sort-keys', action='store_true', default=False, + help='sort the output of dictionaries alphabetically by key') + options = parser.parse_args() + + infile = options.infile or sys.stdin + outfile = options.outfile or sys.stdout + sort_keys = options.sort_keys + with infile: + try: + if sort_keys: + obj = json.load(infile) + else: + obj = json.load(infile, + object_pairs_hook=collections.OrderedDict) + except ValueError as e: + raise SystemExit(e) + with outfile: + json.dump(obj, outfile, sort_keys=sort_keys, indent=4) + outfile.write('\n') + + +if __name__ == '__main__': + main() diff --git a/modules/language/python/module/string.scm b/modules/language/python/module/string.scm index 63adfcd..3255d99 100644 --- a/modules/language/python/module/string.scm +++ b/modules/language/python/module/string.scm @@ -259,7 +259,9 @@ (f-scope fieldName))))))))) -(define e (f-seq (ff* (f-or! (tag fieldName) nontag)) f-eof)) +(define e (f-seq (ff* (f-or! (tag (mk-token (f-scope fieldName))) + nontag)) + f-eof)) (set! (@@ (parser stis-parser lang python3-parser) f-formatter) tag) @@ -364,7 +366,7 @@ (() f))))) (_ - (throw TypeError "wromg field name format"))))) + (throw (TypeError (+ "wrong field name format" field_name))))))) (define get_value (lambda (self key args kwargs) @@ -400,7 +402,7 @@ ((equal? conversion "a") (ascii value)) (else - (throw TypeError "conversion" conversion)))))) + (throw (TypeError (+ "conversion " conversion)))))))) (define (ascii x) (bytes x)) diff --git a/modules/language/python/number.scm b/modules/language/python/number.scm index 6ede96c..089095d 100644 --- a/modules/language/python/number.scm +++ b/modules/language/python/number.scm @@ -460,9 +460,12 @@ (define (projc? x) (if (number? x) - (if (not (complex? x)) - x - #f) + (cond + ((or (integer? x) (rational? x)) + (exact->inexact x)) + ((real? x) + x) + (raise (ValueError "cannot make a float out of a complex"))) (and (or (is-a? x <py-complex>) (is-a? x <py-int>) @@ -479,14 +482,26 @@ (let lp ((n n)) (cond ((projc? n) => - (lambda (n) - (slot-set! self 'x n))) + (lambda (x) x)) ((string? n) - (lp (string->number n))) - (else - (aif it (slot-ref n '__float__) - (slot-set! self 'x it) - (raise ValueError "could not make float from " n))))))))) + (cond + ((equal? n "nan") + (nan)) + ((equal? n "inf") + (inf)) + ((equal? n "-inf") + (- (inf))) + (else + (string->number n)))) + ((is-a? n <py-float>) + (slot-ref n '__float__))))))) + + (define __new__ + (lambda (cls a . l) + (__init__ cls a)))) + + + (name-object float) |