You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1006 lines
35 KiB
Python
1006 lines
35 KiB
Python
# Copyright 2015 Google Inc. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import enum
|
|
import math
|
|
import re
|
|
from typing import (
|
|
Any,
|
|
Callable,
|
|
IO,
|
|
Iterable,
|
|
Mapping,
|
|
Optional,
|
|
Set,
|
|
Tuple,
|
|
Type,
|
|
Union,
|
|
)
|
|
import unicodedata
|
|
|
|
from json5.parser import Parser
|
|
|
|
|
|
# Used when encoding keys, below.
|
|
_reserved_word_re: Optional[re.Pattern] = None
|
|
|
|
|
|
class QuoteStyle(enum.Enum):
|
|
"""Controls how strings will be quoted during encoding.
|
|
|
|
By default, for compatibility with the `json` module and older versions of
|
|
`json5`, strings (not being used as keys and that are legal identifiers)
|
|
will always be double-quoted, and any double quotes in the string will be
|
|
escaped. This is `QuoteStyle.ALWAYS_DOUBLE`. If you pass
|
|
`QuoteStyle.ALWAYS_SINGLE`, then strings will always be single-quoted, and
|
|
any single quotes in the string will be escaped. If you pass
|
|
`QuoteStyle.PREFER_DOUBLE`, then the behavior is the same as ALWAYS_DOUBLE
|
|
and strings will be double-quoted *unless* the string contains more double
|
|
quotes than single quotes, in which case the string will be single-quoted
|
|
and single quotes will be escaped. If you pass `QuoteStyle.PREFER_SINGLE`,
|
|
then the behavior is the same as ALWAYS_SINGLE and strings will be
|
|
single-quoted *unless* the string contains more single quotes than double
|
|
quotes, in which case the string will be double-quoted and any double
|
|
quotes will be escaped.
|
|
|
|
*Note:* PREFER_DOUBLE and PREFER_SINGLE can impact performance, since in
|
|
order to know which encoding to use you have to iterate over the entire
|
|
string to count the number of single and double quotes. The codes guesses
|
|
at an encoding while doing so, but if it guess wrong, the entire string has
|
|
to be re-encoded, which will slow things down. If you are very concerned
|
|
about performance (a) you probably shouldn't be using this library in the
|
|
first place, because it just isn't very fast, and (b) you should use
|
|
ALWAYS_DOUBLE or ALWAYS_SINGLE, which won't have this issue.
|
|
"""
|
|
|
|
ALWAYS_DOUBLE = 'always_double'
|
|
ALWAYS_SINGLE = 'always_single'
|
|
PREFER_DOUBLE = 'prefer_double'
|
|
PREFER_SINGLE = 'prefer_single'
|
|
|
|
|
|
def load(
|
|
fp: IO,
|
|
*,
|
|
encoding: Optional[str] = None,
|
|
cls: Any = None,
|
|
object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,
|
|
parse_float: Optional[Callable[[str], Any]] = None,
|
|
parse_int: Optional[Callable[[str], Any]] = None,
|
|
parse_constant: Optional[Callable[[str], Any]] = None,
|
|
strict: bool = True,
|
|
object_pairs_hook: Optional[
|
|
Callable[[Iterable[Tuple[str, Any]]], Any]
|
|
] = None,
|
|
allow_duplicate_keys: bool = True,
|
|
consume_trailing: bool = True,
|
|
start: Optional[int] = None,
|
|
) -> Any:
|
|
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object
|
|
containing a JSON document) to a Python object.
|
|
|
|
Supports almost the same arguments as ``json.load()`` except that:
|
|
- the `cls` keyword is ignored.
|
|
- an extra `allow_duplicate_keys` parameter supports checking for
|
|
duplicate keys in a object; by default, this is True for
|
|
compatibility with ``json.load()``, but if set to False and
|
|
the object contains duplicate keys, a ValueError will be raised.
|
|
- an extra `consume_trailing` parameter specifies whether to
|
|
consume any trailing characters after a valid object has been
|
|
parsed. By default, this value is True and the only legal
|
|
trailing characters are whitespace. If this value is set to False,
|
|
parsing will stop when a valid object has been parsed and any
|
|
trailing characters in the string will be ignored.
|
|
- an extra `start` parameter specifies the zero-based offset into the
|
|
file to start parsing at. If `start` is None, parsing will
|
|
start at the current position in the file, and line number
|
|
and column values will be reported as if starting from the
|
|
beginning of the file; If `start` is not None,
|
|
`load` will seek to zero and then read (and discard) the
|
|
appropriate number of characters before beginning parsing;
|
|
the file must be seekable for this to work correctly.
|
|
|
|
You can use `load(..., consume_trailing=False)` to repeatedly read
|
|
values from a file. However, in the current implementation `load` does
|
|
this by reading the entire file into memory before doing anything, so
|
|
it is not very efficient.
|
|
|
|
Raises
|
|
- `ValueError` if given an invalid document. This is different
|
|
from the `json` module, which raises `json.JSONDecodeError`.
|
|
- `UnicodeDecodeError` if given a byte string that is not a
|
|
legal UTF-8 document (or the equivalent, if using a different
|
|
`encoding`). This matches the `json` module.
|
|
"""
|
|
|
|
s = fp.read()
|
|
val, err, _ = parse(
|
|
s,
|
|
encoding=encoding,
|
|
cls=cls,
|
|
object_hook=object_hook,
|
|
parse_float=parse_float,
|
|
parse_int=parse_int,
|
|
parse_constant=parse_constant,
|
|
strict=strict,
|
|
object_pairs_hook=object_pairs_hook,
|
|
allow_duplicate_keys=allow_duplicate_keys,
|
|
consume_trailing=consume_trailing,
|
|
start=start,
|
|
)
|
|
if err:
|
|
raise ValueError(err)
|
|
return val
|
|
|
|
|
|
def loads(
|
|
s: str,
|
|
*,
|
|
encoding: Optional[str] = None,
|
|
cls: Any = None,
|
|
object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,
|
|
parse_float: Optional[Callable[[str], Any]] = None,
|
|
parse_int: Optional[Callable[[str], Any]] = None,
|
|
parse_constant: Optional[Callable[[str], Any]] = None,
|
|
strict: bool = True,
|
|
object_pairs_hook: Optional[
|
|
Callable[[Iterable[Tuple[str, Any]]], Any]
|
|
] = None,
|
|
allow_duplicate_keys: bool = True,
|
|
consume_trailing: bool = True,
|
|
start: Optional[int] = None,
|
|
):
|
|
"""Deserialize ``s`` (a string containing a JSON5 document) to a Python
|
|
object.
|
|
|
|
Supports the same arguments as ``json.load()`` except that:
|
|
- the `cls` keyword is ignored.
|
|
- an extra `allow_duplicate_keys` parameter supports checking for
|
|
duplicate keys in a object; by default, this is True for
|
|
compatibility with ``json.load()``, but if set to False and
|
|
the object contains duplicate keys, a ValueError will be raised.
|
|
- an extra `consume_trailing` parameter specifies whether to
|
|
consume any trailing characters after a valid object has been
|
|
parsed. By default, this value is True and the only legal
|
|
trailing characters are whitespace. If this value is set to False,
|
|
parsing will stop when a valid object has been parsed and any
|
|
trailing characters in the string will be ignored.
|
|
- an extra `start` parameter specifies the zero-based offset into the
|
|
string to start parsing at.
|
|
|
|
Raises
|
|
- `ValueError` if given an invalid document. This is different
|
|
from the `json` module, which raises `json.JSONDecodeError`.
|
|
- `UnicodeDecodeError` if given a byte string that is not a
|
|
legal UTF-8 document (or the equivalent, if using a different
|
|
`encoding`). This matches the `json` module.
|
|
"""
|
|
|
|
val, err, _ = parse(
|
|
s=s,
|
|
encoding=encoding,
|
|
cls=cls,
|
|
object_hook=object_hook,
|
|
parse_float=parse_float,
|
|
parse_int=parse_int,
|
|
parse_constant=parse_constant,
|
|
strict=strict,
|
|
object_pairs_hook=object_pairs_hook,
|
|
allow_duplicate_keys=allow_duplicate_keys,
|
|
consume_trailing=consume_trailing,
|
|
start=start,
|
|
)
|
|
if err:
|
|
raise ValueError(err)
|
|
return val
|
|
|
|
|
|
def parse(
|
|
s: str,
|
|
*,
|
|
encoding: Optional[str] = None,
|
|
cls: Any = None,
|
|
object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,
|
|
parse_float: Optional[Callable[[str], Any]] = None,
|
|
parse_int: Optional[Callable[[str], Any]] = None,
|
|
parse_constant: Optional[Callable[[str], Any]] = None,
|
|
strict: bool = True,
|
|
object_pairs_hook: Optional[
|
|
Callable[[Iterable[Tuple[str, Any]]], Any]
|
|
] = None,
|
|
allow_duplicate_keys: bool = True,
|
|
consume_trailing: bool = True,
|
|
start: Optional[int] = None,
|
|
):
|
|
"""Parse ```s``, returning positional information along with a value.
|
|
|
|
This works exactly like `loads()`, except that (a) it returns the
|
|
position in the string where the parsing stopped (either due to
|
|
hitting an error or parsing a valid value) and any error as a string,
|
|
(b) it takes an optional `consume_trailing` parameter that says whether
|
|
to keep parsing the string after a valid value has been parsed; if True
|
|
(the default), any trailing characters must be whitespace. If False,
|
|
parsing stops when a valid value has been reached, (c) it takes an
|
|
optional `start` parameter that specifies a zero-based offset to start
|
|
parsing from in the string, and (d) the return value is different, as
|
|
described below.
|
|
|
|
`parse()` is useful if you have a string that might contain multiple
|
|
values and you need to extract all of them; you can do so by repeatedly
|
|
calling `parse`, setting `start` to the value returned in `position`
|
|
from the previous call.
|
|
|
|
Returns a tuple of (value, error_string, position). If the string
|
|
was a legal value, `value` will be the deserialized value,
|
|
`error_string` will be `None`, and `position` will be one
|
|
past the zero-based offset where the parser stopped reading.
|
|
If the string was not a legal value,
|
|
`value` will be `None`, `error_string` will be the string value
|
|
of the exception that would've been raised, and `position` will
|
|
be the zero-based farthest offset into the string where the parser
|
|
hit an error.
|
|
|
|
Raises:
|
|
- `UnicodeDecodeError` if given a byte string that is not a
|
|
legal UTF-8 document (or the equivalent, if using a different
|
|
`encoding`). This matches the `json` module.
|
|
|
|
Note that this does *not* raise a `ValueError`; instead any error is
|
|
returned as the second value in the tuple.
|
|
|
|
You can use this method to read in a series of values from a string
|
|
`s` as follows:
|
|
|
|
>>> import json5
|
|
>>> s = '1 2 3 4'
|
|
>>> values = []
|
|
>>> start = 0
|
|
>>> while True:
|
|
... v, err, pos = json5.parse(s, start=start, consume_trailing=False)
|
|
... if v:
|
|
... values.append(v)
|
|
... start = pos
|
|
... if start == len(s) or s[start:].isspace():
|
|
... # Reached the end of the string (ignoring trailing
|
|
... # whitespace
|
|
... break
|
|
... continue
|
|
... raise ValueError(err)
|
|
>>> values
|
|
[1, 2, 3, 4]
|
|
|
|
"""
|
|
assert cls is None, 'Custom decoders are not supported'
|
|
|
|
if isinstance(s, bytes):
|
|
encoding = encoding or 'utf-8'
|
|
s = s.decode(encoding)
|
|
|
|
if not s:
|
|
raise ValueError('Empty strings are not legal JSON5')
|
|
start = start or 0
|
|
parser = Parser(s, '<string>', pos=start)
|
|
ast, err, pos = parser.parse(
|
|
global_vars={'_strict': strict, '_consume_trailing': consume_trailing}
|
|
)
|
|
if err:
|
|
return None, err, pos
|
|
|
|
try:
|
|
value = _convert(
|
|
ast,
|
|
object_hook=object_hook,
|
|
parse_float=parse_float,
|
|
parse_int=parse_int,
|
|
parse_constant=parse_constant,
|
|
object_pairs_hook=object_pairs_hook,
|
|
allow_duplicate_keys=allow_duplicate_keys,
|
|
)
|
|
return value, None, pos
|
|
except ValueError as e:
|
|
return None, str(e), pos
|
|
|
|
|
|
def _convert(
|
|
ast,
|
|
object_hook,
|
|
parse_float,
|
|
parse_int,
|
|
parse_constant,
|
|
object_pairs_hook,
|
|
allow_duplicate_keys,
|
|
):
|
|
def _fp_constant_parser(s):
|
|
return float(s.replace('Infinity', 'inf').replace('NaN', 'nan'))
|
|
|
|
def _dictify(pairs):
|
|
if not allow_duplicate_keys:
|
|
keys = set()
|
|
for key, _ in pairs:
|
|
if key in keys:
|
|
raise ValueError(f'Duplicate key "{key}" found in object')
|
|
keys.add(key)
|
|
|
|
if object_pairs_hook:
|
|
return object_pairs_hook(pairs)
|
|
if object_hook:
|
|
return object_hook(dict(pairs))
|
|
return dict(pairs)
|
|
|
|
parse_float = parse_float or float
|
|
parse_int = parse_int or int
|
|
parse_constant = parse_constant or _fp_constant_parser
|
|
|
|
return _walk_ast(ast, _dictify, parse_float, parse_int, parse_constant)
|
|
|
|
|
|
def _walk_ast(
|
|
el,
|
|
dictify: Callable[[Iterable[Tuple[str, Any]]], Any],
|
|
parse_float,
|
|
parse_int,
|
|
parse_constant,
|
|
):
|
|
if el == 'None':
|
|
return None
|
|
if el == 'True':
|
|
return True
|
|
if el == 'False':
|
|
return False
|
|
ty, v = el
|
|
if ty == 'number':
|
|
if v.startswith('0x') or v.startswith('0X'):
|
|
return parse_int(v, base=16)
|
|
if '.' in v or 'e' in v or 'E' in v:
|
|
return parse_float(v)
|
|
if 'Infinity' in v or 'NaN' in v:
|
|
return parse_constant(v)
|
|
return parse_int(v)
|
|
if ty == 'string':
|
|
return v
|
|
if ty == 'object':
|
|
pairs = []
|
|
for key, val_expr in v:
|
|
val = _walk_ast(
|
|
val_expr, dictify, parse_float, parse_int, parse_constant
|
|
)
|
|
pairs.append((key, val))
|
|
return dictify(pairs)
|
|
if ty == 'array':
|
|
return [
|
|
_walk_ast(el, dictify, parse_float, parse_int, parse_constant)
|
|
for el in v
|
|
]
|
|
raise ValueError('unknown el: ' + el) # pragma: no cover
|
|
|
|
|
|
def dump(
|
|
obj: Any,
|
|
fp: IO,
|
|
*,
|
|
skipkeys: bool = False,
|
|
ensure_ascii: bool = True,
|
|
check_circular: bool = True,
|
|
allow_nan: bool = True,
|
|
cls: Optional[Type['JSON5Encoder']] = None,
|
|
indent: Optional[Union[int, str]] = None,
|
|
separators: Optional[Tuple[str, str]] = None,
|
|
default: Optional[Callable[[Any], Any]] = None,
|
|
sort_keys: bool = False,
|
|
quote_keys: bool = False,
|
|
trailing_commas: bool = True,
|
|
allow_duplicate_keys: bool = True,
|
|
quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,
|
|
**kw,
|
|
):
|
|
"""Serialize ``obj`` to a JSON5-formatted stream to ``fp``,
|
|
a ``.write()``-supporting file-like object.
|
|
|
|
Supports the same arguments as ``dumps()``, below.
|
|
|
|
Calling ``dump(obj, fp, quote_keys=True, trailing_commas=False, \
|
|
allow_duplicate_keys=True)``
|
|
should produce exactly the same output as ``json.dump(obj, fp).``
|
|
"""
|
|
|
|
fp.write(
|
|
dumps(
|
|
obj=obj,
|
|
skipkeys=skipkeys,
|
|
ensure_ascii=ensure_ascii,
|
|
check_circular=check_circular,
|
|
allow_nan=allow_nan,
|
|
cls=cls,
|
|
indent=indent,
|
|
separators=separators,
|
|
default=default,
|
|
sort_keys=sort_keys,
|
|
quote_keys=quote_keys,
|
|
trailing_commas=trailing_commas,
|
|
allow_duplicate_keys=allow_duplicate_keys,
|
|
quote_style=quote_style,
|
|
**kw,
|
|
)
|
|
)
|
|
|
|
|
|
def dumps(
|
|
obj: Any,
|
|
*,
|
|
skipkeys: bool = False,
|
|
ensure_ascii: bool = True,
|
|
check_circular: bool = True,
|
|
allow_nan: bool = True,
|
|
cls: Optional[Type['JSON5Encoder']] = None,
|
|
indent: Optional[Union[int, str]] = None,
|
|
separators: Optional[Tuple[str, str]] = None,
|
|
default: Optional[Callable[[Any], Any]] = None,
|
|
sort_keys: bool = False,
|
|
quote_keys: bool = False,
|
|
trailing_commas: bool = True,
|
|
allow_duplicate_keys: bool = True,
|
|
quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,
|
|
**kw,
|
|
):
|
|
"""Serialize ``obj`` to a JSON5-formatted string.
|
|
|
|
Supports the same arguments as ``json.dumps()``, except that:
|
|
|
|
- The ``encoding`` keyword is ignored; Unicode strings are always written.
|
|
- By default, object keys that are legal identifiers are not quoted; if you
|
|
pass ``quote_keys=True``, they will be.
|
|
- By default, if lists and objects span multiple lines of output (i.e.,
|
|
when ``indent`` >=0), the last item will have a trailing comma after it.
|
|
If you pass ``trailing_commas=False``, it will not.
|
|
- If you use a number, a boolean, or ``None`` as a key value in a dict, it
|
|
will be converted to the corresponding JSON string value, e.g. "1",
|
|
"true", or "null". By default, ``dump()`` will match the `json` modules
|
|
behavior and produce malformed JSON if you mix keys of different types
|
|
that have the same converted value; e.g., ``{1: "foo", "1": "bar"}``
|
|
produces '{"1": "foo", "1": "bar"}', an object with duplicated keys. If
|
|
you pass ``allow_duplicate_keys=False``, an exception will be raised
|
|
instead.
|
|
- If `quote_keys` is true, then keys of objects will be enclosed in quotes,
|
|
as in regular JSON. Otheriwse, keys will not be enclosed in quotes unless
|
|
they contain whitespace.
|
|
- If `trailing_commas` is false, then commas will not be inserted after the
|
|
final elements of objects and arrays, as in regular JSON. Otherwise,
|
|
such commas will be inserted.
|
|
- If `allow_duplicate_keys` is false, then only the last entry with a given
|
|
key will be written. Otherwise, all entries with the same key will be
|
|
written.
|
|
- `quote_style` controls how strings are encoded. See the documentation
|
|
for the `QuoteStyle` class, above, for how this is used.
|
|
|
|
*Note*: Strings that are being used as unquoted keys are not affected
|
|
by this parameter and remain unquoted.
|
|
|
|
*`quote_style` was added in version 0.10.0*.
|
|
|
|
Other keyword arguments are allowed and will be passed to the
|
|
encoder so custom encoders can get them, but otherwise they will
|
|
be ignored in an attempt to provide some amount of forward-compatibility.
|
|
|
|
*Note:* the standard JSON module explicitly calls `int.__repr(obj)__`
|
|
and `float.__repr(obj)__` to encode ints and floats, thereby bypassing
|
|
any custom representations you might have for objects that are subclasses
|
|
of ints and floats, and, for compatibility, JSON5 does the same thing.
|
|
To override this behavior, create a subclass of JSON5Encoder
|
|
that overrides `encode()` and handles your custom representation.
|
|
|
|
For example:
|
|
|
|
```
|
|
>>> import json5
|
|
>>> from typing import Any, Set
|
|
>>>
|
|
>>> class Hex(int):
|
|
... def __repr__(self):
|
|
... return hex(self)
|
|
>>>
|
|
>>> class CustomEncoder(json5.JSON5Encoder):
|
|
... def encode(
|
|
... self, obj: Any, seen: Set, level: int, *, as_key: bool
|
|
... ) -> str:
|
|
... if isinstance(obj, Hex):
|
|
... return repr(obj)
|
|
... return super().encode(obj, seen, level, as_key=as_key)
|
|
...
|
|
>>> json5.dumps([20, Hex(20)], cls=CustomEncoder)
|
|
'[20, 0x14]'
|
|
|
|
```
|
|
|
|
*Note:* calling ``dumps(obj, quote_keys=True, trailing_commas=False, \
|
|
allow_duplicate_keys=True)``
|
|
should produce exactly the same output as ``json.dumps(obj).``
|
|
"""
|
|
|
|
cls = cls or JSON5Encoder
|
|
enc = cls(
|
|
skipkeys=skipkeys,
|
|
ensure_ascii=ensure_ascii,
|
|
check_circular=check_circular,
|
|
allow_nan=allow_nan,
|
|
indent=indent,
|
|
separators=separators,
|
|
default=default,
|
|
sort_keys=sort_keys,
|
|
quote_keys=quote_keys,
|
|
trailing_commas=trailing_commas,
|
|
allow_duplicate_keys=allow_duplicate_keys,
|
|
quote_style=quote_style,
|
|
**kw,
|
|
)
|
|
return enc.encode(obj, seen=set(), level=0, as_key=False)
|
|
|
|
|
|
class JSON5Encoder:
|
|
def __init__(
|
|
self,
|
|
*,
|
|
skipkeys: bool = False,
|
|
ensure_ascii: bool = True,
|
|
check_circular: bool = True,
|
|
allow_nan: bool = True,
|
|
indent: Optional[Union[int, str]] = None,
|
|
separators: Optional[Tuple[str, str]] = None,
|
|
default: Optional[Callable[[Any], Any]] = None,
|
|
sort_keys: bool = False,
|
|
quote_keys: bool = False,
|
|
trailing_commas: bool = True,
|
|
allow_duplicate_keys: bool = True,
|
|
quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,
|
|
**kw,
|
|
):
|
|
"""Provides a class that may be overridden to customize the behavior
|
|
of `dumps()`. The keyword args are the same as for that function.
|
|
*Added in version 0.10.0"""
|
|
# Ignore unrecognized keyword arguments in the hope of providing
|
|
# some level of backwards- and forwards-compatibility.
|
|
del kw
|
|
|
|
self.skipkeys = skipkeys
|
|
self.ensure_ascii = ensure_ascii
|
|
self.check_circular = check_circular
|
|
self.allow_nan = allow_nan
|
|
self.indent = indent
|
|
self.separators = separators
|
|
if separators is None:
|
|
separators = (', ', ': ') if indent is None else (',', ': ')
|
|
self.item_separator, self.kv_separator = separators
|
|
self.default_fn = default or _raise_type_error
|
|
self.sort_keys = sort_keys
|
|
self.quote_keys = quote_keys
|
|
self.trailing_commas = trailing_commas
|
|
self.allow_duplicate_keys = allow_duplicate_keys
|
|
self.quote_style = quote_style
|
|
|
|
def default(self, obj: Any) -> Any:
|
|
"""Provides a last-ditch option to encode a value that the encoder
|
|
doesn't otherwise recognize, by converting `obj` to a value that
|
|
*can* (and will) be serialized by the other methods in the class.
|
|
|
|
Note: this must not return a serialized value (i.e., string)
|
|
directly, as that'll result in a doubly-encoded value."""
|
|
return self.default_fn(obj)
|
|
|
|
def encode(
|
|
self,
|
|
obj: Any,
|
|
seen: Set,
|
|
level: int,
|
|
*,
|
|
as_key: bool,
|
|
) -> str:
|
|
"""Returns an JSON5-encoded version of an arbitrary object. This can
|
|
be used to provide customized serialization of objects. Overridden
|
|
methods of this class should handle their custom objects and then
|
|
fall back to super.encode() if they've been passed a normal object.
|
|
|
|
`seen` is used for duplicate object tracking when `check_circular`
|
|
is True.
|
|
|
|
`level` represents the current indentation level, which increases
|
|
by one for each recursive invocation of encode (i.e., whenever
|
|
we're encoding the values of a dict or a list).
|
|
|
|
May raise `TypeError` if the object is the wrong type to be
|
|
encoded (i.e., your custom routine can't handle it either), and
|
|
`ValueError` if there's something wrong with the value, e.g.
|
|
a float value of NaN when `allow_nan` is false.
|
|
|
|
If `as_key` is true, the return value should be a double-quoted string
|
|
representation of the object, unless obj is a string that can be an
|
|
identifier (and quote_keys is false and obj isn't a reserved word).
|
|
If the object should not be used as a key, `TypeError` should be
|
|
raised; that allows the base implementation to implement `skipkeys`
|
|
properly.
|
|
"""
|
|
seen = seen or set()
|
|
s = self._encode_basic_type(obj, as_key=as_key)
|
|
if s is not None:
|
|
return s
|
|
|
|
if as_key:
|
|
raise TypeError(f'Invalid key f{obj}')
|
|
return self._encode_non_basic_type(obj, seen, level)
|
|
|
|
def _encode_basic_type(self, obj: Any, *, as_key: bool) -> Optional[str]:
|
|
"""Returns None if the object is not a basic type."""
|
|
|
|
if isinstance(obj, str):
|
|
return self._encode_str(obj, as_key=as_key)
|
|
|
|
# Check for True/False before ints because True and False are
|
|
# also considered ints and so would be represented as 1 and 0
|
|
# if we did ints first.
|
|
if obj is True:
|
|
return '"true"' if as_key else 'true'
|
|
if obj is False:
|
|
return '"false"' if as_key else 'false'
|
|
if obj is None:
|
|
return '"null"' if as_key else 'null'
|
|
|
|
if isinstance(obj, int):
|
|
return self._encode_int(obj, as_key=as_key)
|
|
|
|
if isinstance(obj, float):
|
|
return self._encode_float(obj, as_key=as_key)
|
|
|
|
return None
|
|
|
|
def _encode_int(self, obj: int, *, as_key: bool) -> str:
|
|
s = int.__repr__(obj)
|
|
return f'"{s}"' if as_key else s
|
|
|
|
def _encode_float(self, obj: float, *, as_key: bool) -> str:
|
|
if obj == float('inf'):
|
|
allowed = self.allow_nan
|
|
s = 'Infinity'
|
|
elif obj == float('-inf'):
|
|
allowed = self.allow_nan
|
|
s = '-Infinity'
|
|
elif math.isnan(obj):
|
|
allowed = self.allow_nan
|
|
s = 'NaN'
|
|
else:
|
|
allowed = True
|
|
s = float.__repr__(obj)
|
|
|
|
if not allowed:
|
|
raise ValueError('Illegal JSON5 value: f{obj}')
|
|
return f'"{s}"' if as_key else s
|
|
|
|
def _encode_str(self, obj: str, *, as_key: bool) -> str:
|
|
if (
|
|
as_key
|
|
and self.is_identifier(obj)
|
|
and not self.quote_keys
|
|
and not self.is_reserved_word(obj)
|
|
):
|
|
return obj
|
|
|
|
return self._encode_quoted_str(obj, self.quote_style)
|
|
|
|
def _encode_quoted_str(self, obj: str, quote_style: QuoteStyle) -> str:
|
|
"""Returns a quoted string with a minimal number of escaped quotes."""
|
|
ret = []
|
|
double_quotes_seen = 0
|
|
single_quotes_seen = 0
|
|
sq = "'"
|
|
dq = '"'
|
|
for ch in obj:
|
|
if ch == dq:
|
|
# At first we will guess at which quotes to escape. If
|
|
# we guess wrong, we reencode the string below.
|
|
double_quotes_seen += 1
|
|
if quote_style in (
|
|
QuoteStyle.ALWAYS_DOUBLE,
|
|
QuoteStyle.PREFER_DOUBLE,
|
|
):
|
|
encoded_ch = self._escape_ch(dq)
|
|
else:
|
|
encoded_ch = dq
|
|
elif ch == sq:
|
|
single_quotes_seen += 1
|
|
if quote_style in (
|
|
QuoteStyle.ALWAYS_SINGLE,
|
|
QuoteStyle.PREFER_SINGLE,
|
|
):
|
|
encoded_ch = self._escape_ch(sq)
|
|
else:
|
|
encoded_ch = sq
|
|
elif ch == '\\':
|
|
encoded_ch = self._escape_ch(ch)
|
|
else:
|
|
o = ord(ch)
|
|
if o < 32:
|
|
encoded_ch = self._escape_ch(ch)
|
|
elif o < 128:
|
|
encoded_ch = ch
|
|
elif not self.ensure_ascii and ch not in ('\u2028', '\u2029'):
|
|
encoded_ch = ch
|
|
else:
|
|
encoded_ch = self._escape_ch(ch)
|
|
ret.append(encoded_ch)
|
|
|
|
# We may have guessed wrong and need to reencode the string.
|
|
if (
|
|
double_quotes_seen > single_quotes_seen
|
|
and quote_style == QuoteStyle.PREFER_DOUBLE
|
|
):
|
|
return self._encode_quoted_str(obj, QuoteStyle.ALWAYS_SINGLE)
|
|
if (
|
|
single_quotes_seen > double_quotes_seen
|
|
and quote_style == QuoteStyle.PREFER_SINGLE
|
|
):
|
|
return self._encode_quoted_str(obj, QuoteStyle.ALWAYS_DOUBLE)
|
|
|
|
if quote_style in (QuoteStyle.ALWAYS_DOUBLE, QuoteStyle.PREFER_DOUBLE):
|
|
return '"' + ''.join(ret) + '"'
|
|
return "'" + ''.join(ret) + "'"
|
|
|
|
def _escape_ch(self, ch: str) -> str:
|
|
"""Returns the backslash-escaped representation of the char."""
|
|
if ch == '\\':
|
|
return '\\\\'
|
|
if ch == "'":
|
|
return r'\''
|
|
if ch == '"':
|
|
return r'\"'
|
|
if ch == '\n':
|
|
return r'\n'
|
|
if ch == '\r':
|
|
return r'\r'
|
|
if ch == '\t':
|
|
return r'\t'
|
|
if ch == '\b':
|
|
return r'\b'
|
|
if ch == '\f':
|
|
return r'\f'
|
|
if ch == '\v':
|
|
return r'\v'
|
|
if ch == '\0':
|
|
return r'\0'
|
|
|
|
o = ord(ch)
|
|
if o < 65536:
|
|
return rf'\u{o:04x}'
|
|
|
|
val = o - 0x10000
|
|
high = 0xD800 + (val >> 10)
|
|
low = 0xDC00 + (val & 0x3FF)
|
|
return rf'\u{high:04x}\u{low:04x}'
|
|
|
|
def _encode_non_basic_type(self, obj, seen: Set, level: int) -> str:
|
|
# Basic types can't be recursive so we only check for circularity
|
|
# on non-basic types. If for some reason the caller was using a
|
|
# subclass of a basic type and wanted to check circularity on it,
|
|
# it'd have to do so directly in a subclass of JSON5Encoder.
|
|
if self.check_circular:
|
|
i = id(obj)
|
|
if i in seen:
|
|
raise ValueError('Circular reference detected.')
|
|
seen.add(i)
|
|
|
|
# Ideally we'd use collections.abc.Mapping and collections.abc.Sequence
|
|
# here, but for backwards-compatibility with potential old callers,
|
|
# we only check for the two attributes we need in each case.
|
|
if hasattr(obj, 'keys') and hasattr(obj, '__getitem__'):
|
|
s = self._encode_dict(obj, seen, level + 1)
|
|
elif hasattr(obj, '__getitem__') and hasattr(obj, '__iter__'):
|
|
s = self._encode_array(obj, seen, level + 1)
|
|
else:
|
|
s = self.encode(self.default(obj), seen, level, as_key=False)
|
|
assert s is not None
|
|
|
|
if self.check_circular:
|
|
seen.remove(i)
|
|
return s
|
|
|
|
def _encode_dict(self, obj: Any, seen: set, level: int) -> str:
|
|
if not obj:
|
|
return '{}'
|
|
|
|
indent_str, end_str = self._spacers(level)
|
|
item_sep = self.item_separator + indent_str
|
|
kv_sep = self.kv_separator
|
|
|
|
if self.sort_keys:
|
|
keys = sorted(obj.keys())
|
|
else:
|
|
keys = obj.keys()
|
|
|
|
s = '{' + indent_str
|
|
|
|
first_key = True
|
|
new_keys = set()
|
|
for key in keys:
|
|
try:
|
|
key_str = self.encode(key, seen, level, as_key=True)
|
|
except TypeError:
|
|
if self.skipkeys:
|
|
continue
|
|
raise
|
|
|
|
if not self.allow_duplicate_keys:
|
|
if key_str in new_keys:
|
|
raise ValueError(f'duplicate key {repr(key)}')
|
|
new_keys.add(key_str)
|
|
|
|
if first_key:
|
|
first_key = False
|
|
else:
|
|
s += item_sep
|
|
|
|
val_str = self.encode(obj[key], seen, level, as_key=False)
|
|
s += key_str + kv_sep + val_str
|
|
|
|
s += end_str + '}'
|
|
return s
|
|
|
|
def _encode_array(self, obj: Any, seen: Set, level: int) -> str:
|
|
if not obj:
|
|
return '[]'
|
|
|
|
indent_str, end_str = self._spacers(level)
|
|
item_sep = self.item_separator + indent_str
|
|
return (
|
|
'['
|
|
+ indent_str
|
|
+ item_sep.join(
|
|
self.encode(el, seen, level, as_key=False) for el in obj
|
|
)
|
|
+ end_str
|
|
+ ']'
|
|
)
|
|
|
|
def _spacers(self, level: int) -> Tuple[str, str]:
|
|
if self.indent is not None:
|
|
end_str = ''
|
|
if self.trailing_commas:
|
|
end_str = ','
|
|
if isinstance(self.indent, int):
|
|
if self.indent > 0:
|
|
indent_str = '\n' + ' ' * self.indent * level
|
|
end_str += '\n' + ' ' * self.indent * (level - 1)
|
|
else:
|
|
indent_str = '\n'
|
|
end_str += '\n'
|
|
else:
|
|
indent_str = '\n' + self.indent * level
|
|
end_str += '\n' + self.indent * (level - 1)
|
|
else:
|
|
indent_str = ''
|
|
end_str = ''
|
|
return indent_str, end_str
|
|
|
|
def is_identifier(self, key: str) -> bool:
|
|
"""Returns whether the string could be used as a legal
|
|
EcmaScript/JavaScript identifier.
|
|
|
|
There should normally be no reason to override this, unless
|
|
the definition of identifiers change in later versions of the
|
|
JSON5 spec and this implementation hasn't been updated to handle
|
|
the changes yet."""
|
|
if (
|
|
not key
|
|
or not self._is_id_start(key[0])
|
|
and key[0] not in ('$', '_')
|
|
):
|
|
return False
|
|
for ch in key[1:]:
|
|
if not self._is_id_continue(ch) and ch not in ('$', '_'):
|
|
return False
|
|
return True
|
|
|
|
def _is_id_start(self, ch: str) -> bool:
|
|
return unicodedata.category(ch) in (
|
|
'Lu',
|
|
'Ll',
|
|
'Li',
|
|
'Lt',
|
|
'Lm',
|
|
'Lo',
|
|
'Nl',
|
|
)
|
|
|
|
def _is_id_continue(self, ch: str) -> bool:
|
|
return unicodedata.category(ch) in (
|
|
'Lu',
|
|
'Ll',
|
|
'Li',
|
|
'Lt',
|
|
'Lm',
|
|
'Lo',
|
|
'Nl',
|
|
'Nd',
|
|
'Mn',
|
|
'Mc',
|
|
'Pc',
|
|
)
|
|
|
|
def is_reserved_word(self, key: str) -> bool:
|
|
"""Returns whether the key is a reserved word.
|
|
|
|
There should normally be no need to override this, unless there
|
|
have been reserved words added in later versions of the JSON5
|
|
spec and this implementation has not yet been updated to handle
|
|
the changes yet."""
|
|
global _reserved_word_re
|
|
if _reserved_word_re is None:
|
|
# List taken from section 7.6.1 of ECMA-262, version 5.1.
|
|
# https://262.ecma-international.org/5.1/#sec-7.6.1.
|
|
# This includes currently reserved words, words reserved
|
|
# for future use (both as of 5.1), null, true, and false.
|
|
_reserved_word_re = re.compile(
|
|
'('
|
|
+ '|'.join(
|
|
[
|
|
'break',
|
|
'case',
|
|
'catch',
|
|
'class',
|
|
'const',
|
|
'continue',
|
|
'debugger',
|
|
'default',
|
|
'delete',
|
|
'do',
|
|
'else',
|
|
'enum',
|
|
'export',
|
|
'extends',
|
|
'false',
|
|
'finally',
|
|
'for',
|
|
'function',
|
|
'if',
|
|
'implements',
|
|
'import',
|
|
'in',
|
|
'instanceof',
|
|
'interface',
|
|
'let',
|
|
'new',
|
|
'null',
|
|
'package',
|
|
'private',
|
|
'protected',
|
|
'public',
|
|
'return',
|
|
'static',
|
|
'super',
|
|
'switch',
|
|
'this',
|
|
'throw',
|
|
'true',
|
|
'try',
|
|
'typeof',
|
|
'var',
|
|
'void',
|
|
'while',
|
|
'with',
|
|
'yield',
|
|
]
|
|
)
|
|
+ ')$'
|
|
)
|
|
return _reserved_word_re.match(key) is not None
|
|
|
|
|
|
def _raise_type_error(obj) -> Any:
|
|
raise TypeError(f'{repr(obj)} is not JSON5 serializable')
|