WebKitTools/Scripts/webkitpy/thirdparty/simplejson/encoder.py
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 """
       
     2 Implementation of JSONEncoder
       
     3 """
       
     4 import re
       
     5 try:
       
     6     from simplejson import _speedups
       
     7 except ImportError:
       
     8     _speedups = None
       
     9 
       
    10 ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]')
       
    11 ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])')
       
    12 ESCAPE_DCT = {
       
    13     # escape all forward slashes to prevent </script> attack
       
    14     '/': '\\/',
       
    15     '\\': '\\\\',
       
    16     '"': '\\"',
       
    17     '\b': '\\b',
       
    18     '\f': '\\f',
       
    19     '\n': '\\n',
       
    20     '\r': '\\r',
       
    21     '\t': '\\t',
       
    22 }
       
    23 for i in range(0x20):
       
    24     ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
       
    25 
       
    26 # assume this produces an infinity on all machines (probably not guaranteed)
       
    27 INFINITY = float('1e66666')
       
    28 
       
    29 def floatstr(o, allow_nan=True):
       
    30     # Check for specials.  Note that this type of test is processor- and/or
       
    31     # platform-specific, so do tests which don't depend on the internals.
       
    32 
       
    33     if o != o:
       
    34         text = 'NaN'
       
    35     elif o == INFINITY:
       
    36         text = 'Infinity'
       
    37     elif o == -INFINITY:
       
    38         text = '-Infinity'
       
    39     else:
       
    40         return repr(o)
       
    41 
       
    42     if not allow_nan:
       
    43         raise ValueError("Out of range float values are not JSON compliant: %r"
       
    44             % (o,))
       
    45 
       
    46     return text
       
    47 
       
    48 
       
    49 def encode_basestring(s):
       
    50     """
       
    51     Return a JSON representation of a Python string
       
    52     """
       
    53     def replace(match):
       
    54         return ESCAPE_DCT[match.group(0)]
       
    55     return '"' + ESCAPE.sub(replace, s) + '"'
       
    56 
       
    57 def encode_basestring_ascii(s):
       
    58     def replace(match):
       
    59         s = match.group(0)
       
    60         try:
       
    61             return ESCAPE_DCT[s]
       
    62         except KeyError:
       
    63             n = ord(s)
       
    64             if n < 0x10000:
       
    65                 return '\\u%04x' % (n,)
       
    66             else:
       
    67                 # surrogate pair
       
    68                 n -= 0x10000
       
    69                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
       
    70                 s2 = 0xdc00 | (n & 0x3ff)
       
    71                 return '\\u%04x\\u%04x' % (s1, s2)
       
    72     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
       
    73         
       
    74 try:
       
    75     encode_basestring_ascii = _speedups.encode_basestring_ascii
       
    76     _need_utf8 = True
       
    77 except AttributeError:
       
    78     _need_utf8 = False
       
    79 
       
    80 class JSONEncoder(object):
       
    81     """
       
    82     Extensible JSON <http://json.org> encoder for Python data structures.
       
    83 
       
    84     Supports the following objects and types by default:
       
    85     
       
    86     +-------------------+---------------+
       
    87     | Python            | JSON          |
       
    88     +===================+===============+
       
    89     | dict              | object        |
       
    90     +-------------------+---------------+
       
    91     | list, tuple       | array         |
       
    92     +-------------------+---------------+
       
    93     | str, unicode      | string        |
       
    94     +-------------------+---------------+
       
    95     | int, long, float  | number        |
       
    96     +-------------------+---------------+
       
    97     | True              | true          |
       
    98     +-------------------+---------------+
       
    99     | False             | false         |
       
   100     +-------------------+---------------+
       
   101     | None              | null          |
       
   102     +-------------------+---------------+
       
   103 
       
   104     To extend this to recognize other objects, subclass and implement a
       
   105     ``.default()`` method with another method that returns a serializable
       
   106     object for ``o`` if possible, otherwise it should call the superclass
       
   107     implementation (to raise ``TypeError``).
       
   108     """
       
   109     __all__ = ['__init__', 'default', 'encode', 'iterencode']
       
   110     item_separator = ', '
       
   111     key_separator = ': '
       
   112     def __init__(self, skipkeys=False, ensure_ascii=True,
       
   113             check_circular=True, allow_nan=True, sort_keys=False,
       
   114             indent=None, separators=None, encoding='utf-8'):
       
   115         """
       
   116         Constructor for JSONEncoder, with sensible defaults.
       
   117 
       
   118         If skipkeys is False, then it is a TypeError to attempt
       
   119         encoding of keys that are not str, int, long, float or None.  If
       
   120         skipkeys is True, such items are simply skipped.
       
   121 
       
   122         If ensure_ascii is True, the output is guaranteed to be str
       
   123         objects with all incoming unicode characters escaped.  If
       
   124         ensure_ascii is false, the output will be unicode object.
       
   125 
       
   126         If check_circular is True, then lists, dicts, and custom encoded
       
   127         objects will be checked for circular references during encoding to
       
   128         prevent an infinite recursion (which would cause an OverflowError).
       
   129         Otherwise, no such check takes place.
       
   130 
       
   131         If allow_nan is True, then NaN, Infinity, and -Infinity will be
       
   132         encoded as such.  This behavior is not JSON specification compliant,
       
   133         but is consistent with most JavaScript based encoders and decoders.
       
   134         Otherwise, it will be a ValueError to encode such floats.
       
   135 
       
   136         If sort_keys is True, then the output of dictionaries will be
       
   137         sorted by key; this is useful for regression tests to ensure
       
   138         that JSON serializations can be compared on a day-to-day basis.
       
   139 
       
   140         If indent is a non-negative integer, then JSON array
       
   141         elements and object members will be pretty-printed with that
       
   142         indent level.  An indent level of 0 will only insert newlines.
       
   143         None is the most compact representation.
       
   144 
       
   145         If specified, separators should be a (item_separator, key_separator)
       
   146         tuple. The default is (', ', ': '). To get the most compact JSON
       
   147         representation you should specify (',', ':') to eliminate whitespace.
       
   148 
       
   149         If encoding is not None, then all input strings will be
       
   150         transformed into unicode using that encoding prior to JSON-encoding. 
       
   151         The default is UTF-8.
       
   152         """
       
   153 
       
   154         self.skipkeys = skipkeys
       
   155         self.ensure_ascii = ensure_ascii
       
   156         self.check_circular = check_circular
       
   157         self.allow_nan = allow_nan
       
   158         self.sort_keys = sort_keys
       
   159         self.indent = indent
       
   160         self.current_indent_level = 0
       
   161         if separators is not None:
       
   162             self.item_separator, self.key_separator = separators
       
   163         self.encoding = encoding
       
   164 
       
   165     def _newline_indent(self):
       
   166         return '\n' + (' ' * (self.indent * self.current_indent_level))
       
   167 
       
   168     def _iterencode_list(self, lst, markers=None):
       
   169         if not lst:
       
   170             yield '[]'
       
   171             return
       
   172         if markers is not None:
       
   173             markerid = id(lst)
       
   174             if markerid in markers:
       
   175                 raise ValueError("Circular reference detected")
       
   176             markers[markerid] = lst
       
   177         yield '['
       
   178         if self.indent is not None:
       
   179             self.current_indent_level += 1
       
   180             newline_indent = self._newline_indent()
       
   181             separator = self.item_separator + newline_indent
       
   182             yield newline_indent
       
   183         else:
       
   184             newline_indent = None
       
   185             separator = self.item_separator
       
   186         first = True
       
   187         for value in lst:
       
   188             if first:
       
   189                 first = False
       
   190             else:
       
   191                 yield separator
       
   192             for chunk in self._iterencode(value, markers):
       
   193                 yield chunk
       
   194         if newline_indent is not None:
       
   195             self.current_indent_level -= 1
       
   196             yield self._newline_indent()
       
   197         yield ']'
       
   198         if markers is not None:
       
   199             del markers[markerid]
       
   200 
       
   201     def _iterencode_dict(self, dct, markers=None):
       
   202         if not dct:
       
   203             yield '{}'
       
   204             return
       
   205         if markers is not None:
       
   206             markerid = id(dct)
       
   207             if markerid in markers:
       
   208                 raise ValueError("Circular reference detected")
       
   209             markers[markerid] = dct
       
   210         yield '{'
       
   211         key_separator = self.key_separator
       
   212         if self.indent is not None:
       
   213             self.current_indent_level += 1
       
   214             newline_indent = self._newline_indent()
       
   215             item_separator = self.item_separator + newline_indent
       
   216             yield newline_indent
       
   217         else:
       
   218             newline_indent = None
       
   219             item_separator = self.item_separator
       
   220         first = True
       
   221         if self.ensure_ascii:
       
   222             encoder = encode_basestring_ascii
       
   223         else:
       
   224             encoder = encode_basestring
       
   225         allow_nan = self.allow_nan
       
   226         if self.sort_keys:
       
   227             keys = dct.keys()
       
   228             keys.sort()
       
   229             items = [(k, dct[k]) for k in keys]
       
   230         else:
       
   231             items = dct.iteritems()
       
   232         _encoding = self.encoding
       
   233         _do_decode = (_encoding is not None
       
   234             and not (_need_utf8 and _encoding == 'utf-8'))
       
   235         for key, value in items:
       
   236             if isinstance(key, str):
       
   237                 if _do_decode:
       
   238                     key = key.decode(_encoding)
       
   239             elif isinstance(key, basestring):
       
   240                 pass
       
   241             # JavaScript is weakly typed for these, so it makes sense to
       
   242             # also allow them.  Many encoders seem to do something like this.
       
   243             elif isinstance(key, float):
       
   244                 key = floatstr(key, allow_nan)
       
   245             elif isinstance(key, (int, long)):
       
   246                 key = str(key)
       
   247             elif key is True:
       
   248                 key = 'true'
       
   249             elif key is False:
       
   250                 key = 'false'
       
   251             elif key is None:
       
   252                 key = 'null'
       
   253             elif self.skipkeys:
       
   254                 continue
       
   255             else:
       
   256                 raise TypeError("key %r is not a string" % (key,))
       
   257             if first:
       
   258                 first = False
       
   259             else:
       
   260                 yield item_separator
       
   261             yield encoder(key)
       
   262             yield key_separator
       
   263             for chunk in self._iterencode(value, markers):
       
   264                 yield chunk
       
   265         if newline_indent is not None:
       
   266             self.current_indent_level -= 1
       
   267             yield self._newline_indent()
       
   268         yield '}'
       
   269         if markers is not None:
       
   270             del markers[markerid]
       
   271 
       
   272     def _iterencode(self, o, markers=None):
       
   273         if isinstance(o, basestring):
       
   274             if self.ensure_ascii:
       
   275                 encoder = encode_basestring_ascii
       
   276             else:
       
   277                 encoder = encode_basestring
       
   278             _encoding = self.encoding
       
   279             if (_encoding is not None and isinstance(o, str)
       
   280                     and not (_need_utf8 and _encoding == 'utf-8')):
       
   281                 o = o.decode(_encoding)
       
   282             yield encoder(o)
       
   283         elif o is None:
       
   284             yield 'null'
       
   285         elif o is True:
       
   286             yield 'true'
       
   287         elif o is False:
       
   288             yield 'false'
       
   289         elif isinstance(o, (int, long)):
       
   290             yield str(o)
       
   291         elif isinstance(o, float):
       
   292             yield floatstr(o, self.allow_nan)
       
   293         elif isinstance(o, (list, tuple)):
       
   294             for chunk in self._iterencode_list(o, markers):
       
   295                 yield chunk
       
   296         elif isinstance(o, dict):
       
   297             for chunk in self._iterencode_dict(o, markers):
       
   298                 yield chunk
       
   299         else:
       
   300             if markers is not None:
       
   301                 markerid = id(o)
       
   302                 if markerid in markers:
       
   303                     raise ValueError("Circular reference detected")
       
   304                 markers[markerid] = o
       
   305             for chunk in self._iterencode_default(o, markers):
       
   306                 yield chunk
       
   307             if markers is not None:
       
   308                 del markers[markerid]
       
   309 
       
   310     def _iterencode_default(self, o, markers=None):
       
   311         newobj = self.default(o)
       
   312         return self._iterencode(newobj, markers)
       
   313 
       
   314     def default(self, o):
       
   315         """
       
   316         Implement this method in a subclass such that it returns
       
   317         a serializable object for ``o``, or calls the base implementation
       
   318         (to raise a ``TypeError``).
       
   319 
       
   320         For example, to support arbitrary iterators, you could
       
   321         implement default like this::
       
   322             
       
   323             def default(self, o):
       
   324                 try:
       
   325                     iterable = iter(o)
       
   326                 except TypeError:
       
   327                     pass
       
   328                 else:
       
   329                     return list(iterable)
       
   330                 return JSONEncoder.default(self, o)
       
   331         """
       
   332         raise TypeError("%r is not JSON serializable" % (o,))
       
   333 
       
   334     def encode(self, o):
       
   335         """
       
   336         Return a JSON string representation of a Python data structure.
       
   337 
       
   338         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
       
   339         '{"foo":["bar", "baz"]}'
       
   340         """
       
   341         # This is for extremely simple cases and benchmarks...
       
   342         if isinstance(o, basestring):
       
   343             if isinstance(o, str):
       
   344                 _encoding = self.encoding
       
   345                 if (_encoding is not None 
       
   346                         and not (_encoding == 'utf-8' and _need_utf8)):
       
   347                     o = o.decode(_encoding)
       
   348             return encode_basestring_ascii(o)
       
   349         # This doesn't pass the iterator directly to ''.join() because it
       
   350         # sucks at reporting exceptions.  It's going to do this internally
       
   351         # anyway because it uses PySequence_Fast or similar.
       
   352         chunks = list(self.iterencode(o))
       
   353         return ''.join(chunks)
       
   354 
       
   355     def iterencode(self, o):
       
   356         """
       
   357         Encode the given object and yield each string
       
   358         representation as available.
       
   359         
       
   360         For example::
       
   361             
       
   362             for chunk in JSONEncoder().iterencode(bigobject):
       
   363                 mysocket.write(chunk)
       
   364         """
       
   365         if self.check_circular:
       
   366             markers = {}
       
   367         else:
       
   368             markers = None
       
   369         return self._iterencode(o, markers)
       
   370 
       
   371 __all__ = ['JSONEncoder']