|
1 """Implementation of JSONDecoder |
|
2 """ |
|
3 |
|
4 import re |
|
5 import sys |
|
6 |
|
7 from json.scanner import Scanner, pattern |
|
8 try: |
|
9 from _json import scanstring as c_scanstring |
|
10 except ImportError: |
|
11 c_scanstring = None |
|
12 |
|
13 __all__ = ['JSONDecoder'] |
|
14 |
|
15 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL |
|
16 |
|
17 NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf') |
|
18 |
|
19 |
|
20 def linecol(doc, pos): |
|
21 lineno = doc.count('\n', 0, pos) + 1 |
|
22 if lineno == 1: |
|
23 colno = pos |
|
24 else: |
|
25 colno = pos - doc.rindex('\n', 0, pos) |
|
26 return lineno, colno |
|
27 |
|
28 |
|
29 def errmsg(msg, doc, pos, end=None): |
|
30 lineno, colno = linecol(doc, pos) |
|
31 if end is None: |
|
32 fmt = '{0}: line {1} column {2} (char {3})' |
|
33 return fmt.format(msg, lineno, colno, pos) |
|
34 endlineno, endcolno = linecol(doc, end) |
|
35 fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' |
|
36 return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) |
|
37 |
|
38 |
|
39 _CONSTANTS = { |
|
40 '-Infinity': NegInf, |
|
41 'Infinity': PosInf, |
|
42 'NaN': NaN, |
|
43 'true': True, |
|
44 'false': False, |
|
45 'null': None, |
|
46 } |
|
47 |
|
48 |
|
49 def JSONConstant(match, context, c=_CONSTANTS): |
|
50 s = match.group(0) |
|
51 fn = getattr(context, 'parse_constant', None) |
|
52 if fn is None: |
|
53 rval = c[s] |
|
54 else: |
|
55 rval = fn(s) |
|
56 return rval, None |
|
57 pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) |
|
58 |
|
59 |
|
60 def JSONNumber(match, context): |
|
61 match = JSONNumber.regex.match(match.string, *match.span()) |
|
62 integer, frac, exp = match.groups() |
|
63 if frac or exp: |
|
64 fn = getattr(context, 'parse_float', None) or float |
|
65 res = fn(integer + (frac or '') + (exp or '')) |
|
66 else: |
|
67 fn = getattr(context, 'parse_int', None) or int |
|
68 res = fn(integer) |
|
69 return res, None |
|
70 pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) |
|
71 |
|
72 |
|
73 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) |
|
74 BACKSLASH = { |
|
75 '"': u'"', '\\': u'\\', '/': u'/', |
|
76 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', |
|
77 } |
|
78 |
|
79 DEFAULT_ENCODING = "utf-8" |
|
80 |
|
81 |
|
82 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): |
|
83 if encoding is None: |
|
84 encoding = DEFAULT_ENCODING |
|
85 chunks = [] |
|
86 _append = chunks.append |
|
87 begin = end - 1 |
|
88 while 1: |
|
89 chunk = _m(s, end) |
|
90 if chunk is None: |
|
91 raise ValueError( |
|
92 errmsg("Unterminated string starting at", s, begin)) |
|
93 end = chunk.end() |
|
94 content, terminator = chunk.groups() |
|
95 if content: |
|
96 if not isinstance(content, unicode): |
|
97 content = unicode(content, encoding) |
|
98 _append(content) |
|
99 if terminator == '"': |
|
100 break |
|
101 elif terminator != '\\': |
|
102 if strict: |
|
103 msg = "Invalid control character {0!r} at".format(terminator) |
|
104 raise ValueError(errmsg(msg, s, end)) |
|
105 else: |
|
106 _append(terminator) |
|
107 continue |
|
108 try: |
|
109 esc = s[end] |
|
110 except IndexError: |
|
111 raise ValueError( |
|
112 errmsg("Unterminated string starting at", s, begin)) |
|
113 if esc != 'u': |
|
114 try: |
|
115 m = _b[esc] |
|
116 except KeyError: |
|
117 msg = "Invalid \\escape: {0!r}".format(esc) |
|
118 raise ValueError(errmsg(msg, s, end)) |
|
119 end += 1 |
|
120 else: |
|
121 esc = s[end + 1:end + 5] |
|
122 next_end = end + 5 |
|
123 msg = "Invalid \\uXXXX escape" |
|
124 try: |
|
125 if len(esc) != 4: |
|
126 raise ValueError |
|
127 uni = int(esc, 16) |
|
128 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: |
|
129 msg = "Invalid \\uXXXX\\uXXXX surrogate pair" |
|
130 if not s[end + 5:end + 7] == '\\u': |
|
131 raise ValueError |
|
132 esc2 = s[end + 7:end + 11] |
|
133 if len(esc2) != 4: |
|
134 raise ValueError |
|
135 uni2 = int(esc2, 16) |
|
136 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) |
|
137 next_end += 6 |
|
138 m = unichr(uni) |
|
139 except ValueError: |
|
140 raise ValueError(errmsg(msg, s, end)) |
|
141 end = next_end |
|
142 _append(m) |
|
143 return u''.join(chunks), end |
|
144 |
|
145 |
|
146 # Use speedup |
|
147 if c_scanstring is not None: |
|
148 scanstring = c_scanstring |
|
149 else: |
|
150 scanstring = py_scanstring |
|
151 |
|
152 def JSONString(match, context): |
|
153 encoding = getattr(context, 'encoding', None) |
|
154 strict = getattr(context, 'strict', True) |
|
155 return scanstring(match.string, match.end(), encoding, strict) |
|
156 pattern(r'"')(JSONString) |
|
157 |
|
158 |
|
159 WHITESPACE = re.compile(r'\s*', FLAGS) |
|
160 |
|
161 |
|
162 def JSONObject(match, context, _w=WHITESPACE.match): |
|
163 pairs = {} |
|
164 s = match.string |
|
165 end = _w(s, match.end()).end() |
|
166 nextchar = s[end:end + 1] |
|
167 # Trivial empty object |
|
168 if nextchar == '}': |
|
169 return pairs, end + 1 |
|
170 if nextchar != '"': |
|
171 raise ValueError(errmsg("Expecting property name", s, end)) |
|
172 end += 1 |
|
173 encoding = getattr(context, 'encoding', None) |
|
174 strict = getattr(context, 'strict', True) |
|
175 iterscan = JSONScanner.iterscan |
|
176 while True: |
|
177 key, end = scanstring(s, end, encoding, strict) |
|
178 end = _w(s, end).end() |
|
179 if s[end:end + 1] != ':': |
|
180 raise ValueError(errmsg("Expecting : delimiter", s, end)) |
|
181 end = _w(s, end + 1).end() |
|
182 try: |
|
183 value, end = iterscan(s, idx=end, context=context).next() |
|
184 except StopIteration: |
|
185 raise ValueError(errmsg("Expecting object", s, end)) |
|
186 pairs[key] = value |
|
187 end = _w(s, end).end() |
|
188 nextchar = s[end:end + 1] |
|
189 end += 1 |
|
190 if nextchar == '}': |
|
191 break |
|
192 if nextchar != ',': |
|
193 raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) |
|
194 end = _w(s, end).end() |
|
195 nextchar = s[end:end + 1] |
|
196 end += 1 |
|
197 if nextchar != '"': |
|
198 raise ValueError(errmsg("Expecting property name", s, end - 1)) |
|
199 object_hook = getattr(context, 'object_hook', None) |
|
200 if object_hook is not None: |
|
201 pairs = object_hook(pairs) |
|
202 return pairs, end |
|
203 pattern(r'{')(JSONObject) |
|
204 |
|
205 |
|
206 def JSONArray(match, context, _w=WHITESPACE.match): |
|
207 values = [] |
|
208 s = match.string |
|
209 end = _w(s, match.end()).end() |
|
210 # Look-ahead for trivial empty array |
|
211 nextchar = s[end:end + 1] |
|
212 if nextchar == ']': |
|
213 return values, end + 1 |
|
214 iterscan = JSONScanner.iterscan |
|
215 while True: |
|
216 try: |
|
217 value, end = iterscan(s, idx=end, context=context).next() |
|
218 except StopIteration: |
|
219 raise ValueError(errmsg("Expecting object", s, end)) |
|
220 values.append(value) |
|
221 end = _w(s, end).end() |
|
222 nextchar = s[end:end + 1] |
|
223 end += 1 |
|
224 if nextchar == ']': |
|
225 break |
|
226 if nextchar != ',': |
|
227 raise ValueError(errmsg("Expecting , delimiter", s, end)) |
|
228 end = _w(s, end).end() |
|
229 return values, end |
|
230 pattern(r'\[')(JSONArray) |
|
231 |
|
232 |
|
233 ANYTHING = [ |
|
234 JSONObject, |
|
235 JSONArray, |
|
236 JSONString, |
|
237 JSONConstant, |
|
238 JSONNumber, |
|
239 ] |
|
240 |
|
241 JSONScanner = Scanner(ANYTHING) |
|
242 |
|
243 |
|
244 class JSONDecoder(object): |
|
245 """Simple JSON <http://json.org> decoder |
|
246 |
|
247 Performs the following translations in decoding by default: |
|
248 |
|
249 +---------------+-------------------+ |
|
250 | JSON | Python | |
|
251 +===============+===================+ |
|
252 | object | dict | |
|
253 +---------------+-------------------+ |
|
254 | array | list | |
|
255 +---------------+-------------------+ |
|
256 | string | unicode | |
|
257 +---------------+-------------------+ |
|
258 | number (int) | int, long | |
|
259 +---------------+-------------------+ |
|
260 | number (real) | float | |
|
261 +---------------+-------------------+ |
|
262 | true | True | |
|
263 +---------------+-------------------+ |
|
264 | false | False | |
|
265 +---------------+-------------------+ |
|
266 | null | None | |
|
267 +---------------+-------------------+ |
|
268 |
|
269 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as |
|
270 their corresponding ``float`` values, which is outside the JSON spec. |
|
271 """ |
|
272 |
|
273 _scanner = Scanner(ANYTHING) |
|
274 __all__ = ['__init__', 'decode', 'raw_decode'] |
|
275 |
|
276 def __init__(self, encoding=None, object_hook=None, parse_float=None, |
|
277 parse_int=None, parse_constant=None, strict=True): |
|
278 """``encoding`` determines the encoding used to interpret any ``str`` |
|
279 objects decoded by this instance (utf-8 by default). It has no |
|
280 effect when decoding ``unicode`` objects. |
|
281 |
|
282 Note that currently only encodings that are a superset of ASCII work, |
|
283 strings of other encodings should be passed in as ``unicode``. |
|
284 |
|
285 ``object_hook``, if specified, will be called with the result of |
|
286 every JSON object decoded and its return value will be used in |
|
287 place of the given ``dict``. This can be used to provide custom |
|
288 deserializations (e.g. to support JSON-RPC class hinting). |
|
289 |
|
290 ``parse_float``, if specified, will be called with the string |
|
291 of every JSON float to be decoded. By default this is equivalent to |
|
292 float(num_str). This can be used to use another datatype or parser |
|
293 for JSON floats (e.g. decimal.Decimal). |
|
294 |
|
295 ``parse_int``, if specified, will be called with the string |
|
296 of every JSON int to be decoded. By default this is equivalent to |
|
297 int(num_str). This can be used to use another datatype or parser |
|
298 for JSON integers (e.g. float). |
|
299 |
|
300 ``parse_constant``, if specified, will be called with one of the |
|
301 following strings: -Infinity, Infinity, NaN, null, true, false. |
|
302 This can be used to raise an exception if invalid JSON numbers |
|
303 are encountered. |
|
304 |
|
305 """ |
|
306 self.encoding = encoding |
|
307 self.object_hook = object_hook |
|
308 self.parse_float = parse_float |
|
309 self.parse_int = parse_int |
|
310 self.parse_constant = parse_constant |
|
311 self.strict = strict |
|
312 |
|
313 def decode(self, s, _w=WHITESPACE.match): |
|
314 """ |
|
315 Return the Python representation of ``s`` (a ``str`` or ``unicode`` |
|
316 instance containing a JSON document) |
|
317 |
|
318 """ |
|
319 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) |
|
320 end = _w(s, end).end() |
|
321 if end != len(s): |
|
322 raise ValueError(errmsg("Extra data", s, end, len(s))) |
|
323 return obj |
|
324 |
|
325 def raw_decode(self, s, **kw): |
|
326 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning |
|
327 with a JSON document) and return a 2-tuple of the Python |
|
328 representation and the index in ``s`` where the document ended. |
|
329 |
|
330 This can be used to decode a JSON document from a string that may |
|
331 have extraneous data at the end. |
|
332 |
|
333 """ |
|
334 kw.setdefault('context', self) |
|
335 try: |
|
336 obj, end = self._scanner.iterscan(s, **kw).next() |
|
337 except StopIteration: |
|
338 raise ValueError("No JSON object could be decoded") |
|
339 return obj, end |