|
1 /* csv module */ |
|
2 |
|
3 /* |
|
4 |
|
5 This module provides the low-level underpinnings of a CSV reading/writing |
|
6 module. Users should not use this module directly, but import the csv.py |
|
7 module instead. |
|
8 |
|
9 **** For people modifying this code, please note that as of this writing |
|
10 **** (2003-03-23), it is intended that this code should work with Python |
|
11 **** 2.2. |
|
12 |
|
13 */ |
|
14 |
|
15 #define MODULE_VERSION "1.0" |
|
16 |
|
17 #include "Python.h" |
|
18 #include "structmember.h" |
|
19 |
|
20 |
|
21 /* begin 2.2 compatibility macros */ |
|
22 #ifndef PyDoc_STRVAR |
|
23 /* Define macros for inline documentation. */ |
|
24 #define PyDoc_VAR(name) static char name[] |
|
25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str) |
|
26 #ifdef WITH_DOC_STRINGS |
|
27 #define PyDoc_STR(str) str |
|
28 #else |
|
29 #define PyDoc_STR(str) "" |
|
30 #endif |
|
31 #endif /* ifndef PyDoc_STRVAR */ |
|
32 |
|
33 #ifndef PyMODINIT_FUNC |
|
34 # if defined(__cplusplus) |
|
35 # define PyMODINIT_FUNC extern "C" void |
|
36 # else /* __cplusplus */ |
|
37 # define PyMODINIT_FUNC void |
|
38 # endif /* __cplusplus */ |
|
39 #endif |
|
40 |
|
41 #ifndef Py_CLEAR |
|
42 #define Py_CLEAR(op) \ |
|
43 do { \ |
|
44 if (op) { \ |
|
45 PyObject *tmp = (PyObject *)(op); \ |
|
46 (op) = NULL; \ |
|
47 Py_DECREF(tmp); \ |
|
48 } \ |
|
49 } while (0) |
|
50 #endif |
|
51 #ifndef Py_VISIT |
|
52 #define Py_VISIT(op) \ |
|
53 do { \ |
|
54 if (op) { \ |
|
55 int vret = visit((PyObject *)(op), arg); \ |
|
56 if (vret) \ |
|
57 return vret; \ |
|
58 } \ |
|
59 } while (0) |
|
60 #endif |
|
61 |
|
62 /* end 2.2 compatibility macros */ |
|
63 |
|
64 #define IS_BASESTRING(o) \ |
|
65 PyObject_TypeCheck(o, &PyBaseString_Type) |
|
66 |
|
67 static PyObject *error_obj; /* CSV exception */ |
|
68 static PyObject *dialects; /* Dialect registry */ |
|
69 static long field_limit = 128 * 1024; /* max parsed field size */ |
|
70 |
|
71 typedef enum { |
|
72 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, |
|
73 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD, |
|
74 EAT_CRNL |
|
75 } ParserState; |
|
76 |
|
77 typedef enum { |
|
78 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE |
|
79 } QuoteStyle; |
|
80 |
|
81 typedef struct { |
|
82 QuoteStyle style; |
|
83 char *name; |
|
84 } StyleDesc; |
|
85 |
|
86 static StyleDesc quote_styles[] = { |
|
87 { QUOTE_MINIMAL, "QUOTE_MINIMAL" }, |
|
88 { QUOTE_ALL, "QUOTE_ALL" }, |
|
89 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" }, |
|
90 { QUOTE_NONE, "QUOTE_NONE" }, |
|
91 { 0 } |
|
92 }; |
|
93 |
|
94 typedef struct { |
|
95 PyObject_HEAD |
|
96 |
|
97 int doublequote; /* is " represented by ""? */ |
|
98 char delimiter; /* field separator */ |
|
99 char quotechar; /* quote character */ |
|
100 char escapechar; /* escape character */ |
|
101 int skipinitialspace; /* ignore spaces following delimiter? */ |
|
102 PyObject *lineterminator; /* string to write between records */ |
|
103 int quoting; /* style of quoting to write */ |
|
104 |
|
105 int strict; /* raise exception on bad CSV */ |
|
106 } DialectObj; |
|
107 |
|
108 staticforward PyTypeObject Dialect_Type; |
|
109 |
|
110 typedef struct { |
|
111 PyObject_HEAD |
|
112 |
|
113 PyObject *input_iter; /* iterate over this for input lines */ |
|
114 |
|
115 DialectObj *dialect; /* parsing dialect */ |
|
116 |
|
117 PyObject *fields; /* field list for current record */ |
|
118 ParserState state; /* current CSV parse state */ |
|
119 char *field; /* build current field in here */ |
|
120 int field_size; /* size of allocated buffer */ |
|
121 int field_len; /* length of current field */ |
|
122 int numeric_field; /* treat field as numeric */ |
|
123 unsigned long line_num; /* Source-file line number */ |
|
124 } ReaderObj; |
|
125 |
|
126 staticforward PyTypeObject Reader_Type; |
|
127 |
|
128 #define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type) |
|
129 |
|
130 typedef struct { |
|
131 PyObject_HEAD |
|
132 |
|
133 PyObject *writeline; /* write output lines to this file */ |
|
134 |
|
135 DialectObj *dialect; /* parsing dialect */ |
|
136 |
|
137 char *rec; /* buffer for parser.join */ |
|
138 int rec_size; /* size of allocated record */ |
|
139 int rec_len; /* length of record */ |
|
140 int num_fields; /* number of fields in record */ |
|
141 } WriterObj; |
|
142 |
|
143 staticforward PyTypeObject Writer_Type; |
|
144 |
|
145 /* |
|
146 * DIALECT class |
|
147 */ |
|
148 |
|
149 static PyObject * |
|
150 get_dialect_from_registry(PyObject * name_obj) |
|
151 { |
|
152 PyObject *dialect_obj; |
|
153 |
|
154 dialect_obj = PyDict_GetItem(dialects, name_obj); |
|
155 if (dialect_obj == NULL) { |
|
156 if (!PyErr_Occurred()) |
|
157 PyErr_Format(error_obj, "unknown dialect"); |
|
158 } |
|
159 else |
|
160 Py_INCREF(dialect_obj); |
|
161 return dialect_obj; |
|
162 } |
|
163 |
|
164 static PyObject * |
|
165 get_string(PyObject *str) |
|
166 { |
|
167 Py_XINCREF(str); |
|
168 return str; |
|
169 } |
|
170 |
|
171 static PyObject * |
|
172 get_nullchar_as_None(char c) |
|
173 { |
|
174 if (c == '\0') { |
|
175 Py_INCREF(Py_None); |
|
176 return Py_None; |
|
177 } |
|
178 else |
|
179 return PyString_FromStringAndSize((char*)&c, 1); |
|
180 } |
|
181 |
|
182 static PyObject * |
|
183 Dialect_get_lineterminator(DialectObj *self) |
|
184 { |
|
185 return get_string(self->lineterminator); |
|
186 } |
|
187 |
|
188 static PyObject * |
|
189 Dialect_get_escapechar(DialectObj *self) |
|
190 { |
|
191 return get_nullchar_as_None(self->escapechar); |
|
192 } |
|
193 |
|
194 static PyObject * |
|
195 Dialect_get_quotechar(DialectObj *self) |
|
196 { |
|
197 return get_nullchar_as_None(self->quotechar); |
|
198 } |
|
199 |
|
200 static PyObject * |
|
201 Dialect_get_quoting(DialectObj *self) |
|
202 { |
|
203 return PyInt_FromLong(self->quoting); |
|
204 } |
|
205 |
|
206 static int |
|
207 _set_bool(const char *name, int *target, PyObject *src, int dflt) |
|
208 { |
|
209 if (src == NULL) |
|
210 *target = dflt; |
|
211 else |
|
212 *target = PyObject_IsTrue(src); |
|
213 return 0; |
|
214 } |
|
215 |
|
216 static int |
|
217 _set_int(const char *name, int *target, PyObject *src, int dflt) |
|
218 { |
|
219 if (src == NULL) |
|
220 *target = dflt; |
|
221 else { |
|
222 if (!PyInt_Check(src)) { |
|
223 PyErr_Format(PyExc_TypeError, |
|
224 "\"%s\" must be an integer", name); |
|
225 return -1; |
|
226 } |
|
227 *target = PyInt_AsLong(src); |
|
228 } |
|
229 return 0; |
|
230 } |
|
231 |
|
232 static int |
|
233 _set_char(const char *name, char *target, PyObject *src, char dflt) |
|
234 { |
|
235 if (src == NULL) |
|
236 *target = dflt; |
|
237 else { |
|
238 if (src == Py_None || PyString_Size(src) == 0) |
|
239 *target = '\0'; |
|
240 else if (!PyString_Check(src) || PyString_Size(src) != 1) { |
|
241 PyErr_Format(PyExc_TypeError, |
|
242 "\"%s\" must be an 1-character string", |
|
243 name); |
|
244 return -1; |
|
245 } |
|
246 else { |
|
247 char *s = PyString_AsString(src); |
|
248 if (s == NULL) |
|
249 return -1; |
|
250 *target = s[0]; |
|
251 } |
|
252 } |
|
253 return 0; |
|
254 } |
|
255 |
|
256 static int |
|
257 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) |
|
258 { |
|
259 if (src == NULL) |
|
260 *target = PyString_FromString(dflt); |
|
261 else { |
|
262 if (src == Py_None) |
|
263 *target = NULL; |
|
264 else if (!IS_BASESTRING(src)) { |
|
265 PyErr_Format(PyExc_TypeError, |
|
266 "\"%s\" must be an string", name); |
|
267 return -1; |
|
268 } |
|
269 else { |
|
270 Py_XDECREF(*target); |
|
271 Py_INCREF(src); |
|
272 *target = src; |
|
273 } |
|
274 } |
|
275 return 0; |
|
276 } |
|
277 |
|
278 static int |
|
279 dialect_check_quoting(int quoting) |
|
280 { |
|
281 StyleDesc *qs = quote_styles; |
|
282 |
|
283 for (qs = quote_styles; qs->name; qs++) { |
|
284 if (qs->style == quoting) |
|
285 return 0; |
|
286 } |
|
287 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value"); |
|
288 return -1; |
|
289 } |
|
290 |
|
291 #define D_OFF(x) offsetof(DialectObj, x) |
|
292 |
|
293 static struct PyMemberDef Dialect_memberlist[] = { |
|
294 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY }, |
|
295 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY }, |
|
296 { "doublequote", T_INT, D_OFF(doublequote), READONLY }, |
|
297 { "strict", T_INT, D_OFF(strict), READONLY }, |
|
298 { NULL } |
|
299 }; |
|
300 |
|
301 static PyGetSetDef Dialect_getsetlist[] = { |
|
302 { "escapechar", (getter)Dialect_get_escapechar}, |
|
303 { "lineterminator", (getter)Dialect_get_lineterminator}, |
|
304 { "quotechar", (getter)Dialect_get_quotechar}, |
|
305 { "quoting", (getter)Dialect_get_quoting}, |
|
306 {NULL}, |
|
307 }; |
|
308 |
|
309 static void |
|
310 Dialect_dealloc(DialectObj *self) |
|
311 { |
|
312 Py_XDECREF(self->lineterminator); |
|
313 Py_TYPE(self)->tp_free((PyObject *)self); |
|
314 } |
|
315 |
|
316 static char *dialect_kws[] = { |
|
317 "dialect", |
|
318 "delimiter", |
|
319 "doublequote", |
|
320 "escapechar", |
|
321 "lineterminator", |
|
322 "quotechar", |
|
323 "quoting", |
|
324 "skipinitialspace", |
|
325 "strict", |
|
326 NULL |
|
327 }; |
|
328 |
|
329 static PyObject * |
|
330 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) |
|
331 { |
|
332 DialectObj *self; |
|
333 PyObject *ret = NULL; |
|
334 PyObject *dialect = NULL; |
|
335 PyObject *delimiter = NULL; |
|
336 PyObject *doublequote = NULL; |
|
337 PyObject *escapechar = NULL; |
|
338 PyObject *lineterminator = NULL; |
|
339 PyObject *quotechar = NULL; |
|
340 PyObject *quoting = NULL; |
|
341 PyObject *skipinitialspace = NULL; |
|
342 PyObject *strict = NULL; |
|
343 |
|
344 if (!PyArg_ParseTupleAndKeywords(args, kwargs, |
|
345 "|OOOOOOOOO", dialect_kws, |
|
346 &dialect, |
|
347 &delimiter, |
|
348 &doublequote, |
|
349 &escapechar, |
|
350 &lineterminator, |
|
351 "echar, |
|
352 "ing, |
|
353 &skipinitialspace, |
|
354 &strict)) |
|
355 return NULL; |
|
356 |
|
357 if (dialect != NULL) { |
|
358 if (IS_BASESTRING(dialect)) { |
|
359 dialect = get_dialect_from_registry(dialect); |
|
360 if (dialect == NULL) |
|
361 return NULL; |
|
362 } |
|
363 else |
|
364 Py_INCREF(dialect); |
|
365 /* Can we reuse this instance? */ |
|
366 if (PyObject_TypeCheck(dialect, &Dialect_Type) && |
|
367 delimiter == 0 && |
|
368 doublequote == 0 && |
|
369 escapechar == 0 && |
|
370 lineterminator == 0 && |
|
371 quotechar == 0 && |
|
372 quoting == 0 && |
|
373 skipinitialspace == 0 && |
|
374 strict == 0) |
|
375 return dialect; |
|
376 } |
|
377 |
|
378 self = (DialectObj *)type->tp_alloc(type, 0); |
|
379 if (self == NULL) { |
|
380 Py_XDECREF(dialect); |
|
381 return NULL; |
|
382 } |
|
383 self->lineterminator = NULL; |
|
384 |
|
385 Py_XINCREF(delimiter); |
|
386 Py_XINCREF(doublequote); |
|
387 Py_XINCREF(escapechar); |
|
388 Py_XINCREF(lineterminator); |
|
389 Py_XINCREF(quotechar); |
|
390 Py_XINCREF(quoting); |
|
391 Py_XINCREF(skipinitialspace); |
|
392 Py_XINCREF(strict); |
|
393 if (dialect != NULL) { |
|
394 #define DIALECT_GETATTR(v, n) \ |
|
395 if (v == NULL) \ |
|
396 v = PyObject_GetAttrString(dialect, n) |
|
397 DIALECT_GETATTR(delimiter, "delimiter"); |
|
398 DIALECT_GETATTR(doublequote, "doublequote"); |
|
399 DIALECT_GETATTR(escapechar, "escapechar"); |
|
400 DIALECT_GETATTR(lineterminator, "lineterminator"); |
|
401 DIALECT_GETATTR(quotechar, "quotechar"); |
|
402 DIALECT_GETATTR(quoting, "quoting"); |
|
403 DIALECT_GETATTR(skipinitialspace, "skipinitialspace"); |
|
404 DIALECT_GETATTR(strict, "strict"); |
|
405 PyErr_Clear(); |
|
406 } |
|
407 |
|
408 /* check types and convert to C values */ |
|
409 #define DIASET(meth, name, target, src, dflt) \ |
|
410 if (meth(name, target, src, dflt)) \ |
|
411 goto err |
|
412 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ','); |
|
413 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1); |
|
414 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0); |
|
415 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n"); |
|
416 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"'); |
|
417 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL); |
|
418 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0); |
|
419 DIASET(_set_bool, "strict", &self->strict, strict, 0); |
|
420 |
|
421 /* validate options */ |
|
422 if (dialect_check_quoting(self->quoting)) |
|
423 goto err; |
|
424 if (self->delimiter == 0) { |
|
425 PyErr_SetString(PyExc_TypeError, "delimiter must be set"); |
|
426 goto err; |
|
427 } |
|
428 if (quotechar == Py_None && quoting == NULL) |
|
429 self->quoting = QUOTE_NONE; |
|
430 if (self->quoting != QUOTE_NONE && self->quotechar == 0) { |
|
431 PyErr_SetString(PyExc_TypeError, |
|
432 "quotechar must be set if quoting enabled"); |
|
433 goto err; |
|
434 } |
|
435 if (self->lineterminator == 0) { |
|
436 PyErr_SetString(PyExc_TypeError, "lineterminator must be set"); |
|
437 goto err; |
|
438 } |
|
439 |
|
440 ret = (PyObject *)self; |
|
441 Py_INCREF(self); |
|
442 err: |
|
443 Py_XDECREF(self); |
|
444 Py_XDECREF(dialect); |
|
445 Py_XDECREF(delimiter); |
|
446 Py_XDECREF(doublequote); |
|
447 Py_XDECREF(escapechar); |
|
448 Py_XDECREF(lineterminator); |
|
449 Py_XDECREF(quotechar); |
|
450 Py_XDECREF(quoting); |
|
451 Py_XDECREF(skipinitialspace); |
|
452 Py_XDECREF(strict); |
|
453 return ret; |
|
454 } |
|
455 |
|
456 |
|
457 PyDoc_STRVAR(Dialect_Type_doc, |
|
458 "CSV dialect\n" |
|
459 "\n" |
|
460 "The Dialect type records CSV parsing and generation options.\n"); |
|
461 |
|
462 static PyTypeObject Dialect_Type = { |
|
463 PyVarObject_HEAD_INIT(NULL, 0) |
|
464 "_csv.Dialect", /* tp_name */ |
|
465 sizeof(DialectObj), /* tp_basicsize */ |
|
466 0, /* tp_itemsize */ |
|
467 /* methods */ |
|
468 (destructor)Dialect_dealloc, /* tp_dealloc */ |
|
469 (printfunc)0, /* tp_print */ |
|
470 (getattrfunc)0, /* tp_getattr */ |
|
471 (setattrfunc)0, /* tp_setattr */ |
|
472 (cmpfunc)0, /* tp_compare */ |
|
473 (reprfunc)0, /* tp_repr */ |
|
474 0, /* tp_as_number */ |
|
475 0, /* tp_as_sequence */ |
|
476 0, /* tp_as_mapping */ |
|
477 (hashfunc)0, /* tp_hash */ |
|
478 (ternaryfunc)0, /* tp_call */ |
|
479 (reprfunc)0, /* tp_str */ |
|
480 0, /* tp_getattro */ |
|
481 0, /* tp_setattro */ |
|
482 0, /* tp_as_buffer */ |
|
483 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
484 Dialect_Type_doc, /* tp_doc */ |
|
485 0, /* tp_traverse */ |
|
486 0, /* tp_clear */ |
|
487 0, /* tp_richcompare */ |
|
488 0, /* tp_weaklistoffset */ |
|
489 0, /* tp_iter */ |
|
490 0, /* tp_iternext */ |
|
491 0, /* tp_methods */ |
|
492 Dialect_memberlist, /* tp_members */ |
|
493 Dialect_getsetlist, /* tp_getset */ |
|
494 0, /* tp_base */ |
|
495 0, /* tp_dict */ |
|
496 0, /* tp_descr_get */ |
|
497 0, /* tp_descr_set */ |
|
498 0, /* tp_dictoffset */ |
|
499 0, /* tp_init */ |
|
500 0, /* tp_alloc */ |
|
501 dialect_new, /* tp_new */ |
|
502 0, /* tp_free */ |
|
503 }; |
|
504 |
|
505 /* |
|
506 * Return an instance of the dialect type, given a Python instance or kwarg |
|
507 * description of the dialect |
|
508 */ |
|
509 static PyObject * |
|
510 _call_dialect(PyObject *dialect_inst, PyObject *kwargs) |
|
511 { |
|
512 PyObject *ctor_args; |
|
513 PyObject *dialect; |
|
514 |
|
515 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst); |
|
516 if (ctor_args == NULL) |
|
517 return NULL; |
|
518 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs); |
|
519 Py_DECREF(ctor_args); |
|
520 return dialect; |
|
521 } |
|
522 |
|
523 /* |
|
524 * READER |
|
525 */ |
|
526 static int |
|
527 parse_save_field(ReaderObj *self) |
|
528 { |
|
529 PyObject *field; |
|
530 |
|
531 field = PyString_FromStringAndSize(self->field, self->field_len); |
|
532 if (field == NULL) |
|
533 return -1; |
|
534 self->field_len = 0; |
|
535 if (self->numeric_field) { |
|
536 PyObject *tmp; |
|
537 |
|
538 self->numeric_field = 0; |
|
539 tmp = PyNumber_Float(field); |
|
540 if (tmp == NULL) { |
|
541 Py_DECREF(field); |
|
542 return -1; |
|
543 } |
|
544 Py_DECREF(field); |
|
545 field = tmp; |
|
546 } |
|
547 PyList_Append(self->fields, field); |
|
548 Py_DECREF(field); |
|
549 return 0; |
|
550 } |
|
551 |
|
552 static int |
|
553 parse_grow_buff(ReaderObj *self) |
|
554 { |
|
555 if (self->field_size == 0) { |
|
556 self->field_size = 4096; |
|
557 if (self->field != NULL) |
|
558 PyMem_Free(self->field); |
|
559 self->field = PyMem_Malloc(self->field_size); |
|
560 } |
|
561 else { |
|
562 if (self->field_size > INT_MAX / 2) { |
|
563 PyErr_NoMemory(); |
|
564 return 0; |
|
565 } |
|
566 self->field_size *= 2; |
|
567 self->field = PyMem_Realloc(self->field, self->field_size); |
|
568 } |
|
569 if (self->field == NULL) { |
|
570 PyErr_NoMemory(); |
|
571 return 0; |
|
572 } |
|
573 return 1; |
|
574 } |
|
575 |
|
576 static int |
|
577 parse_add_char(ReaderObj *self, char c) |
|
578 { |
|
579 if (self->field_len >= field_limit) { |
|
580 PyErr_Format(error_obj, "field larger than field limit (%ld)", |
|
581 field_limit); |
|
582 return -1; |
|
583 } |
|
584 if (self->field_len == self->field_size && !parse_grow_buff(self)) |
|
585 return -1; |
|
586 self->field[self->field_len++] = c; |
|
587 return 0; |
|
588 } |
|
589 |
|
590 static int |
|
591 parse_process_char(ReaderObj *self, char c) |
|
592 { |
|
593 DialectObj *dialect = self->dialect; |
|
594 |
|
595 switch (self->state) { |
|
596 case START_RECORD: |
|
597 /* start of record */ |
|
598 if (c == '\0') |
|
599 /* empty line - return [] */ |
|
600 break; |
|
601 else if (c == '\n' || c == '\r') { |
|
602 self->state = EAT_CRNL; |
|
603 break; |
|
604 } |
|
605 /* normal character - handle as START_FIELD */ |
|
606 self->state = START_FIELD; |
|
607 /* fallthru */ |
|
608 case START_FIELD: |
|
609 /* expecting field */ |
|
610 if (c == '\n' || c == '\r' || c == '\0') { |
|
611 /* save empty field - return [fields] */ |
|
612 if (parse_save_field(self) < 0) |
|
613 return -1; |
|
614 self->state = (c == '\0' ? START_RECORD : EAT_CRNL); |
|
615 } |
|
616 else if (c == dialect->quotechar && |
|
617 dialect->quoting != QUOTE_NONE) { |
|
618 /* start quoted field */ |
|
619 self->state = IN_QUOTED_FIELD; |
|
620 } |
|
621 else if (c == dialect->escapechar) { |
|
622 /* possible escaped character */ |
|
623 self->state = ESCAPED_CHAR; |
|
624 } |
|
625 else if (c == ' ' && dialect->skipinitialspace) |
|
626 /* ignore space at start of field */ |
|
627 ; |
|
628 else if (c == dialect->delimiter) { |
|
629 /* save empty field */ |
|
630 if (parse_save_field(self) < 0) |
|
631 return -1; |
|
632 } |
|
633 else { |
|
634 /* begin new unquoted field */ |
|
635 if (dialect->quoting == QUOTE_NONNUMERIC) |
|
636 self->numeric_field = 1; |
|
637 if (parse_add_char(self, c) < 0) |
|
638 return -1; |
|
639 self->state = IN_FIELD; |
|
640 } |
|
641 break; |
|
642 |
|
643 case ESCAPED_CHAR: |
|
644 if (c == '\0') |
|
645 c = '\n'; |
|
646 if (parse_add_char(self, c) < 0) |
|
647 return -1; |
|
648 self->state = IN_FIELD; |
|
649 break; |
|
650 |
|
651 case IN_FIELD: |
|
652 /* in unquoted field */ |
|
653 if (c == '\n' || c == '\r' || c == '\0') { |
|
654 /* end of line - return [fields] */ |
|
655 if (parse_save_field(self) < 0) |
|
656 return -1; |
|
657 self->state = (c == '\0' ? START_RECORD : EAT_CRNL); |
|
658 } |
|
659 else if (c == dialect->escapechar) { |
|
660 /* possible escaped character */ |
|
661 self->state = ESCAPED_CHAR; |
|
662 } |
|
663 else if (c == dialect->delimiter) { |
|
664 /* save field - wait for new field */ |
|
665 if (parse_save_field(self) < 0) |
|
666 return -1; |
|
667 self->state = START_FIELD; |
|
668 } |
|
669 else { |
|
670 /* normal character - save in field */ |
|
671 if (parse_add_char(self, c) < 0) |
|
672 return -1; |
|
673 } |
|
674 break; |
|
675 |
|
676 case IN_QUOTED_FIELD: |
|
677 /* in quoted field */ |
|
678 if (c == '\0') |
|
679 ; |
|
680 else if (c == dialect->escapechar) { |
|
681 /* Possible escape character */ |
|
682 self->state = ESCAPE_IN_QUOTED_FIELD; |
|
683 } |
|
684 else if (c == dialect->quotechar && |
|
685 dialect->quoting != QUOTE_NONE) { |
|
686 if (dialect->doublequote) { |
|
687 /* doublequote; " represented by "" */ |
|
688 self->state = QUOTE_IN_QUOTED_FIELD; |
|
689 } |
|
690 else { |
|
691 /* end of quote part of field */ |
|
692 self->state = IN_FIELD; |
|
693 } |
|
694 } |
|
695 else { |
|
696 /* normal character - save in field */ |
|
697 if (parse_add_char(self, c) < 0) |
|
698 return -1; |
|
699 } |
|
700 break; |
|
701 |
|
702 case ESCAPE_IN_QUOTED_FIELD: |
|
703 if (c == '\0') |
|
704 c = '\n'; |
|
705 if (parse_add_char(self, c) < 0) |
|
706 return -1; |
|
707 self->state = IN_QUOTED_FIELD; |
|
708 break; |
|
709 |
|
710 case QUOTE_IN_QUOTED_FIELD: |
|
711 /* doublequote - seen a quote in an quoted field */ |
|
712 if (dialect->quoting != QUOTE_NONE && |
|
713 c == dialect->quotechar) { |
|
714 /* save "" as " */ |
|
715 if (parse_add_char(self, c) < 0) |
|
716 return -1; |
|
717 self->state = IN_QUOTED_FIELD; |
|
718 } |
|
719 else if (c == dialect->delimiter) { |
|
720 /* save field - wait for new field */ |
|
721 if (parse_save_field(self) < 0) |
|
722 return -1; |
|
723 self->state = START_FIELD; |
|
724 } |
|
725 else if (c == '\n' || c == '\r' || c == '\0') { |
|
726 /* end of line - return [fields] */ |
|
727 if (parse_save_field(self) < 0) |
|
728 return -1; |
|
729 self->state = (c == '\0' ? START_RECORD : EAT_CRNL); |
|
730 } |
|
731 else if (!dialect->strict) { |
|
732 if (parse_add_char(self, c) < 0) |
|
733 return -1; |
|
734 self->state = IN_FIELD; |
|
735 } |
|
736 else { |
|
737 /* illegal */ |
|
738 PyErr_Format(error_obj, "'%c' expected after '%c'", |
|
739 dialect->delimiter, |
|
740 dialect->quotechar); |
|
741 return -1; |
|
742 } |
|
743 break; |
|
744 |
|
745 case EAT_CRNL: |
|
746 if (c == '\n' || c == '\r') |
|
747 ; |
|
748 else if (c == '\0') |
|
749 self->state = START_RECORD; |
|
750 else { |
|
751 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); |
|
752 return -1; |
|
753 } |
|
754 break; |
|
755 |
|
756 } |
|
757 return 0; |
|
758 } |
|
759 |
|
760 static int |
|
761 parse_reset(ReaderObj *self) |
|
762 { |
|
763 Py_XDECREF(self->fields); |
|
764 self->fields = PyList_New(0); |
|
765 if (self->fields == NULL) |
|
766 return -1; |
|
767 self->field_len = 0; |
|
768 self->state = START_RECORD; |
|
769 self->numeric_field = 0; |
|
770 return 0; |
|
771 } |
|
772 |
|
773 static PyObject * |
|
774 Reader_iternext(ReaderObj *self) |
|
775 { |
|
776 PyObject *lineobj; |
|
777 PyObject *fields = NULL; |
|
778 char *line, c; |
|
779 int linelen; |
|
780 |
|
781 if (parse_reset(self) < 0) |
|
782 return NULL; |
|
783 do { |
|
784 lineobj = PyIter_Next(self->input_iter); |
|
785 if (lineobj == NULL) { |
|
786 /* End of input OR exception */ |
|
787 if (!PyErr_Occurred() && self->field_len != 0) |
|
788 PyErr_Format(error_obj, |
|
789 "newline inside string"); |
|
790 return NULL; |
|
791 } |
|
792 ++self->line_num; |
|
793 |
|
794 line = PyString_AsString(lineobj); |
|
795 linelen = PyString_Size(lineobj); |
|
796 |
|
797 if (line == NULL || linelen < 0) { |
|
798 Py_DECREF(lineobj); |
|
799 return NULL; |
|
800 } |
|
801 while (linelen--) { |
|
802 c = *line++; |
|
803 if (c == '\0') { |
|
804 Py_DECREF(lineobj); |
|
805 PyErr_Format(error_obj, |
|
806 "line contains NULL byte"); |
|
807 goto err; |
|
808 } |
|
809 if (parse_process_char(self, c) < 0) { |
|
810 Py_DECREF(lineobj); |
|
811 goto err; |
|
812 } |
|
813 } |
|
814 Py_DECREF(lineobj); |
|
815 if (parse_process_char(self, 0) < 0) |
|
816 goto err; |
|
817 } while (self->state != START_RECORD); |
|
818 |
|
819 fields = self->fields; |
|
820 self->fields = NULL; |
|
821 err: |
|
822 return fields; |
|
823 } |
|
824 |
|
825 static void |
|
826 Reader_dealloc(ReaderObj *self) |
|
827 { |
|
828 PyObject_GC_UnTrack(self); |
|
829 Py_XDECREF(self->dialect); |
|
830 Py_XDECREF(self->input_iter); |
|
831 Py_XDECREF(self->fields); |
|
832 if (self->field != NULL) |
|
833 PyMem_Free(self->field); |
|
834 PyObject_GC_Del(self); |
|
835 } |
|
836 |
|
837 static int |
|
838 Reader_traverse(ReaderObj *self, visitproc visit, void *arg) |
|
839 { |
|
840 Py_VISIT(self->dialect); |
|
841 Py_VISIT(self->input_iter); |
|
842 Py_VISIT(self->fields); |
|
843 return 0; |
|
844 } |
|
845 |
|
846 static int |
|
847 Reader_clear(ReaderObj *self) |
|
848 { |
|
849 Py_CLEAR(self->dialect); |
|
850 Py_CLEAR(self->input_iter); |
|
851 Py_CLEAR(self->fields); |
|
852 return 0; |
|
853 } |
|
854 |
|
855 PyDoc_STRVAR(Reader_Type_doc, |
|
856 "CSV reader\n" |
|
857 "\n" |
|
858 "Reader objects are responsible for reading and parsing tabular data\n" |
|
859 "in CSV format.\n" |
|
860 ); |
|
861 |
|
862 static struct PyMethodDef Reader_methods[] = { |
|
863 { NULL, NULL } |
|
864 }; |
|
865 #define R_OFF(x) offsetof(ReaderObj, x) |
|
866 |
|
867 static struct PyMemberDef Reader_memberlist[] = { |
|
868 { "dialect", T_OBJECT, R_OFF(dialect), RO }, |
|
869 { "line_num", T_ULONG, R_OFF(line_num), RO }, |
|
870 { NULL } |
|
871 }; |
|
872 |
|
873 |
|
874 static PyTypeObject Reader_Type = { |
|
875 PyVarObject_HEAD_INIT(NULL, 0) |
|
876 "_csv.reader", /*tp_name*/ |
|
877 sizeof(ReaderObj), /*tp_basicsize*/ |
|
878 0, /*tp_itemsize*/ |
|
879 /* methods */ |
|
880 (destructor)Reader_dealloc, /*tp_dealloc*/ |
|
881 (printfunc)0, /*tp_print*/ |
|
882 (getattrfunc)0, /*tp_getattr*/ |
|
883 (setattrfunc)0, /*tp_setattr*/ |
|
884 (cmpfunc)0, /*tp_compare*/ |
|
885 (reprfunc)0, /*tp_repr*/ |
|
886 0, /*tp_as_number*/ |
|
887 0, /*tp_as_sequence*/ |
|
888 0, /*tp_as_mapping*/ |
|
889 (hashfunc)0, /*tp_hash*/ |
|
890 (ternaryfunc)0, /*tp_call*/ |
|
891 (reprfunc)0, /*tp_str*/ |
|
892 0, /*tp_getattro*/ |
|
893 0, /*tp_setattro*/ |
|
894 0, /*tp_as_buffer*/ |
|
895 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | |
|
896 Py_TPFLAGS_HAVE_GC, /*tp_flags*/ |
|
897 Reader_Type_doc, /*tp_doc*/ |
|
898 (traverseproc)Reader_traverse, /*tp_traverse*/ |
|
899 (inquiry)Reader_clear, /*tp_clear*/ |
|
900 0, /*tp_richcompare*/ |
|
901 0, /*tp_weaklistoffset*/ |
|
902 PyObject_SelfIter, /*tp_iter*/ |
|
903 (getiterfunc)Reader_iternext, /*tp_iternext*/ |
|
904 Reader_methods, /*tp_methods*/ |
|
905 Reader_memberlist, /*tp_members*/ |
|
906 0, /*tp_getset*/ |
|
907 |
|
908 }; |
|
909 |
|
910 static PyObject * |
|
911 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args) |
|
912 { |
|
913 PyObject * iterator, * dialect = NULL; |
|
914 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type); |
|
915 |
|
916 if (!self) |
|
917 return NULL; |
|
918 |
|
919 self->dialect = NULL; |
|
920 self->fields = NULL; |
|
921 self->input_iter = NULL; |
|
922 self->field = NULL; |
|
923 self->field_size = 0; |
|
924 self->line_num = 0; |
|
925 |
|
926 if (parse_reset(self) < 0) { |
|
927 Py_DECREF(self); |
|
928 return NULL; |
|
929 } |
|
930 |
|
931 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) { |
|
932 Py_DECREF(self); |
|
933 return NULL; |
|
934 } |
|
935 self->input_iter = PyObject_GetIter(iterator); |
|
936 if (self->input_iter == NULL) { |
|
937 PyErr_SetString(PyExc_TypeError, |
|
938 "argument 1 must be an iterator"); |
|
939 Py_DECREF(self); |
|
940 return NULL; |
|
941 } |
|
942 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args); |
|
943 if (self->dialect == NULL) { |
|
944 Py_DECREF(self); |
|
945 return NULL; |
|
946 } |
|
947 |
|
948 PyObject_GC_Track(self); |
|
949 return (PyObject *)self; |
|
950 } |
|
951 |
|
952 /* |
|
953 * WRITER |
|
954 */ |
|
955 /* ---------------------------------------------------------------- */ |
|
956 static void |
|
957 join_reset(WriterObj *self) |
|
958 { |
|
959 self->rec_len = 0; |
|
960 self->num_fields = 0; |
|
961 } |
|
962 |
|
963 #define MEM_INCR 32768 |
|
964 |
|
965 /* Calculate new record length or append field to record. Return new |
|
966 * record length. |
|
967 */ |
|
968 static int |
|
969 join_append_data(WriterObj *self, char *field, int quote_empty, |
|
970 int *quoted, int copy_phase) |
|
971 { |
|
972 DialectObj *dialect = self->dialect; |
|
973 int i, rec_len; |
|
974 char *lineterm; |
|
975 |
|
976 #define ADDCH(c) \ |
|
977 do {\ |
|
978 if (copy_phase) \ |
|
979 self->rec[rec_len] = c;\ |
|
980 rec_len++;\ |
|
981 } while(0) |
|
982 |
|
983 lineterm = PyString_AsString(dialect->lineterminator); |
|
984 if (lineterm == NULL) |
|
985 return -1; |
|
986 |
|
987 rec_len = self->rec_len; |
|
988 |
|
989 /* If this is not the first field we need a field separator */ |
|
990 if (self->num_fields > 0) |
|
991 ADDCH(dialect->delimiter); |
|
992 |
|
993 /* Handle preceding quote */ |
|
994 if (copy_phase && *quoted) |
|
995 ADDCH(dialect->quotechar); |
|
996 |
|
997 /* Copy/count field data */ |
|
998 for (i = 0;; i++) { |
|
999 char c = field[i]; |
|
1000 int want_escape = 0; |
|
1001 |
|
1002 if (c == '\0') |
|
1003 break; |
|
1004 |
|
1005 if (c == dialect->delimiter || |
|
1006 c == dialect->escapechar || |
|
1007 c == dialect->quotechar || |
|
1008 strchr(lineterm, c)) { |
|
1009 if (dialect->quoting == QUOTE_NONE) |
|
1010 want_escape = 1; |
|
1011 else { |
|
1012 if (c == dialect->quotechar) { |
|
1013 if (dialect->doublequote) |
|
1014 ADDCH(dialect->quotechar); |
|
1015 else |
|
1016 want_escape = 1; |
|
1017 } |
|
1018 if (!want_escape) |
|
1019 *quoted = 1; |
|
1020 } |
|
1021 if (want_escape) { |
|
1022 if (!dialect->escapechar) { |
|
1023 PyErr_Format(error_obj, |
|
1024 "need to escape, but no escapechar set"); |
|
1025 return -1; |
|
1026 } |
|
1027 ADDCH(dialect->escapechar); |
|
1028 } |
|
1029 } |
|
1030 /* Copy field character into record buffer. |
|
1031 */ |
|
1032 ADDCH(c); |
|
1033 } |
|
1034 |
|
1035 /* If field is empty check if it needs to be quoted. |
|
1036 */ |
|
1037 if (i == 0 && quote_empty) { |
|
1038 if (dialect->quoting == QUOTE_NONE) { |
|
1039 PyErr_Format(error_obj, |
|
1040 "single empty field record must be quoted"); |
|
1041 return -1; |
|
1042 } |
|
1043 else |
|
1044 *quoted = 1; |
|
1045 } |
|
1046 |
|
1047 if (*quoted) { |
|
1048 if (copy_phase) |
|
1049 ADDCH(dialect->quotechar); |
|
1050 else |
|
1051 rec_len += 2; |
|
1052 } |
|
1053 return rec_len; |
|
1054 #undef ADDCH |
|
1055 } |
|
1056 |
|
1057 static int |
|
1058 join_check_rec_size(WriterObj *self, int rec_len) |
|
1059 { |
|
1060 |
|
1061 if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) { |
|
1062 PyErr_NoMemory(); |
|
1063 return 0; |
|
1064 } |
|
1065 |
|
1066 if (rec_len > self->rec_size) { |
|
1067 if (self->rec_size == 0) { |
|
1068 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; |
|
1069 if (self->rec != NULL) |
|
1070 PyMem_Free(self->rec); |
|
1071 self->rec = PyMem_Malloc(self->rec_size); |
|
1072 } |
|
1073 else { |
|
1074 char *old_rec = self->rec; |
|
1075 |
|
1076 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; |
|
1077 self->rec = PyMem_Realloc(self->rec, self->rec_size); |
|
1078 if (self->rec == NULL) |
|
1079 PyMem_Free(old_rec); |
|
1080 } |
|
1081 if (self->rec == NULL) { |
|
1082 PyErr_NoMemory(); |
|
1083 return 0; |
|
1084 } |
|
1085 } |
|
1086 return 1; |
|
1087 } |
|
1088 |
|
1089 static int |
|
1090 join_append(WriterObj *self, char *field, int *quoted, int quote_empty) |
|
1091 { |
|
1092 int rec_len; |
|
1093 |
|
1094 rec_len = join_append_data(self, field, quote_empty, quoted, 0); |
|
1095 if (rec_len < 0) |
|
1096 return 0; |
|
1097 |
|
1098 /* grow record buffer if necessary */ |
|
1099 if (!join_check_rec_size(self, rec_len)) |
|
1100 return 0; |
|
1101 |
|
1102 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1); |
|
1103 self->num_fields++; |
|
1104 |
|
1105 return 1; |
|
1106 } |
|
1107 |
|
1108 static int |
|
1109 join_append_lineterminator(WriterObj *self) |
|
1110 { |
|
1111 int terminator_len; |
|
1112 char *terminator; |
|
1113 |
|
1114 terminator_len = PyString_Size(self->dialect->lineterminator); |
|
1115 if (terminator_len == -1) |
|
1116 return 0; |
|
1117 |
|
1118 /* grow record buffer if necessary */ |
|
1119 if (!join_check_rec_size(self, self->rec_len + terminator_len)) |
|
1120 return 0; |
|
1121 |
|
1122 terminator = PyString_AsString(self->dialect->lineterminator); |
|
1123 if (terminator == NULL) |
|
1124 return 0; |
|
1125 memmove(self->rec + self->rec_len, terminator, terminator_len); |
|
1126 self->rec_len += terminator_len; |
|
1127 |
|
1128 return 1; |
|
1129 } |
|
1130 |
|
1131 PyDoc_STRVAR(csv_writerow_doc, |
|
1132 "writerow(sequence)\n" |
|
1133 "\n" |
|
1134 "Construct and write a CSV record from a sequence of fields. Non-string\n" |
|
1135 "elements will be converted to string."); |
|
1136 |
|
1137 static PyObject * |
|
1138 csv_writerow(WriterObj *self, PyObject *seq) |
|
1139 { |
|
1140 DialectObj *dialect = self->dialect; |
|
1141 int len, i; |
|
1142 |
|
1143 if (!PySequence_Check(seq)) |
|
1144 return PyErr_Format(error_obj, "sequence expected"); |
|
1145 |
|
1146 len = PySequence_Length(seq); |
|
1147 if (len < 0) |
|
1148 return NULL; |
|
1149 |
|
1150 /* Join all fields in internal buffer. |
|
1151 */ |
|
1152 join_reset(self); |
|
1153 for (i = 0; i < len; i++) { |
|
1154 PyObject *field; |
|
1155 int append_ok; |
|
1156 int quoted; |
|
1157 |
|
1158 field = PySequence_GetItem(seq, i); |
|
1159 if (field == NULL) |
|
1160 return NULL; |
|
1161 |
|
1162 switch (dialect->quoting) { |
|
1163 case QUOTE_NONNUMERIC: |
|
1164 quoted = !PyNumber_Check(field); |
|
1165 break; |
|
1166 case QUOTE_ALL: |
|
1167 quoted = 1; |
|
1168 break; |
|
1169 default: |
|
1170 quoted = 0; |
|
1171 break; |
|
1172 } |
|
1173 |
|
1174 if (PyString_Check(field)) { |
|
1175 append_ok = join_append(self, |
|
1176 PyString_AS_STRING(field), |
|
1177 "ed, len == 1); |
|
1178 Py_DECREF(field); |
|
1179 } |
|
1180 else if (field == Py_None) { |
|
1181 append_ok = join_append(self, "", "ed, len == 1); |
|
1182 Py_DECREF(field); |
|
1183 } |
|
1184 else { |
|
1185 PyObject *str; |
|
1186 |
|
1187 str = PyObject_Str(field); |
|
1188 Py_DECREF(field); |
|
1189 if (str == NULL) |
|
1190 return NULL; |
|
1191 |
|
1192 append_ok = join_append(self, PyString_AS_STRING(str), |
|
1193 "ed, len == 1); |
|
1194 Py_DECREF(str); |
|
1195 } |
|
1196 if (!append_ok) |
|
1197 return NULL; |
|
1198 } |
|
1199 |
|
1200 /* Add line terminator. |
|
1201 */ |
|
1202 if (!join_append_lineterminator(self)) |
|
1203 return 0; |
|
1204 |
|
1205 return PyObject_CallFunction(self->writeline, |
|
1206 "(s#)", self->rec, self->rec_len); |
|
1207 } |
|
1208 |
|
1209 PyDoc_STRVAR(csv_writerows_doc, |
|
1210 "writerows(sequence of sequences)\n" |
|
1211 "\n" |
|
1212 "Construct and write a series of sequences to a csv file. Non-string\n" |
|
1213 "elements will be converted to string."); |
|
1214 |
|
1215 static PyObject * |
|
1216 csv_writerows(WriterObj *self, PyObject *seqseq) |
|
1217 { |
|
1218 PyObject *row_iter, *row_obj, *result; |
|
1219 |
|
1220 row_iter = PyObject_GetIter(seqseq); |
|
1221 if (row_iter == NULL) { |
|
1222 PyErr_SetString(PyExc_TypeError, |
|
1223 "writerows() argument must be iterable"); |
|
1224 return NULL; |
|
1225 } |
|
1226 while ((row_obj = PyIter_Next(row_iter))) { |
|
1227 result = csv_writerow(self, row_obj); |
|
1228 Py_DECREF(row_obj); |
|
1229 if (!result) { |
|
1230 Py_DECREF(row_iter); |
|
1231 return NULL; |
|
1232 } |
|
1233 else |
|
1234 Py_DECREF(result); |
|
1235 } |
|
1236 Py_DECREF(row_iter); |
|
1237 if (PyErr_Occurred()) |
|
1238 return NULL; |
|
1239 Py_INCREF(Py_None); |
|
1240 return Py_None; |
|
1241 } |
|
1242 |
|
1243 static struct PyMethodDef Writer_methods[] = { |
|
1244 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc}, |
|
1245 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc}, |
|
1246 { NULL, NULL } |
|
1247 }; |
|
1248 |
|
1249 #define W_OFF(x) offsetof(WriterObj, x) |
|
1250 |
|
1251 static struct PyMemberDef Writer_memberlist[] = { |
|
1252 { "dialect", T_OBJECT, W_OFF(dialect), RO }, |
|
1253 { NULL } |
|
1254 }; |
|
1255 |
|
1256 static void |
|
1257 Writer_dealloc(WriterObj *self) |
|
1258 { |
|
1259 PyObject_GC_UnTrack(self); |
|
1260 Py_XDECREF(self->dialect); |
|
1261 Py_XDECREF(self->writeline); |
|
1262 if (self->rec != NULL) |
|
1263 PyMem_Free(self->rec); |
|
1264 PyObject_GC_Del(self); |
|
1265 } |
|
1266 |
|
1267 static int |
|
1268 Writer_traverse(WriterObj *self, visitproc visit, void *arg) |
|
1269 { |
|
1270 Py_VISIT(self->dialect); |
|
1271 Py_VISIT(self->writeline); |
|
1272 return 0; |
|
1273 } |
|
1274 |
|
1275 static int |
|
1276 Writer_clear(WriterObj *self) |
|
1277 { |
|
1278 Py_CLEAR(self->dialect); |
|
1279 Py_CLEAR(self->writeline); |
|
1280 return 0; |
|
1281 } |
|
1282 |
|
1283 PyDoc_STRVAR(Writer_Type_doc, |
|
1284 "CSV writer\n" |
|
1285 "\n" |
|
1286 "Writer objects are responsible for generating tabular data\n" |
|
1287 "in CSV format from sequence input.\n" |
|
1288 ); |
|
1289 |
|
1290 static PyTypeObject Writer_Type = { |
|
1291 PyVarObject_HEAD_INIT(NULL, 0) |
|
1292 "_csv.writer", /*tp_name*/ |
|
1293 sizeof(WriterObj), /*tp_basicsize*/ |
|
1294 0, /*tp_itemsize*/ |
|
1295 /* methods */ |
|
1296 (destructor)Writer_dealloc, /*tp_dealloc*/ |
|
1297 (printfunc)0, /*tp_print*/ |
|
1298 (getattrfunc)0, /*tp_getattr*/ |
|
1299 (setattrfunc)0, /*tp_setattr*/ |
|
1300 (cmpfunc)0, /*tp_compare*/ |
|
1301 (reprfunc)0, /*tp_repr*/ |
|
1302 0, /*tp_as_number*/ |
|
1303 0, /*tp_as_sequence*/ |
|
1304 0, /*tp_as_mapping*/ |
|
1305 (hashfunc)0, /*tp_hash*/ |
|
1306 (ternaryfunc)0, /*tp_call*/ |
|
1307 (reprfunc)0, /*tp_str*/ |
|
1308 0, /*tp_getattro*/ |
|
1309 0, /*tp_setattro*/ |
|
1310 0, /*tp_as_buffer*/ |
|
1311 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | |
|
1312 Py_TPFLAGS_HAVE_GC, /*tp_flags*/ |
|
1313 Writer_Type_doc, |
|
1314 (traverseproc)Writer_traverse, /*tp_traverse*/ |
|
1315 (inquiry)Writer_clear, /*tp_clear*/ |
|
1316 0, /*tp_richcompare*/ |
|
1317 0, /*tp_weaklistoffset*/ |
|
1318 (getiterfunc)0, /*tp_iter*/ |
|
1319 (getiterfunc)0, /*tp_iternext*/ |
|
1320 Writer_methods, /*tp_methods*/ |
|
1321 Writer_memberlist, /*tp_members*/ |
|
1322 0, /*tp_getset*/ |
|
1323 }; |
|
1324 |
|
1325 static PyObject * |
|
1326 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) |
|
1327 { |
|
1328 PyObject * output_file, * dialect = NULL; |
|
1329 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type); |
|
1330 |
|
1331 if (!self) |
|
1332 return NULL; |
|
1333 |
|
1334 self->dialect = NULL; |
|
1335 self->writeline = NULL; |
|
1336 |
|
1337 self->rec = NULL; |
|
1338 self->rec_size = 0; |
|
1339 self->rec_len = 0; |
|
1340 self->num_fields = 0; |
|
1341 |
|
1342 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) { |
|
1343 Py_DECREF(self); |
|
1344 return NULL; |
|
1345 } |
|
1346 self->writeline = PyObject_GetAttrString(output_file, "write"); |
|
1347 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) { |
|
1348 PyErr_SetString(PyExc_TypeError, |
|
1349 "argument 1 must have a \"write\" method"); |
|
1350 Py_DECREF(self); |
|
1351 return NULL; |
|
1352 } |
|
1353 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args); |
|
1354 if (self->dialect == NULL) { |
|
1355 Py_DECREF(self); |
|
1356 return NULL; |
|
1357 } |
|
1358 PyObject_GC_Track(self); |
|
1359 return (PyObject *)self; |
|
1360 } |
|
1361 |
|
1362 /* |
|
1363 * DIALECT REGISTRY |
|
1364 */ |
|
1365 static PyObject * |
|
1366 csv_list_dialects(PyObject *module, PyObject *args) |
|
1367 { |
|
1368 return PyDict_Keys(dialects); |
|
1369 } |
|
1370 |
|
1371 static PyObject * |
|
1372 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) |
|
1373 { |
|
1374 PyObject *name_obj, *dialect_obj = NULL; |
|
1375 PyObject *dialect; |
|
1376 |
|
1377 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj)) |
|
1378 return NULL; |
|
1379 if (!IS_BASESTRING(name_obj)) { |
|
1380 PyErr_SetString(PyExc_TypeError, |
|
1381 "dialect name must be a string or unicode"); |
|
1382 return NULL; |
|
1383 } |
|
1384 dialect = _call_dialect(dialect_obj, kwargs); |
|
1385 if (dialect == NULL) |
|
1386 return NULL; |
|
1387 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) { |
|
1388 Py_DECREF(dialect); |
|
1389 return NULL; |
|
1390 } |
|
1391 Py_DECREF(dialect); |
|
1392 Py_INCREF(Py_None); |
|
1393 return Py_None; |
|
1394 } |
|
1395 |
|
1396 static PyObject * |
|
1397 csv_unregister_dialect(PyObject *module, PyObject *name_obj) |
|
1398 { |
|
1399 if (PyDict_DelItem(dialects, name_obj) < 0) |
|
1400 return PyErr_Format(error_obj, "unknown dialect"); |
|
1401 Py_INCREF(Py_None); |
|
1402 return Py_None; |
|
1403 } |
|
1404 |
|
1405 static PyObject * |
|
1406 csv_get_dialect(PyObject *module, PyObject *name_obj) |
|
1407 { |
|
1408 return get_dialect_from_registry(name_obj); |
|
1409 } |
|
1410 |
|
1411 static PyObject * |
|
1412 csv_field_size_limit(PyObject *module, PyObject *args) |
|
1413 { |
|
1414 PyObject *new_limit = NULL; |
|
1415 long old_limit = field_limit; |
|
1416 |
|
1417 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit)) |
|
1418 return NULL; |
|
1419 if (new_limit != NULL) { |
|
1420 if (!PyInt_Check(new_limit)) { |
|
1421 PyErr_Format(PyExc_TypeError, |
|
1422 "limit must be an integer"); |
|
1423 return NULL; |
|
1424 } |
|
1425 field_limit = PyInt_AsLong(new_limit); |
|
1426 } |
|
1427 return PyInt_FromLong(old_limit); |
|
1428 } |
|
1429 |
|
1430 /* |
|
1431 * MODULE |
|
1432 */ |
|
1433 |
|
1434 PyDoc_STRVAR(csv_module_doc, |
|
1435 "CSV parsing and writing.\n" |
|
1436 "\n" |
|
1437 "This module provides classes that assist in the reading and writing\n" |
|
1438 "of Comma Separated Value (CSV) files, and implements the interface\n" |
|
1439 "described by PEP 305. Although many CSV files are simple to parse,\n" |
|
1440 "the format is not formally defined by a stable specification and\n" |
|
1441 "is subtle enough that parsing lines of a CSV file with something\n" |
|
1442 "like line.split(\",\") is bound to fail. The module supports three\n" |
|
1443 "basic APIs: reading, writing, and registration of dialects.\n" |
|
1444 "\n" |
|
1445 "\n" |
|
1446 "DIALECT REGISTRATION:\n" |
|
1447 "\n" |
|
1448 "Readers and writers support a dialect argument, which is a convenient\n" |
|
1449 "handle on a group of settings. When the dialect argument is a string,\n" |
|
1450 "it identifies one of the dialects previously registered with the module.\n" |
|
1451 "If it is a class or instance, the attributes of the argument are used as\n" |
|
1452 "the settings for the reader or writer:\n" |
|
1453 "\n" |
|
1454 " class excel:\n" |
|
1455 " delimiter = ','\n" |
|
1456 " quotechar = '\"'\n" |
|
1457 " escapechar = None\n" |
|
1458 " doublequote = True\n" |
|
1459 " skipinitialspace = False\n" |
|
1460 " lineterminator = '\\r\\n'\n" |
|
1461 " quoting = QUOTE_MINIMAL\n" |
|
1462 "\n" |
|
1463 "SETTINGS:\n" |
|
1464 "\n" |
|
1465 " * quotechar - specifies a one-character string to use as the \n" |
|
1466 " quoting character. It defaults to '\"'.\n" |
|
1467 " * delimiter - specifies a one-character string to use as the \n" |
|
1468 " field separator. It defaults to ','.\n" |
|
1469 " * skipinitialspace - specifies how to interpret whitespace which\n" |
|
1470 " immediately follows a delimiter. It defaults to False, which\n" |
|
1471 " means that whitespace immediately following a delimiter is part\n" |
|
1472 " of the following field.\n" |
|
1473 " * lineterminator - specifies the character sequence which should \n" |
|
1474 " terminate rows.\n" |
|
1475 " * quoting - controls when quotes should be generated by the writer.\n" |
|
1476 " It can take on any of the following module constants:\n" |
|
1477 "\n" |
|
1478 " csv.QUOTE_MINIMAL means only when required, for example, when a\n" |
|
1479 " field contains either the quotechar or the delimiter\n" |
|
1480 " csv.QUOTE_ALL means that quotes are always placed around fields.\n" |
|
1481 " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n" |
|
1482 " fields which do not parse as integers or floating point\n" |
|
1483 " numbers.\n" |
|
1484 " csv.QUOTE_NONE means that quotes are never placed around fields.\n" |
|
1485 " * escapechar - specifies a one-character string used to escape \n" |
|
1486 " the delimiter when quoting is set to QUOTE_NONE.\n" |
|
1487 " * doublequote - controls the handling of quotes inside fields. When\n" |
|
1488 " True, two consecutive quotes are interpreted as one during read,\n" |
|
1489 " and when writing, each quote character embedded in the data is\n" |
|
1490 " written as two quotes\n"); |
|
1491 |
|
1492 PyDoc_STRVAR(csv_reader_doc, |
|
1493 " csv_reader = reader(iterable [, dialect='excel']\n" |
|
1494 " [optional keyword args])\n" |
|
1495 " for row in csv_reader:\n" |
|
1496 " process(row)\n" |
|
1497 "\n" |
|
1498 "The \"iterable\" argument can be any object that returns a line\n" |
|
1499 "of input for each iteration, such as a file object or a list. The\n" |
|
1500 "optional \"dialect\" parameter is discussed below. The function\n" |
|
1501 "also accepts optional keyword arguments which override settings\n" |
|
1502 "provided by the dialect.\n" |
|
1503 "\n" |
|
1504 "The returned object is an iterator. Each iteration returns a row\n" |
|
1505 "of the CSV file (which can span multiple input lines):\n"); |
|
1506 |
|
1507 PyDoc_STRVAR(csv_writer_doc, |
|
1508 " csv_writer = csv.writer(fileobj [, dialect='excel']\n" |
|
1509 " [optional keyword args])\n" |
|
1510 " for row in sequence:\n" |
|
1511 " csv_writer.writerow(row)\n" |
|
1512 "\n" |
|
1513 " [or]\n" |
|
1514 "\n" |
|
1515 " csv_writer = csv.writer(fileobj [, dialect='excel']\n" |
|
1516 " [optional keyword args])\n" |
|
1517 " csv_writer.writerows(rows)\n" |
|
1518 "\n" |
|
1519 "The \"fileobj\" argument can be any object that supports the file API.\n"); |
|
1520 |
|
1521 PyDoc_STRVAR(csv_list_dialects_doc, |
|
1522 "Return a list of all know dialect names.\n" |
|
1523 " names = csv.list_dialects()"); |
|
1524 |
|
1525 PyDoc_STRVAR(csv_get_dialect_doc, |
|
1526 "Return the dialect instance associated with name.\n" |
|
1527 " dialect = csv.get_dialect(name)"); |
|
1528 |
|
1529 PyDoc_STRVAR(csv_register_dialect_doc, |
|
1530 "Create a mapping from a string name to a dialect class.\n" |
|
1531 " dialect = csv.register_dialect(name, dialect)"); |
|
1532 |
|
1533 PyDoc_STRVAR(csv_unregister_dialect_doc, |
|
1534 "Delete the name/dialect mapping associated with a string name.\n" |
|
1535 " csv.unregister_dialect(name)"); |
|
1536 |
|
1537 PyDoc_STRVAR(csv_field_size_limit_doc, |
|
1538 "Sets an upper limit on parsed fields.\n" |
|
1539 " csv.field_size_limit([limit])\n" |
|
1540 "\n" |
|
1541 "Returns old limit. If limit is not given, no new limit is set and\n" |
|
1542 "the old limit is returned"); |
|
1543 |
|
1544 static struct PyMethodDef csv_methods[] = { |
|
1545 { "reader", (PyCFunction)csv_reader, |
|
1546 METH_VARARGS | METH_KEYWORDS, csv_reader_doc}, |
|
1547 { "writer", (PyCFunction)csv_writer, |
|
1548 METH_VARARGS | METH_KEYWORDS, csv_writer_doc}, |
|
1549 { "list_dialects", (PyCFunction)csv_list_dialects, |
|
1550 METH_NOARGS, csv_list_dialects_doc}, |
|
1551 { "register_dialect", (PyCFunction)csv_register_dialect, |
|
1552 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc}, |
|
1553 { "unregister_dialect", (PyCFunction)csv_unregister_dialect, |
|
1554 METH_O, csv_unregister_dialect_doc}, |
|
1555 { "get_dialect", (PyCFunction)csv_get_dialect, |
|
1556 METH_O, csv_get_dialect_doc}, |
|
1557 { "field_size_limit", (PyCFunction)csv_field_size_limit, |
|
1558 METH_VARARGS, csv_field_size_limit_doc}, |
|
1559 { NULL, NULL } |
|
1560 }; |
|
1561 |
|
1562 PyMODINIT_FUNC |
|
1563 init_csv(void) |
|
1564 { |
|
1565 PyObject *module; |
|
1566 StyleDesc *style; |
|
1567 |
|
1568 if (PyType_Ready(&Dialect_Type) < 0) |
|
1569 return; |
|
1570 |
|
1571 if (PyType_Ready(&Reader_Type) < 0) |
|
1572 return; |
|
1573 |
|
1574 if (PyType_Ready(&Writer_Type) < 0) |
|
1575 return; |
|
1576 |
|
1577 /* Create the module and add the functions */ |
|
1578 module = Py_InitModule3("_csv", csv_methods, csv_module_doc); |
|
1579 if (module == NULL) |
|
1580 return; |
|
1581 |
|
1582 /* Add version to the module. */ |
|
1583 if (PyModule_AddStringConstant(module, "__version__", |
|
1584 MODULE_VERSION) == -1) |
|
1585 return; |
|
1586 |
|
1587 /* Add _dialects dictionary */ |
|
1588 dialects = PyDict_New(); |
|
1589 if (dialects == NULL) |
|
1590 return; |
|
1591 if (PyModule_AddObject(module, "_dialects", dialects)) |
|
1592 return; |
|
1593 |
|
1594 /* Add quote styles into dictionary */ |
|
1595 for (style = quote_styles; style->name; style++) { |
|
1596 if (PyModule_AddIntConstant(module, style->name, |
|
1597 style->style) == -1) |
|
1598 return; |
|
1599 } |
|
1600 |
|
1601 /* Add the Dialect type */ |
|
1602 Py_INCREF(&Dialect_Type); |
|
1603 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type)) |
|
1604 return; |
|
1605 |
|
1606 /* Add the CSV exception object to the module. */ |
|
1607 error_obj = PyErr_NewException("_csv.Error", NULL, NULL); |
|
1608 if (error_obj == NULL) |
|
1609 return; |
|
1610 PyModule_AddObject(module, "Error", error_obj); |
|
1611 } |