|
1 package Text::CSV; |
|
2 |
|
3 # Copyright (c) 1997 Alan Citterman. All rights reserved. |
|
4 # This program is free software; you can redistribute it and/or |
|
5 # modify it under the same terms as Perl itself. |
|
6 |
|
7 ################################################################################ |
|
8 # HISTORY |
|
9 # |
|
10 # Written by: |
|
11 # Alan Citterman <alan@mfgrtl.com> |
|
12 # |
|
13 # Version 0.01 06/05/1997 |
|
14 # original version |
|
15 # Version 0.02x 13/05/2010 |
|
16 # Hacked at symbian.org to permit top bit set characters in CSV |
|
17 ################################################################################ |
|
18 |
|
19 require 5.002; |
|
20 |
|
21 use strict; |
|
22 |
|
23 BEGIN { |
|
24 # use Exporter (); |
|
25 # use AutoLoader qw(AUTOLOAD); |
|
26 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); |
|
27 $VERSION = '0.02x'; |
|
28 # @ISA = qw(Exporter AutoLoader); |
|
29 @EXPORT = qw(); |
|
30 @EXPORT_OK = qw(); |
|
31 %EXPORT_TAGS = qw(); |
|
32 } |
|
33 |
|
34 1; |
|
35 |
|
36 ################################################################################ |
|
37 # version |
|
38 # |
|
39 # class/object method expecting no arguments and returning the version number |
|
40 # of Text::CSV. there are no side-effects. |
|
41 ################################################################################ |
|
42 sub version { |
|
43 return $VERSION; |
|
44 } |
|
45 |
|
46 ################################################################################ |
|
47 # new |
|
48 # |
|
49 # class/object method expecting no arguments and returning a reference to a |
|
50 # newly created Text::CSV object. |
|
51 ################################################################################ |
|
52 sub new { |
|
53 my $proto = shift; |
|
54 my $class = ref($proto) || $proto; |
|
55 my $self = {}; |
|
56 $self->{'_STATUS'} = undef; |
|
57 $self->{'_ERROR_INPUT'} = undef; |
|
58 $self->{'_STRING'} = undef; |
|
59 $self->{'_FIELDS'} = undef; |
|
60 bless $self, $class; |
|
61 return $self; |
|
62 } |
|
63 |
|
64 ################################################################################ |
|
65 # status |
|
66 # |
|
67 # object method returning the success or failure of the most recent combine() |
|
68 # or parse(). there are no side-effects. |
|
69 ################################################################################ |
|
70 sub status { |
|
71 my $self = shift; |
|
72 return $self->{'_STATUS'}; |
|
73 } |
|
74 |
|
75 ################################################################################ |
|
76 # error_input |
|
77 # |
|
78 # object method returning the first invalid argument to the most recent |
|
79 # combine() or parse(). there are no side-effects. |
|
80 ################################################################################ |
|
81 sub error_input { |
|
82 my $self = shift; |
|
83 return $self->{'_ERROR_INPUT'}; |
|
84 } |
|
85 |
|
86 ################################################################################ |
|
87 # string |
|
88 # |
|
89 # object method returning the result of the most recent combine() or the |
|
90 # input to the most recent parse(), whichever is more recent. there are no |
|
91 # side-effects. |
|
92 ################################################################################ |
|
93 sub string { |
|
94 my $self = shift; |
|
95 return $self->{'_STRING'}; |
|
96 } |
|
97 |
|
98 ################################################################################ |
|
99 # fields |
|
100 # |
|
101 # object method returning the result of the most recent parse() or the input |
|
102 # to the most recent combine(), whichever is more recent. there are no |
|
103 # side-effects. |
|
104 ################################################################################ |
|
105 sub fields { |
|
106 my $self = shift; |
|
107 if (ref($self->{'_FIELDS'})) { |
|
108 return @{$self->{'_FIELDS'}}; |
|
109 } |
|
110 return undef; |
|
111 } |
|
112 |
|
113 ################################################################################ |
|
114 # combine |
|
115 # |
|
116 # object method returning success or failure. the given arguments are |
|
117 # combined into a single comma-separated value. failure can be the result of |
|
118 # no arguments or an argument containing an invalid character. side-effects |
|
119 # include: |
|
120 # setting status() |
|
121 # setting fields() |
|
122 # setting string() |
|
123 # setting error_input() |
|
124 ################################################################################ |
|
125 sub combine { |
|
126 my $self = shift; |
|
127 my @part = @_; |
|
128 $self->{'_FIELDS'} = \@part; |
|
129 $self->{'_ERROR_INPUT'} = undef; |
|
130 $self->{'_STATUS'} = 0; |
|
131 $self->{'_STRING'} = ''; |
|
132 my $column = ''; |
|
133 my $combination = ''; |
|
134 my $skip_comma = 1; |
|
135 if ($#part >= 0) { |
|
136 |
|
137 # at least one argument was given for "combining"... |
|
138 for $column (@part) { |
|
139 # if ($column =~ /[^\t\040-\176]/) { |
|
140 # |
|
141 # # an argument contained an invalid character... |
|
142 # $self->{'_ERROR_INPUT'} = $column; |
|
143 # return $self->{'_STATUS'}; |
|
144 # } |
|
145 if ($skip_comma) { |
|
146 |
|
147 # do not put a comma before the first argument... |
|
148 $skip_comma = 0; |
|
149 } else { |
|
150 |
|
151 # do put a comma before all arguments except the first argument... |
|
152 $combination .= ','; |
|
153 } |
|
154 $column =~ s/\042/\042\042/go; |
|
155 $combination .= "\042"; |
|
156 $combination .= $column; |
|
157 $combination .= "\042"; |
|
158 } |
|
159 $self->{'_STRING'} = $combination; |
|
160 $self->{'_STATUS'} = 1; |
|
161 } |
|
162 return $self->{'_STATUS'}; |
|
163 } |
|
164 |
|
165 ################################################################################ |
|
166 # parse |
|
167 # |
|
168 # object method returning success or failure. the given argument is expected |
|
169 # to be a valid comma-separated value. failure can be the result of |
|
170 # no arguments or an argument containing an invalid sequence of characters. |
|
171 # side-effects include: |
|
172 # setting status() |
|
173 # setting fields() |
|
174 # setting string() |
|
175 # setting error_input() |
|
176 ################################################################################ |
|
177 sub parse { |
|
178 my $self = shift; |
|
179 $self->{'_STRING'} = shift; |
|
180 $self->{'_FIELDS'} = undef; |
|
181 $self->{'_ERROR_INPUT'} = $self->{'_STRING'}; |
|
182 $self->{'_STATUS'} = 0; |
|
183 if (!defined($self->{'_STRING'})) { |
|
184 return $self->{'_STATUS'}; |
|
185 } |
|
186 my $keep_biting = 1; |
|
187 my $palatable = 0; |
|
188 my $line = $self->{'_STRING'}; |
|
189 if ($line =~ /\n$/) { |
|
190 chop($line); |
|
191 if ($line =~ /\r$/) { |
|
192 chop($line); |
|
193 } |
|
194 } |
|
195 my $mouthful = ''; |
|
196 my @part = (); |
|
197 while ($keep_biting and ($palatable = $self->_bite(\$line, \$mouthful, \$keep_biting))) { |
|
198 push(@part, $mouthful); |
|
199 } |
|
200 if ($palatable) { |
|
201 $self->{'_ERROR_INPUT'} = undef; |
|
202 $self->{'_FIELDS'} = \@part; |
|
203 } |
|
204 return $self->{'_STATUS'} = $palatable; |
|
205 } |
|
206 |
|
207 ################################################################################ |
|
208 # _bite |
|
209 # |
|
210 # *private* class/object method returning success or failure. the arguments |
|
211 # are: |
|
212 # - a reference to a comma-separated value string |
|
213 # - a reference to a return string |
|
214 # - a reference to a return boolean |
|
215 # upon success the first comma-separated value of the csv string is |
|
216 # transferred to the return string and the boolean is set to true if a comma |
|
217 # followed that value. in other words, "bite" one value off of csv |
|
218 # returning the remaining string, the "piece" bitten, and if there's any |
|
219 # more. failure can be the result of the csv string containing an invalid |
|
220 # sequence of characters. |
|
221 # |
|
222 # from the csv string and |
|
223 # to be a valid comma-separated value. failure can be the result of |
|
224 # no arguments or an argument containing an invalid sequence of characters. |
|
225 # side-effects include: |
|
226 # setting status() |
|
227 # setting fields() |
|
228 # setting string() |
|
229 # setting error_input() |
|
230 ################################################################################ |
|
231 sub _bite { |
|
232 my ($self, $line_ref, $piece_ref, $bite_again_ref) = @_; |
|
233 my $in_quotes = 0; |
|
234 my $ok = 0; |
|
235 $$piece_ref = ''; |
|
236 $$bite_again_ref = 0; |
|
237 while (1) { |
|
238 if (length($$line_ref) < 1) { |
|
239 |
|
240 # end of string... |
|
241 if ($in_quotes) { |
|
242 |
|
243 # end of string, missing closing double-quote... |
|
244 last; |
|
245 } else { |
|
246 |
|
247 # proper end of string... |
|
248 $ok = 1; |
|
249 last; |
|
250 } |
|
251 } elsif ($$line_ref =~ /^\042/) { |
|
252 |
|
253 # double-quote... |
|
254 if ($in_quotes) { |
|
255 if (length($$line_ref) == 1) { |
|
256 |
|
257 # closing double-quote at end of string... |
|
258 substr($$line_ref, 0, 1) = ''; |
|
259 $ok = 1; |
|
260 last; |
|
261 } elsif ($$line_ref =~ /^\042\042/) { |
|
262 |
|
263 # an embedded double-quote... |
|
264 $$piece_ref .= "\042"; |
|
265 substr($$line_ref, 0, 2) = ''; |
|
266 } elsif ($$line_ref =~ /^\042,/) { |
|
267 |
|
268 # closing double-quote followed by a comma... |
|
269 substr($$line_ref, 0, 2) = ''; |
|
270 $$bite_again_ref = 1; |
|
271 $ok = 1; |
|
272 last; |
|
273 } else { |
|
274 |
|
275 # double-quote, followed by undesirable character (bad character sequence)... |
|
276 last; |
|
277 } |
|
278 } else { |
|
279 if (length($$piece_ref) < 1) { |
|
280 |
|
281 # starting double-quote at beginning of string |
|
282 $in_quotes = 1; |
|
283 substr($$line_ref, 0, 1) = ''; |
|
284 } else { |
|
285 |
|
286 # double-quote, outside of double-quotes (bad character sequence)... |
|
287 last; |
|
288 } |
|
289 } |
|
290 } elsif ($$line_ref =~ /^,/) { |
|
291 |
|
292 # comma... |
|
293 if ($in_quotes) { |
|
294 |
|
295 # a comma, inside double-quotes... |
|
296 $$piece_ref .= substr($$line_ref, 0 ,1); |
|
297 substr($$line_ref, 0, 1) = ''; |
|
298 } else { |
|
299 |
|
300 # a comma, which separates values... |
|
301 substr($$line_ref, 0, 1) = ''; |
|
302 $$bite_again_ref = 1; |
|
303 $ok = 1; |
|
304 last; |
|
305 } |
|
306 } elsif ($$line_ref =~ /^[\t\040-\176]/) { |
|
307 |
|
308 # a tab, space, or printable... |
|
309 $$piece_ref .= substr($$line_ref, 0 ,1); |
|
310 substr($$line_ref, 0, 1) = ''; |
|
311 } else { |
|
312 |
|
313 # an undesirable character... But use it anyway |
|
314 $$piece_ref .= substr($$line_ref, 0 ,1); |
|
315 substr($$line_ref, 0, 1) = ''; |
|
316 } |
|
317 } |
|
318 return $ok; |
|
319 } |
|
320 |
|
321 =head1 NAME |
|
322 |
|
323 Text::CSV - comma-separated values manipulation routines |
|
324 |
|
325 =head1 SYNOPSIS |
|
326 |
|
327 use Text::CSV; |
|
328 |
|
329 $version = Text::CSV->version(); # get the module version |
|
330 |
|
331 $csv = Text::CSV->new(); # create a new object |
|
332 |
|
333 $status = $csv->combine(@columns); # combine columns into a string |
|
334 $line = $csv->string(); # get the combined string |
|
335 |
|
336 $status = $csv->parse($line); # parse a CSV string into fields |
|
337 @columns = $csv->fields(); # get the parsed fields |
|
338 |
|
339 $status = $csv->status(); # get the most recent status |
|
340 $bad_argument = $csv->error_input(); # get the most recent bad argument |
|
341 |
|
342 =head1 DESCRIPTION |
|
343 |
|
344 Text::CSV provides facilities for the composition and decomposition of |
|
345 comma-separated values. An instance of the Text::CSV class can combine |
|
346 fields into a CSV string and parse a CSV string into fields. |
|
347 |
|
348 =head1 FUNCTIONS |
|
349 |
|
350 =over 4 |
|
351 |
|
352 =item version |
|
353 |
|
354 $version = Text::CSV->version(); |
|
355 |
|
356 This function may be called as a class or an object method. It returns the current |
|
357 module version. |
|
358 |
|
359 =item new |
|
360 |
|
361 $csv = Text::CSV->new(); |
|
362 |
|
363 This function may be called as a class or an object method. It returns a reference to a |
|
364 newly created Text::CSV object. |
|
365 |
|
366 =item combine |
|
367 |
|
368 $status = $csv->combine(@columns); |
|
369 |
|
370 This object function constructs a CSV string from the arguments, returning |
|
371 success or failure. Failure can result from lack of arguments or an argument |
|
372 containing an invalid character. Upon success, C<string()> can be called to |
|
373 retrieve the resultant CSV string. Upon failure, the value returned by |
|
374 C<string()> is undefined and C<error_input()> can be called to retrieve an |
|
375 invalid argument. |
|
376 |
|
377 =item string |
|
378 |
|
379 $line = $csv->string(); |
|
380 |
|
381 This object function returns the input to C<parse()> or the resultant CSV string of |
|
382 C<combine()>, whichever was called more recently. |
|
383 |
|
384 =item parse |
|
385 |
|
386 $status = $csv->parse($line); |
|
387 |
|
388 This object function decomposes a CSV string into fields, returning |
|
389 success or failure. Failure can result from a lack of argument or the given CSV |
|
390 string is improperly formatted. Upon success, C<fields()> can be called to |
|
391 retrieve the decomposed fields . Upon failure, the value returned by |
|
392 C<fields()> is undefined and C<error_input()> can be called to retrieve the |
|
393 invalid argument. |
|
394 |
|
395 =item fields |
|
396 |
|
397 @columns = $csv->fields(); |
|
398 |
|
399 This object function returns the input to C<combine()> or the resultant decomposed |
|
400 fields of C<parse()>, whichever was called more recently. |
|
401 |
|
402 =item status |
|
403 |
|
404 $status = $csv->status(); |
|
405 |
|
406 This object function returns success (or failure) of C<combine()> or C<parse()>, |
|
407 whichever was called more recently. |
|
408 |
|
409 =item error_input |
|
410 |
|
411 $bad_argument = $csv->error_input(); |
|
412 |
|
413 This object function returns the erroneous argument (if it exists) of C<combine()> |
|
414 or C<parse()>, whichever was called more recently. |
|
415 |
|
416 =back |
|
417 |
|
418 =head1 EXAMPLE |
|
419 |
|
420 require Text::CSV; |
|
421 |
|
422 my $csv = Text::CSV->new; |
|
423 |
|
424 my $column = ''; |
|
425 my $sample_input_string = '"I said, ""Hi!""",Yes,"",2.34,,"1.09"'; |
|
426 if ($csv->parse($sample_input_string)) { |
|
427 my @field = $csv->fields; |
|
428 my $count = 0; |
|
429 for $column (@field) { |
|
430 print ++$count, " => ", $column, "\n"; |
|
431 } |
|
432 print "\n"; |
|
433 } else { |
|
434 my $err = $csv->error_input; |
|
435 print "parse() failed on argument: ", $err, "\n"; |
|
436 } |
|
437 |
|
438 my @sample_input_fields = ('You said, "Hello!"', |
|
439 5.67, |
|
440 'Surely', |
|
441 '', |
|
442 '3.14159'); |
|
443 if ($csv->combine(@sample_input_fields)) { |
|
444 my $string = $csv->string; |
|
445 print $string, "\n"; |
|
446 } else { |
|
447 my $err = $csv->error_input; |
|
448 print "combine() failed on argument: ", $err, "\n"; |
|
449 } |
|
450 |
|
451 =head1 CAVEATS |
|
452 |
|
453 This module is based upon a working definition of CSV format which may not be |
|
454 the most general. |
|
455 |
|
456 =over 4 |
|
457 |
|
458 =item 1 |
|
459 |
|
460 Allowable characters within a CSV field include 0x09 (tab) and the inclusive |
|
461 range of 0x20 (space) through 0x7E (tilde). |
|
462 |
|
463 =item 2 |
|
464 |
|
465 A field within CSV may be surrounded by double-quotes. |
|
466 |
|
467 =item 3 |
|
468 |
|
469 A field within CSV must be surrounded by double-quotes to contain a comma. |
|
470 |
|
471 =item 4 |
|
472 |
|
473 A field within CSV must be surrounded by double-quotes to contain an embedded |
|
474 double-quote, represented by a pair of consecutive double-quotes. |
|
475 |
|
476 =item 5 |
|
477 |
|
478 A CSV string may be terminated by 0x0A (line feed) or by 0x0D,0x0A |
|
479 (carriage return, line feed). |
|
480 |
|
481 =head1 AUTHOR |
|
482 |
|
483 Alan Citterman F<E<lt>alan@mfgrtl.comE<gt>> |
|
484 |
|
485 =head1 SEE ALSO |
|
486 |
|
487 perl(1) |
|
488 |
|
489 =cut |