package Text::CSV;# Copyright (c) 1997 Alan Citterman. All rights reserved.# This program is free software; you can redistribute it and/or# modify it under the same terms as Perl itself.################################################################################# HISTORY## Written by:# Alan Citterman <alan@mfgrtl.com>## Version 0.01 06/05/1997# original version################################################################################require 5.002;use strict;BEGIN { use Exporter (); use AutoLoader qw(AUTOLOAD); use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); $VERSION = '0.01'; @ISA = qw(Exporter AutoLoader); @EXPORT = qw(); @EXPORT_OK = qw(); %EXPORT_TAGS = qw();}1;__END__################################################################################# version## class/object method expecting no arguments and returning the version number# of Text::CSV. there are no side-effects.################################################################################sub version { return $VERSION;}################################################################################# new## class/object method expecting no arguments and returning a reference to a# newly created Text::CSV object.################################################################################sub new { my $proto = shift; my $class = ref($proto) || $proto; my $self = {}; $self->{'_STATUS'} = undef; $self->{'_ERROR_INPUT'} = undef; $self->{'_STRING'} = undef; $self->{'_FIELDS'} = undef; bless $self, $class; return $self;}################################################################################# status## object method returning the success or failure of the most recent combine()# or parse(). there are no side-effects.################################################################################sub status { my $self = shift; return $self->{'_STATUS'};}################################################################################# error_input## object method returning the first invalid argument to the most recent# combine() or parse(). there are no side-effects.################################################################################sub error_input { my $self = shift; return $self->{'_ERROR_INPUT'};}################################################################################# string## object method returning the result of the most recent combine() or the# input to the most recent parse(), whichever is more recent. there are no# side-effects.################################################################################sub string { my $self = shift; return $self->{'_STRING'};}################################################################################# fields## object method returning the result of the most recent parse() or the input# to the most recent combine(), whichever is more recent. there are no# side-effects.################################################################################sub fields { my $self = shift; if (ref($self->{'_FIELDS'})) { return @{$self->{'_FIELDS'}}; } return undef;}################################################################################# combine## object method returning success or failure. the given arguments are# combined into a single comma-separated value. failure can be the result of# no arguments or an argument containing an invalid character. side-effects# include:# setting status()# setting fields()# setting string()# setting error_input()################################################################################sub combine { my $self = shift; my @part = @_; $self->{'_FIELDS'} = \@part; $self->{'_ERROR_INPUT'} = undef; $self->{'_STATUS'} = 0; $self->{'_STRING'} = ''; my $column = ''; my $combination = ''; my $skip_comma = 1; if ($#part >= 0) { # at least one argument was given for "combining"... for $column (@part) { if ($column =~ /[^\t\040-\176]/) { # an argument contained an invalid character... $self->{'_ERROR_INPUT'} = $column; return $self->{'_STATUS'}; } if ($skip_comma) { # do not put a comma before the first argument... $skip_comma = 0; } else { # do put a comma before all arguments except the first argument... $combination .= ','; } $column =~ s/\042/\042\042/go; $combination .= "\042"; $combination .= $column; $combination .= "\042"; } $self->{'_STRING'} = $combination; $self->{'_STATUS'} = 1; } return $self->{'_STATUS'};}################################################################################# parse## object method returning success or failure. the given argument is expected# to be a valid comma-separated value. failure can be the result of# no arguments or an argument containing an invalid sequence of characters.# side-effects include:# setting status()# setting fields()# setting string()# setting error_input()################################################################################sub parse { my $self = shift; $self->{'_STRING'} = shift; $self->{'_FIELDS'} = undef; $self->{'_ERROR_INPUT'} = $self->{'_STRING'}; $self->{'_STATUS'} = 0; if (!defined($self->{'_STRING'})) { return $self->{'_STATUS'}; } my $keep_biting = 1; my $palatable = 0; my $line = $self->{'_STRING'}; if ($line =~ /\n$/) { chop($line); if ($line =~ /\r$/) { chop($line); } } my $mouthful = ''; my @part = (); while ($keep_biting and ($palatable = $self->_bite(\$line, \$mouthful, \$keep_biting))) { push(@part, $mouthful); } if ($palatable) { $self->{'_ERROR_INPUT'} = undef; $self->{'_FIELDS'} = \@part; } return $self->{'_STATUS'} = $palatable;}################################################################################# _bite## *private* class/object method returning success or failure. the arguments# are:# - a reference to a comma-separated value string# - a reference to a return string# - a reference to a return boolean# upon success the first comma-separated value of the csv string is# transferred to the return string and the boolean is set to true if a comma# followed that value. in other words, "bite" one value off of csv# returning the remaining string, the "piece" bitten, and if there's any# more. failure can be the result of the csv string containing an invalid# sequence of characters.# # from the csv string and # to be a valid comma-separated value. failure can be the result of# no arguments or an argument containing an invalid sequence of characters.# side-effects include:# setting status()# setting fields()# setting string()# setting error_input()################################################################################sub _bite { my ($self, $line_ref, $piece_ref, $bite_again_ref) = @_; my $in_quotes = 0; my $ok = 0; $$piece_ref = ''; $$bite_again_ref = 0; while (1) { if (length($$line_ref) < 1) { # end of string... if ($in_quotes) { # end of string, missing closing double-quote... last; } else { # proper end of string... $ok = 1; last; } } elsif ($$line_ref =~ /^\042/) { # double-quote... if ($in_quotes) { if (length($$line_ref) == 1) { # closing double-quote at end of string... substr($$line_ref, 0, 1) = ''; $ok = 1; last; } elsif ($$line_ref =~ /^\042\042/) { # an embedded double-quote... $$piece_ref .= "\042"; substr($$line_ref, 0, 2) = ''; } elsif ($$line_ref =~ /^\042,/) { # closing double-quote followed by a comma... substr($$line_ref, 0, 2) = ''; $$bite_again_ref = 1; $ok = 1; last; } else { # double-quote, followed by undesirable character (bad character sequence)... last; } } else { if (length($$piece_ref) < 1) { # starting double-quote at beginning of string $in_quotes = 1; substr($$line_ref, 0, 1) = ''; } else { # double-quote, outside of double-quotes (bad character sequence)... last; } } } elsif ($$line_ref =~ /^,/) { # comma... if ($in_quotes) { # a comma, inside double-quotes... $$piece_ref .= substr($$line_ref, 0 ,1); substr($$line_ref, 0, 1) = ''; } else { # a comma, which separates values... substr($$line_ref, 0, 1) = ''; $$bite_again_ref = 1; $ok = 1; last; } } elsif ($$line_ref =~ /^[\t\040-\176]/) { # a tab, space, or printable... $$piece_ref .= substr($$line_ref, 0 ,1); substr($$line_ref, 0, 1) = ''; } else { # an undesirable character... last; } } return $ok;}=head1 NAMEText::CSV - comma-separated values manipulation routines=head1 SYNOPSIS use Text::CSV; $version = Text::CSV->version(); # get the module version $csv = Text::CSV->new(); # create a new object $status = $csv->combine(@columns); # combine columns into a string $line = $csv->string(); # get the combined string $status = $csv->parse($line); # parse a CSV string into fields @columns = $csv->fields(); # get the parsed fields $status = $csv->status(); # get the most recent status $bad_argument = $csv->error_input(); # get the most recent bad argument=head1 DESCRIPTIONText::CSV provides facilities for the composition and decomposition ofcomma-separated values. An instance of the Text::CSV class can combinefields into a CSV string and parse a CSV string into fields.=head1 FUNCTIONS=over 4=item version $version = Text::CSV->version();This function may be called as a class or an object method. It returns the currentmodule version.=item new $csv = Text::CSV->new();This function may be called as a class or an object method. It returns a reference to anewly created Text::CSV object.=item combine $status = $csv->combine(@columns);This object function constructs a CSV string from the arguments, returningsuccess or failure. Failure can result from lack of arguments or an argumentcontaining an invalid character. Upon success, C<string()> can be called toretrieve the resultant CSV string. Upon failure, the value returned byC<string()> is undefined and C<error_input()> can be called to retrieve aninvalid argument.=item string $line = $csv->string();This object function returns the input to C<parse()> or the resultant CSV string ofC<combine()>, whichever was called more recently.=item parse $status = $csv->parse($line);This object function decomposes a CSV string into fields, returningsuccess or failure. Failure can result from a lack of argument or the given CSVstring is improperly formatted. Upon success, C<fields()> can be called toretrieve the decomposed fields . Upon failure, the value returned byC<fields()> is undefined and C<error_input()> can be called to retrieve theinvalid argument.=item fields @columns = $csv->fields();This object function returns the input to C<combine()> or the resultant decomposedfields of C<parse()>, whichever was called more recently.=item status $status = $csv->status();This object function returns success (or failure) of C<combine()> or C<parse()>,whichever was called more recently.=item error_input $bad_argument = $csv->error_input();This object function returns the erroneous argument (if it exists) of C<combine()>or C<parse()>, whichever was called more recently.=back=head1 EXAMPLE require Text::CSV; my $csv = Text::CSV->new; my $column = ''; my $sample_input_string = '"I said, ""Hi!""",Yes,"",2.34,,"1.09"'; if ($csv->parse($sample_input_string)) { my @field = $csv->fields; my $count = 0; for $column (@field) { print ++$count, " => ", $column, "\n"; } print "\n"; } else { my $err = $csv->error_input; print "parse() failed on argument: ", $err, "\n"; } my @sample_input_fields = ('You said, "Hello!"', 5.67, 'Surely', '', '3.14159'); if ($csv->combine(@sample_input_fields)) { my $string = $csv->string; print $string, "\n"; } else { my $err = $csv->error_input; print "combine() failed on argument: ", $err, "\n"; }=head1 CAVEATSThis module is based upon a working definition of CSV format which may not bethe most general.=over 4=item 1 Allowable characters within a CSV field include 0x09 (tab) and the inclusiverange of 0x20 (space) through 0x7E (tilde).=item 2A field within CSV may be surrounded by double-quotes.=item 3A field within CSV must be surrounded by double-quotes to contain a comma.=item 4A field within CSV must be surrounded by double-quotes to contain an embeddeddouble-quote, represented by a pair of consecutive double-quotes.=item 5A CSV string may be terminated by 0x0A (line feed) or by 0x0D,0x0A(carriage return, line feed).=head1 AUTHORAlan Citterman F<E<lt>alan@mfgrtl.comE<gt>>=head1 SEE ALSOperl(1)=cut