|
1 # |
|
2 # Copyright (C) 1999 Ken MacLeod |
|
3 # Portions derived from code in XML::Writer by David Megginson |
|
4 # XML::Handler::XMLWriter is free software; you can redistribute it and/or |
|
5 # modify it under the same terms as Perl itself. |
|
6 # |
|
7 # $Id: XMLWriter.pm,v 1.2 1999/12/22 21:15:00 kmacleod Exp $ |
|
8 # |
|
9 |
|
10 use strict; |
|
11 |
|
12 package XML::Handler::XMLWriter; |
|
13 use XML::Handler::Subs; |
|
14 |
|
15 use vars qw{ $VERSION @ISA $escapes }; |
|
16 |
|
17 # will be substituted by make-rel script |
|
18 $VERSION = "0.07"; |
|
19 |
|
20 @ISA = qw{ XML::Handler::Subs }; |
|
21 |
|
22 $escapes = { '&' => '&', |
|
23 '<' => '<', |
|
24 '>' => '>', |
|
25 '"' => '"' |
|
26 }; |
|
27 |
|
28 sub start_document { |
|
29 my ($self, $document) = @_; |
|
30 |
|
31 $self->SUPER::start_document($document); |
|
32 |
|
33 # create a temporary Output_ in case we're creating a standard |
|
34 # output file that we'll delete later. |
|
35 if (!$self->{AsString} && !defined($self->{Output})) { |
|
36 require IO::File; |
|
37 import IO::File; |
|
38 $self->{Output_} = new IO::File(">-"); |
|
39 } elsif (defined($self->{Output})) { |
|
40 $self->{Output_} = $self->{Output}; |
|
41 } |
|
42 |
|
43 if ($self->{AsString}) { |
|
44 $self->{Strings} = []; |
|
45 } |
|
46 |
|
47 $self->print("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); |
|
48 |
|
49 # FIXME support Doctype declarations |
|
50 } |
|
51 |
|
52 sub end_document { |
|
53 my ($self, $document) = @_; |
|
54 |
|
55 if (defined($self->{Output_})) { |
|
56 $self->{Output_}->print("\n"); |
|
57 delete $self->{Output_}; |
|
58 } |
|
59 |
|
60 my $string = undef; |
|
61 if (defined($self->{AsString})) { |
|
62 push @{$self->{Strings}}, "\n"; |
|
63 $string = join('', @{$self->{Strings}}); |
|
64 delete $self->{Strings}; |
|
65 } |
|
66 |
|
67 $self->SUPER::end_document($document); |
|
68 |
|
69 return($string); |
|
70 } |
|
71 |
|
72 sub start_element { |
|
73 my ($self, $element) = @_; |
|
74 |
|
75 if ($self->SUPER::start_element($element) == 0) { |
|
76 $self->print_start_element($element); |
|
77 } |
|
78 } |
|
79 |
|
80 sub print_start_element { |
|
81 my ($self, $element) = @_; |
|
82 |
|
83 my $output = "<$element->{Name}"; |
|
84 if (defined($element->{Attributes})) { |
|
85 foreach my $name (sort keys %{$element->{Attributes}}) { |
|
86 my $esc_value = $element->{Attributes}{$name}; |
|
87 $esc_value =~ s/([\&\<\>\"])/$escapes->{$1}/ge; |
|
88 $output .= " $name=\"$esc_value\""; |
|
89 } |
|
90 } |
|
91 |
|
92 if ($self->{Newlines}) { |
|
93 $output .= "\n"; |
|
94 } |
|
95 |
|
96 $output .= ">"; |
|
97 |
|
98 $self->print($output); |
|
99 } |
|
100 |
|
101 sub end_element { |
|
102 my ($self, $element) = @_; |
|
103 |
|
104 if ($self->SUPER::end_element($element) == 0) { |
|
105 $self->print_end_element($element); |
|
106 } |
|
107 } |
|
108 |
|
109 sub print_end_element { |
|
110 my ($self, $element) = @_; |
|
111 |
|
112 my $output = "</$element->{Name}" |
|
113 . ($self->{Newlines} ? "\n" : "") . ">"; |
|
114 |
|
115 $self->print($output); |
|
116 } |
|
117 sub characters { |
|
118 my ($self, $characters) = @_; |
|
119 |
|
120 my $output = $characters->{Data}; |
|
121 |
|
122 $output =~ s/([\&\<\>])/$escapes->{$1}/ge; |
|
123 |
|
124 $self->print($output); |
|
125 } |
|
126 |
|
127 sub processing_instruction { |
|
128 my ($self, $pi) = @_; |
|
129 |
|
130 my $nl = ($#{$self->{Names}} == -1) ? "\n" : ""; |
|
131 |
|
132 my $output; |
|
133 if ($self->{IsSGML}) { |
|
134 $output = "<?$pi->{Data}>\n"; |
|
135 } else { |
|
136 if ($pi->{Data}) { |
|
137 $output = "<?$pi->{Target} $pi->{Data}?>$nl"; |
|
138 } else { |
|
139 $output = "<?$pi->{Target}?>$nl"; |
|
140 } |
|
141 } |
|
142 |
|
143 $self->print($output); |
|
144 } |
|
145 |
|
146 sub ignorable_whitespace { |
|
147 my ($self, $whitespace) = @_; |
|
148 |
|
149 $self->print($whitespace->{Data}); |
|
150 } |
|
151 |
|
152 sub comment { |
|
153 my ($self, $comment) = @_; |
|
154 |
|
155 my $nl = ($#{$self->{Names}} == -1) ? "\n" : ""; |
|
156 |
|
157 my $output = "<!-- $comment->{Data} -->$nl"; |
|
158 |
|
159 $self->print($output); |
|
160 } |
|
161 |
|
162 sub print { |
|
163 my ($self, $output) = @_; |
|
164 |
|
165 $self->{Output_}->print($output) |
|
166 if (defined($self->{Output_})); |
|
167 |
|
168 push(@{$self->{Strings}}, $output) |
|
169 if (defined($self->{AsString})); |
|
170 } |
|
171 |
|
172 1; |
|
173 |
|
174 __END__ |
|
175 |
|
176 =head1 NAME |
|
177 |
|
178 XML::Handler::XMLWriter - a PerlSAX handler for writing readable XML |
|
179 |
|
180 =head1 SYNOPSIS |
|
181 |
|
182 use XML::Parser::PerlSAX; |
|
183 use XML::Handler::XMLWriter; |
|
184 |
|
185 $my_handler = XML::Handler::XMLWriter->new( I<OPTIONS> ); |
|
186 |
|
187 XML::Parser::PerlSAX->new->parse(Source => { SystemId => 'REC-xml-19980210.xml' }, |
|
188 Handler => $my_handler); |
|
189 |
|
190 =head1 DESCRIPTION |
|
191 |
|
192 C<XML::Handler::XMLWriter> is a PerlSAX handler for writing readable |
|
193 XML (in contrast to Canonical XML, for example). |
|
194 XML::Handler::XMLWriter can be used with a parser to reformat XML, |
|
195 with XML::DOM or XML::Grove to write out XML, or with other PerlSAX |
|
196 modules that generate events. |
|
197 |
|
198 C<XML::Handler::XMLWriter> is intended to be used with PerlSAX event |
|
199 generators and does not perform any checking itself (for example, |
|
200 matching start and end element events). If you want to generate XML |
|
201 directly from your Perl code, use the XML::Writer module. XML::Writer |
|
202 has an easy to use interface and performs many checks to make sure |
|
203 that the XML you generate is well-formed. |
|
204 |
|
205 C<XML::Handler::XMLWriter> is a subclass of C<XML::Handler::Subs>. |
|
206 C<XML::Handler::XMLWriter> can be further subclassed to alter it's |
|
207 behavior or to add element-specific handling. In the subclass, each |
|
208 time an element starts, a method by that name prefixed with `s_' is |
|
209 called with the element to be processed. Each time an element ends, a |
|
210 method with that name prefixed with `e_' is called. Any special |
|
211 characters in the element name are replaced by underscores. If there |
|
212 isn't a start or end method for an element, the default action is to |
|
213 write the start or end tag. Start and end methods can use the |
|
214 `C<print_start_element()>' and `C<print_end_element()>' methods to |
|
215 print start or end tags. Subclasses can call the `C<print()>' method |
|
216 to write additional output. |
|
217 |
|
218 Subclassing XML::Handler::XMLWriter in this way is similar to |
|
219 XML::Parser's Stream style. |
|
220 |
|
221 XML::Handler::Subs maintains a stack of element names, |
|
222 `C<$self->{Names}', and a stack of element nodes, `C<$self->{Nodes}>' |
|
223 that can be used by subclasses. The current element is pushed on the |
|
224 stacks before calling an element-name start method and popped off the |
|
225 stacks after calling the element-name end method. |
|
226 |
|
227 See XML::Handler::Subs for additional methods. |
|
228 |
|
229 In addition to the standard PerlSAX handler methods (see PerlSAX for |
|
230 descriptions), XML::Handler::XMLWriter supports the following methods: |
|
231 |
|
232 =over 4 |
|
233 |
|
234 =item new( I<OPTIONS> ) |
|
235 |
|
236 Creates and returns a new instance of XML::Handler::XMLWriter with the |
|
237 given I<OPTIONS>. Options may be changed at any time by modifying |
|
238 them directly in the hash returned. I<OPTIONS> can be a list of key, |
|
239 value pairs or a hash. The following I<OPTIONS> are supported: |
|
240 |
|
241 =over 4 |
|
242 |
|
243 =item Output |
|
244 |
|
245 An IO::Handle or one of it's subclasses (such as IO::File), if this |
|
246 parameter is not present and the AsString option is not used, the |
|
247 module will write to standard output. |
|
248 |
|
249 =item AsString |
|
250 |
|
251 Return the generated XML as a string from the `C<parse()>' method of |
|
252 the PerlSAX event generator. |
|
253 |
|
254 =item Newlines |
|
255 |
|
256 A true or false value; if this parameter is present and its value is |
|
257 true, then the module will insert an extra newline before the closing |
|
258 delimiter of start, end, and empty tags to guarantee that the document |
|
259 does not end up as a single, long line. If the paramter is not |
|
260 present, the module will not insert the newlines. |
|
261 |
|
262 =item IsSGML |
|
263 |
|
264 A true or false value; if this parameter is present and its value is |
|
265 true, then the module will generate SGML rather than XML. |
|
266 |
|
267 =back |
|
268 |
|
269 =item print_start_element($element) |
|
270 |
|
271 Print a start tag for `C<$element>'. This is the default action for |
|
272 the PerlSAX `C<start_element()>' handler, but subclasses may use this |
|
273 if they define a start method for an element. |
|
274 |
|
275 =item print_end_element($element) |
|
276 |
|
277 Prints an end tag for `C<$element>'. This is the default action for |
|
278 the PerlSAX `C<end_element()>' handler, but subclasses may use this |
|
279 if they define a start method for an element. |
|
280 |
|
281 =item print($output) |
|
282 |
|
283 Write `C<$output>' to Output and/or append it to the string to be |
|
284 returned. Subclasses may use this to write additional output. |
|
285 |
|
286 =back |
|
287 |
|
288 =head1 TODO |
|
289 |
|
290 =over 4 |
|
291 |
|
292 =item * |
|
293 |
|
294 An Elements option that provides finer control over newlines than the |
|
295 Newlines option, where you can choose before and after newline for |
|
296 element start and end tags. Inspired by the Python XMLWriter. |
|
297 |
|
298 =item * |
|
299 |
|
300 Support Doctype and XML declarations. |
|
301 |
|
302 =back |
|
303 |
|
304 =head1 AUTHOR |
|
305 |
|
306 Ken MacLeod, ken@bitsko.slc.ut.us |
|
307 This module is partially derived from XML::Writer by David Megginson. |
|
308 |
|
309 =head1 SEE ALSO |
|
310 |
|
311 perl(1), PerlSAX.pod(3) |
|
312 |
|
313 =cut |