1 /* |
|
2 * Licensed to the Apache Software Foundation (ASF) under one or more |
|
3 * contributor license agreements. See the NOTICE file distributed with |
|
4 * this work for additional information regarding copyright ownership. |
|
5 * The ASF licenses this file to You under the Apache License, Version 2.0 |
|
6 * (the "License"); you may not use this file except in compliance with |
|
7 * the License. You may obtain a copy of the License at |
|
8 * |
|
9 * http://www.apache.org/licenses/LICENSE-2.0 |
|
10 * |
|
11 * Unless required by applicable law or agreed to in writing, software |
|
12 * distributed under the License is distributed on an "AS IS" BASIS, |
|
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
14 * See the License for the specific language governing permissions and |
|
15 * limitations under the License. |
|
16 */ |
|
17 |
|
18 /* |
|
19 * $Id: XMLReader.hpp 568078 2007-08-21 11:43:25Z amassari $ |
|
20 */ |
|
21 |
|
22 #if !defined(XMLREADER_HPP) |
|
23 #define XMLREADER_HPP |
|
24 |
|
25 #include <xercesc/util/XMLChar.hpp> |
|
26 #include <xercesc/framework/XMLRecognizer.hpp> |
|
27 #include <xercesc/framework/XMLBuffer.hpp> |
|
28 #include <xercesc/util/TranscodingException.hpp> |
|
29 |
|
30 XERCES_CPP_NAMESPACE_BEGIN |
|
31 |
|
32 class InputSource; |
|
33 class BinInputStream; |
|
34 class ReaderMgr; |
|
35 class XMLScanner; |
|
36 class XMLTranscoder; |
|
37 |
|
38 |
|
39 // --------------------------------------------------------------------------- |
|
40 // Instances of this class are used to manage the content of entities. The |
|
41 // scanner maintains a stack of these, one for each entity (this means entity |
|
42 // in the sense of any parsed file or internal entity) currently being |
|
43 // scanned. This class, given a binary input stream will handle reading in |
|
44 // the data and decoding it from its external decoding into the internal |
|
45 // Unicode format. Once internallized, this class provides the access |
|
46 // methods to read in the data in various ways, maintains line and column |
|
47 // information, and provides high performance character attribute checking |
|
48 // methods. |
|
49 // |
|
50 // This is NOT to be derived from. |
|
51 // |
|
52 // --------------------------------------------------------------------------- |
|
53 class XMLPARSER_EXPORT XMLReader : public XMemory |
|
54 { |
|
55 public: |
|
56 // ----------------------------------------------------------------------- |
|
57 // Public types |
|
58 // ----------------------------------------------------------------------- |
|
59 enum Types |
|
60 { |
|
61 Type_PE |
|
62 , Type_General |
|
63 }; |
|
64 |
|
65 enum Sources |
|
66 { |
|
67 Source_Internal |
|
68 , Source_External |
|
69 }; |
|
70 |
|
71 enum RefFrom |
|
72 { |
|
73 RefFrom_Literal |
|
74 , RefFrom_NonLiteral |
|
75 }; |
|
76 |
|
77 enum XMLVersion |
|
78 { |
|
79 XMLV1_0 |
|
80 , XMLV1_1 |
|
81 , XMLV_Unknown |
|
82 }; |
|
83 |
|
84 |
|
85 // ----------------------------------------------------------------------- |
|
86 // Public, query methods |
|
87 // ----------------------------------------------------------------------- |
|
88 bool isAllSpaces |
|
89 ( |
|
90 const XMLCh* const toCheck |
|
91 , const unsigned int count |
|
92 ) const; |
|
93 |
|
94 bool containsWhiteSpace |
|
95 ( |
|
96 const XMLCh* const toCheck |
|
97 , const unsigned int count |
|
98 ) const; |
|
99 |
|
100 |
|
101 bool isXMLLetter(const XMLCh toCheck) const; |
|
102 bool isFirstNameChar(const XMLCh toCheck) const; |
|
103 bool isNameChar(const XMLCh toCheck) const; |
|
104 bool isPlainContentChar(const XMLCh toCheck) const; |
|
105 bool isSpecialStartTagChar(const XMLCh toCheck) const; |
|
106 bool isXMLChar(const XMLCh toCheck) const; |
|
107 bool isWhitespace(const XMLCh toCheck) const; |
|
108 bool isControlChar(const XMLCh toCheck) const; |
|
109 bool isPublicIdChar(const XMLCh toCheck) const; |
|
110 bool isFirstNCNameChar(const XMLCh toCheck) const; |
|
111 bool isNCNameChar(const XMLCh toCheck) const; |
|
112 |
|
113 // ----------------------------------------------------------------------- |
|
114 // Constructors and Destructor |
|
115 // ----------------------------------------------------------------------- |
|
116 XMLReader |
|
117 ( |
|
118 const XMLCh* const pubId |
|
119 , const XMLCh* const sysId |
|
120 , BinInputStream* const streamToAdopt |
|
121 , const RefFrom from |
|
122 , const Types type |
|
123 , const Sources source |
|
124 , const bool throwAtEnd = false |
|
125 , const bool calculateSrcOfs = true |
|
126 , const XMLVersion xmlVersion = XMLV1_0 |
|
127 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager |
|
128 ); |
|
129 |
|
130 XMLReader |
|
131 ( |
|
132 const XMLCh* const pubId |
|
133 , const XMLCh* const sysId |
|
134 , BinInputStream* const streamToAdopt |
|
135 , const XMLCh* const encodingStr |
|
136 , const RefFrom from |
|
137 , const Types type |
|
138 , const Sources source |
|
139 , const bool throwAtEnd = false |
|
140 , const bool calculateSrcOfs = true |
|
141 , const XMLVersion xmlVersion = XMLV1_0 |
|
142 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager |
|
143 ); |
|
144 |
|
145 XMLReader |
|
146 ( |
|
147 const XMLCh* const pubId |
|
148 , const XMLCh* const sysId |
|
149 , BinInputStream* const streamToAdopt |
|
150 , XMLRecognizer::Encodings encodingEnum |
|
151 , const RefFrom from |
|
152 , const Types type |
|
153 , const Sources source |
|
154 , const bool throwAtEnd = false |
|
155 , const bool calculateSrcOfs = true |
|
156 , const XMLVersion xmlVersion = XMLV1_0 |
|
157 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager |
|
158 ); |
|
159 |
|
160 ~XMLReader(); |
|
161 |
|
162 |
|
163 // ----------------------------------------------------------------------- |
|
164 // Character buffer management methods |
|
165 // ----------------------------------------------------------------------- |
|
166 unsigned long charsLeftInBuffer() const; |
|
167 bool refreshCharBuffer(); |
|
168 |
|
169 |
|
170 // ----------------------------------------------------------------------- |
|
171 // Scanning methods |
|
172 // ----------------------------------------------------------------------- |
|
173 bool getName(XMLBuffer& toFill, const bool token); |
|
174 bool getQName(XMLBuffer& toFill, int* colonPosition); |
|
175 bool getNextChar(XMLCh& chGotten); |
|
176 bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten); |
|
177 void movePlainContentChars(XMLBuffer &dest); |
|
178 bool getSpaces(XMLBuffer& toFill); |
|
179 bool getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck); |
|
180 bool peekNextChar(XMLCh& chGotten); |
|
181 bool skipIfQuote(XMLCh& chGotten); |
|
182 bool skipSpaces(bool& skippedSomething, bool inDecl = false); |
|
183 bool skippedChar(const XMLCh toSkip); |
|
184 bool skippedSpace(); |
|
185 bool skippedString(const XMLCh* const toSkip); |
|
186 bool peekString(const XMLCh* const toPeek); |
|
187 |
|
188 |
|
189 // ----------------------------------------------------------------------- |
|
190 // Getter methods |
|
191 // ----------------------------------------------------------------------- |
|
192 XMLSSize_t getColumnNumber() const; |
|
193 const XMLCh* getEncodingStr() const; |
|
194 XMLSSize_t getLineNumber() const; |
|
195 bool getNoMoreFlag() const; |
|
196 const XMLCh* getPublicId() const; |
|
197 unsigned int getReaderNum() const; |
|
198 RefFrom getRefFrom() const; |
|
199 Sources getSource() const; |
|
200 unsigned int getSrcOffset() const; |
|
201 const XMLCh* getSystemId() const; |
|
202 bool getThrowAtEnd() const; |
|
203 Types getType() const; |
|
204 |
|
205 |
|
206 // ----------------------------------------------------------------------- |
|
207 // Setter methods |
|
208 // ----------------------------------------------------------------------- |
|
209 bool setEncoding |
|
210 ( |
|
211 const XMLCh* const newEncoding |
|
212 ); |
|
213 void setReaderNum(const unsigned int newNum); |
|
214 void setThrowAtEnd(const bool newValue); |
|
215 void setXMLVersion(const XMLVersion version); |
|
216 |
|
217 |
|
218 private: |
|
219 // ----------------------------------------------------------------------- |
|
220 // Unimplemented constructors and operators |
|
221 // ----------------------------------------------------------------------- |
|
222 XMLReader(const XMLReader&); |
|
223 XMLReader& operator=(const XMLReader&); |
|
224 |
|
225 // --------------------------------------------------------------------------- |
|
226 // Class Constants |
|
227 // |
|
228 // kCharBufSize |
|
229 // The size of the character spool buffer that we use. Its not terribly |
|
230 // large because its just getting filled with data from a raw byte |
|
231 // buffer as we go along. We don't want to decode all the text at |
|
232 // once before we find out that there is an error. |
|
233 // |
|
234 // NOTE: This is a size in characters, not bytes. |
|
235 // |
|
236 // kRawBufSize |
|
237 // The size of the raw buffer from which raw bytes are spooled out |
|
238 // as we transcode chunks of data. As it is emptied, it is filled back |
|
239 // in again from the source stream. |
|
240 // --------------------------------------------------------------------------- |
|
241 enum Constants |
|
242 { |
|
243 kCharBufSize = 16 * 1024 |
|
244 , kRawBufSize = 48 * 1024 |
|
245 }; |
|
246 |
|
247 |
|
248 // ----------------------------------------------------------------------- |
|
249 // Private helper methods |
|
250 // ----------------------------------------------------------------------- |
|
251 void checkForSwapped(); |
|
252 |
|
253 void doInitCharSizeChecks(); |
|
254 |
|
255 void doInitDecode(); |
|
256 |
|
257 XMLByte getNextRawByte |
|
258 ( |
|
259 const bool eoiOk |
|
260 ); |
|
261 |
|
262 void refreshRawBuffer(); |
|
263 |
|
264 void setTranscoder |
|
265 ( |
|
266 const XMLCh* const newEncoding |
|
267 ); |
|
268 |
|
269 unsigned int xcodeMoreChars |
|
270 ( |
|
271 XMLCh* const bufToFill |
|
272 , unsigned char* const charSizes |
|
273 , const unsigned int maxChars |
|
274 ); |
|
275 |
|
276 void handleEOL |
|
277 ( |
|
278 XMLCh& curCh |
|
279 , bool inDecl = false |
|
280 ); |
|
281 |
|
282 // ----------------------------------------------------------------------- |
|
283 // Data members |
|
284 // |
|
285 // fCharIndex |
|
286 // The index into the character buffer. When this hits fCharsAvail |
|
287 // then its time to refill. |
|
288 // |
|
289 // fCharBuf |
|
290 // A buffer that the reader manager fills up with transcoded |
|
291 // characters a small amount at a time. |
|
292 // |
|
293 // fCharsAvail |
|
294 // The characters currently available in the character buffer. |
|
295 // |
|
296 // fCharSizeBuf |
|
297 // This buffer is an array that contains the number of source chars |
|
298 // eaten to create each char in the fCharBuf buffer. So the entry |
|
299 // fCharSizeBuf[x] is the number of source chars that were eaten |
|
300 // to make the internalized char fCharBuf[x]. This only contains |
|
301 // useful data if fSrcOfsSupported is true. |
|
302 // |
|
303 // fCharOfsBuf |
|
304 // This buffer is an array that contains the offset in the |
|
305 // fRawByteBuf buffer of each char in the fCharBuf buffer. It |
|
306 // only contains useful data if fSrcOfsSupported is true. |
|
307 // |
|
308 // fCurCol |
|
309 // fCurLine |
|
310 // The current line and column that we are in within this reader's |
|
311 // text. |
|
312 // |
|
313 // fEncoding |
|
314 // This is the rough encoding setting. This enum is set during |
|
315 // construction and just tells us the rough family of encoding that |
|
316 // we are doing. |
|
317 // |
|
318 // fEncodingStr |
|
319 // This is the name of the encoding we are using. It will be |
|
320 // provisionally set during construction, from the auto-sensed |
|
321 // encoding. But it might be overridden when the XMLDecl is finally |
|
322 // seen by the scanner. It can also be forced to a particular |
|
323 // encoding, in which case fForcedEncoding is set. |
|
324 // |
|
325 // fForcedEncoding |
|
326 // If the encoding if forced then this is set and all other |
|
327 // information will be ignored. This encoding will be taken as |
|
328 // gospel. This is done by calling an alternate constructor. |
|
329 // |
|
330 // fNoMore |
|
331 // This is set when the source text is exhausted. It lets us know |
|
332 // quickly that no more text is available. |
|
333 // |
|
334 // fRawBufIndex |
|
335 // The current index into the raw byte buffer. When its equal to |
|
336 // fRawBytesAvail then we need to read another buffer. |
|
337 // |
|
338 // fRawByteBuf |
|
339 // This is the raw byte buffer that is used to spool out bytes |
|
340 // from into the fCharBuf buffer, as we transcode in blocks. |
|
341 // |
|
342 // fRawBytesAvail |
|
343 // The number of bytes currently available in the raw buffer. This |
|
344 // helps deal with the last buffer's worth, which will usually not |
|
345 // be a full one. |
|
346 // |
|
347 // fReaderNum |
|
348 // Each reader from a particular reader manager (which means from a |
|
349 // particular document) is given a unique number. The reader manager |
|
350 // sets these numbers. They are used to catch things like partial |
|
351 // markup errors. |
|
352 // |
|
353 // fRefFrom |
|
354 // This flag is provided in the ctor, and tells us if we represent |
|
355 // some entity being expanded inside a literal. Sometimes things |
|
356 // happen differently inside and outside literals. |
|
357 // |
|
358 // fPublicId |
|
359 // fSystemId |
|
360 // These are the system and public ids of the source that this |
|
361 // reader is reading. |
|
362 // |
|
363 // fSentTrailingSpace |
|
364 // If we are a PE entity being read and we not referenced from a |
|
365 // literal, then a leading and trailing space must be faked into the |
|
366 // data. This lets us know we've done the trailing space already (so |
|
367 // we don't just keep doing it again and again.) |
|
368 // |
|
369 // fSource |
|
370 // Indicates whether the content this reader is spooling as already |
|
371 // been internalized. This will prevent multiple processing of |
|
372 // whitespace when an already internalized entity is being spooled |
|
373 // out. |
|
374 // |
|
375 // fSpareChar |
|
376 // Some encodings can create two chars in an atomic way, e.g. |
|
377 // surrogate pairs. We might not be able to store both, so we store |
|
378 // it here until the next buffer transcoding operation. |
|
379 // |
|
380 // fSrcOfsBase |
|
381 // This is the base offset within the source of this entity. Values |
|
382 // in the curent fCharSizeBuf array are relative to this value. |
|
383 // |
|
384 // fSrcOfsSupported |
|
385 // This flag is set to indicate whether source byte offset info |
|
386 // is supported. For intrinsic encodings, its always set since we |
|
387 // can always support it. For transcoder based encodings, we ask |
|
388 // the transcoder if it supports it or not. |
|
389 // |
|
390 // fStream |
|
391 // This is the input stream that provides the data for the reader. |
|
392 // Its always treated as a raw byte stream. The derived class will |
|
393 // ask for buffers of text from it and will handle making some |
|
394 // sense of it. |
|
395 // |
|
396 // fSwapped |
|
397 // If the encoding is one of the ones we do intrinsically, and its |
|
398 // in a different byte order from our native order, then this is |
|
399 // set to remind us to byte swap it during transcoding. |
|
400 // |
|
401 // fThrowAtEnd |
|
402 // Indicates whether the reader manager should throw an end of entity |
|
403 // exception at the end of this reader instance. This is usually |
|
404 // set for top level external entity references. It overrides the |
|
405 // reader manager's global flag that controls throwing at the end |
|
406 // of entities. Defaults to false. |
|
407 // |
|
408 // fTranscoder |
|
409 // If the encoding is not one that we handle intrinsically, then |
|
410 // we use an an external transcoder to do it. This class is an |
|
411 // abstraction that allows us to use pluggable external transcoding |
|
412 // services (via XMLTransService in util.) |
|
413 // |
|
414 // fType |
|
415 // Indicates whether this reader represents a PE or not. If this |
|
416 // flag is true and the fInLiteral flag is false, then we will put |
|
417 // out an extra space at the end. |
|
418 // |
|
419 // fgCharCharsTable; |
|
420 // Pointer to XMLChar table, depends on XML version |
|
421 // |
|
422 // fNEL |
|
423 // Boolean indicates if NEL and LSEP should be recognized as NEL |
|
424 // |
|
425 // fXMLVersion |
|
426 // Enum to indicate if this Reader is conforming to XML 1.0 or XML 1.1 |
|
427 // ----------------------------------------------------------------------- |
|
428 unsigned int fCharIndex; |
|
429 XMLCh fCharBuf[kCharBufSize]; |
|
430 unsigned int fCharsAvail; |
|
431 unsigned char fCharSizeBuf[kCharBufSize]; |
|
432 unsigned int fCharOfsBuf[kCharBufSize]; |
|
433 XMLSSize_t fCurCol; |
|
434 XMLSSize_t fCurLine; |
|
435 XMLRecognizer::Encodings fEncoding; |
|
436 XMLCh* fEncodingStr; |
|
437 bool fForcedEncoding; |
|
438 bool fNoMore; |
|
439 XMLCh* fPublicId; |
|
440 unsigned int fRawBufIndex; |
|
441 XMLByte fRawByteBuf[kRawBufSize]; |
|
442 unsigned int fRawBytesAvail; |
|
443 unsigned int fReaderNum; |
|
444 RefFrom fRefFrom; |
|
445 bool fSentTrailingSpace; |
|
446 Sources fSource; |
|
447 unsigned int fSrcOfsBase; |
|
448 bool fSrcOfsSupported; |
|
449 bool fCalculateSrcOfs; |
|
450 XMLCh* fSystemId; |
|
451 BinInputStream* fStream; |
|
452 bool fSwapped; |
|
453 bool fThrowAtEnd; |
|
454 XMLTranscoder* fTranscoder; |
|
455 Types fType; |
|
456 XMLByte* fgCharCharsTable; |
|
457 bool fNEL; |
|
458 XMLVersion fXMLVersion; |
|
459 MemoryManager* fMemoryManager; |
|
460 }; |
|
461 |
|
462 |
|
463 // --------------------------------------------------------------------------- |
|
464 // XMLReader: Public, query methods |
|
465 // --------------------------------------------------------------------------- |
|
466 inline bool XMLReader::isNameChar(const XMLCh toCheck) const |
|
467 { |
|
468 return ((fgCharCharsTable[toCheck] & gNameCharMask) != 0); |
|
469 } |
|
470 |
|
471 inline bool XMLReader::isNCNameChar(const XMLCh toCheck) const |
|
472 { |
|
473 return ((fgCharCharsTable[toCheck] & gNCNameCharMask) != 0); |
|
474 } |
|
475 |
|
476 inline bool XMLReader::isPlainContentChar(const XMLCh toCheck) const |
|
477 { |
|
478 return ((fgCharCharsTable[toCheck] & gPlainContentCharMask) != 0); |
|
479 } |
|
480 |
|
481 |
|
482 inline bool XMLReader::isFirstNameChar(const XMLCh toCheck) const |
|
483 { |
|
484 return ((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0); |
|
485 } |
|
486 |
|
487 inline bool XMLReader::isFirstNCNameChar(const XMLCh toCheck) const |
|
488 { |
|
489 return (((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0) |
|
490 && (toCheck != chColon)); |
|
491 } |
|
492 |
|
493 inline bool XMLReader::isSpecialStartTagChar(const XMLCh toCheck) const |
|
494 { |
|
495 return ((fgCharCharsTable[toCheck] & gSpecialStartTagCharMask) != 0); |
|
496 } |
|
497 |
|
498 inline bool XMLReader::isXMLChar(const XMLCh toCheck) const |
|
499 { |
|
500 return ((fgCharCharsTable[toCheck] & gXMLCharMask) != 0); |
|
501 } |
|
502 |
|
503 inline bool XMLReader::isXMLLetter(const XMLCh toCheck) const |
|
504 { |
|
505 return (((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0) |
|
506 && (toCheck != chColon) && (toCheck != chUnderscore)); |
|
507 } |
|
508 |
|
509 inline bool XMLReader::isWhitespace(const XMLCh toCheck) const |
|
510 { |
|
511 return ((fgCharCharsTable[toCheck] & gWhitespaceCharMask) != 0); |
|
512 } |
|
513 |
|
514 inline bool XMLReader::isControlChar(const XMLCh toCheck) const |
|
515 { |
|
516 return ((fgCharCharsTable[toCheck] & gControlCharMask) != 0); |
|
517 } |
|
518 |
|
519 // --------------------------------------------------------------------------- |
|
520 // XMLReader: Buffer management methods |
|
521 // --------------------------------------------------------------------------- |
|
522 inline unsigned long XMLReader::charsLeftInBuffer() const |
|
523 { |
|
524 return fCharsAvail - fCharIndex; |
|
525 } |
|
526 |
|
527 |
|
528 // --------------------------------------------------------------------------- |
|
529 // XMLReader: Getter methods |
|
530 // --------------------------------------------------------------------------- |
|
531 inline XMLSSize_t XMLReader::getColumnNumber() const |
|
532 { |
|
533 return fCurCol; |
|
534 } |
|
535 |
|
536 inline const XMLCh* XMLReader::getEncodingStr() const |
|
537 { |
|
538 return fEncodingStr; |
|
539 } |
|
540 |
|
541 inline XMLSSize_t XMLReader::getLineNumber() const |
|
542 { |
|
543 return fCurLine; |
|
544 } |
|
545 |
|
546 inline bool XMLReader::getNoMoreFlag() const |
|
547 { |
|
548 return fNoMore; |
|
549 } |
|
550 |
|
551 inline const XMLCh* XMLReader::getPublicId() const |
|
552 { |
|
553 return fPublicId; |
|
554 } |
|
555 |
|
556 inline unsigned int XMLReader::getReaderNum() const |
|
557 { |
|
558 return fReaderNum; |
|
559 } |
|
560 |
|
561 inline XMLReader::RefFrom XMLReader::getRefFrom() const |
|
562 { |
|
563 return fRefFrom; |
|
564 } |
|
565 |
|
566 inline XMLReader::Sources XMLReader::getSource() const |
|
567 { |
|
568 return fSource; |
|
569 } |
|
570 |
|
571 inline const XMLCh* XMLReader::getSystemId() const |
|
572 { |
|
573 return fSystemId; |
|
574 } |
|
575 |
|
576 inline bool XMLReader::getThrowAtEnd() const |
|
577 { |
|
578 return fThrowAtEnd; |
|
579 } |
|
580 |
|
581 inline XMLReader::Types XMLReader::getType() const |
|
582 { |
|
583 return fType; |
|
584 } |
|
585 |
|
586 // --------------------------------------------------------------------------- |
|
587 // XMLReader: Setter methods |
|
588 // --------------------------------------------------------------------------- |
|
589 inline void XMLReader::setReaderNum(const unsigned int newNum) |
|
590 { |
|
591 fReaderNum = newNum; |
|
592 } |
|
593 |
|
594 inline void XMLReader::setThrowAtEnd(const bool newValue) |
|
595 { |
|
596 fThrowAtEnd = newValue; |
|
597 } |
|
598 |
|
599 inline void XMLReader::setXMLVersion(const XMLVersion version) |
|
600 { |
|
601 fXMLVersion = version; |
|
602 if (version == XMLV1_1) { |
|
603 fNEL = true; |
|
604 fgCharCharsTable = XMLChar1_1::fgCharCharsTable1_1; |
|
605 } |
|
606 else { |
|
607 fNEL = XMLChar1_0::enableNEL; |
|
608 fgCharCharsTable = XMLChar1_0::fgCharCharsTable1_0; |
|
609 } |
|
610 |
|
611 } |
|
612 |
|
613 |
|
614 |
|
615 // --------------------------------------------------------------------------- |
|
616 // |
|
617 // XMLReader: movePlainContentChars() |
|
618 // |
|
619 // Move as many plain (no special handling of any sort required) content |
|
620 // characters as possible from this reader to the supplied destination buffer. |
|
621 // |
|
622 // This is THE hottest performance spot in the parser. |
|
623 // |
|
624 // --------------------------------------------------------------------------- |
|
625 inline void XMLReader::movePlainContentChars(XMLBuffer &dest) |
|
626 { |
|
627 unsigned int count = fCharIndex; |
|
628 |
|
629 while (fCharIndex < fCharsAvail) |
|
630 { |
|
631 if (!isPlainContentChar(fCharBuf[fCharIndex])) |
|
632 break; |
|
633 fCharIndex++; |
|
634 } |
|
635 |
|
636 if (count != fCharIndex) |
|
637 { |
|
638 fCurCol += (fCharIndex - count); |
|
639 dest.append(&fCharBuf[count], fCharIndex - count); |
|
640 } |
|
641 } |
|
642 |
|
643 |
|
644 // --------------------------------------------------------------------------- |
|
645 // XMLReader: getNextCharIfNot() method inlined for speed |
|
646 // --------------------------------------------------------------------------- |
|
647 inline bool XMLReader::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten) |
|
648 { |
|
649 // |
|
650 // See if there is at least a char in the buffer. Else, do the buffer |
|
651 // reload logic. |
|
652 // |
|
653 if (fCharIndex >= fCharsAvail) |
|
654 { |
|
655 // If fNoMore is set, then we have nothing else to give |
|
656 if (fNoMore) |
|
657 return false; |
|
658 |
|
659 // Try to refresh |
|
660 if (!refreshCharBuffer()) |
|
661 return false; |
|
662 } |
|
663 |
|
664 // Check the next char |
|
665 if (fCharBuf[fCharIndex] == chNotToGet) |
|
666 return false; |
|
667 |
|
668 // Its not the one we want to skip so bump the index |
|
669 chGotten = fCharBuf[fCharIndex++]; |
|
670 |
|
671 // Handle end of line normalization and line/col member maintenance. |
|
672 // |
|
673 // we can have end-of-line combinations with a leading |
|
674 // chCR(xD), chLF(xA), chNEL(x85), or chLineSeparator(x2028) |
|
675 // |
|
676 // 0000000000001101 chCR |
|
677 // 0000000000001010 chLF |
|
678 // 0000000010000101 chNEL |
|
679 // 0010000000101000 chLineSeparator |
|
680 // ----------------------- |
|
681 // 1101111101010000 == ~(chCR|chLF|chNEL|chLineSeparator) |
|
682 // |
|
683 // if the result of the logical-& operation is |
|
684 // true : 'curCh' can not be chCR, chLF, chNEL or chLineSeparator |
|
685 // false : 'curCh' can be chCR, chLF, chNEL or chLineSeparator |
|
686 // |
|
687 if ( chGotten & (XMLCh) ~(chCR|chLF|chNEL|chLineSeparator) ) |
|
688 { |
|
689 fCurCol++; |
|
690 } else |
|
691 { |
|
692 handleEOL(chGotten, false); |
|
693 } |
|
694 |
|
695 return true; |
|
696 } |
|
697 |
|
698 // --------------------------------------------------------------------------- |
|
699 // XMLReader: getNextChar() method inlined for speed |
|
700 // --------------------------------------------------------------------------- |
|
701 inline bool XMLReader::getNextChar(XMLCh& chGotten) |
|
702 { |
|
703 // |
|
704 // See if there is at least a char in the buffer. Else, do the buffer |
|
705 // reload logic. |
|
706 // |
|
707 if (fCharIndex >= fCharsAvail) |
|
708 { |
|
709 // If fNoMore is set, then we have nothing else to give |
|
710 if (fNoMore) |
|
711 return false; |
|
712 |
|
713 // Try to refresh |
|
714 if (!refreshCharBuffer()) |
|
715 return false; |
|
716 } |
|
717 |
|
718 chGotten = fCharBuf[fCharIndex++]; |
|
719 |
|
720 // Handle end of line normalization and line/col member maintenance. |
|
721 // |
|
722 // we can have end-of-line combinations with a leading |
|
723 // chCR(xD), chLF(xA), chNEL(x85), or chLineSeparator(x2028) |
|
724 // |
|
725 // 0000000000001101 chCR |
|
726 // 0000000000001010 chLF |
|
727 // 0000000010000101 chNEL |
|
728 // 0010000000101000 chLineSeparator |
|
729 // ----------------------- |
|
730 // 1101111101010000 == ~(chCR|chLF|chNEL|chLineSeparator) |
|
731 // |
|
732 // if the result of the logical-& operation is |
|
733 // true : 'curCh' can not be chCR, chLF, chNEL or chLineSeparator |
|
734 // false : 'curCh' can be chCR, chLF, chNEL or chLineSeparator |
|
735 // |
|
736 if ( chGotten & (XMLCh) ~(chCR|chLF|chNEL|chLineSeparator) ) |
|
737 { |
|
738 fCurCol++; |
|
739 } else |
|
740 { |
|
741 handleEOL(chGotten, false); |
|
742 } |
|
743 |
|
744 return true; |
|
745 } |
|
746 |
|
747 |
|
748 // --------------------------------------------------------------------------- |
|
749 // XMLReader: peekNextChar() method inlined for speed |
|
750 // --------------------------------------------------------------------------- |
|
751 inline bool XMLReader::peekNextChar(XMLCh& chGotten) |
|
752 { |
|
753 // |
|
754 // If there is something still in the buffer, get it. Else do the reload |
|
755 // scenario. |
|
756 // |
|
757 if (fCharIndex >= fCharsAvail) |
|
758 { |
|
759 // Try to refresh the buffer |
|
760 if (!refreshCharBuffer()) |
|
761 { |
|
762 chGotten = chNull; |
|
763 return false; |
|
764 } |
|
765 } |
|
766 |
|
767 chGotten = fCharBuf[fCharIndex]; |
|
768 |
|
769 // |
|
770 // Even though we are only peeking, we have to act the same as the |
|
771 // normal char get method in regards to newline normalization, though |
|
772 // its not as complicated as the actual character getting method's. |
|
773 // |
|
774 if ((chGotten == chCR || (fNEL && (chGotten == chNEL || chGotten == chLineSeparator))) |
|
775 && (fSource == Source_External)) |
|
776 chGotten = chLF; |
|
777 |
|
778 return true; |
|
779 } |
|
780 |
|
781 XERCES_CPP_NAMESPACE_END |
|
782 |
|
783 #endif |
|