secureswitools/swisistools/source/xmlparser/xerces/include/xercesc/internal/XMLReader.hpp
changeset 4 3eebb1e54d3a
parent 3 127731b7107d
child 5 aba6b8104af3
equal deleted inserted replaced
3:127731b7107d 4:3eebb1e54d3a
     1 /*
       
     2  * Licensed to the Apache Software Foundation (ASF) under one or more
       
     3  * contributor license agreements.  See the NOTICE file distributed with
       
     4  * this work for additional information regarding copyright ownership.
       
     5  * The ASF licenses this file to You under the Apache License, Version 2.0
       
     6  * (the "License"); you may not use this file except in compliance with
       
     7  * the License.  You may obtain a copy of the License at
       
     8  * 
       
     9  *      http://www.apache.org/licenses/LICENSE-2.0
       
    10  * 
       
    11  * Unless required by applicable law or agreed to in writing, software
       
    12  * distributed under the License is distributed on an "AS IS" BASIS,
       
    13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
       
    14  * See the License for the specific language governing permissions and
       
    15  * limitations under the License.
       
    16  */
       
    17 
       
    18 /*
       
    19  * $Id: XMLReader.hpp 568078 2007-08-21 11:43:25Z amassari $
       
    20  */
       
    21 
       
    22 #if !defined(XMLREADER_HPP)
       
    23 #define XMLREADER_HPP
       
    24 
       
    25 #include <xercesc/util/XMLChar.hpp>
       
    26 #include <xercesc/framework/XMLRecognizer.hpp>
       
    27 #include <xercesc/framework/XMLBuffer.hpp>
       
    28 #include <xercesc/util/TranscodingException.hpp>
       
    29 
       
    30 XERCES_CPP_NAMESPACE_BEGIN
       
    31 
       
    32 class InputSource;
       
    33 class BinInputStream;
       
    34 class ReaderMgr;
       
    35 class XMLScanner;
       
    36 class XMLTranscoder;
       
    37 
       
    38 
       
    39 // ---------------------------------------------------------------------------
       
    40 //  Instances of this class are used to manage the content of entities. The
       
    41 //  scanner maintains a stack of these, one for each entity (this means entity
       
    42 //  in the sense of any parsed file or internal entity) currently being
       
    43 //  scanned. This class, given a binary input stream will handle reading in
       
    44 //  the data and decoding it from its external decoding into the internal
       
    45 //  Unicode format. Once internallized, this class provides the access
       
    46 //  methods to read in the data in various ways, maintains line and column
       
    47 //  information, and provides high performance character attribute checking
       
    48 //  methods.
       
    49 //
       
    50 //  This is NOT to be derived from.
       
    51 //
       
    52 // ---------------------------------------------------------------------------
       
    53 class XMLPARSER_EXPORT XMLReader : public XMemory
       
    54 {
       
    55 public:
       
    56     // -----------------------------------------------------------------------
       
    57     //  Public types
       
    58     // -----------------------------------------------------------------------
       
    59     enum Types
       
    60     {
       
    61         Type_PE
       
    62         , Type_General
       
    63     };
       
    64 
       
    65     enum Sources
       
    66     {
       
    67         Source_Internal
       
    68         , Source_External
       
    69     };
       
    70 
       
    71     enum RefFrom
       
    72     {
       
    73         RefFrom_Literal
       
    74         , RefFrom_NonLiteral
       
    75     };
       
    76 
       
    77     enum XMLVersion
       
    78     {
       
    79         XMLV1_0
       
    80         , XMLV1_1
       
    81         , XMLV_Unknown
       
    82     };
       
    83 
       
    84 
       
    85     // -----------------------------------------------------------------------
       
    86     //  Public, query methods
       
    87     // -----------------------------------------------------------------------
       
    88     bool isAllSpaces
       
    89     (
       
    90         const   XMLCh* const    toCheck
       
    91         , const unsigned int    count
       
    92     ) const;
       
    93 
       
    94     bool containsWhiteSpace
       
    95     (
       
    96         const   XMLCh* const    toCheck
       
    97         , const unsigned int    count
       
    98     ) const;
       
    99 
       
   100 
       
   101     bool isXMLLetter(const XMLCh toCheck) const;
       
   102     bool isFirstNameChar(const XMLCh toCheck) const;
       
   103     bool isNameChar(const XMLCh toCheck) const;    
       
   104     bool isPlainContentChar(const XMLCh toCheck) const;
       
   105     bool isSpecialStartTagChar(const XMLCh toCheck) const;
       
   106     bool isXMLChar(const XMLCh toCheck) const;
       
   107     bool isWhitespace(const XMLCh toCheck) const;
       
   108     bool isControlChar(const XMLCh toCheck) const;
       
   109     bool isPublicIdChar(const XMLCh toCheck) const;
       
   110     bool isFirstNCNameChar(const XMLCh toCheck) const;
       
   111     bool isNCNameChar(const XMLCh toCheck) const;    
       
   112 
       
   113     // -----------------------------------------------------------------------
       
   114     //  Constructors and Destructor
       
   115     // -----------------------------------------------------------------------
       
   116     XMLReader
       
   117     (
       
   118         const   XMLCh* const          pubId
       
   119         , const XMLCh* const          sysId
       
   120         ,       BinInputStream* const streamToAdopt
       
   121         , const RefFrom               from
       
   122         , const Types                 type
       
   123         , const Sources               source
       
   124         , const bool                  throwAtEnd = false
       
   125         , const bool                  calculateSrcOfs = true
       
   126         , const XMLVersion            xmlVersion = XMLV1_0
       
   127         ,       MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
       
   128     );
       
   129 
       
   130     XMLReader
       
   131     (
       
   132         const   XMLCh* const          pubId
       
   133         , const XMLCh* const          sysId
       
   134         ,       BinInputStream* const streamToAdopt
       
   135         , const XMLCh* const          encodingStr
       
   136         , const RefFrom               from
       
   137         , const Types                 type
       
   138         , const Sources               source
       
   139         , const bool                  throwAtEnd = false
       
   140         , const bool                  calculateSrcOfs = true
       
   141         , const XMLVersion            xmlVersion = XMLV1_0
       
   142         ,       MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
       
   143     );
       
   144 
       
   145     XMLReader
       
   146     (
       
   147         const   XMLCh* const          pubId
       
   148         , const XMLCh* const          sysId
       
   149         ,       BinInputStream* const streamToAdopt
       
   150         , XMLRecognizer::Encodings    encodingEnum
       
   151         , const RefFrom               from
       
   152         , const Types                 type
       
   153         , const Sources               source
       
   154         , const bool                  throwAtEnd = false
       
   155         , const bool                  calculateSrcOfs = true
       
   156         , const XMLVersion            xmlVersion = XMLV1_0
       
   157         ,       MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
       
   158     );
       
   159 
       
   160     ~XMLReader();
       
   161 
       
   162 
       
   163     // -----------------------------------------------------------------------
       
   164     //  Character buffer management methods
       
   165     // -----------------------------------------------------------------------
       
   166     unsigned long charsLeftInBuffer() const;
       
   167     bool refreshCharBuffer();
       
   168 
       
   169 
       
   170     // -----------------------------------------------------------------------
       
   171     //  Scanning methods
       
   172     // -----------------------------------------------------------------------
       
   173     bool getName(XMLBuffer& toFill, const bool token);
       
   174     bool getQName(XMLBuffer& toFill, int* colonPosition);
       
   175     bool getNextChar(XMLCh& chGotten);
       
   176     bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten);
       
   177     void movePlainContentChars(XMLBuffer &dest);
       
   178     bool getSpaces(XMLBuffer& toFill);
       
   179     bool getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
       
   180     bool peekNextChar(XMLCh& chGotten);
       
   181     bool skipIfQuote(XMLCh& chGotten);
       
   182     bool skipSpaces(bool& skippedSomething, bool inDecl = false);
       
   183     bool skippedChar(const XMLCh toSkip);
       
   184     bool skippedSpace();
       
   185     bool skippedString(const XMLCh* const toSkip);
       
   186     bool peekString(const XMLCh* const toPeek);
       
   187 
       
   188 
       
   189     // -----------------------------------------------------------------------
       
   190     //  Getter methods
       
   191     // -----------------------------------------------------------------------
       
   192     XMLSSize_t getColumnNumber() const;
       
   193     const XMLCh* getEncodingStr() const;
       
   194     XMLSSize_t getLineNumber() const;
       
   195     bool getNoMoreFlag() const;
       
   196     const XMLCh* getPublicId() const;
       
   197     unsigned int getReaderNum() const;
       
   198     RefFrom getRefFrom() const;
       
   199     Sources getSource() const;
       
   200     unsigned int getSrcOffset() const;
       
   201     const XMLCh* getSystemId() const;
       
   202     bool getThrowAtEnd() const;
       
   203     Types getType() const;
       
   204 
       
   205 
       
   206     // -----------------------------------------------------------------------
       
   207     //  Setter methods
       
   208     // -----------------------------------------------------------------------
       
   209     bool setEncoding
       
   210     (
       
   211         const   XMLCh* const    newEncoding
       
   212     );
       
   213     void setReaderNum(const unsigned int newNum);
       
   214     void setThrowAtEnd(const bool newValue);
       
   215     void setXMLVersion(const XMLVersion version);
       
   216 
       
   217 
       
   218 private:
       
   219     // -----------------------------------------------------------------------
       
   220     //  Unimplemented constructors and operators
       
   221     // -----------------------------------------------------------------------
       
   222     XMLReader(const XMLReader&);
       
   223     XMLReader& operator=(const XMLReader&);
       
   224 
       
   225     // ---------------------------------------------------------------------------
       
   226     //  Class Constants
       
   227     //
       
   228     //  kCharBufSize
       
   229     //      The size of the character spool buffer that we use. Its not terribly
       
   230     //      large because its just getting filled with data from a raw byte
       
   231     //      buffer as we go along. We don't want to decode all the text at
       
   232     //      once before we find out that there is an error.
       
   233     //
       
   234     //      NOTE: This is a size in characters, not bytes.
       
   235     //
       
   236     //  kRawBufSize
       
   237     //      The size of the raw buffer from which raw bytes are spooled out
       
   238     //      as we transcode chunks of data. As it is emptied, it is filled back
       
   239     //      in again from the source stream.
       
   240     // ---------------------------------------------------------------------------
       
   241     enum Constants
       
   242     {
       
   243         kCharBufSize        = 16 * 1024
       
   244         , kRawBufSize       = 48 * 1024
       
   245     };
       
   246 
       
   247 
       
   248     // -----------------------------------------------------------------------
       
   249     //  Private helper methods
       
   250     // -----------------------------------------------------------------------
       
   251     void checkForSwapped();
       
   252 
       
   253     void doInitCharSizeChecks();
       
   254 
       
   255     void doInitDecode();
       
   256 
       
   257     XMLByte getNextRawByte
       
   258     (
       
   259         const   bool            eoiOk
       
   260     );
       
   261 
       
   262     void refreshRawBuffer();
       
   263 
       
   264     void setTranscoder
       
   265     (
       
   266         const   XMLCh* const    newEncoding
       
   267     );
       
   268 
       
   269     unsigned int xcodeMoreChars
       
   270     (
       
   271                 XMLCh* const            bufToFill
       
   272         ,       unsigned char* const    charSizes
       
   273         , const unsigned int            maxChars
       
   274     );
       
   275 
       
   276     void handleEOL
       
   277     (
       
   278               XMLCh&   curCh
       
   279             , bool     inDecl = false
       
   280     );
       
   281 
       
   282     // -----------------------------------------------------------------------
       
   283     //  Data members
       
   284     //
       
   285     //  fCharIndex
       
   286     //      The index into the character buffer. When this hits fCharsAvail
       
   287     //      then its time to refill.
       
   288     //
       
   289     //  fCharBuf
       
   290     //      A buffer that the reader manager fills up with transcoded
       
   291     //      characters a small amount at a time.
       
   292     //
       
   293     //  fCharsAvail
       
   294     //      The characters currently available in the character buffer.
       
   295     //
       
   296     //  fCharSizeBuf
       
   297     //      This buffer is an array that contains the number of source chars
       
   298     //      eaten to create each char in the fCharBuf buffer. So the entry
       
   299     //      fCharSizeBuf[x] is the number of source chars that were eaten
       
   300     //      to make the internalized char fCharBuf[x]. This only contains
       
   301     //      useful data if fSrcOfsSupported is true.
       
   302     //
       
   303     //  fCharOfsBuf
       
   304     //      This buffer is an array that contains the offset in the
       
   305     //      fRawByteBuf buffer of each char in the fCharBuf buffer. It
       
   306     //      only contains useful data if fSrcOfsSupported is true.
       
   307     //
       
   308     //  fCurCol
       
   309     //  fCurLine
       
   310     //      The current line and column that we are in within this reader's
       
   311     //      text.
       
   312     //
       
   313     //  fEncoding
       
   314     //      This is the rough encoding setting. This enum is set during
       
   315     //      construction and just tells us the rough family of encoding that
       
   316     //      we are doing.
       
   317     //
       
   318     //  fEncodingStr
       
   319     //      This is the name of the encoding we are using. It will be
       
   320     //      provisionally set during construction, from the auto-sensed
       
   321     //      encoding. But it might be overridden when the XMLDecl is finally
       
   322     //      seen by the scanner. It can also be forced to a particular
       
   323     //      encoding, in which case fForcedEncoding is set.
       
   324     //
       
   325     //  fForcedEncoding
       
   326     //      If the encoding if forced then this is set and all other
       
   327     //      information will be ignored. This encoding will be taken as
       
   328     //      gospel. This is done by calling an alternate constructor.
       
   329     //
       
   330     //  fNoMore
       
   331     //      This is set when the source text is exhausted. It lets us know
       
   332     //      quickly that no more text is available.
       
   333     //
       
   334     //  fRawBufIndex
       
   335     //      The current index into the raw byte buffer. When its equal to
       
   336     //      fRawBytesAvail then we need to read another buffer.
       
   337     //
       
   338     //  fRawByteBuf
       
   339     //      This is the raw byte buffer that is used to spool out bytes
       
   340     //      from into the fCharBuf buffer, as we transcode in blocks.
       
   341     //
       
   342     //  fRawBytesAvail
       
   343     //      The number of bytes currently available in the raw buffer. This
       
   344     //      helps deal with the last buffer's worth, which will usually not
       
   345     //      be a full one.
       
   346     //
       
   347     //  fReaderNum
       
   348     //      Each reader from a particular reader manager (which means from a
       
   349     //      particular document) is given a unique number. The reader manager
       
   350     //      sets these numbers. They are used to catch things like partial
       
   351     //      markup errors.
       
   352     //
       
   353     //  fRefFrom
       
   354     //      This flag is provided in the ctor, and tells us if we represent
       
   355     //      some entity being expanded inside a literal. Sometimes things
       
   356     //      happen differently inside and outside literals.
       
   357     //
       
   358     //  fPublicId
       
   359     //  fSystemId
       
   360     //      These are the system and public ids of the source that this
       
   361     //      reader is reading.
       
   362     //
       
   363     //  fSentTrailingSpace
       
   364     //      If we are a PE entity being read and we not referenced from a
       
   365     //      literal, then a leading and trailing space must be faked into the
       
   366     //      data. This lets us know we've done the trailing space already (so
       
   367     //      we don't just keep doing it again and again.)
       
   368     //
       
   369     //  fSource
       
   370     //      Indicates whether the content this reader is spooling as already
       
   371     //      been internalized. This will prevent multiple processing of
       
   372     //      whitespace when an already internalized entity is being spooled
       
   373     //      out.
       
   374     //
       
   375     //  fSpareChar
       
   376     //      Some encodings can create two chars in an atomic way, e.g.
       
   377     //      surrogate pairs. We might not be able to store both, so we store
       
   378     //      it here until the next buffer transcoding operation.
       
   379     //
       
   380     //  fSrcOfsBase
       
   381     //      This is the base offset within the source of this entity. Values
       
   382     //      in the curent fCharSizeBuf array are relative to this value.
       
   383     //
       
   384     //  fSrcOfsSupported
       
   385     //      This flag is set to indicate whether source byte offset info
       
   386     //      is supported. For intrinsic encodings, its always set since we
       
   387     //      can always support it. For transcoder based encodings, we ask
       
   388     //      the transcoder if it supports it or not.
       
   389     //
       
   390     //  fStream
       
   391     //      This is the input stream that provides the data for the reader.
       
   392     //      Its always treated as a raw byte stream. The derived class will
       
   393     //      ask for buffers of text from it and will handle making some
       
   394     //      sense of it.
       
   395     //
       
   396     //  fSwapped
       
   397     //      If the encoding is one of the ones we do intrinsically, and its
       
   398     //      in a different byte order from our native order, then this is
       
   399     //      set to remind us to byte swap it during transcoding.
       
   400     //
       
   401     //  fThrowAtEnd
       
   402     //      Indicates whether the reader manager should throw an end of entity
       
   403     //      exception at the end of this reader instance. This is usually
       
   404     //      set for top level external entity references. It overrides the
       
   405     //      reader manager's global flag that controls throwing at the end
       
   406     //      of entities. Defaults to false.
       
   407     //
       
   408     //  fTranscoder
       
   409     //      If the encoding is not one that we handle intrinsically, then
       
   410     //      we use an an external transcoder to do it. This class is an
       
   411     //      abstraction that allows us to use pluggable external transcoding
       
   412     //      services (via XMLTransService in util.)
       
   413     //
       
   414     //  fType
       
   415     //      Indicates whether this reader represents a PE or not. If this
       
   416     //      flag is true and the fInLiteral flag is false, then we will put
       
   417     //      out an extra space at the end.
       
   418     //
       
   419     //  fgCharCharsTable;
       
   420     //      Pointer to XMLChar table, depends on XML version
       
   421     //
       
   422     //  fNEL
       
   423     //      Boolean indicates if NEL and LSEP should be recognized as NEL
       
   424     //
       
   425     //  fXMLVersion
       
   426     //      Enum to indicate if this Reader is conforming to XML 1.0 or XML 1.1
       
   427     // -----------------------------------------------------------------------
       
   428     unsigned int                fCharIndex;
       
   429     XMLCh                       fCharBuf[kCharBufSize];
       
   430     unsigned int                fCharsAvail;
       
   431     unsigned char               fCharSizeBuf[kCharBufSize];
       
   432     unsigned int                fCharOfsBuf[kCharBufSize];
       
   433     XMLSSize_t                  fCurCol;
       
   434     XMLSSize_t                  fCurLine;
       
   435     XMLRecognizer::Encodings    fEncoding;
       
   436     XMLCh*                      fEncodingStr;
       
   437     bool                        fForcedEncoding;
       
   438     bool                        fNoMore;
       
   439     XMLCh*                      fPublicId;
       
   440     unsigned int                fRawBufIndex;
       
   441     XMLByte                     fRawByteBuf[kRawBufSize];
       
   442     unsigned int                fRawBytesAvail;
       
   443     unsigned int                fReaderNum;
       
   444     RefFrom                     fRefFrom;
       
   445     bool                        fSentTrailingSpace;
       
   446     Sources                     fSource;
       
   447     unsigned int                fSrcOfsBase;
       
   448     bool                        fSrcOfsSupported;
       
   449     bool                        fCalculateSrcOfs;
       
   450     XMLCh*                      fSystemId;
       
   451     BinInputStream*             fStream;
       
   452     bool                        fSwapped;
       
   453     bool                        fThrowAtEnd;
       
   454     XMLTranscoder*              fTranscoder;
       
   455     Types                       fType;
       
   456     XMLByte*                    fgCharCharsTable;
       
   457     bool                        fNEL;
       
   458     XMLVersion                  fXMLVersion;
       
   459     MemoryManager*              fMemoryManager;
       
   460 };
       
   461 
       
   462 
       
   463 // ---------------------------------------------------------------------------
       
   464 //  XMLReader: Public, query methods
       
   465 // ---------------------------------------------------------------------------
       
   466 inline bool XMLReader::isNameChar(const XMLCh toCheck) const
       
   467 {
       
   468     return ((fgCharCharsTable[toCheck] & gNameCharMask) != 0);
       
   469 }
       
   470 
       
   471 inline bool XMLReader::isNCNameChar(const XMLCh toCheck) const
       
   472 {
       
   473     return ((fgCharCharsTable[toCheck] & gNCNameCharMask) != 0);
       
   474 }
       
   475 
       
   476 inline bool XMLReader::isPlainContentChar(const XMLCh toCheck) const
       
   477 {
       
   478     return ((fgCharCharsTable[toCheck] & gPlainContentCharMask) != 0);
       
   479 }
       
   480 
       
   481 
       
   482 inline bool XMLReader::isFirstNameChar(const XMLCh toCheck) const
       
   483 {
       
   484     return ((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0);
       
   485 }
       
   486 
       
   487 inline bool XMLReader::isFirstNCNameChar(const XMLCh toCheck) const
       
   488 {
       
   489     return (((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0) 
       
   490             && (toCheck != chColon));
       
   491 }
       
   492 
       
   493 inline bool XMLReader::isSpecialStartTagChar(const XMLCh toCheck) const
       
   494 {
       
   495     return ((fgCharCharsTable[toCheck] & gSpecialStartTagCharMask) != 0);
       
   496 }
       
   497 
       
   498 inline bool XMLReader::isXMLChar(const XMLCh toCheck) const
       
   499 {
       
   500     return ((fgCharCharsTable[toCheck] & gXMLCharMask) != 0);
       
   501 }
       
   502 
       
   503 inline bool XMLReader::isXMLLetter(const XMLCh toCheck) const
       
   504 {
       
   505     return (((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0)
       
   506             && (toCheck != chColon) && (toCheck != chUnderscore));
       
   507 }
       
   508 
       
   509 inline bool XMLReader::isWhitespace(const XMLCh toCheck) const
       
   510 {
       
   511     return ((fgCharCharsTable[toCheck] & gWhitespaceCharMask) != 0);
       
   512 }
       
   513 
       
   514 inline bool XMLReader::isControlChar(const XMLCh toCheck) const
       
   515 {
       
   516     return ((fgCharCharsTable[toCheck] & gControlCharMask) != 0);
       
   517 }
       
   518 
       
   519 // ---------------------------------------------------------------------------
       
   520 //  XMLReader: Buffer management methods
       
   521 // ---------------------------------------------------------------------------
       
   522 inline unsigned long XMLReader::charsLeftInBuffer() const
       
   523 {
       
   524     return fCharsAvail - fCharIndex;
       
   525 }
       
   526 
       
   527 
       
   528 // ---------------------------------------------------------------------------
       
   529 //  XMLReader: Getter methods
       
   530 // ---------------------------------------------------------------------------
       
   531 inline XMLSSize_t XMLReader::getColumnNumber() const
       
   532 {
       
   533     return fCurCol;
       
   534 }
       
   535 
       
   536 inline const XMLCh* XMLReader::getEncodingStr() const
       
   537 {
       
   538     return fEncodingStr;
       
   539 }
       
   540 
       
   541 inline XMLSSize_t XMLReader::getLineNumber() const
       
   542 {
       
   543     return fCurLine;
       
   544 }
       
   545 
       
   546 inline bool XMLReader::getNoMoreFlag() const
       
   547 {
       
   548     return fNoMore;
       
   549 }
       
   550 
       
   551 inline const XMLCh* XMLReader::getPublicId() const
       
   552 {
       
   553     return fPublicId;
       
   554 }
       
   555 
       
   556 inline unsigned int XMLReader::getReaderNum() const
       
   557 {
       
   558     return fReaderNum;
       
   559 }
       
   560 
       
   561 inline XMLReader::RefFrom XMLReader::getRefFrom() const
       
   562 {
       
   563     return fRefFrom;
       
   564 }
       
   565 
       
   566 inline XMLReader::Sources XMLReader::getSource() const
       
   567 {
       
   568     return fSource;
       
   569 }
       
   570 
       
   571 inline const XMLCh* XMLReader::getSystemId() const
       
   572 {
       
   573     return fSystemId;
       
   574 }
       
   575 
       
   576 inline bool XMLReader::getThrowAtEnd() const
       
   577 {
       
   578     return fThrowAtEnd;
       
   579 }
       
   580 
       
   581 inline XMLReader::Types XMLReader::getType() const
       
   582 {
       
   583     return fType;
       
   584 }
       
   585 
       
   586 // ---------------------------------------------------------------------------
       
   587 //  XMLReader: Setter methods
       
   588 // ---------------------------------------------------------------------------
       
   589 inline void XMLReader::setReaderNum(const unsigned int newNum)
       
   590 {
       
   591     fReaderNum = newNum;
       
   592 }
       
   593 
       
   594 inline void XMLReader::setThrowAtEnd(const bool newValue)
       
   595 {
       
   596     fThrowAtEnd = newValue;
       
   597 }
       
   598 
       
   599 inline void XMLReader::setXMLVersion(const XMLVersion version)
       
   600 {
       
   601     fXMLVersion = version;
       
   602     if (version == XMLV1_1) {
       
   603         fNEL = true;
       
   604         fgCharCharsTable = XMLChar1_1::fgCharCharsTable1_1;
       
   605     }
       
   606     else {
       
   607         fNEL = XMLChar1_0::enableNEL;
       
   608         fgCharCharsTable = XMLChar1_0::fgCharCharsTable1_0;
       
   609     }
       
   610 
       
   611 }
       
   612 
       
   613 
       
   614 
       
   615 // ---------------------------------------------------------------------------
       
   616 //
       
   617 //  XMLReader: movePlainContentChars()
       
   618 //
       
   619 //       Move as many plain (no special handling of any sort required) content
       
   620 //       characters as possible from this reader to the supplied destination buffer.
       
   621 //
       
   622 //       This is THE hottest performance spot in the parser.
       
   623 //
       
   624 // ---------------------------------------------------------------------------
       
   625 inline void XMLReader::movePlainContentChars(XMLBuffer &dest)
       
   626 {
       
   627     unsigned int count = fCharIndex;
       
   628 
       
   629     while (fCharIndex < fCharsAvail)
       
   630     {
       
   631         if (!isPlainContentChar(fCharBuf[fCharIndex]))
       
   632             break;
       
   633         fCharIndex++;
       
   634     }
       
   635 
       
   636     if (count != fCharIndex)
       
   637     {
       
   638         fCurCol    += (fCharIndex - count);
       
   639         dest.append(&fCharBuf[count], fCharIndex - count);
       
   640     }
       
   641 }
       
   642 
       
   643 
       
   644 // ---------------------------------------------------------------------------
       
   645 //  XMLReader: getNextCharIfNot() method inlined for speed
       
   646 // ---------------------------------------------------------------------------
       
   647 inline bool XMLReader::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten)
       
   648 {
       
   649     //
       
   650     //  See if there is at least a char in the buffer. Else, do the buffer
       
   651     //  reload logic.
       
   652     //
       
   653     if (fCharIndex >= fCharsAvail)
       
   654     {
       
   655         // If fNoMore is set, then we have nothing else to give
       
   656         if (fNoMore)
       
   657             return false;
       
   658 
       
   659         // Try to refresh
       
   660         if (!refreshCharBuffer())
       
   661             return false;
       
   662     }
       
   663 
       
   664     // Check the next char
       
   665     if (fCharBuf[fCharIndex] == chNotToGet)
       
   666         return false;
       
   667 
       
   668     // Its not the one we want to skip so bump the index
       
   669     chGotten = fCharBuf[fCharIndex++];
       
   670 
       
   671     // Handle end of line normalization and line/col member maintenance.
       
   672     //
       
   673     // we can have end-of-line combinations with a leading
       
   674     // chCR(xD), chLF(xA), chNEL(x85), or chLineSeparator(x2028)
       
   675     //
       
   676     // 0000000000001101 chCR
       
   677     // 0000000000001010 chLF
       
   678     // 0000000010000101 chNEL
       
   679     // 0010000000101000 chLineSeparator
       
   680     // -----------------------
       
   681     // 1101111101010000 == ~(chCR|chLF|chNEL|chLineSeparator)
       
   682     //
       
   683     // if the result of the logical-& operation is
       
   684     // true  : 'curCh' can not be chCR, chLF, chNEL or chLineSeparator
       
   685     // false : 'curCh' can be chCR, chLF, chNEL or chLineSeparator
       
   686     //
       
   687     if ( chGotten & (XMLCh) ~(chCR|chLF|chNEL|chLineSeparator) )
       
   688     {
       
   689         fCurCol++;
       
   690     } else
       
   691     {
       
   692         handleEOL(chGotten, false);
       
   693     }
       
   694 
       
   695     return true;
       
   696 }
       
   697 
       
   698 // ---------------------------------------------------------------------------
       
   699 //  XMLReader: getNextChar() method inlined for speed
       
   700 // ---------------------------------------------------------------------------
       
   701 inline bool XMLReader::getNextChar(XMLCh& chGotten)
       
   702 {
       
   703     //
       
   704     //  See if there is at least a char in the buffer. Else, do the buffer
       
   705     //  reload logic.
       
   706     //
       
   707     if (fCharIndex >= fCharsAvail)
       
   708     {
       
   709         // If fNoMore is set, then we have nothing else to give
       
   710         if (fNoMore)
       
   711             return false;
       
   712 
       
   713         // Try to refresh
       
   714         if (!refreshCharBuffer())
       
   715             return false;
       
   716     }
       
   717 
       
   718     chGotten = fCharBuf[fCharIndex++];
       
   719 
       
   720     // Handle end of line normalization and line/col member maintenance.
       
   721     //
       
   722     // we can have end-of-line combinations with a leading
       
   723     // chCR(xD), chLF(xA), chNEL(x85), or chLineSeparator(x2028)
       
   724     //
       
   725     // 0000000000001101 chCR
       
   726     // 0000000000001010 chLF
       
   727     // 0000000010000101 chNEL
       
   728     // 0010000000101000 chLineSeparator
       
   729     // -----------------------
       
   730     // 1101111101010000 == ~(chCR|chLF|chNEL|chLineSeparator)
       
   731     //
       
   732     // if the result of the logical-& operation is
       
   733     // true  : 'curCh' can not be chCR, chLF, chNEL or chLineSeparator
       
   734     // false : 'curCh' can be chCR, chLF, chNEL or chLineSeparator
       
   735     //
       
   736     if ( chGotten & (XMLCh) ~(chCR|chLF|chNEL|chLineSeparator) )
       
   737     {
       
   738         fCurCol++;
       
   739     } else
       
   740     {
       
   741         handleEOL(chGotten, false);
       
   742     }
       
   743 
       
   744     return true;
       
   745 }
       
   746 
       
   747 
       
   748 // ---------------------------------------------------------------------------
       
   749 //  XMLReader: peekNextChar() method inlined for speed
       
   750 // ---------------------------------------------------------------------------
       
   751 inline bool XMLReader::peekNextChar(XMLCh& chGotten)
       
   752 {
       
   753     //
       
   754     //  If there is something still in the buffer, get it. Else do the reload
       
   755     //  scenario.
       
   756     //
       
   757     if (fCharIndex >= fCharsAvail)
       
   758     {
       
   759         // Try to refresh the buffer
       
   760         if (!refreshCharBuffer())
       
   761         {
       
   762             chGotten = chNull;
       
   763             return false;
       
   764         }
       
   765     }
       
   766 
       
   767     chGotten = fCharBuf[fCharIndex];
       
   768 
       
   769     //
       
   770     //  Even though we are only peeking, we have to act the same as the
       
   771     //  normal char get method in regards to newline normalization, though
       
   772     //  its not as complicated as the actual character getting method's.
       
   773     //
       
   774     if ((chGotten == chCR || (fNEL && (chGotten == chNEL || chGotten == chLineSeparator)))
       
   775         && (fSource == Source_External))
       
   776         chGotten = chLF;
       
   777 
       
   778     return true;
       
   779 }
       
   780 
       
   781 XERCES_CPP_NAMESPACE_END
       
   782 
       
   783 #endif