diff -r 127731b7107d -r 3eebb1e54d3a secureswitools/swisistools/source/xmlparser/xerces/include/xercesc/internal/XMLReader.hpp --- a/secureswitools/swisistools/source/xmlparser/xerces/include/xercesc/internal/XMLReader.hpp Fri Jan 22 09:56:12 2010 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,783 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * $Id: XMLReader.hpp 568078 2007-08-21 11:43:25Z amassari $ - */ - -#if !defined(XMLREADER_HPP) -#define XMLREADER_HPP - -#include -#include -#include -#include - -XERCES_CPP_NAMESPACE_BEGIN - -class InputSource; -class BinInputStream; -class ReaderMgr; -class XMLScanner; -class XMLTranscoder; - - -// --------------------------------------------------------------------------- -// Instances of this class are used to manage the content of entities. The -// scanner maintains a stack of these, one for each entity (this means entity -// in the sense of any parsed file or internal entity) currently being -// scanned. This class, given a binary input stream will handle reading in -// the data and decoding it from its external decoding into the internal -// Unicode format. Once internallized, this class provides the access -// methods to read in the data in various ways, maintains line and column -// information, and provides high performance character attribute checking -// methods. -// -// This is NOT to be derived from. -// -// --------------------------------------------------------------------------- -class XMLPARSER_EXPORT XMLReader : public XMemory -{ -public: - // ----------------------------------------------------------------------- - // Public types - // ----------------------------------------------------------------------- - enum Types - { - Type_PE - , Type_General - }; - - enum Sources - { - Source_Internal - , Source_External - }; - - enum RefFrom - { - RefFrom_Literal - , RefFrom_NonLiteral - }; - - enum XMLVersion - { - XMLV1_0 - , XMLV1_1 - , XMLV_Unknown - }; - - - // ----------------------------------------------------------------------- - // Public, query methods - // ----------------------------------------------------------------------- - bool isAllSpaces - ( - const XMLCh* const toCheck - , const unsigned int count - ) const; - - bool containsWhiteSpace - ( - const XMLCh* const toCheck - , const unsigned int count - ) const; - - - bool isXMLLetter(const XMLCh toCheck) const; - bool isFirstNameChar(const XMLCh toCheck) const; - bool isNameChar(const XMLCh toCheck) const; - bool isPlainContentChar(const XMLCh toCheck) const; - bool isSpecialStartTagChar(const XMLCh toCheck) const; - bool isXMLChar(const XMLCh toCheck) const; - bool isWhitespace(const XMLCh toCheck) const; - bool isControlChar(const XMLCh toCheck) const; - bool isPublicIdChar(const XMLCh toCheck) const; - bool isFirstNCNameChar(const XMLCh toCheck) const; - bool isNCNameChar(const XMLCh toCheck) const; - - // ----------------------------------------------------------------------- - // Constructors and Destructor - // ----------------------------------------------------------------------- - XMLReader - ( - const XMLCh* const pubId - , const XMLCh* const sysId - , BinInputStream* const streamToAdopt - , const RefFrom from - , const Types type - , const Sources source - , const bool throwAtEnd = false - , const bool calculateSrcOfs = true - , const XMLVersion xmlVersion = XMLV1_0 - , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager - ); - - XMLReader - ( - const XMLCh* const pubId - , const XMLCh* const sysId - , BinInputStream* const streamToAdopt - , const XMLCh* const encodingStr - , const RefFrom from - , const Types type - , const Sources source - , const bool throwAtEnd = false - , const bool calculateSrcOfs = true - , const XMLVersion xmlVersion = XMLV1_0 - , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager - ); - - XMLReader - ( - const XMLCh* const pubId - , const XMLCh* const sysId - , BinInputStream* const streamToAdopt - , XMLRecognizer::Encodings encodingEnum - , const RefFrom from - , const Types type - , const Sources source - , const bool throwAtEnd = false - , const bool calculateSrcOfs = true - , const XMLVersion xmlVersion = XMLV1_0 - , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager - ); - - ~XMLReader(); - - - // ----------------------------------------------------------------------- - // Character buffer management methods - // ----------------------------------------------------------------------- - unsigned long charsLeftInBuffer() const; - bool refreshCharBuffer(); - - - // ----------------------------------------------------------------------- - // Scanning methods - // ----------------------------------------------------------------------- - bool getName(XMLBuffer& toFill, const bool token); - bool getQName(XMLBuffer& toFill, int* colonPosition); - bool getNextChar(XMLCh& chGotten); - bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten); - void movePlainContentChars(XMLBuffer &dest); - bool getSpaces(XMLBuffer& toFill); - bool getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck); - bool peekNextChar(XMLCh& chGotten); - bool skipIfQuote(XMLCh& chGotten); - bool skipSpaces(bool& skippedSomething, bool inDecl = false); - bool skippedChar(const XMLCh toSkip); - bool skippedSpace(); - bool skippedString(const XMLCh* const toSkip); - bool peekString(const XMLCh* const toPeek); - - - // ----------------------------------------------------------------------- - // Getter methods - // ----------------------------------------------------------------------- - XMLSSize_t getColumnNumber() const; - const XMLCh* getEncodingStr() const; - XMLSSize_t getLineNumber() const; - bool getNoMoreFlag() const; - const XMLCh* getPublicId() const; - unsigned int getReaderNum() const; - RefFrom getRefFrom() const; - Sources getSource() const; - unsigned int getSrcOffset() const; - const XMLCh* getSystemId() const; - bool getThrowAtEnd() const; - Types getType() const; - - - // ----------------------------------------------------------------------- - // Setter methods - // ----------------------------------------------------------------------- - bool setEncoding - ( - const XMLCh* const newEncoding - ); - void setReaderNum(const unsigned int newNum); - void setThrowAtEnd(const bool newValue); - void setXMLVersion(const XMLVersion version); - - -private: - // ----------------------------------------------------------------------- - // Unimplemented constructors and operators - // ----------------------------------------------------------------------- - XMLReader(const XMLReader&); - XMLReader& operator=(const XMLReader&); - - // --------------------------------------------------------------------------- - // Class Constants - // - // kCharBufSize - // The size of the character spool buffer that we use. Its not terribly - // large because its just getting filled with data from a raw byte - // buffer as we go along. We don't want to decode all the text at - // once before we find out that there is an error. - // - // NOTE: This is a size in characters, not bytes. - // - // kRawBufSize - // The size of the raw buffer from which raw bytes are spooled out - // as we transcode chunks of data. As it is emptied, it is filled back - // in again from the source stream. - // --------------------------------------------------------------------------- - enum Constants - { - kCharBufSize = 16 * 1024 - , kRawBufSize = 48 * 1024 - }; - - - // ----------------------------------------------------------------------- - // Private helper methods - // ----------------------------------------------------------------------- - void checkForSwapped(); - - void doInitCharSizeChecks(); - - void doInitDecode(); - - XMLByte getNextRawByte - ( - const bool eoiOk - ); - - void refreshRawBuffer(); - - void setTranscoder - ( - const XMLCh* const newEncoding - ); - - unsigned int xcodeMoreChars - ( - XMLCh* const bufToFill - , unsigned char* const charSizes - , const unsigned int maxChars - ); - - void handleEOL - ( - XMLCh& curCh - , bool inDecl = false - ); - - // ----------------------------------------------------------------------- - // Data members - // - // fCharIndex - // The index into the character buffer. When this hits fCharsAvail - // then its time to refill. - // - // fCharBuf - // A buffer that the reader manager fills up with transcoded - // characters a small amount at a time. - // - // fCharsAvail - // The characters currently available in the character buffer. - // - // fCharSizeBuf - // This buffer is an array that contains the number of source chars - // eaten to create each char in the fCharBuf buffer. So the entry - // fCharSizeBuf[x] is the number of source chars that were eaten - // to make the internalized char fCharBuf[x]. This only contains - // useful data if fSrcOfsSupported is true. - // - // fCharOfsBuf - // This buffer is an array that contains the offset in the - // fRawByteBuf buffer of each char in the fCharBuf buffer. It - // only contains useful data if fSrcOfsSupported is true. - // - // fCurCol - // fCurLine - // The current line and column that we are in within this reader's - // text. - // - // fEncoding - // This is the rough encoding setting. This enum is set during - // construction and just tells us the rough family of encoding that - // we are doing. - // - // fEncodingStr - // This is the name of the encoding we are using. It will be - // provisionally set during construction, from the auto-sensed - // encoding. But it might be overridden when the XMLDecl is finally - // seen by the scanner. It can also be forced to a particular - // encoding, in which case fForcedEncoding is set. - // - // fForcedEncoding - // If the encoding if forced then this is set and all other - // information will be ignored. This encoding will be taken as - // gospel. This is done by calling an alternate constructor. - // - // fNoMore - // This is set when the source text is exhausted. It lets us know - // quickly that no more text is available. - // - // fRawBufIndex - // The current index into the raw byte buffer. When its equal to - // fRawBytesAvail then we need to read another buffer. - // - // fRawByteBuf - // This is the raw byte buffer that is used to spool out bytes - // from into the fCharBuf buffer, as we transcode in blocks. - // - // fRawBytesAvail - // The number of bytes currently available in the raw buffer. This - // helps deal with the last buffer's worth, which will usually not - // be a full one. - // - // fReaderNum - // Each reader from a particular reader manager (which means from a - // particular document) is given a unique number. The reader manager - // sets these numbers. They are used to catch things like partial - // markup errors. - // - // fRefFrom - // This flag is provided in the ctor, and tells us if we represent - // some entity being expanded inside a literal. Sometimes things - // happen differently inside and outside literals. - // - // fPublicId - // fSystemId - // These are the system and public ids of the source that this - // reader is reading. - // - // fSentTrailingSpace - // If we are a PE entity being read and we not referenced from a - // literal, then a leading and trailing space must be faked into the - // data. This lets us know we've done the trailing space already (so - // we don't just keep doing it again and again.) - // - // fSource - // Indicates whether the content this reader is spooling as already - // been internalized. This will prevent multiple processing of - // whitespace when an already internalized entity is being spooled - // out. - // - // fSpareChar - // Some encodings can create two chars in an atomic way, e.g. - // surrogate pairs. We might not be able to store both, so we store - // it here until the next buffer transcoding operation. - // - // fSrcOfsBase - // This is the base offset within the source of this entity. Values - // in the curent fCharSizeBuf array are relative to this value. - // - // fSrcOfsSupported - // This flag is set to indicate whether source byte offset info - // is supported. For intrinsic encodings, its always set since we - // can always support it. For transcoder based encodings, we ask - // the transcoder if it supports it or not. - // - // fStream - // This is the input stream that provides the data for the reader. - // Its always treated as a raw byte stream. The derived class will - // ask for buffers of text from it and will handle making some - // sense of it. - // - // fSwapped - // If the encoding is one of the ones we do intrinsically, and its - // in a different byte order from our native order, then this is - // set to remind us to byte swap it during transcoding. - // - // fThrowAtEnd - // Indicates whether the reader manager should throw an end of entity - // exception at the end of this reader instance. This is usually - // set for top level external entity references. It overrides the - // reader manager's global flag that controls throwing at the end - // of entities. Defaults to false. - // - // fTranscoder - // If the encoding is not one that we handle intrinsically, then - // we use an an external transcoder to do it. This class is an - // abstraction that allows us to use pluggable external transcoding - // services (via XMLTransService in util.) - // - // fType - // Indicates whether this reader represents a PE or not. If this - // flag is true and the fInLiteral flag is false, then we will put - // out an extra space at the end. - // - // fgCharCharsTable; - // Pointer to XMLChar table, depends on XML version - // - // fNEL - // Boolean indicates if NEL and LSEP should be recognized as NEL - // - // fXMLVersion - // Enum to indicate if this Reader is conforming to XML 1.0 or XML 1.1 - // ----------------------------------------------------------------------- - unsigned int fCharIndex; - XMLCh fCharBuf[kCharBufSize]; - unsigned int fCharsAvail; - unsigned char fCharSizeBuf[kCharBufSize]; - unsigned int fCharOfsBuf[kCharBufSize]; - XMLSSize_t fCurCol; - XMLSSize_t fCurLine; - XMLRecognizer::Encodings fEncoding; - XMLCh* fEncodingStr; - bool fForcedEncoding; - bool fNoMore; - XMLCh* fPublicId; - unsigned int fRawBufIndex; - XMLByte fRawByteBuf[kRawBufSize]; - unsigned int fRawBytesAvail; - unsigned int fReaderNum; - RefFrom fRefFrom; - bool fSentTrailingSpace; - Sources fSource; - unsigned int fSrcOfsBase; - bool fSrcOfsSupported; - bool fCalculateSrcOfs; - XMLCh* fSystemId; - BinInputStream* fStream; - bool fSwapped; - bool fThrowAtEnd; - XMLTranscoder* fTranscoder; - Types fType; - XMLByte* fgCharCharsTable; - bool fNEL; - XMLVersion fXMLVersion; - MemoryManager* fMemoryManager; -}; - - -// --------------------------------------------------------------------------- -// XMLReader: Public, query methods -// --------------------------------------------------------------------------- -inline bool XMLReader::isNameChar(const XMLCh toCheck) const -{ - return ((fgCharCharsTable[toCheck] & gNameCharMask) != 0); -} - -inline bool XMLReader::isNCNameChar(const XMLCh toCheck) const -{ - return ((fgCharCharsTable[toCheck] & gNCNameCharMask) != 0); -} - -inline bool XMLReader::isPlainContentChar(const XMLCh toCheck) const -{ - return ((fgCharCharsTable[toCheck] & gPlainContentCharMask) != 0); -} - - -inline bool XMLReader::isFirstNameChar(const XMLCh toCheck) const -{ - return ((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0); -} - -inline bool XMLReader::isFirstNCNameChar(const XMLCh toCheck) const -{ - return (((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0) - && (toCheck != chColon)); -} - -inline bool XMLReader::isSpecialStartTagChar(const XMLCh toCheck) const -{ - return ((fgCharCharsTable[toCheck] & gSpecialStartTagCharMask) != 0); -} - -inline bool XMLReader::isXMLChar(const XMLCh toCheck) const -{ - return ((fgCharCharsTable[toCheck] & gXMLCharMask) != 0); -} - -inline bool XMLReader::isXMLLetter(const XMLCh toCheck) const -{ - return (((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0) - && (toCheck != chColon) && (toCheck != chUnderscore)); -} - -inline bool XMLReader::isWhitespace(const XMLCh toCheck) const -{ - return ((fgCharCharsTable[toCheck] & gWhitespaceCharMask) != 0); -} - -inline bool XMLReader::isControlChar(const XMLCh toCheck) const -{ - return ((fgCharCharsTable[toCheck] & gControlCharMask) != 0); -} - -// --------------------------------------------------------------------------- -// XMLReader: Buffer management methods -// --------------------------------------------------------------------------- -inline unsigned long XMLReader::charsLeftInBuffer() const -{ - return fCharsAvail - fCharIndex; -} - - -// --------------------------------------------------------------------------- -// XMLReader: Getter methods -// --------------------------------------------------------------------------- -inline XMLSSize_t XMLReader::getColumnNumber() const -{ - return fCurCol; -} - -inline const XMLCh* XMLReader::getEncodingStr() const -{ - return fEncodingStr; -} - -inline XMLSSize_t XMLReader::getLineNumber() const -{ - return fCurLine; -} - -inline bool XMLReader::getNoMoreFlag() const -{ - return fNoMore; -} - -inline const XMLCh* XMLReader::getPublicId() const -{ - return fPublicId; -} - -inline unsigned int XMLReader::getReaderNum() const -{ - return fReaderNum; -} - -inline XMLReader::RefFrom XMLReader::getRefFrom() const -{ - return fRefFrom; -} - -inline XMLReader::Sources XMLReader::getSource() const -{ - return fSource; -} - -inline const XMLCh* XMLReader::getSystemId() const -{ - return fSystemId; -} - -inline bool XMLReader::getThrowAtEnd() const -{ - return fThrowAtEnd; -} - -inline XMLReader::Types XMLReader::getType() const -{ - return fType; -} - -// --------------------------------------------------------------------------- -// XMLReader: Setter methods -// --------------------------------------------------------------------------- -inline void XMLReader::setReaderNum(const unsigned int newNum) -{ - fReaderNum = newNum; -} - -inline void XMLReader::setThrowAtEnd(const bool newValue) -{ - fThrowAtEnd = newValue; -} - -inline void XMLReader::setXMLVersion(const XMLVersion version) -{ - fXMLVersion = version; - if (version == XMLV1_1) { - fNEL = true; - fgCharCharsTable = XMLChar1_1::fgCharCharsTable1_1; - } - else { - fNEL = XMLChar1_0::enableNEL; - fgCharCharsTable = XMLChar1_0::fgCharCharsTable1_0; - } - -} - - - -// --------------------------------------------------------------------------- -// -// XMLReader: movePlainContentChars() -// -// Move as many plain (no special handling of any sort required) content -// characters as possible from this reader to the supplied destination buffer. -// -// This is THE hottest performance spot in the parser. -// -// --------------------------------------------------------------------------- -inline void XMLReader::movePlainContentChars(XMLBuffer &dest) -{ - unsigned int count = fCharIndex; - - while (fCharIndex < fCharsAvail) - { - if (!isPlainContentChar(fCharBuf[fCharIndex])) - break; - fCharIndex++; - } - - if (count != fCharIndex) - { - fCurCol += (fCharIndex - count); - dest.append(&fCharBuf[count], fCharIndex - count); - } -} - - -// --------------------------------------------------------------------------- -// XMLReader: getNextCharIfNot() method inlined for speed -// --------------------------------------------------------------------------- -inline bool XMLReader::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten) -{ - // - // See if there is at least a char in the buffer. Else, do the buffer - // reload logic. - // - if (fCharIndex >= fCharsAvail) - { - // If fNoMore is set, then we have nothing else to give - if (fNoMore) - return false; - - // Try to refresh - if (!refreshCharBuffer()) - return false; - } - - // Check the next char - if (fCharBuf[fCharIndex] == chNotToGet) - return false; - - // Its not the one we want to skip so bump the index - chGotten = fCharBuf[fCharIndex++]; - - // Handle end of line normalization and line/col member maintenance. - // - // we can have end-of-line combinations with a leading - // chCR(xD), chLF(xA), chNEL(x85), or chLineSeparator(x2028) - // - // 0000000000001101 chCR - // 0000000000001010 chLF - // 0000000010000101 chNEL - // 0010000000101000 chLineSeparator - // ----------------------- - // 1101111101010000 == ~(chCR|chLF|chNEL|chLineSeparator) - // - // if the result of the logical-& operation is - // true : 'curCh' can not be chCR, chLF, chNEL or chLineSeparator - // false : 'curCh' can be chCR, chLF, chNEL or chLineSeparator - // - if ( chGotten & (XMLCh) ~(chCR|chLF|chNEL|chLineSeparator) ) - { - fCurCol++; - } else - { - handleEOL(chGotten, false); - } - - return true; -} - -// --------------------------------------------------------------------------- -// XMLReader: getNextChar() method inlined for speed -// --------------------------------------------------------------------------- -inline bool XMLReader::getNextChar(XMLCh& chGotten) -{ - // - // See if there is at least a char in the buffer. Else, do the buffer - // reload logic. - // - if (fCharIndex >= fCharsAvail) - { - // If fNoMore is set, then we have nothing else to give - if (fNoMore) - return false; - - // Try to refresh - if (!refreshCharBuffer()) - return false; - } - - chGotten = fCharBuf[fCharIndex++]; - - // Handle end of line normalization and line/col member maintenance. - // - // we can have end-of-line combinations with a leading - // chCR(xD), chLF(xA), chNEL(x85), or chLineSeparator(x2028) - // - // 0000000000001101 chCR - // 0000000000001010 chLF - // 0000000010000101 chNEL - // 0010000000101000 chLineSeparator - // ----------------------- - // 1101111101010000 == ~(chCR|chLF|chNEL|chLineSeparator) - // - // if the result of the logical-& operation is - // true : 'curCh' can not be chCR, chLF, chNEL or chLineSeparator - // false : 'curCh' can be chCR, chLF, chNEL or chLineSeparator - // - if ( chGotten & (XMLCh) ~(chCR|chLF|chNEL|chLineSeparator) ) - { - fCurCol++; - } else - { - handleEOL(chGotten, false); - } - - return true; -} - - -// --------------------------------------------------------------------------- -// XMLReader: peekNextChar() method inlined for speed -// --------------------------------------------------------------------------- -inline bool XMLReader::peekNextChar(XMLCh& chGotten) -{ - // - // If there is something still in the buffer, get it. Else do the reload - // scenario. - // - if (fCharIndex >= fCharsAvail) - { - // Try to refresh the buffer - if (!refreshCharBuffer()) - { - chGotten = chNull; - return false; - } - } - - chGotten = fCharBuf[fCharIndex]; - - // - // Even though we are only peeking, we have to act the same as the - // normal char get method in regards to newline normalization, though - // its not as complicated as the actual character getting method's. - // - if ((chGotten == chCR || (fNEL && (chGotten == chNEL || chGotten == chLineSeparator))) - && (fSource == Source_External)) - chGotten = chLF; - - return true; -} - -XERCES_CPP_NAMESPACE_END - -#endif