secureswitools/swisistools/source/xmlparser/xerces/include/xercesc/util/regx/RegxParser.hpp
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Mon, 18 Jan 2010 20:28:24 +0200
changeset 2 661f3784fe57
parent 1 c42dffbd5b4f
permissions -rw-r--r--
Revision: 201001 Kit: 201003

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * $Id: RegxParser.hpp 568078 2007-08-21 11:43:25Z amassari $
 */

/*
 *	A regular expression parser
 */
#if !defined(REGXPARSER_HPP)
#define REGXPARSER_HPP

// ---------------------------------------------------------------------------
//  Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/RefVectorOf.hpp>
#include <xercesc/util/XMLUniDefs.hpp>

XERCES_CPP_NAMESPACE_BEGIN

// ---------------------------------------------------------------------------
//  Forward Declaration
// ---------------------------------------------------------------------------
class Token;
class RangeToken;
class TokenFactory;

class XMLUTIL_EXPORT RegxParser : public XMemory
{
public:

	// -----------------------------------------------------------------------
    //  Public constant data
    // -----------------------------------------------------------------------
    // Parse tokens
	enum {
		REGX_T_CHAR                     = 0,
		REGX_T_EOF                      = 1,
		REGX_T_OR                       = 2,
		REGX_T_STAR                     = 3,
		REGX_T_PLUS                     = 4,
		REGX_T_QUESTION                 = 5,
		REGX_T_LPAREN                   = 6,
		REGX_T_RPAREN                   = 7,
		REGX_T_DOT                      = 8,
		REGX_T_LBRACKET                 = 9,
		REGX_T_BACKSOLIDUS              = 10,
		REGX_T_CARET                    = 11,
		REGX_T_DOLLAR                   = 12,
		REGX_T_LPAREN2                  = 13,
		REGX_T_LOOKAHEAD                = 14,
		REGX_T_NEGATIVELOOKAHEAD        = 15,
		REGX_T_LOOKBEHIND               = 16,
		REGX_T_NEGATIVELOOKBEHIND       = 17,
		REGX_T_INDEPENDENT              = 18,
		REGX_T_SET_OPERATIONS           = 19,
		REGX_T_POSIX_CHARCLASS_START    = 20,
		REGX_T_COMMENT                  = 21,
		REGX_T_MODIFIERS                = 22,
		REGX_T_CONDITION                = 23,
		REGX_T_XMLSCHEMA_CC_SUBTRACTION	= 24
	};

	static const unsigned short S_NORMAL;
	static const unsigned short S_INBRACKETS;
	static const unsigned short S_INXBRACKETS;

	// -----------------------------------------------------------------------
    //  Public Constructors and Destructor
    // -----------------------------------------------------------------------
	RegxParser(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
	virtual ~RegxParser();

    // -----------------------------------------------------------------------
    //  Getter methods
    // -----------------------------------------------------------------------
    unsigned short getParseContext() const;
    unsigned short getState() const;
    XMLInt32       getCharData() const;
    int            getNoParen() const;
	int            getOffset() const;
	bool           hasBackReferences() const;
    TokenFactory*  getTokenFactory() const;

	// -----------------------------------------------------------------------
    //  Setter methods
    // -----------------------------------------------------------------------
	void setParseContext(const unsigned short value);
    void setTokenFactory(TokenFactory* const tokFactory);

	// -----------------------------------------------------------------------
    //  Public Parsing methods
    // -----------------------------------------------------------------------
	Token* parse(const XMLCh* const regxStr, const int options);

protected:
    // -----------------------------------------------------------------------
    //  Protected Helper methods
    // -----------------------------------------------------------------------
    virtual bool        checkQuestion(const int off);
	virtual XMLInt32    decodeEscaped();
    MemoryManager*      getMemoryManager() const;
    // -----------------------------------------------------------------------
    //  Protected Parsing/Processing methods
    // -----------------------------------------------------------------------
	void                processNext();
	Token*              parseRegx(const bool matchingRParen = false);
	virtual Token*      processCaret();
    virtual Token*      processDollar();
	virtual Token*      processLook(const unsigned short tokType);
    virtual Token*      processBacksolidus_A();
    virtual Token*      processBacksolidus_z();
    virtual Token*      processBacksolidus_Z();
    virtual Token*      processBacksolidus_b();
    virtual Token*      processBacksolidus_B();
    virtual Token*      processBacksolidus_lt();
    virtual Token*      processBacksolidus_gt();
    virtual Token*      processBacksolidus_c();
    virtual Token*      processBacksolidus_C();
    virtual Token*      processBacksolidus_i();
    virtual Token*      processBacksolidus_I();
    virtual Token*      processBacksolidus_g();
    virtual Token*      processBacksolidus_X();
    virtual Token*      processBackReference();
	virtual Token*      processStar(Token* const tok);
	virtual Token*      processPlus(Token* const tok);
	virtual Token*      processQuestion(Token* const tok);
    virtual Token*      processParen();
    virtual Token*      processParen2();
    virtual Token*      processCondition();
    virtual Token*      processModifiers();
    virtual Token*      processIndependent();
    virtual RangeToken* parseCharacterClass(const bool useNRange);
    virtual RangeToken* parseSetOperations();
	virtual XMLInt32    processCInCharacterClass(RangeToken* const tok,
                                                 const XMLInt32 ch);
    RangeToken*         processBacksolidus_pP(const XMLInt32 ch);

    // -----------------------------------------------------------------------
    //  Protected PreCreated RangeToken access methods
    // -----------------------------------------------------------------------
	virtual Token*      getTokenForShorthand(const XMLInt32 ch);

private:
    // -----------------------------------------------------------------------
    //  Private parsing/processing methods
    // -----------------------------------------------------------------------
    Token* parseTerm(const bool matchingRParen = false);
	Token* parseFactor();
	Token* parseAtom();

    // -----------------------------------------------------------------------
    //  Unimplemented constructors and operators
    // -----------------------------------------------------------------------
    RegxParser(const RegxParser&);
    RegxParser& operator=(const RegxParser&);

	// -----------------------------------------------------------------------
    //  Private data types
    // -----------------------------------------------------------------------
    class ReferencePosition : public XMemory
    {
        public :
            ReferencePosition(const int refNo, const int position);

            int	fReferenceNo;
			int	fPosition;
    };

    // -----------------------------------------------------------------------
    //  Private Helper methods
    // -----------------------------------------------------------------------
    bool isSet(const int flag);
	int hexChar(const XMLInt32 ch);

	// -----------------------------------------------------------------------
    //  Private data members
	// -----------------------------------------------------------------------
    MemoryManager*                  fMemoryManager;
	bool                            fHasBackReferences;
	int                             fOptions;
	int                             fOffset;
	int                             fNoGroups;
	unsigned short                  fParseContext;
	int                             fStringLen;
	unsigned short                  fState;
	XMLInt32                        fCharData;
	XMLCh*                          fString;
	RefVectorOf<ReferencePosition>* fReferences;
    TokenFactory*                   fTokenFactory;
};


// ---------------------------------------------------------------------------
//  RegxParser: Getter Methods
// ---------------------------------------------------------------------------
inline unsigned short RegxParser::getParseContext() const {

    return fParseContext;
}

inline unsigned short RegxParser::getState() const {

	return fState;
}

inline XMLInt32 RegxParser::getCharData() const {

    return fCharData;
}

inline int RegxParser::getNoParen() const {

    return fNoGroups;
}

inline int RegxParser::getOffset() const {

	return fOffset;
}

inline bool RegxParser::hasBackReferences() const {

	return fHasBackReferences;
}

inline TokenFactory* RegxParser::getTokenFactory() const {

    return fTokenFactory;
}

inline MemoryManager* RegxParser::getMemoryManager() const {
    return fMemoryManager;
}
// ---------------------------------------------------------------------------
//  RegxParser: Setter Methods
// ---------------------------------------------------------------------------
inline void RegxParser::setParseContext(const unsigned short value) {

	fParseContext = value;
}

inline void RegxParser::setTokenFactory(TokenFactory* const tokFactory) {

    fTokenFactory = tokFactory;
}

// ---------------------------------------------------------------------------
//  RegxParser: Helper Methods
// ---------------------------------------------------------------------------
inline bool RegxParser::isSet(const int flag) {

    return (fOptions & flag) == flag;
}


inline int RegxParser::hexChar(const XMLInt32 ch) {

	if (ch < chDigit_0 || ch > chLatin_f)
		return -1;

	if (ch <= chDigit_9)
		return ch - chDigit_0;

	if (ch < chLatin_A)
		return -1;

	if (ch <= chLatin_F)
		return ch - chLatin_A + 10;

	if (ch < chLatin_a)
		return -1;

	return ch - chLatin_a + 10;
}

XERCES_CPP_NAMESPACE_END

#endif

/**
  *	End file RegxParser.hpp
  */