FCL/sf/mw/searchsrv: comparison searchengine/util/cpixtools/inc/public/cpixparsetools.h

equal deleted inserted replaced

-:a5fbfefd615f
+:6547bf8ca13a
 * strength language and converting the character stream into
 * language token stream.  Note: Regular expression syntax
 * (e.g. "file*.tx?") itself is not supported)
 */
 namespace Lex {
+		typedef const wchar_t* token_type_t;
-/**
-* Basic token types
-*/
+		extern token_type_t TOKEN_UNKNOWN;
-enum TokenType {
+		extern token_type_t TOKEN_EOF;
-TOKEN_UNKNOWN = 0,
+		extern token_type_t TOKEN_WS;
-TOKEN_EOF = 1,
+		extern token_type_t TOKEN_COMMENT;
-TOKEN_WS,
+		extern token_type_t TOKEN_ID;
-TOKEN_ID,
+		extern token_type_t TOKEN_STRLIT;
-TOKEN_STRLIT,
+		extern token_type_t TOKEN_INTLIT;
-TOKEN_INTLIT,
+		extern token_type_t TOKEN_REALLIT;
-TOKEN_REALLIT,
+		extern token_type_t TOKEN_LIT;
-TOKEN_LIT,
-TOKEN_LAST_RESERVED // 8
-};
 class LexException : public ITxtCtxtExc {
 public:
 LexException(const wchar_t* what, const wchar_t* where);
 virtual ~LexException();
 * hazardous, if the original tokenized string is modified or
 * released.
 */
 class Token {
 public:
-Token(int type, const wchar_t* begin, const wchar_t* end);
+Token(token_type_t type, const wchar_t* begin, const wchar_t* end);
 Token();
-int type() const;
+const wchar_t* type() const;
 const wchar_t* begin() const;
 const wchar_t* end() const;
 int length() const;
 std::wstring text() const;
 private:
-int type_;
+token_type_t type_;
 const wchar_t* begin_;
 const wchar_t* end_;
 };
 /**
 const wchar_t* end_;
 };
 class SymbolTokenizer : public Tokenizer {
 public:
-SymbolTokenizer(int tokenType, const wchar_t* symbol);
+SymbolTokenizer(const wchar_t* tokenType, const wchar_t* symbol);
 virtual void reset();
 virtual Token get();
 virtual TokenizerState consume(const wchar_t* cursor);
 private:
 const wchar_t* begin_;
 const wchar_t* end_;
-int tokenType_;
+token_type_t tokenType_;
 const wchar_t* symbol_;
 };
+/**
+* C style line comment, e.g. // comment
+*/
+class LineCommentTokenizer : public Tokenizer {
+public:
+	LineCommentTokenizer();
+virtual void reset();
+virtual Token get();
+virtual TokenizerState consume(const wchar_t* cursor);
+private:
+enum State {
+				READY,
+				SLASH_CONSUMED,
+				COMMENT,
+				FINISHED
+};
+	State state_;
+	const wchar_t* begin_;
+	const wchar_t* end_;
+};
+/**
+* C++ style section comments. Like the one's surrounding this comment
+*/
+class SectionCommentTokenizer : public Tokenizer {
+public:
+	SectionCommentTokenizer();
+virtual void reset();
+virtual Token get();
+virtual TokenizerState consume(const wchar_t* cursor);
+private:
+enum State {
+				READY,
+				SLASH_CONSUMED,
+				COMMENT,
+				STAR_CONSUMED,
+				FINISH
+};
+	State state_;
+	const wchar_t* begin_;
+	const wchar_t* end_;
+};
 /**
 * Tokenizes text by using given tokenizers. Text is consumed
 * until no tokenizer is in hungry state e.g., all tokenizers
 * are either failed or finished. In case a number of
 * tokenizers have finished, the longest token is used. If a
 */
 virtual Token operator++(int) = 0;
 virtual ~TokenIterator();
 };
+class WhitespaceSplitter : public TokenIterator {
+public:
+	WhitespaceSplitter(const wchar_t* text);
+virtual operator bool();
+virtual Token operator++(int);
+public:
+const wchar_t* begin_;
+const wchar_t* end_;
+};
 /**
 * Uses tokenizer for converting given text into token stream
 * and provides means for iterating throught the token
 * stream's tokens.
 };
 /**
 * Filters out all tokens of type TOKEN_WS
 */
-class WhiteSpaceFilter : public TokenIterator {
+class StdFilter : public TokenIterator {
 public:
-WhiteSpaceFilter(TokenIterator& tokens);
+	StdFilter(TokenIterator& tokens);
 virtual operator bool();
 virtual Token operator++(int);
 private:
 void prepareNext();
 private: // data
 class Lexer : public Lex::TokenReader {
 public:
 Lexer(Lex::TokenIterator& tokens);
 // throws ParseException instead of LexException on EOF.
 virtual Lex::Token operator++(int);
-Lex::Token eat(int tokenType);
+Lex::Token eat(Lex::token_type_t tokenType);
 void eatEof();
 std::wstring eatId();
 std::wstring eatString();
 long eatInteger();
 double eatReal();
 class StdLexer : public Lexer {
 public:
 StdLexer(Lex::Tokenizer& tokens, const wchar_t* text);
 private:
 Lex::Tokens tokens_;
-Lex::WhiteSpaceFilter ws_;
+Lex::StdFilter filter_;
 };
 } // Parser
 } // Cpt

changeset 8	6547bf8ca13a
parent 0	671dee74050a