--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/oss/loc/analysis/inc/private/breakiterator.h Fri Oct 15 12:09:28 2010 +0530
@@ -0,0 +1,118 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+
+#ifndef BREAKITERATOR_H_
+#define BREAKITERATOR_H_
+
+#include "statemachine.h"
+
+namespace analysis {
+
+ /**
+ * Abstract class for finding word boundaries in text
+ */
+ class BreakIterator {
+
+ public:
+
+ virtual ~BreakIterator();
+
+ /**
+ * Sets the text
+ */
+ virtual void setText(const wchar_t* text) = 0;
+
+ /**
+ * Returns true, if next boundary is exist
+ */
+ virtual bool hasNext() = 0;
+
+ /**
+ * Returns the location of current break in string
+ */
+ virtual int current() = 0;
+
+ /**
+ * Finds next break and returns the new location
+ */
+ virtual int next() = 0;
+
+ };
+
+ /**
+ * State machine and longest matching algorithm based break
+ * iterator. Used for finding word boundaries. State machine
+ * is typically compiled from dictionary.
+ *
+ * @tparam Encoding Describes the serialization format of the state machine
+ */
+ template<class Encoding>
+ class StateMachineBreakIterator : public BreakIterator {
+
+ public:
+
+ /**
+ * Constructs the break iterator to use given state machine
+ */
+ StateMachineBreakIterator(StateMachine<Encoding>& machine);
+
+ ~StateMachineBreakIterator();
+
+ public: // From BreakIterator
+
+ virtual void setText(const wchar_t* text);
+
+ virtual bool hasNext();
+
+ virtual int current();
+
+ virtual int next();
+
+ private:
+
+ /**
+ * Prepares next
+ */
+ void prepareNext();
+
+ private:
+
+ /** Used state machine. E.g. compiled from dictionary */
+ StateMachine<Encoding>& machine_;
+
+ /** Pointer to a state. Used for moving within state machine */
+ StateCursor<Encoding> state_;
+
+ /** Compiled text */
+ const wchar_t* text_;
+
+ /** Cursor in text */
+ int cursor_;
+
+ /** Current break */
+ int current_;
+
+ /** Next break */
+ int next_;
+
+ };
+
+}
+
+#include "breakiterator.inl"
+
+#endif /* BREAKITERATOR_H_ */