searchengine/oss/loc/analysis/inc/private/breakiterator.h
changeset 24 65456528cac2
equal deleted inserted replaced
23:d4d56f5e7c55 24:65456528cac2
       
     1 /*
       
     2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 #ifndef BREAKITERATOR_H_
       
    19 #define BREAKITERATOR_H_
       
    20 
       
    21 #include "statemachine.h"
       
    22 
       
    23 namespace analysis {
       
    24 
       
    25 	/**
       
    26 	 * Abstract class for finding word boundaries in text
       
    27 	 */
       
    28 	class BreakIterator {
       
    29 		
       
    30 	public:
       
    31 		
       
    32 		virtual ~BreakIterator(); 
       
    33 		
       
    34 		/**
       
    35 		 * Sets the text
       
    36 		 */
       
    37 		virtual void setText(const wchar_t* text) = 0; 
       
    38 		
       
    39 		/**
       
    40 		 * Returns true, if next boundary is exist 
       
    41 		 */
       
    42 		virtual bool hasNext() = 0; 
       
    43 		
       
    44 		/**
       
    45 		 * Returns the location of current break in string
       
    46 		 */
       
    47 		virtual int current() = 0; 
       
    48 
       
    49 		/**
       
    50 		 * Finds next break and returns the new location
       
    51 		 */
       
    52 		virtual int next() = 0;
       
    53 		
       
    54 	};
       
    55 	
       
    56 	/**
       
    57 	 * State machine and longest matching algorithm based break 
       
    58 	 * iterator. Used for finding word boundaries. State machine
       
    59 	 * is typically compiled from dictionary.
       
    60 	 * 
       
    61 	 * @tparam Encoding Describes the serialization format of the state machine
       
    62 	 */
       
    63 	template<class Encoding>
       
    64 	class StateMachineBreakIterator : public BreakIterator {
       
    65 
       
    66 		public:
       
    67 		
       
    68 			/**
       
    69 			 * Constructs the break iterator to use given state machine
       
    70 			 */
       
    71 			StateMachineBreakIterator(StateMachine<Encoding>& machine);
       
    72 			
       
    73 			~StateMachineBreakIterator(); 
       
    74 			
       
    75 		public: // From BreakIterator
       
    76 			
       
    77 			virtual void setText(const wchar_t* text);
       
    78 			
       
    79 			virtual bool hasNext(); 
       
    80 			
       
    81 			virtual int current(); 
       
    82 
       
    83 			virtual int next();
       
    84 
       
    85 		private: 
       
    86 	
       
    87 			/**
       
    88 			 * Prepares next
       
    89 			 */
       
    90 			void prepareNext(); 
       
    91 		
       
    92 		private: 
       
    93 		
       
    94 			/** Used state machine. E.g. compiled from dictionary */
       
    95 			StateMachine<Encoding>& machine_;
       
    96 			
       
    97 			/** Pointer to a state. Used for moving within state machine  */
       
    98 			StateCursor<Encoding> state_; 
       
    99 			
       
   100 			/** Compiled text */ 
       
   101 			const wchar_t* text_; 
       
   102 			
       
   103 			/** Cursor in text */
       
   104 			int cursor_;
       
   105 			
       
   106 			/** Current break */
       
   107 			int current_; 
       
   108 			
       
   109 			/** Next break */
       
   110 			int next_; 
       
   111 			
       
   112 	};
       
   113 
       
   114 }
       
   115 
       
   116 #include "breakiterator.inl"
       
   117 
       
   118 #endif /* BREAKITERATOR_H_ */