|
1 /* |
|
2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 #ifndef BREAKITERATOR_H_ |
|
19 #define BREAKITERATOR_H_ |
|
20 |
|
21 #include "statemachine.h" |
|
22 |
|
23 namespace analysis { |
|
24 |
|
25 /** |
|
26 * Abstract class for finding word boundaries in text |
|
27 */ |
|
28 class BreakIterator { |
|
29 |
|
30 public: |
|
31 |
|
32 virtual ~BreakIterator(); |
|
33 |
|
34 /** |
|
35 * Sets the text |
|
36 */ |
|
37 virtual void setText(const wchar_t* text) = 0; |
|
38 |
|
39 /** |
|
40 * Returns true, if next boundary is exist |
|
41 */ |
|
42 virtual bool hasNext() = 0; |
|
43 |
|
44 /** |
|
45 * Returns the location of current break in string |
|
46 */ |
|
47 virtual int current() = 0; |
|
48 |
|
49 /** |
|
50 * Finds next break and returns the new location |
|
51 */ |
|
52 virtual int next() = 0; |
|
53 |
|
54 }; |
|
55 |
|
56 /** |
|
57 * State machine and longest matching algorithm based break |
|
58 * iterator. Used for finding word boundaries. State machine |
|
59 * is typically compiled from dictionary. |
|
60 * |
|
61 * @tparam Encoding Describes the serialization format of the state machine |
|
62 */ |
|
63 template<class Encoding> |
|
64 class StateMachineBreakIterator : public BreakIterator { |
|
65 |
|
66 public: |
|
67 |
|
68 /** |
|
69 * Constructs the break iterator to use given state machine |
|
70 */ |
|
71 StateMachineBreakIterator(StateMachine<Encoding>& machine); |
|
72 |
|
73 ~StateMachineBreakIterator(); |
|
74 |
|
75 public: // From BreakIterator |
|
76 |
|
77 virtual void setText(const wchar_t* text); |
|
78 |
|
79 virtual bool hasNext(); |
|
80 |
|
81 virtual int current(); |
|
82 |
|
83 virtual int next(); |
|
84 |
|
85 private: |
|
86 |
|
87 /** |
|
88 * Prepares next |
|
89 */ |
|
90 void prepareNext(); |
|
91 |
|
92 private: |
|
93 |
|
94 /** Used state machine. E.g. compiled from dictionary */ |
|
95 StateMachine<Encoding>& machine_; |
|
96 |
|
97 /** Pointer to a state. Used for moving within state machine */ |
|
98 StateCursor<Encoding> state_; |
|
99 |
|
100 /** Compiled text */ |
|
101 const wchar_t* text_; |
|
102 |
|
103 /** Cursor in text */ |
|
104 int cursor_; |
|
105 |
|
106 /** Current break */ |
|
107 int current_; |
|
108 |
|
109 /** Next break */ |
|
110 int next_; |
|
111 |
|
112 }; |
|
113 |
|
114 } |
|
115 |
|
116 #include "breakiterator.inl" |
|
117 |
|
118 #endif /* BREAKITERATOR_H_ */ |