|
1 /* |
|
2 * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved. |
|
3 * |
|
4 * Redistribution and use in source and binary forms, with or without |
|
5 * modification, are permitted provided that the following conditions |
|
6 * are met: |
|
7 * 1. Redistributions of source code must retain the above copyright |
|
8 * notice, this list of conditions and the following disclaimer. |
|
9 * 2. Redistributions in binary form must reproduce the above copyright |
|
10 * notice, this list of conditions and the following disclaimer in the |
|
11 * documentation and/or other materials provided with the distribution. |
|
12 * |
|
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY |
|
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR |
|
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
|
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
24 */ |
|
25 |
|
26 #ifndef TextIterator_h |
|
27 #define TextIterator_h |
|
28 |
|
29 #include "InlineTextBox.h" |
|
30 #include "Range.h" |
|
31 #include <wtf/Vector.h> |
|
32 |
|
33 namespace WebCore { |
|
34 |
|
35 // FIXME: Can't really answer this question correctly without knowing the white-space mode. |
|
36 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here. |
|
37 inline bool isCollapsibleWhitespace(UChar c) |
|
38 { |
|
39 switch (c) { |
|
40 case ' ': |
|
41 case '\n': |
|
42 return true; |
|
43 default: |
|
44 return false; |
|
45 } |
|
46 } |
|
47 |
|
48 String plainText(const Range*); |
|
49 UChar* plainTextToMallocAllocatedBuffer(const Range*, unsigned& bufferLength, bool isDisplayString); |
|
50 PassRefPtr<Range> findPlainText(const Range*, const String&, bool forward, bool caseSensitive); |
|
51 |
|
52 class BitStack { |
|
53 public: |
|
54 BitStack(); |
|
55 |
|
56 void push(bool); |
|
57 void pop(); |
|
58 |
|
59 bool top() const; |
|
60 unsigned size() const; |
|
61 |
|
62 private: |
|
63 unsigned m_size; |
|
64 Vector<unsigned, 1> m_words; |
|
65 }; |
|
66 |
|
67 // Iterates through the DOM range, returning all the text, and 0-length boundaries |
|
68 // at points where replaced elements break up the text flow. The text comes back in |
|
69 // chunks so as to optimize for performance of the iteration. |
|
70 |
|
71 enum TextIteratorBehavior { |
|
72 TextIteratorDefaultBehavior = 0, |
|
73 TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0, |
|
74 TextIteratorEntersTextControls = 1 << 1, |
|
75 TextIteratorEmitsTextsWithoutTranscoding = 1 << 2, |
|
76 }; |
|
77 |
|
78 class TextIterator { |
|
79 public: |
|
80 TextIterator(); |
|
81 explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); |
|
82 |
|
83 bool atEnd() const { return !m_positionNode; } |
|
84 void advance(); |
|
85 |
|
86 int length() const { return m_textLength; } |
|
87 const UChar* characters() const { return m_textCharacters; } |
|
88 |
|
89 PassRefPtr<Range> range() const; |
|
90 Node* node() const; |
|
91 |
|
92 static int rangeLength(const Range*, bool spacesForReplacedElements = false); |
|
93 static PassRefPtr<Range> rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false); |
|
94 static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount); |
|
95 |
|
96 private: |
|
97 void exitNode(); |
|
98 bool shouldRepresentNodeOffsetZero(); |
|
99 bool shouldEmitSpaceBeforeAndAfterNode(Node*); |
|
100 void representNodeOffsetZero(); |
|
101 bool handleTextNode(); |
|
102 bool handleReplacedElement(); |
|
103 bool handleNonTextNode(); |
|
104 void handleTextBox(); |
|
105 void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset); |
|
106 void emitText(Node* textNode, int textStartOffset, int textEndOffset); |
|
107 |
|
108 // Current position, not necessarily of the text being returned, but position |
|
109 // as we walk through the DOM tree. |
|
110 Node* m_node; |
|
111 int m_offset; |
|
112 bool m_handledNode; |
|
113 bool m_handledChildren; |
|
114 BitStack m_fullyClippedStack; |
|
115 |
|
116 // The range. |
|
117 Node* m_startContainer; |
|
118 int m_startOffset; |
|
119 Node* m_endContainer; |
|
120 int m_endOffset; |
|
121 Node* m_pastEndNode; |
|
122 |
|
123 // The current text and its position, in the form to be returned from the iterator. |
|
124 Node* m_positionNode; |
|
125 mutable Node* m_positionOffsetBaseNode; |
|
126 mutable int m_positionStartOffset; |
|
127 mutable int m_positionEndOffset; |
|
128 const UChar* m_textCharacters; |
|
129 int m_textLength; |
|
130 // Hold string m_textCharacters points to so we ensure it won't be deleted. |
|
131 String m_text; |
|
132 |
|
133 // Used when there is still some pending text from the current node; when these |
|
134 // are false and 0, we go back to normal iterating. |
|
135 bool m_needsAnotherNewline; |
|
136 InlineTextBox* m_textBox; |
|
137 |
|
138 // Used to do the whitespace collapsing logic. |
|
139 Node* m_lastTextNode; |
|
140 bool m_lastTextNodeEndedWithCollapsedSpace; |
|
141 UChar m_lastCharacter; |
|
142 |
|
143 // Used for whitespace characters that aren't in the DOM, so we can point at them. |
|
144 UChar m_singleCharacterBuffer; |
|
145 |
|
146 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text) |
|
147 Vector<InlineTextBox*> m_sortedTextBoxes; |
|
148 size_t m_sortedTextBoxesPosition; |
|
149 |
|
150 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content |
|
151 bool m_hasEmitted; |
|
152 |
|
153 // Used by selection preservation code. There should be one character emitted between every VisiblePosition |
|
154 // in the Range used to create the TextIterator. |
|
155 // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite |
|
156 // moveParagraphs to not clone/destroy moved content. |
|
157 bool m_emitsCharactersBetweenAllVisiblePositions; |
|
158 bool m_entersTextControls; |
|
159 |
|
160 // Used when we want texts for copying, pasting, and transposing. |
|
161 bool m_emitsTextWithoutTranscoding; |
|
162 }; |
|
163 |
|
164 // Iterates through the DOM range, returning all the text, and 0-length boundaries |
|
165 // at points where replaced elements break up the text flow. The text comes back in |
|
166 // chunks so as to optimize for performance of the iteration. |
|
167 class SimplifiedBackwardsTextIterator { |
|
168 public: |
|
169 SimplifiedBackwardsTextIterator(); |
|
170 explicit SimplifiedBackwardsTextIterator(const Range*); |
|
171 |
|
172 bool atEnd() const { return !m_positionNode; } |
|
173 void advance(); |
|
174 |
|
175 int length() const { return m_textLength; } |
|
176 const UChar* characters() const { return m_textCharacters; } |
|
177 |
|
178 PassRefPtr<Range> range() const; |
|
179 |
|
180 private: |
|
181 void exitNode(); |
|
182 bool handleTextNode(); |
|
183 bool handleReplacedElement(); |
|
184 bool handleNonTextNode(); |
|
185 void emitCharacter(UChar, Node*, int startOffset, int endOffset); |
|
186 |
|
187 // Current position, not necessarily of the text being returned, but position |
|
188 // as we walk through the DOM tree. |
|
189 Node* m_node; |
|
190 int m_offset; |
|
191 bool m_handledNode; |
|
192 bool m_handledChildren; |
|
193 BitStack m_fullyClippedStack; |
|
194 |
|
195 // End of the range. |
|
196 Node* m_startNode; |
|
197 int m_startOffset; |
|
198 // Start of the range. |
|
199 Node* m_endNode; |
|
200 int m_endOffset; |
|
201 |
|
202 // The current text and its position, in the form to be returned from the iterator. |
|
203 Node* m_positionNode; |
|
204 int m_positionStartOffset; |
|
205 int m_positionEndOffset; |
|
206 const UChar* m_textCharacters; |
|
207 int m_textLength; |
|
208 |
|
209 // Used to do the whitespace logic. |
|
210 Node* m_lastTextNode; |
|
211 UChar m_lastCharacter; |
|
212 |
|
213 // Used for whitespace characters that aren't in the DOM, so we can point at them. |
|
214 UChar m_singleCharacterBuffer; |
|
215 |
|
216 // The node after the last node this iterator should process. |
|
217 Node* m_pastStartNode; |
|
218 }; |
|
219 |
|
220 // Builds on the text iterator, adding a character position so we can walk one |
|
221 // character at a time, or faster, as needed. Useful for searching. |
|
222 class CharacterIterator { |
|
223 public: |
|
224 CharacterIterator(); |
|
225 explicit CharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); |
|
226 |
|
227 void advance(int numCharacters); |
|
228 |
|
229 bool atBreak() const { return m_atBreak; } |
|
230 bool atEnd() const { return m_textIterator.atEnd(); } |
|
231 |
|
232 int length() const { return m_textIterator.length() - m_runOffset; } |
|
233 const UChar* characters() const { return m_textIterator.characters() + m_runOffset; } |
|
234 String string(int numChars); |
|
235 |
|
236 int characterOffset() const { return m_offset; } |
|
237 PassRefPtr<Range> range() const; |
|
238 |
|
239 private: |
|
240 int m_offset; |
|
241 int m_runOffset; |
|
242 bool m_atBreak; |
|
243 |
|
244 TextIterator m_textIterator; |
|
245 }; |
|
246 |
|
247 class BackwardsCharacterIterator { |
|
248 public: |
|
249 BackwardsCharacterIterator(); |
|
250 explicit BackwardsCharacterIterator(const Range*); |
|
251 |
|
252 void advance(int); |
|
253 |
|
254 bool atEnd() const { return m_textIterator.atEnd(); } |
|
255 |
|
256 PassRefPtr<Range> range() const; |
|
257 |
|
258 private: |
|
259 int m_offset; |
|
260 int m_runOffset; |
|
261 bool m_atBreak; |
|
262 |
|
263 SimplifiedBackwardsTextIterator m_textIterator; |
|
264 }; |
|
265 |
|
266 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved", |
|
267 // meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching. |
|
268 class WordAwareIterator { |
|
269 public: |
|
270 WordAwareIterator(); |
|
271 explicit WordAwareIterator(const Range*); |
|
272 |
|
273 bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); } |
|
274 void advance(); |
|
275 |
|
276 int length() const; |
|
277 const UChar* characters() const; |
|
278 |
|
279 // Range of the text we're currently returning |
|
280 PassRefPtr<Range> range() const { return m_range; } |
|
281 |
|
282 private: |
|
283 // text from the previous chunk from the textIterator |
|
284 const UChar* m_previousText; |
|
285 int m_previousLength; |
|
286 |
|
287 // many chunks from textIterator concatenated |
|
288 Vector<UChar> m_buffer; |
|
289 |
|
290 // Did we have to look ahead in the textIterator to confirm the current chunk? |
|
291 bool m_didLookAhead; |
|
292 |
|
293 RefPtr<Range> m_range; |
|
294 |
|
295 TextIterator m_textIterator; |
|
296 }; |
|
297 |
|
298 } |
|
299 |
|
300 #endif |