WebCore/editing/TextIterator.h
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
       
     3  *
       
     4  * Redistribution and use in source and binary forms, with or without
       
     5  * modification, are permitted provided that the following conditions
       
     6  * are met:
       
     7  * 1. Redistributions of source code must retain the above copyright
       
     8  *    notice, this list of conditions and the following disclaimer.
       
     9  * 2. Redistributions in binary form must reproduce the above copyright
       
    10  *    notice, this list of conditions and the following disclaimer in the
       
    11  *    documentation and/or other materials provided with the distribution.
       
    12  *
       
    13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
       
    14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
       
    16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
       
    17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
       
    18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
       
    19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
       
    20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
       
    21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       
    23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
       
    24  */
       
    25 
       
    26 #ifndef TextIterator_h
       
    27 #define TextIterator_h
       
    28 
       
    29 #include "InlineTextBox.h"
       
    30 #include "Range.h"
       
    31 #include <wtf/Vector.h>
       
    32 
       
    33 namespace WebCore {
       
    34 
       
    35 // FIXME: Can't really answer this question correctly without knowing the white-space mode.
       
    36 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
       
    37 inline bool isCollapsibleWhitespace(UChar c)
       
    38 {
       
    39     switch (c) {
       
    40         case ' ':
       
    41         case '\n':
       
    42             return true;
       
    43         default:
       
    44             return false;
       
    45     }
       
    46 }
       
    47 
       
    48 String plainText(const Range*);
       
    49 UChar* plainTextToMallocAllocatedBuffer(const Range*, unsigned& bufferLength, bool isDisplayString);
       
    50 PassRefPtr<Range> findPlainText(const Range*, const String&, bool forward, bool caseSensitive);
       
    51 
       
    52 class BitStack {
       
    53 public:
       
    54     BitStack();
       
    55 
       
    56     void push(bool);
       
    57     void pop();
       
    58 
       
    59     bool top() const;
       
    60     unsigned size() const;
       
    61 
       
    62 private:
       
    63     unsigned m_size;
       
    64     Vector<unsigned, 1> m_words;
       
    65 };
       
    66 
       
    67 // Iterates through the DOM range, returning all the text, and 0-length boundaries
       
    68 // at points where replaced elements break up the text flow.  The text comes back in
       
    69 // chunks so as to optimize for performance of the iteration.
       
    70 
       
    71 enum TextIteratorBehavior {
       
    72     TextIteratorDefaultBehavior = 0,
       
    73     TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0,
       
    74     TextIteratorEntersTextControls = 1 << 1,
       
    75     TextIteratorEmitsTextsWithoutTranscoding = 1 << 2,
       
    76 };
       
    77 
       
    78 class TextIterator {
       
    79 public:
       
    80     TextIterator();
       
    81     explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
       
    82 
       
    83     bool atEnd() const { return !m_positionNode; }
       
    84     void advance();
       
    85     
       
    86     int length() const { return m_textLength; }
       
    87     const UChar* characters() const { return m_textCharacters; }
       
    88     
       
    89     PassRefPtr<Range> range() const;
       
    90     Node* node() const;
       
    91      
       
    92     static int rangeLength(const Range*, bool spacesForReplacedElements = false);
       
    93     static PassRefPtr<Range> rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
       
    94     static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
       
    95     
       
    96 private:
       
    97     void exitNode();
       
    98     bool shouldRepresentNodeOffsetZero();
       
    99     bool shouldEmitSpaceBeforeAndAfterNode(Node*);
       
   100     void representNodeOffsetZero();
       
   101     bool handleTextNode();
       
   102     bool handleReplacedElement();
       
   103     bool handleNonTextNode();
       
   104     void handleTextBox();
       
   105     void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
       
   106     void emitText(Node* textNode, int textStartOffset, int textEndOffset);
       
   107     
       
   108     // Current position, not necessarily of the text being returned, but position
       
   109     // as we walk through the DOM tree.
       
   110     Node* m_node;
       
   111     int m_offset;
       
   112     bool m_handledNode;
       
   113     bool m_handledChildren;
       
   114     BitStack m_fullyClippedStack;
       
   115     
       
   116     // The range.
       
   117     Node* m_startContainer;
       
   118     int m_startOffset;
       
   119     Node* m_endContainer;
       
   120     int m_endOffset;
       
   121     Node* m_pastEndNode;
       
   122     
       
   123     // The current text and its position, in the form to be returned from the iterator.
       
   124     Node* m_positionNode;
       
   125     mutable Node* m_positionOffsetBaseNode;
       
   126     mutable int m_positionStartOffset;
       
   127     mutable int m_positionEndOffset;
       
   128     const UChar* m_textCharacters;
       
   129     int m_textLength;
       
   130     // Hold string m_textCharacters points to so we ensure it won't be deleted.
       
   131     String m_text;
       
   132 
       
   133     // Used when there is still some pending text from the current node; when these
       
   134     // are false and 0, we go back to normal iterating.
       
   135     bool m_needsAnotherNewline;
       
   136     InlineTextBox* m_textBox;
       
   137     
       
   138     // Used to do the whitespace collapsing logic.
       
   139     Node* m_lastTextNode;    
       
   140     bool m_lastTextNodeEndedWithCollapsedSpace;
       
   141     UChar m_lastCharacter;
       
   142     
       
   143     // Used for whitespace characters that aren't in the DOM, so we can point at them.
       
   144     UChar m_singleCharacterBuffer;
       
   145     
       
   146     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
       
   147     Vector<InlineTextBox*> m_sortedTextBoxes;
       
   148     size_t m_sortedTextBoxesPosition;
       
   149     
       
   150     // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
       
   151     bool m_hasEmitted;
       
   152     
       
   153     // Used by selection preservation code.  There should be one character emitted between every VisiblePosition
       
   154     // in the Range used to create the TextIterator.
       
   155     // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite 
       
   156     // moveParagraphs to not clone/destroy moved content.
       
   157     bool m_emitsCharactersBetweenAllVisiblePositions;
       
   158     bool m_entersTextControls;
       
   159 
       
   160     // Used when we want texts for copying, pasting, and transposing.
       
   161     bool m_emitsTextWithoutTranscoding;
       
   162 };
       
   163 
       
   164 // Iterates through the DOM range, returning all the text, and 0-length boundaries
       
   165 // at points where replaced elements break up the text flow. The text comes back in
       
   166 // chunks so as to optimize for performance of the iteration.
       
   167 class SimplifiedBackwardsTextIterator {
       
   168 public:
       
   169     SimplifiedBackwardsTextIterator();
       
   170     explicit SimplifiedBackwardsTextIterator(const Range*);
       
   171     
       
   172     bool atEnd() const { return !m_positionNode; }
       
   173     void advance();
       
   174     
       
   175     int length() const { return m_textLength; }
       
   176     const UChar* characters() const { return m_textCharacters; }
       
   177     
       
   178     PassRefPtr<Range> range() const;
       
   179         
       
   180 private:
       
   181     void exitNode();
       
   182     bool handleTextNode();
       
   183     bool handleReplacedElement();
       
   184     bool handleNonTextNode();
       
   185     void emitCharacter(UChar, Node*, int startOffset, int endOffset);
       
   186     
       
   187     // Current position, not necessarily of the text being returned, but position
       
   188     // as we walk through the DOM tree.
       
   189     Node* m_node;
       
   190     int m_offset;
       
   191     bool m_handledNode;
       
   192     bool m_handledChildren;
       
   193     BitStack m_fullyClippedStack;
       
   194 
       
   195     // End of the range.
       
   196     Node* m_startNode;
       
   197     int m_startOffset;
       
   198     // Start of the range.
       
   199     Node* m_endNode;
       
   200     int m_endOffset;
       
   201     
       
   202     // The current text and its position, in the form to be returned from the iterator.
       
   203     Node* m_positionNode;
       
   204     int m_positionStartOffset;
       
   205     int m_positionEndOffset;
       
   206     const UChar* m_textCharacters;
       
   207     int m_textLength;
       
   208 
       
   209     // Used to do the whitespace logic.
       
   210     Node* m_lastTextNode;    
       
   211     UChar m_lastCharacter;
       
   212     
       
   213     // Used for whitespace characters that aren't in the DOM, so we can point at them.
       
   214     UChar m_singleCharacterBuffer;
       
   215     
       
   216     // The node after the last node this iterator should process.
       
   217     Node* m_pastStartNode;
       
   218 };
       
   219 
       
   220 // Builds on the text iterator, adding a character position so we can walk one
       
   221 // character at a time, or faster, as needed. Useful for searching.
       
   222 class CharacterIterator {
       
   223 public:
       
   224     CharacterIterator();
       
   225     explicit CharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
       
   226     
       
   227     void advance(int numCharacters);
       
   228     
       
   229     bool atBreak() const { return m_atBreak; }
       
   230     bool atEnd() const { return m_textIterator.atEnd(); }
       
   231     
       
   232     int length() const { return m_textIterator.length() - m_runOffset; }
       
   233     const UChar* characters() const { return m_textIterator.characters() + m_runOffset; }
       
   234     String string(int numChars);
       
   235     
       
   236     int characterOffset() const { return m_offset; }
       
   237     PassRefPtr<Range> range() const;
       
   238         
       
   239 private:
       
   240     int m_offset;
       
   241     int m_runOffset;
       
   242     bool m_atBreak;
       
   243     
       
   244     TextIterator m_textIterator;
       
   245 };
       
   246     
       
   247 class BackwardsCharacterIterator {
       
   248 public:
       
   249     BackwardsCharacterIterator();
       
   250     explicit BackwardsCharacterIterator(const Range*);
       
   251 
       
   252     void advance(int);
       
   253 
       
   254     bool atEnd() const { return m_textIterator.atEnd(); }
       
   255 
       
   256     PassRefPtr<Range> range() const;
       
   257 
       
   258 private:
       
   259     int m_offset;
       
   260     int m_runOffset;
       
   261     bool m_atBreak;
       
   262 
       
   263     SimplifiedBackwardsTextIterator m_textIterator;
       
   264 };
       
   265 
       
   266 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
       
   267 // meaning they never end split up a word.  This is useful for spellcheck or (perhaps one day) searching.
       
   268 class WordAwareIterator {
       
   269 public:
       
   270     WordAwareIterator();
       
   271     explicit WordAwareIterator(const Range*);
       
   272 
       
   273     bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
       
   274     void advance();
       
   275     
       
   276     int length() const;
       
   277     const UChar* characters() const;
       
   278     
       
   279     // Range of the text we're currently returning
       
   280     PassRefPtr<Range> range() const { return m_range; }
       
   281 
       
   282 private:
       
   283     // text from the previous chunk from the textIterator
       
   284     const UChar* m_previousText;
       
   285     int m_previousLength;
       
   286 
       
   287     // many chunks from textIterator concatenated
       
   288     Vector<UChar> m_buffer;
       
   289     
       
   290     // Did we have to look ahead in the textIterator to confirm the current chunk?
       
   291     bool m_didLookAhead;
       
   292 
       
   293     RefPtr<Range> m_range;
       
   294 
       
   295     TextIterator m_textIterator;
       
   296 };
       
   297 
       
   298 }
       
   299 
       
   300 #endif