WebCore/editing/TextIterator.cpp
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
       
     3  * Copyright (C) 2005 Alexey Proskuryakov.
       
     4  *
       
     5  * Redistribution and use in source and binary forms, with or without
       
     6  * modification, are permitted provided that the following conditions
       
     7  * are met:
       
     8  * 1. Redistributions of source code must retain the above copyright
       
     9  *    notice, this list of conditions and the following disclaimer.
       
    10  * 2. Redistributions in binary form must reproduce the above copyright
       
    11  *    notice, this list of conditions and the following disclaimer in the
       
    12  *    documentation and/or other materials provided with the distribution.
       
    13  *
       
    14  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
       
    15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
       
    17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
       
    18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
       
    19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
       
    20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
       
    21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
       
    22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       
    24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
       
    25  */
       
    26 
       
    27 #include "config.h"
       
    28 #include "TextIterator.h"
       
    29 
       
    30 #include "CharacterNames.h"
       
    31 #include "Document.h"
       
    32 #include "HTMLElement.h"
       
    33 #include "HTMLNames.h"
       
    34 #include "htmlediting.h"
       
    35 #include "InlineTextBox.h"
       
    36 #include "Range.h"
       
    37 #include "RenderTableCell.h"
       
    38 #include "RenderTableRow.h"
       
    39 #include "RenderTextControl.h"
       
    40 #include "VisiblePosition.h"
       
    41 #include "visible_units.h"
       
    42 
       
    43 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
       
    44 #include "TextBreakIteratorInternalICU.h"
       
    45 #include <unicode/usearch.h>
       
    46 #endif
       
    47 
       
    48 using namespace WTF::Unicode;
       
    49 using namespace std;
       
    50 
       
    51 namespace WebCore {
       
    52 
       
    53 using namespace HTMLNames;
       
    54 
       
    55 // Buffer that knows how to compare with a search target.
       
    56 // Keeps enough of the previous text to be able to search in the future, but no more.
       
    57 // Non-breaking spaces are always equal to normal spaces.
       
    58 // Case folding is also done if <isCaseSensitive> is false.
       
    59 class SearchBuffer : public Noncopyable {
       
    60 public:
       
    61     SearchBuffer(const String& target, bool isCaseSensitive);
       
    62     ~SearchBuffer();
       
    63 
       
    64     // Returns number of characters appended; guaranteed to be in the range [1, length].
       
    65     size_t append(const UChar*, size_t length);
       
    66     void reachedBreak();
       
    67 
       
    68     // Result is the size in characters of what was found.
       
    69     // And <startOffset> is the number of characters back to the start of what was found.
       
    70     size_t search(size_t& startOffset);
       
    71     bool atBreak() const;
       
    72 
       
    73 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
       
    74 
       
    75 private:
       
    76     bool isBadMatch(const UChar*, size_t length) const;
       
    77 
       
    78     String m_target;
       
    79     Vector<UChar> m_buffer;
       
    80     size_t m_overlap;
       
    81     bool m_atBreak;
       
    82 
       
    83     bool m_targetRequiresKanaWorkaround;
       
    84     Vector<UChar> m_normalizedTarget;
       
    85     mutable Vector<UChar> m_normalizedMatch;
       
    86 
       
    87 #else
       
    88 
       
    89 private:
       
    90     void append(UChar, bool isCharacterStart);
       
    91     size_t length() const;
       
    92 
       
    93     String m_target;
       
    94     bool m_isCaseSensitive;
       
    95 
       
    96     Vector<UChar> m_buffer;
       
    97     Vector<bool> m_isCharacterStartBuffer;
       
    98     bool m_isBufferFull;
       
    99     size_t m_cursor;
       
   100 
       
   101 #endif
       
   102 };
       
   103 
       
   104 // --------
       
   105 
       
   106 static const unsigned bitsInWord = sizeof(unsigned) * 8;
       
   107 static const unsigned bitInWordMask = bitsInWord - 1;
       
   108 
       
   109 BitStack::BitStack()
       
   110     : m_size(0)
       
   111 {
       
   112 }
       
   113 
       
   114 void BitStack::push(bool bit)
       
   115 {
       
   116     unsigned index = m_size / bitsInWord;
       
   117     unsigned shift = m_size & bitInWordMask;
       
   118     if (!shift && index == m_words.size()) {
       
   119         m_words.grow(index + 1);
       
   120         m_words[index] = 0;
       
   121     }
       
   122     unsigned& word = m_words[index];
       
   123     unsigned mask = 1U << shift;
       
   124     if (bit)
       
   125         word |= mask;
       
   126     else
       
   127         word &= ~mask;
       
   128     ++m_size;
       
   129 }
       
   130 
       
   131 void BitStack::pop()
       
   132 {
       
   133     if (m_size)
       
   134         --m_size;
       
   135 }
       
   136 
       
   137 bool BitStack::top() const
       
   138 {
       
   139     if (!m_size)
       
   140         return false;
       
   141     unsigned shift = (m_size - 1) & bitInWordMask;
       
   142     return m_words.last() & (1U << shift);
       
   143 }
       
   144 
       
   145 unsigned BitStack::size() const
       
   146 {
       
   147     return m_size;
       
   148 }
       
   149 
       
   150 // --------
       
   151 
       
   152 static inline Node* parentCrossingShadowBoundaries(Node* node)
       
   153 {
       
   154     if (Node* parent = node->parentNode())
       
   155         return parent;
       
   156     return node->shadowParentNode();
       
   157 }
       
   158 
       
   159 #if !ASSERT_DISABLED
       
   160 
       
   161 static unsigned depthCrossingShadowBoundaries(Node* node)
       
   162 {
       
   163     unsigned depth = 0;
       
   164     for (Node* parent = parentCrossingShadowBoundaries(node); parent; parent = parentCrossingShadowBoundaries(parent))
       
   165         ++depth;
       
   166     return depth;
       
   167 }
       
   168 
       
   169 #endif
       
   170 
       
   171 // This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees.
       
   172 static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset)
       
   173 {
       
   174     if (!rangeEndContainer)
       
   175         return 0;
       
   176     if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) {
       
   177         if (Node* next = rangeEndContainer->childNode(rangeEndOffset))
       
   178             return next;
       
   179     }
       
   180     for (Node* node = rangeEndContainer; node; node = parentCrossingShadowBoundaries(node)) {
       
   181         if (Node* next = node->nextSibling())
       
   182             return next;
       
   183     }
       
   184     return 0;
       
   185 }
       
   186 
       
   187 static Node* previousInPostOrderCrossingShadowBoundaries(Node* rangeStartContainer, int rangeStartOffset)
       
   188 {
       
   189     if (!rangeStartContainer)
       
   190         return 0;
       
   191     if (rangeStartOffset > 0 && !rangeStartContainer->offsetInCharacters()) {
       
   192         if (Node* previous = rangeStartContainer->childNode(rangeStartOffset - 1))
       
   193             return previous;
       
   194     }
       
   195     for (Node* node = rangeStartContainer; node; node = parentCrossingShadowBoundaries(node)) {
       
   196         if (Node* previous = node->previousSibling())
       
   197             return previous;
       
   198     }
       
   199     return 0;
       
   200 }
       
   201 
       
   202 // --------
       
   203 
       
   204 static inline bool fullyClipsContents(Node* node)
       
   205 {
       
   206     RenderObject* renderer = node->renderer();
       
   207     if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip())
       
   208         return false;
       
   209     return toRenderBox(renderer)->size().isEmpty();
       
   210 }
       
   211 
       
   212 static inline bool ignoresContainerClip(Node* node)
       
   213 {
       
   214     RenderObject* renderer = node->renderer();
       
   215     if (!renderer || renderer->isText())
       
   216         return false;
       
   217     EPosition position = renderer->style()->position();
       
   218     return position == AbsolutePosition || position == FixedPosition;
       
   219 }
       
   220 
       
   221 static void pushFullyClippedState(BitStack& stack, Node* node)
       
   222 {
       
   223     ASSERT(stack.size() == depthCrossingShadowBoundaries(node));
       
   224 
       
   225     // Push true if this node full clips its contents, or if a parent already has fully
       
   226     // clipped and this is not a node that ignores its container's clip.
       
   227     stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node)));
       
   228 }
       
   229 
       
   230 static void setUpFullyClippedStack(BitStack& stack, Node* node)
       
   231 {
       
   232     // Put the nodes in a vector so we can iterate in reverse order.
       
   233     Vector<Node*, 100> ancestry;
       
   234     for (Node* parent = parentCrossingShadowBoundaries(node); parent; parent = parentCrossingShadowBoundaries(parent))
       
   235         ancestry.append(parent);
       
   236 
       
   237     // Call pushFullyClippedState on each node starting with the earliest ancestor.
       
   238     size_t size = ancestry.size();
       
   239     for (size_t i = 0; i < size; ++i)
       
   240         pushFullyClippedState(stack, ancestry[size - i - 1]);
       
   241     pushFullyClippedState(stack, node);
       
   242 
       
   243     ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node));
       
   244 }
       
   245 
       
   246 // --------
       
   247 
       
   248 TextIterator::TextIterator()
       
   249     : m_startContainer(0)
       
   250     , m_startOffset(0)
       
   251     , m_endContainer(0)
       
   252     , m_endOffset(0)
       
   253     , m_positionNode(0)
       
   254     , m_textCharacters(0)
       
   255     , m_textLength(0)
       
   256     , m_lastCharacter(0)
       
   257     , m_emitsCharactersBetweenAllVisiblePositions(false)
       
   258     , m_entersTextControls(false)
       
   259     , m_emitsTextWithoutTranscoding(false)
       
   260 {
       
   261 }
       
   262 
       
   263 TextIterator::TextIterator(const Range* r, TextIteratorBehavior behavior)
       
   264     : m_startContainer(0)
       
   265     , m_startOffset(0)
       
   266     , m_endContainer(0)
       
   267     , m_endOffset(0)
       
   268     , m_positionNode(0)
       
   269     , m_textCharacters(0)
       
   270     , m_textLength(0)
       
   271     , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions)
       
   272     , m_entersTextControls(behavior & TextIteratorEntersTextControls)
       
   273     , m_emitsTextWithoutTranscoding(behavior & TextIteratorEmitsTextsWithoutTranscoding)
       
   274 {
       
   275     if (!r)
       
   276         return;
       
   277 
       
   278     // get and validate the range endpoints
       
   279     Node* startContainer = r->startContainer();
       
   280     if (!startContainer)
       
   281         return;
       
   282     int startOffset = r->startOffset();
       
   283     Node* endContainer = r->endContainer();
       
   284     int endOffset = r->endOffset();
       
   285 
       
   286     // Callers should be handing us well-formed ranges. If we discover that this isn't
       
   287     // the case, we could consider changing this assertion to an early return.
       
   288     ASSERT(r->boundaryPointsValid());
       
   289 
       
   290     // remember range - this does not change
       
   291     m_startContainer = startContainer;
       
   292     m_startOffset = startOffset;
       
   293     m_endContainer = endContainer;
       
   294     m_endOffset = endOffset;
       
   295 
       
   296     // set up the current node for processing
       
   297     m_node = r->firstNode();
       
   298     if (!m_node)
       
   299         return;
       
   300     setUpFullyClippedStack(m_fullyClippedStack, m_node);
       
   301     m_offset = m_node == m_startContainer ? m_startOffset : 0;
       
   302     m_handledNode = false;
       
   303     m_handledChildren = false;
       
   304 
       
   305     // calculate first out of bounds node
       
   306     m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset);
       
   307 
       
   308     // initialize node processing state
       
   309     m_needsAnotherNewline = false;
       
   310     m_textBox = 0;
       
   311 
       
   312     // initialize record of previous node processing
       
   313     m_hasEmitted = false;
       
   314     m_lastTextNode = 0;
       
   315     m_lastTextNodeEndedWithCollapsedSpace = false;
       
   316     m_lastCharacter = 0;
       
   317 
       
   318 #ifndef NDEBUG
       
   319     // need this just because of the assert in advance()
       
   320     m_positionNode = m_node;
       
   321 #endif
       
   322 
       
   323     // identify the first run
       
   324     advance();
       
   325 }
       
   326 
       
   327 void TextIterator::advance()
       
   328 {
       
   329     // reset the run information
       
   330     m_positionNode = 0;
       
   331     m_textLength = 0;
       
   332 
       
   333     // handle remembered node that needed a newline after the text node's newline
       
   334     if (m_needsAnotherNewline) {
       
   335         // Emit the extra newline, and position it *inside* m_node, after m_node's 
       
   336         // contents, in case it's a block, in the same way that we position the first 
       
   337         // newline.  The range for the emitted newline should start where the line 
       
   338         // break begins.
       
   339         // FIXME: It would be cleaner if we emitted two newlines during the last 
       
   340         // iteration, instead of using m_needsAnotherNewline.
       
   341         Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node;
       
   342         emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
       
   343         m_needsAnotherNewline = false;
       
   344         return;
       
   345     }
       
   346 
       
   347     // handle remembered text box
       
   348     if (m_textBox) {
       
   349         handleTextBox();
       
   350         if (m_positionNode)
       
   351             return;
       
   352     }
       
   353 
       
   354     while (m_node && m_node != m_pastEndNode) {
       
   355         // if the range ends at offset 0 of an element, represent the
       
   356         // position, but not the content, of that element e.g. if the
       
   357         // node is a blockflow element, emit a newline that
       
   358         // precedes the element
       
   359         if (m_node == m_endContainer && m_endOffset == 0) {
       
   360             representNodeOffsetZero();
       
   361             m_node = 0;
       
   362             return;
       
   363         }
       
   364         
       
   365         RenderObject* renderer = m_node->renderer();
       
   366         if (!renderer) {
       
   367             m_handledNode = true;
       
   368             m_handledChildren = true;
       
   369         } else {
       
   370             // handle current node according to its type
       
   371             if (!m_handledNode) {
       
   372                 if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) // FIXME: What about CDATA_SECTION_NODE?
       
   373                     m_handledNode = handleTextNode();
       
   374                 else if (renderer && (renderer->isImage() || renderer->isWidget() ||
       
   375                          (renderer->node() && renderer->node()->isElementNode() &&
       
   376                           static_cast<Element*>(renderer->node())->isFormControlElement())))
       
   377                     m_handledNode = handleReplacedElement();
       
   378                 else
       
   379                     m_handledNode = handleNonTextNode();
       
   380                 if (m_positionNode)
       
   381                     return;
       
   382             }
       
   383         }
       
   384 
       
   385         // find a new current node to handle in depth-first manner,
       
   386         // calling exitNode() as we come back thru a parent node
       
   387         Node* next = m_handledChildren ? 0 : m_node->firstChild();
       
   388         m_offset = 0;
       
   389         if (!next) {
       
   390             next = m_node->nextSibling();
       
   391             if (!next) {
       
   392                 bool pastEnd = m_node->traverseNextNode() == m_pastEndNode;
       
   393                 Node* parentNode = parentCrossingShadowBoundaries(m_node);
       
   394                 while (!next && parentNode) {
       
   395                     if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode))
       
   396                         return;
       
   397                     bool haveRenderer = m_node->renderer();
       
   398                     m_node = parentNode;
       
   399                     m_fullyClippedStack.pop();
       
   400                     parentNode = parentCrossingShadowBoundaries(m_node);
       
   401                     if (haveRenderer)
       
   402                         exitNode();
       
   403                     if (m_positionNode) {
       
   404                         m_handledNode = true;
       
   405                         m_handledChildren = true;
       
   406                         return;
       
   407                     }
       
   408                     next = m_node->nextSibling();
       
   409                 }
       
   410             }
       
   411             m_fullyClippedStack.pop();            
       
   412         }
       
   413 
       
   414         // set the new current node
       
   415         m_node = next;
       
   416         if (m_node)
       
   417             pushFullyClippedState(m_fullyClippedStack, m_node);
       
   418         m_handledNode = false;
       
   419         m_handledChildren = false;
       
   420 
       
   421         // how would this ever be?
       
   422         if (m_positionNode)
       
   423             return;
       
   424     }
       
   425 }
       
   426 
       
   427 static inline bool compareBoxStart(const InlineTextBox* first, const InlineTextBox* second)
       
   428 {
       
   429     return first->start() < second->start();
       
   430 }
       
   431 
       
   432 bool TextIterator::handleTextNode()
       
   433 {
       
   434     if (m_fullyClippedStack.top())
       
   435         return false;
       
   436 
       
   437     RenderText* renderer = toRenderText(m_node->renderer());
       
   438     if (renderer->style()->visibility() != VISIBLE)
       
   439         return false;
       
   440         
       
   441     m_lastTextNode = m_node;
       
   442     String str = renderer->text();
       
   443 
       
   444     // handle pre-formatted text
       
   445     if (!renderer->style()->collapseWhiteSpace()) {
       
   446         int runStart = m_offset;
       
   447         if (m_lastTextNodeEndedWithCollapsedSpace) {
       
   448             emitCharacter(' ', m_node, 0, runStart, runStart);
       
   449             return false;
       
   450         }
       
   451         int strLength = str.length();
       
   452         int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX;
       
   453         int runEnd = min(strLength, end);
       
   454 
       
   455         if (runStart >= runEnd)
       
   456             return true;
       
   457 
       
   458         emitText(m_node, runStart, runEnd);
       
   459         return true;
       
   460     }
       
   461 
       
   462     if (!renderer->firstTextBox() && str.length() > 0) {
       
   463         m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space
       
   464         return true;
       
   465     }
       
   466 
       
   467     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
       
   468     if (renderer->containsReversedText()) {
       
   469         m_sortedTextBoxes.clear();
       
   470         for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) {
       
   471             m_sortedTextBoxes.append(textBox);
       
   472         }
       
   473         std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), compareBoxStart); 
       
   474         m_sortedTextBoxesPosition = 0;
       
   475     }
       
   476     
       
   477     m_textBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox();
       
   478     handleTextBox();
       
   479     return true;
       
   480 }
       
   481 
       
   482 void TextIterator::handleTextBox()
       
   483 {    
       
   484     RenderText* renderer = toRenderText(m_node->renderer());
       
   485     String str = renderer->text();
       
   486     int start = m_offset;
       
   487     int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX;
       
   488     while (m_textBox) {
       
   489         int textBoxStart = m_textBox->start();
       
   490         int runStart = max(textBoxStart, start);
       
   491 
       
   492         // Check for collapsed space at the start of this run.
       
   493         InlineTextBox* firstTextBox = renderer->containsReversedText() ? m_sortedTextBoxes[0] : renderer->firstTextBox();
       
   494         bool needSpace = m_lastTextNodeEndedWithCollapsedSpace
       
   495             || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0);
       
   496         if (needSpace && !isCollapsibleWhitespace(m_lastCharacter) && m_lastCharacter) {
       
   497             if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') {
       
   498                 unsigned spaceRunStart = runStart - 1;
       
   499                 while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ')
       
   500                     --spaceRunStart;
       
   501                 emitText(m_node, spaceRunStart, spaceRunStart + 1);
       
   502             } else
       
   503                 emitCharacter(' ', m_node, 0, runStart, runStart);
       
   504             return;
       
   505         }
       
   506         int textBoxEnd = textBoxStart + m_textBox->len();
       
   507         int runEnd = min(textBoxEnd, end);
       
   508         
       
   509         // Determine what the next text box will be, but don't advance yet
       
   510         InlineTextBox* nextTextBox = 0;
       
   511         if (renderer->containsReversedText()) {
       
   512             if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size())
       
   513                 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1];
       
   514         } else 
       
   515             nextTextBox = m_textBox->nextTextBox();
       
   516 
       
   517         if (runStart < runEnd) {
       
   518             // Handle either a single newline character (which becomes a space),
       
   519             // or a run of characters that does not include a newline.
       
   520             // This effectively translates newlines to spaces without copying the text.
       
   521             if (str[runStart] == '\n') {
       
   522                 emitCharacter(' ', m_node, 0, runStart, runStart + 1);
       
   523                 m_offset = runStart + 1;
       
   524             } else {
       
   525                 int subrunEnd = str.find('\n', runStart);
       
   526                 if (subrunEnd == -1 || subrunEnd > runEnd)
       
   527                     subrunEnd = runEnd;
       
   528     
       
   529                 m_offset = subrunEnd;
       
   530                 emitText(m_node, runStart, subrunEnd);
       
   531             }
       
   532 
       
   533             // If we are doing a subrun that doesn't go to the end of the text box,
       
   534             // come back again to finish handling this text box; don't advance to the next one.
       
   535             if (m_positionEndOffset < textBoxEnd)
       
   536                 return;
       
   537 
       
   538             // Advance and return
       
   539             int nextRunStart = nextTextBox ? nextTextBox->start() : str.length();
       
   540             if (nextRunStart > runEnd)
       
   541                 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end
       
   542             m_textBox = nextTextBox;
       
   543             if (renderer->containsReversedText())
       
   544                 ++m_sortedTextBoxesPosition;
       
   545             return;
       
   546         }
       
   547         // Advance and continue
       
   548         m_textBox = nextTextBox;
       
   549         if (renderer->containsReversedText())
       
   550             ++m_sortedTextBoxesPosition;
       
   551     }
       
   552 }
       
   553 
       
   554 bool TextIterator::handleReplacedElement()
       
   555 {
       
   556     if (m_fullyClippedStack.top())
       
   557         return false;
       
   558 
       
   559     RenderObject* renderer = m_node->renderer();
       
   560     if (renderer->style()->visibility() != VISIBLE)
       
   561         return false;
       
   562 
       
   563     if (m_lastTextNodeEndedWithCollapsedSpace) {
       
   564         emitCharacter(' ', m_lastTextNode->parentNode(), m_lastTextNode, 1, 1);
       
   565         return false;
       
   566     }
       
   567 
       
   568     if (m_entersTextControls && renderer->isTextControl()) {
       
   569         if (HTMLElement* innerTextElement = toRenderTextControl(renderer)->innerTextElement()) {
       
   570             m_node = innerTextElement->shadowTreeRootNode();
       
   571             pushFullyClippedState(m_fullyClippedStack, m_node);
       
   572             m_offset = 0;
       
   573             return false;
       
   574         }
       
   575     }
       
   576 
       
   577     m_hasEmitted = true;
       
   578 
       
   579     if (m_emitsCharactersBetweenAllVisiblePositions) {
       
   580         // We want replaced elements to behave like punctuation for boundary 
       
   581         // finding, and to simply take up space for the selection preservation 
       
   582         // code in moveParagraphs, so we use a comma.
       
   583         emitCharacter(',', m_node->parentNode(), m_node, 0, 1);
       
   584         return true;
       
   585     }
       
   586 
       
   587     m_positionNode = m_node->parentNode();
       
   588     m_positionOffsetBaseNode = m_node;
       
   589     m_positionStartOffset = 0;
       
   590     m_positionEndOffset = 1;
       
   591 
       
   592     m_textCharacters = 0;
       
   593     m_textLength = 0;
       
   594 
       
   595     m_lastCharacter = 0;
       
   596 
       
   597     return true;
       
   598 }
       
   599 
       
   600 static bool shouldEmitTabBeforeNode(Node* node)
       
   601 {
       
   602     RenderObject* r = node->renderer();
       
   603     
       
   604     // Table cells are delimited by tabs.
       
   605     if (!r || !isTableCell(node))
       
   606         return false;
       
   607     
       
   608     // Want a tab before every cell other than the first one
       
   609     RenderTableCell* rc = toRenderTableCell(r);
       
   610     RenderTable* t = rc->table();
       
   611     return t && (t->cellBefore(rc) || t->cellAbove(rc));
       
   612 }
       
   613 
       
   614 static bool shouldEmitNewlineForNode(Node* node)
       
   615 {
       
   616     // br elements are represented by a single newline.
       
   617     RenderObject* r = node->renderer();
       
   618     if (!r)
       
   619         return node->hasTagName(brTag);
       
   620         
       
   621     return r->isBR();
       
   622 }
       
   623 
       
   624 static bool shouldEmitNewlinesBeforeAndAfterNode(Node* node)
       
   625 {
       
   626     // Block flow (versus inline flow) is represented by having
       
   627     // a newline both before and after the element.
       
   628     RenderObject* r = node->renderer();
       
   629     if (!r) {
       
   630         return (node->hasTagName(blockquoteTag)
       
   631                 || node->hasTagName(ddTag)
       
   632                 || node->hasTagName(divTag)
       
   633                 || node->hasTagName(dlTag)
       
   634                 || node->hasTagName(dtTag)
       
   635                 || node->hasTagName(h1Tag)
       
   636                 || node->hasTagName(h2Tag)
       
   637                 || node->hasTagName(h3Tag)
       
   638                 || node->hasTagName(h4Tag)
       
   639                 || node->hasTagName(h5Tag)
       
   640                 || node->hasTagName(h6Tag)
       
   641                 || node->hasTagName(hrTag)
       
   642                 || node->hasTagName(liTag)
       
   643                 || node->hasTagName(listingTag)
       
   644                 || node->hasTagName(olTag)
       
   645                 || node->hasTagName(pTag)
       
   646                 || node->hasTagName(preTag)
       
   647                 || node->hasTagName(trTag)
       
   648                 || node->hasTagName(ulTag));
       
   649     }
       
   650     
       
   651     // Need to make an exception for table cells, because they are blocks, but we
       
   652     // want them tab-delimited rather than having newlines before and after.
       
   653     if (isTableCell(node))
       
   654         return false;
       
   655     
       
   656     // Need to make an exception for table row elements, because they are neither
       
   657     // "inline" or "RenderBlock", but we want newlines for them.
       
   658     if (r->isTableRow()) {
       
   659         RenderTable* t = toRenderTableRow(r)->table();
       
   660         if (t && !t->isInline())
       
   661             return true;
       
   662     }
       
   663     
       
   664     return !r->isInline() && r->isRenderBlock() && !r->isFloatingOrPositioned() && !r->isBody();
       
   665 }
       
   666 
       
   667 static bool shouldEmitNewlineAfterNode(Node* node)
       
   668 {
       
   669     // FIXME: It should be better but slower to create a VisiblePosition here.
       
   670     if (!shouldEmitNewlinesBeforeAndAfterNode(node))
       
   671         return false;
       
   672     // Check if this is the very last renderer in the document.
       
   673     // If so, then we should not emit a newline.
       
   674     while ((node = node->traverseNextSibling()))
       
   675         if (node->renderer())
       
   676             return true;
       
   677     return false;
       
   678 }
       
   679 
       
   680 static bool shouldEmitNewlineBeforeNode(Node* node)
       
   681 {
       
   682     return shouldEmitNewlinesBeforeAndAfterNode(node); 
       
   683 }
       
   684 
       
   685 static bool shouldEmitExtraNewlineForNode(Node* node)
       
   686 {
       
   687     // When there is a significant collapsed bottom margin, emit an extra
       
   688     // newline for a more realistic result.  We end up getting the right
       
   689     // result even without margin collapsing. For example: <div><p>text</p></div>
       
   690     // will work right even if both the <div> and the <p> have bottom margins.
       
   691     RenderObject* r = node->renderer();
       
   692     if (!r || !r->isBox())
       
   693         return false;
       
   694     
       
   695     // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears
       
   696     // not to do this at all
       
   697     if (node->hasTagName(h1Tag)
       
   698         || node->hasTagName(h2Tag)
       
   699         || node->hasTagName(h3Tag)
       
   700         || node->hasTagName(h4Tag)
       
   701         || node->hasTagName(h5Tag)
       
   702         || node->hasTagName(h6Tag)
       
   703         || node->hasTagName(pTag)) {
       
   704         RenderStyle* style = r->style();
       
   705         if (style) {
       
   706             int bottomMargin = toRenderBox(r)->collapsedMarginBottom();
       
   707             int fontSize = style->fontDescription().computedPixelSize();
       
   708             if (bottomMargin * 2 >= fontSize)
       
   709                 return true;
       
   710         }
       
   711     }
       
   712     
       
   713     return false;
       
   714 }
       
   715 
       
   716 static int collapsedSpaceLength(RenderText* renderer, int textEnd)
       
   717 {
       
   718     const UChar* characters = renderer->text()->characters();
       
   719     int length = renderer->text()->length();
       
   720     for (int i = textEnd; i < length; ++i) {
       
   721         if (!renderer->style()->isCollapsibleWhiteSpace(characters[i]))
       
   722             return i - textEnd;
       
   723     }
       
   724 
       
   725     return length - textEnd;
       
   726 }
       
   727 
       
   728 static int maxOffsetIncludingCollapsedSpaces(Node* node)
       
   729 {
       
   730     int offset = caretMaxOffset(node);
       
   731 
       
   732     if (node->renderer() && node->renderer()->isText())
       
   733         offset += collapsedSpaceLength(toRenderText(node->renderer()), offset);
       
   734 
       
   735     return offset;
       
   736 }
       
   737 
       
   738 // Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic).
       
   739 bool TextIterator::shouldRepresentNodeOffsetZero()
       
   740 {
       
   741     if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isTable())
       
   742         return true;
       
   743         
       
   744     // Leave element positioned flush with start of a paragraph
       
   745     // (e.g. do not insert tab before a table cell at the start of a paragraph)
       
   746     if (m_lastCharacter == '\n')
       
   747         return false;
       
   748     
       
   749     // Otherwise, show the position if we have emitted any characters
       
   750     if (m_hasEmitted)
       
   751         return true;
       
   752     
       
   753     // We've not emitted anything yet. Generally, there is no need for any positioning then.
       
   754     // The only exception is when the element is visually not in the same line as
       
   755     // the start of the range (e.g. the range starts at the end of the previous paragraph).
       
   756     // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we
       
   757     // make quicker checks to possibly avoid that. Another check that we could make is
       
   758     // is whether the inline vs block flow changed since the previous visible element.
       
   759     // I think we're already in a special enough case that that won't be needed, tho.
       
   760 
       
   761     // No character needed if this is the first node in the range.
       
   762     if (m_node == m_startContainer)
       
   763         return false;
       
   764     
       
   765     // If we are outside the start container's subtree, assume we need to emit.
       
   766     // FIXME: m_startContainer could be an inline block
       
   767     if (!m_node->isDescendantOf(m_startContainer))
       
   768         return true;
       
   769 
       
   770     // If we started as m_startContainer offset 0 and the current node is a descendant of
       
   771     // the start container, we already had enough context to correctly decide whether to
       
   772     // emit after a preceding block. We chose not to emit (m_hasEmitted is false),
       
   773     // so don't second guess that now.
       
   774     // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably
       
   775     // immaterial since we likely would have already emitted something by now.
       
   776     if (m_startOffset == 0)
       
   777         return false;
       
   778         
       
   779     // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning.
       
   780     // Additionally, if the range we are iterating over contains huge sections of unrendered content, 
       
   781     // we would create VisiblePositions on every call to this function without this check.
       
   782     if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE)
       
   783         return false;
       
   784     
       
   785     // The startPos.isNotNull() check is needed because the start could be before the body,
       
   786     // and in that case we'll get null. We don't want to put in newlines at the start in that case.
       
   787     // The currPos.isNotNull() check is needed because positions in non-HTML content
       
   788     // (like SVG) do not have visible positions, and we don't want to emit for them either.
       
   789     VisiblePosition startPos = VisiblePosition(m_startContainer, m_startOffset, DOWNSTREAM);
       
   790     VisiblePosition currPos = VisiblePosition(m_node, 0, DOWNSTREAM);
       
   791     return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos);
       
   792 }
       
   793 
       
   794 bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node)
       
   795 {
       
   796     return node->renderer() && node->renderer()->isTable() && (node->renderer()->isInline() || m_emitsCharactersBetweenAllVisiblePositions);
       
   797 }
       
   798 
       
   799 void TextIterator::representNodeOffsetZero()
       
   800 {
       
   801     // Emit a character to show the positioning of m_node.
       
   802     
       
   803     // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can 
       
   804     // create VisiblePositions, which is expensive.  So, we perform the inexpensive checks
       
   805     // on m_node to see if it necessitates emitting a character first and will early return 
       
   806     // before encountering shouldRepresentNodeOffsetZero()s worse case behavior.
       
   807     if (shouldEmitTabBeforeNode(m_node)) {
       
   808         if (shouldRepresentNodeOffsetZero())
       
   809             emitCharacter('\t', m_node->parentNode(), m_node, 0, 0);
       
   810     } else if (shouldEmitNewlineBeforeNode(m_node)) {
       
   811         if (shouldRepresentNodeOffsetZero())
       
   812             emitCharacter('\n', m_node->parentNode(), m_node, 0, 0);
       
   813     } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) {
       
   814         if (shouldRepresentNodeOffsetZero())
       
   815             emitCharacter(' ', m_node->parentNode(), m_node, 0, 0);
       
   816     }
       
   817 }
       
   818 
       
   819 bool TextIterator::handleNonTextNode()
       
   820 {
       
   821     if (shouldEmitNewlineForNode(m_node))
       
   822         emitCharacter('\n', m_node->parentNode(), m_node, 0, 1);
       
   823     else if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR())
       
   824         emitCharacter(' ', m_node->parentNode(), m_node, 0, 1);
       
   825     else
       
   826         representNodeOffsetZero();
       
   827 
       
   828     return true;
       
   829 }
       
   830 
       
   831 void TextIterator::exitNode()
       
   832 {
       
   833     // prevent emitting a newline when exiting a collapsed block at beginning of the range
       
   834     // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could
       
   835     // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and
       
   836     // therefore look like a blank line.
       
   837     if (!m_hasEmitted)
       
   838         return;
       
   839         
       
   840     // Emit with a position *inside* m_node, after m_node's contents, in 
       
   841     // case it is a block, because the run should start where the 
       
   842     // emitted character is positioned visually.
       
   843     Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node;
       
   844     // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making
       
   845     // the logic in _web_attributedStringFromRange match.  We'll get that for free when we switch to use
       
   846     // TextIterator in _web_attributedStringFromRange.
       
   847     // See <rdar://problem/5428427> for an example of how this mismatch will cause problems.
       
   848     if (m_lastTextNode && shouldEmitNewlineAfterNode(m_node)) {
       
   849         // use extra newline to represent margin bottom, as needed
       
   850         bool addNewline = shouldEmitExtraNewlineForNode(m_node);
       
   851         
       
   852         // FIXME: We need to emit a '\n' as we leave an empty block(s) that
       
   853         // contain a VisiblePosition when doing selection preservation.
       
   854         if (m_lastCharacter != '\n') {
       
   855             // insert a newline with a position following this block's contents.
       
   856             emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
       
   857             // remember whether to later add a newline for the current node
       
   858             ASSERT(!m_needsAnotherNewline);
       
   859             m_needsAnotherNewline = addNewline;
       
   860         } else if (addNewline)
       
   861             // insert a newline with a position following this block's contents.
       
   862             emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
       
   863     }
       
   864     
       
   865     // If nothing was emitted, see if we need to emit a space.
       
   866     if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node))
       
   867         emitCharacter(' ', baseNode->parentNode(), baseNode, 1, 1);
       
   868 }
       
   869 
       
   870 void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset)
       
   871 {
       
   872     m_hasEmitted = true;
       
   873     
       
   874     // remember information with which to construct the TextIterator::range()
       
   875     // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode
       
   876     m_positionNode = textNode;
       
   877     m_positionOffsetBaseNode = offsetBaseNode;
       
   878     m_positionStartOffset = textStartOffset;
       
   879     m_positionEndOffset = textEndOffset;
       
   880  
       
   881     // remember information with which to construct the TextIterator::characters() and length()
       
   882     m_singleCharacterBuffer = c;
       
   883     m_textCharacters = &m_singleCharacterBuffer;
       
   884     m_textLength = 1;
       
   885 
       
   886     // remember some iteration state
       
   887     m_lastTextNodeEndedWithCollapsedSpace = false;
       
   888     m_lastCharacter = c;
       
   889 }
       
   890 
       
   891 void TextIterator::emitText(Node* textNode, int textStartOffset, int textEndOffset)
       
   892 {
       
   893     RenderText* renderer = toRenderText(m_node->renderer());
       
   894     m_text = m_emitsTextWithoutTranscoding ? renderer->textWithoutTranscoding() : renderer->text();
       
   895     ASSERT(m_text.characters());
       
   896 
       
   897     m_positionNode = textNode;
       
   898     m_positionOffsetBaseNode = 0;
       
   899     m_positionStartOffset = textStartOffset;
       
   900     m_positionEndOffset = textEndOffset;
       
   901     m_textCharacters = m_text.characters() + textStartOffset;
       
   902     m_textLength = textEndOffset - textStartOffset;
       
   903     m_lastCharacter = m_text[textEndOffset - 1];
       
   904 
       
   905     m_lastTextNodeEndedWithCollapsedSpace = false;
       
   906     m_hasEmitted = true;
       
   907 }
       
   908 
       
   909 PassRefPtr<Range> TextIterator::range() const
       
   910 {
       
   911     // use the current run information, if we have it
       
   912     if (m_positionNode) {
       
   913         if (m_positionOffsetBaseNode) {
       
   914             int index = m_positionOffsetBaseNode->nodeIndex();
       
   915             m_positionStartOffset += index;
       
   916             m_positionEndOffset += index;
       
   917             m_positionOffsetBaseNode = 0;
       
   918         }
       
   919         return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
       
   920     }
       
   921 
       
   922     // otherwise, return the end of the overall range we were given
       
   923     if (m_endContainer)
       
   924         return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset);
       
   925         
       
   926     return 0;
       
   927 }
       
   928     
       
   929 Node* TextIterator::node() const
       
   930 {
       
   931     RefPtr<Range> textRange = range();
       
   932     if (!textRange)
       
   933         return 0;
       
   934 
       
   935     Node* node = textRange->startContainer();
       
   936     if (!node)
       
   937         return 0;
       
   938     if (node->offsetInCharacters())
       
   939         return node;
       
   940     
       
   941     return node->childNode(textRange->startOffset());
       
   942 }
       
   943 
       
   944 // --------
       
   945 
       
   946 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator()
       
   947     : m_positionNode(0)
       
   948 {
       
   949 }
       
   950 
       
   951 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r)
       
   952     : m_positionNode(0)
       
   953 {
       
   954     if (!r)
       
   955         return;
       
   956 
       
   957     Node* startNode = r->startContainer();
       
   958     if (!startNode)
       
   959         return;
       
   960     Node* endNode = r->endContainer();
       
   961     int startOffset = r->startOffset();
       
   962     int endOffset = r->endOffset();
       
   963 
       
   964     if (!startNode->offsetInCharacters()) {
       
   965         if (startOffset >= 0 && startOffset < static_cast<int>(startNode->childNodeCount())) {
       
   966             startNode = startNode->childNode(startOffset);
       
   967             startOffset = 0;
       
   968         }
       
   969     }
       
   970     if (!endNode->offsetInCharacters()) {
       
   971         if (endOffset > 0 && endOffset <= static_cast<int>(endNode->childNodeCount())) {
       
   972             endNode = endNode->childNode(endOffset - 1);
       
   973             endOffset = lastOffsetInNode(endNode);
       
   974         }
       
   975     }
       
   976 
       
   977     m_node = endNode;
       
   978     setUpFullyClippedStack(m_fullyClippedStack, m_node);    
       
   979     m_offset = endOffset;
       
   980     m_handledNode = false;
       
   981     m_handledChildren = endOffset == 0;
       
   982 
       
   983     m_startNode = startNode;
       
   984     m_startOffset = startOffset;
       
   985     m_endNode = endNode;
       
   986     m_endOffset = endOffset;
       
   987     
       
   988 #ifndef NDEBUG
       
   989     // Need this just because of the assert.
       
   990     m_positionNode = endNode;
       
   991 #endif
       
   992 
       
   993     m_lastTextNode = 0;
       
   994     m_lastCharacter = '\n';
       
   995 
       
   996     m_pastStartNode = previousInPostOrderCrossingShadowBoundaries(startNode, startOffset);
       
   997 
       
   998     advance();
       
   999 }
       
  1000 
       
  1001 void SimplifiedBackwardsTextIterator::advance()
       
  1002 {
       
  1003     ASSERT(m_positionNode);
       
  1004 
       
  1005     m_positionNode = 0;
       
  1006     m_textLength = 0;
       
  1007 
       
  1008     while (m_node && m_node != m_pastStartNode) {
       
  1009         // Don't handle node if we start iterating at [node, 0].
       
  1010         if (!m_handledNode && !(m_node == m_endNode && m_endOffset == 0)) {
       
  1011             RenderObject* renderer = m_node->renderer();
       
  1012             if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) {
       
  1013                 // FIXME: What about CDATA_SECTION_NODE?
       
  1014                 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
       
  1015                     m_handledNode = handleTextNode();
       
  1016             } else if (renderer && (renderer->isImage() || renderer->isWidget())) {
       
  1017                 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
       
  1018                     m_handledNode = handleReplacedElement();
       
  1019             } else
       
  1020                 m_handledNode = handleNonTextNode();
       
  1021             if (m_positionNode)
       
  1022                 return;
       
  1023         }
       
  1024 
       
  1025         Node* next = m_handledChildren ? 0 : m_node->lastChild();
       
  1026         if (!next) {
       
  1027             // Exit empty containers as we pass over them or containers
       
  1028             // where [container, 0] is where we started iterating.
       
  1029             if (!m_handledNode &&
       
  1030                 canHaveChildrenForEditing(m_node) && 
       
  1031                 m_node->parentNode() && 
       
  1032                 (!m_node->lastChild() || (m_node == m_endNode && m_endOffset == 0))) {
       
  1033                 exitNode();
       
  1034                 if (m_positionNode) {
       
  1035                     m_handledNode = true;
       
  1036                     m_handledChildren = true;
       
  1037                     return;
       
  1038                 }            
       
  1039             }
       
  1040             // Exit all other containers.
       
  1041             next = m_node->previousSibling();
       
  1042             while (!next) {
       
  1043                 Node* parentNode = parentCrossingShadowBoundaries(m_node);
       
  1044                 if (!parentNode)
       
  1045                     break;
       
  1046                 m_node = parentNode;
       
  1047                 m_fullyClippedStack.pop();
       
  1048                 exitNode();
       
  1049                 if (m_positionNode) {
       
  1050                     m_handledNode = true;
       
  1051                     m_handledChildren = true;
       
  1052                     return;
       
  1053                 }
       
  1054                 next = m_node->previousSibling();
       
  1055             }
       
  1056             m_fullyClippedStack.pop();
       
  1057         }
       
  1058         
       
  1059         m_node = next;
       
  1060         if (m_node)
       
  1061             pushFullyClippedState(m_fullyClippedStack, m_node);
       
  1062         // For the purpose of word boundary detection,
       
  1063         // we should iterate all visible text and trailing (collapsed) whitespaces. 
       
  1064         m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(m_node) : 0;
       
  1065         m_handledNode = false;
       
  1066         m_handledChildren = false;
       
  1067         
       
  1068         if (m_positionNode)
       
  1069             return;
       
  1070     }
       
  1071 }
       
  1072 
       
  1073 bool SimplifiedBackwardsTextIterator::handleTextNode()
       
  1074 {
       
  1075     m_lastTextNode = m_node;
       
  1076 
       
  1077     RenderText* renderer = toRenderText(m_node->renderer());
       
  1078     String str = renderer->text();
       
  1079 
       
  1080     if (!renderer->firstTextBox() && str.length() > 0)
       
  1081         return true;
       
  1082 
       
  1083     m_positionEndOffset = m_offset;
       
  1084 
       
  1085     m_offset = (m_node == m_startNode) ? m_startOffset : 0;
       
  1086     m_positionNode = m_node;
       
  1087     m_positionStartOffset = m_offset;
       
  1088     m_textLength = m_positionEndOffset - m_positionStartOffset;
       
  1089     m_textCharacters = str.characters() + m_positionStartOffset;
       
  1090 
       
  1091     m_lastCharacter = str[m_positionEndOffset - 1];
       
  1092 
       
  1093     return true;
       
  1094 }
       
  1095 
       
  1096 bool SimplifiedBackwardsTextIterator::handleReplacedElement()
       
  1097 {
       
  1098     unsigned index = m_node->nodeIndex();
       
  1099     // We want replaced elements to behave like punctuation for boundary 
       
  1100     // finding, and to simply take up space for the selection preservation 
       
  1101     // code in moveParagraphs, so we use a comma.  Unconditionally emit
       
  1102     // here because this iterator is only used for boundary finding.
       
  1103     emitCharacter(',', m_node->parentNode(), index, index + 1);
       
  1104     return true;
       
  1105 }
       
  1106 
       
  1107 bool SimplifiedBackwardsTextIterator::handleNonTextNode()
       
  1108 {    
       
  1109     // We can use a linefeed in place of a tab because this simple iterator is only used to
       
  1110     // find boundaries, not actual content.  A linefeed breaks words, sentences, and paragraphs.
       
  1111     if (shouldEmitNewlineForNode(m_node) || shouldEmitNewlineAfterNode(m_node) || shouldEmitTabBeforeNode(m_node)) {
       
  1112         unsigned index = m_node->nodeIndex();
       
  1113         // The start of this emitted range is wrong. Ensuring correctness would require
       
  1114         // VisiblePositions and so would be slow. previousBoundary expects this.
       
  1115         emitCharacter('\n', m_node->parentNode(), index + 1, index + 1);
       
  1116     }
       
  1117     return true;
       
  1118 }
       
  1119 
       
  1120 void SimplifiedBackwardsTextIterator::exitNode()
       
  1121 {
       
  1122     if (shouldEmitNewlineForNode(m_node) || shouldEmitNewlineBeforeNode(m_node) || shouldEmitTabBeforeNode(m_node)) {
       
  1123         // The start of this emitted range is wrong. Ensuring correctness would require
       
  1124         // VisiblePositions and so would be slow. previousBoundary expects this.
       
  1125         emitCharacter('\n', m_node, 0, 0);
       
  1126     }
       
  1127 }
       
  1128 
       
  1129 void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset)
       
  1130 {
       
  1131     m_singleCharacterBuffer = c;
       
  1132     m_positionNode = node;
       
  1133     m_positionStartOffset = startOffset;
       
  1134     m_positionEndOffset = endOffset;
       
  1135     m_textCharacters = &m_singleCharacterBuffer;
       
  1136     m_textLength = 1;
       
  1137     m_lastCharacter = c;
       
  1138 }
       
  1139 
       
  1140 PassRefPtr<Range> SimplifiedBackwardsTextIterator::range() const
       
  1141 {
       
  1142     if (m_positionNode)
       
  1143         return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
       
  1144     
       
  1145     return Range::create(m_startNode->document(), m_startNode, m_startOffset, m_startNode, m_startOffset);
       
  1146 }
       
  1147 
       
  1148 // --------
       
  1149 
       
  1150 CharacterIterator::CharacterIterator()
       
  1151     : m_offset(0)
       
  1152     , m_runOffset(0)
       
  1153     , m_atBreak(true)
       
  1154 {
       
  1155 }
       
  1156 
       
  1157 CharacterIterator::CharacterIterator(const Range* r, TextIteratorBehavior behavior)
       
  1158     : m_offset(0)
       
  1159     , m_runOffset(0)
       
  1160     , m_atBreak(true)
       
  1161     , m_textIterator(r, behavior)
       
  1162 {
       
  1163     while (!atEnd() && m_textIterator.length() == 0)
       
  1164         m_textIterator.advance();
       
  1165 }
       
  1166 
       
  1167 PassRefPtr<Range> CharacterIterator::range() const
       
  1168 {
       
  1169     RefPtr<Range> r = m_textIterator.range();
       
  1170     if (!m_textIterator.atEnd()) {
       
  1171         if (m_textIterator.length() <= 1) {
       
  1172             ASSERT(m_runOffset == 0);
       
  1173         } else {
       
  1174             Node* n = r->startContainer();
       
  1175             ASSERT(n == r->endContainer());
       
  1176             int offset = r->startOffset() + m_runOffset;
       
  1177             ExceptionCode ec = 0;
       
  1178             r->setStart(n, offset, ec);
       
  1179             r->setEnd(n, offset + 1, ec);
       
  1180             ASSERT(!ec);
       
  1181         }
       
  1182     }
       
  1183     return r.release();
       
  1184 }
       
  1185 
       
  1186 void CharacterIterator::advance(int count)
       
  1187 {
       
  1188     if (count <= 0) {
       
  1189         ASSERT(count == 0);
       
  1190         return;
       
  1191     }
       
  1192     
       
  1193     m_atBreak = false;
       
  1194 
       
  1195     // easy if there is enough left in the current m_textIterator run
       
  1196     int remaining = m_textIterator.length() - m_runOffset;
       
  1197     if (count < remaining) {
       
  1198         m_runOffset += count;
       
  1199         m_offset += count;
       
  1200         return;
       
  1201     }
       
  1202 
       
  1203     // exhaust the current m_textIterator run
       
  1204     count -= remaining;
       
  1205     m_offset += remaining;
       
  1206     
       
  1207     // move to a subsequent m_textIterator run
       
  1208     for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
       
  1209         int runLength = m_textIterator.length();
       
  1210         if (runLength == 0)
       
  1211             m_atBreak = true;
       
  1212         else {
       
  1213             // see whether this is m_textIterator to use
       
  1214             if (count < runLength) {
       
  1215                 m_runOffset = count;
       
  1216                 m_offset += count;
       
  1217                 return;
       
  1218             }
       
  1219             
       
  1220             // exhaust this m_textIterator run
       
  1221             count -= runLength;
       
  1222             m_offset += runLength;
       
  1223         }
       
  1224     }
       
  1225 
       
  1226     // ran to the end of the m_textIterator... no more runs left
       
  1227     m_atBreak = true;
       
  1228     m_runOffset = 0;
       
  1229 }
       
  1230 
       
  1231 String CharacterIterator::string(int numChars)
       
  1232 {
       
  1233     Vector<UChar> result;
       
  1234     result.reserveInitialCapacity(numChars);
       
  1235     while (numChars > 0 && !atEnd()) {
       
  1236         int runSize = min(numChars, length());
       
  1237         result.append(characters(), runSize);
       
  1238         numChars -= runSize;
       
  1239         advance(runSize);
       
  1240     }
       
  1241     return String::adopt(result);
       
  1242 }
       
  1243 
       
  1244 static PassRefPtr<Range> characterSubrange(CharacterIterator& it, int offset, int length)
       
  1245 {
       
  1246     it.advance(offset);
       
  1247     RefPtr<Range> start = it.range();
       
  1248 
       
  1249     if (length > 1)
       
  1250         it.advance(length - 1);
       
  1251     RefPtr<Range> end = it.range();
       
  1252 
       
  1253     return Range::create(start->startContainer()->document(), 
       
  1254         start->startContainer(), start->startOffset(), 
       
  1255         end->endContainer(), end->endOffset());
       
  1256 }
       
  1257 
       
  1258 BackwardsCharacterIterator::BackwardsCharacterIterator()
       
  1259     : m_offset(0)
       
  1260     , m_runOffset(0)
       
  1261     , m_atBreak(true)
       
  1262 {
       
  1263 }
       
  1264 
       
  1265 BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range)
       
  1266     : m_offset(0)
       
  1267     , m_runOffset(0)
       
  1268     , m_atBreak(true)
       
  1269     , m_textIterator(range)
       
  1270 {
       
  1271     while (!atEnd() && !m_textIterator.length())
       
  1272         m_textIterator.advance();
       
  1273 }
       
  1274 
       
  1275 PassRefPtr<Range> BackwardsCharacterIterator::range() const
       
  1276 {
       
  1277     RefPtr<Range> r = m_textIterator.range();
       
  1278     if (!m_textIterator.atEnd()) {
       
  1279         if (m_textIterator.length() <= 1)
       
  1280             ASSERT(m_runOffset == 0);
       
  1281         else {
       
  1282             Node* n = r->startContainer();
       
  1283             ASSERT(n == r->endContainer());
       
  1284             int offset = r->endOffset() - m_runOffset;
       
  1285             ExceptionCode ec = 0;
       
  1286             r->setStart(n, offset - 1, ec);
       
  1287             r->setEnd(n, offset, ec);
       
  1288             ASSERT(!ec);
       
  1289         }
       
  1290     }
       
  1291     return r.release();
       
  1292 }
       
  1293 
       
  1294 void BackwardsCharacterIterator::advance(int count)
       
  1295 {
       
  1296     if (count <= 0) {
       
  1297         ASSERT(!count);
       
  1298         return;
       
  1299     }
       
  1300 
       
  1301     m_atBreak = false;
       
  1302 
       
  1303     int remaining = m_textIterator.length() - m_runOffset;
       
  1304     if (count < remaining) {
       
  1305         m_runOffset += count;
       
  1306         m_offset += count;
       
  1307         return;
       
  1308     }
       
  1309 
       
  1310     count -= remaining;
       
  1311     m_offset += remaining;
       
  1312 
       
  1313     for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
       
  1314         int runLength = m_textIterator.length();
       
  1315         if (runLength == 0)
       
  1316             m_atBreak = true;
       
  1317         else {
       
  1318             if (count < runLength) {
       
  1319                 m_runOffset = count;
       
  1320                 m_offset += count;
       
  1321                 return;
       
  1322             }
       
  1323             
       
  1324             count -= runLength;
       
  1325             m_offset += runLength;
       
  1326         }
       
  1327     }
       
  1328 
       
  1329     m_atBreak = true;
       
  1330     m_runOffset = 0;
       
  1331 }
       
  1332 
       
  1333 // --------
       
  1334 
       
  1335 WordAwareIterator::WordAwareIterator()
       
  1336     : m_previousText(0)
       
  1337     , m_didLookAhead(false)
       
  1338 {
       
  1339 }
       
  1340 
       
  1341 WordAwareIterator::WordAwareIterator(const Range* r)
       
  1342     : m_previousText(0)
       
  1343     , m_didLookAhead(true) // so we consider the first chunk from the text iterator
       
  1344     , m_textIterator(r)
       
  1345 {
       
  1346     advance(); // get in position over the first chunk of text
       
  1347 }
       
  1348 
       
  1349 // We're always in one of these modes:
       
  1350 // - The current chunk in the text iterator is our current chunk
       
  1351 //      (typically its a piece of whitespace, or text that ended with whitespace)
       
  1352 // - The previous chunk in the text iterator is our current chunk
       
  1353 //      (we looked ahead to the next chunk and found a word boundary)
       
  1354 // - We built up our own chunk of text from many chunks from the text iterator
       
  1355 
       
  1356 // FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries.
       
  1357 
       
  1358 void WordAwareIterator::advance()
       
  1359 {
       
  1360     m_previousText = 0;
       
  1361     m_buffer.clear();      // toss any old buffer we built up
       
  1362 
       
  1363     // If last time we did a look-ahead, start with that looked-ahead chunk now
       
  1364     if (!m_didLookAhead) {
       
  1365         ASSERT(!m_textIterator.atEnd());
       
  1366         m_textIterator.advance();
       
  1367     }
       
  1368     m_didLookAhead = false;
       
  1369 
       
  1370     // Go to next non-empty chunk 
       
  1371     while (!m_textIterator.atEnd() && m_textIterator.length() == 0)
       
  1372         m_textIterator.advance();
       
  1373     m_range = m_textIterator.range();
       
  1374 
       
  1375     if (m_textIterator.atEnd())
       
  1376         return;
       
  1377     
       
  1378     while (1) {
       
  1379         // If this chunk ends in whitespace we can just use it as our chunk.
       
  1380         if (isSpaceOrNewline(m_textIterator.characters()[m_textIterator.length() - 1]))
       
  1381             return;
       
  1382 
       
  1383         // If this is the first chunk that failed, save it in previousText before look ahead
       
  1384         if (m_buffer.isEmpty()) {
       
  1385             m_previousText = m_textIterator.characters();
       
  1386             m_previousLength = m_textIterator.length();
       
  1387         }
       
  1388 
       
  1389         // Look ahead to next chunk.  If it is whitespace or a break, we can use the previous stuff
       
  1390         m_textIterator.advance();
       
  1391         if (m_textIterator.atEnd() || m_textIterator.length() == 0 || isSpaceOrNewline(m_textIterator.characters()[0])) {
       
  1392             m_didLookAhead = true;
       
  1393             return;
       
  1394         }
       
  1395 
       
  1396         if (m_buffer.isEmpty()) {
       
  1397             // Start gobbling chunks until we get to a suitable stopping point
       
  1398             m_buffer.append(m_previousText, m_previousLength);
       
  1399             m_previousText = 0;
       
  1400         }
       
  1401         m_buffer.append(m_textIterator.characters(), m_textIterator.length());
       
  1402         int exception = 0;
       
  1403         m_range->setEnd(m_textIterator.range()->endContainer(), m_textIterator.range()->endOffset(), exception);
       
  1404     }
       
  1405 }
       
  1406 
       
  1407 int WordAwareIterator::length() const
       
  1408 {
       
  1409     if (!m_buffer.isEmpty())
       
  1410         return m_buffer.size();
       
  1411     if (m_previousText)
       
  1412         return m_previousLength;
       
  1413     return m_textIterator.length();
       
  1414 }
       
  1415 
       
  1416 const UChar* WordAwareIterator::characters() const
       
  1417 {
       
  1418     if (!m_buffer.isEmpty())
       
  1419         return m_buffer.data();
       
  1420     if (m_previousText)
       
  1421         return m_previousText;
       
  1422     return m_textIterator.characters();
       
  1423 }
       
  1424 
       
  1425 // --------
       
  1426 
       
  1427 static inline UChar foldQuoteMark(UChar c)
       
  1428 {
       
  1429     switch (c) {
       
  1430         case hebrewPunctuationGershayim:
       
  1431         case leftDoubleQuotationMark:
       
  1432         case rightDoubleQuotationMark:
       
  1433             return '"';
       
  1434         case hebrewPunctuationGeresh:
       
  1435         case leftSingleQuotationMark:
       
  1436         case rightSingleQuotationMark:
       
  1437             return '\'';
       
  1438         default:
       
  1439             return c;
       
  1440     }
       
  1441 }
       
  1442 
       
  1443 static inline void foldQuoteMarks(String& s)
       
  1444 {
       
  1445     s.replace(hebrewPunctuationGeresh, '\'');
       
  1446     s.replace(hebrewPunctuationGershayim, '"');
       
  1447     s.replace(leftDoubleQuotationMark, '"');
       
  1448     s.replace(leftSingleQuotationMark, '\'');
       
  1449     s.replace(rightDoubleQuotationMark, '"');
       
  1450     s.replace(rightSingleQuotationMark, '\'');
       
  1451 }
       
  1452 
       
  1453 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
       
  1454 
       
  1455 static inline void foldQuoteMarks(UChar* data, size_t length)
       
  1456 {
       
  1457     for (size_t i = 0; i < length; ++i)
       
  1458         data[i] = foldQuoteMark(data[i]);
       
  1459 }
       
  1460 
       
  1461 static const size_t minimumSearchBufferSize = 8192;
       
  1462 
       
  1463 #ifndef NDEBUG
       
  1464 static bool searcherInUse;
       
  1465 #endif
       
  1466 
       
  1467 static UStringSearch* createSearcher()
       
  1468 {
       
  1469     // Provide a non-empty pattern and non-empty text so usearch_open will not fail,
       
  1470     // but it doesn't matter exactly what it is, since we don't perform any searches
       
  1471     // without setting both the pattern and the text.
       
  1472     UErrorCode status = U_ZERO_ERROR;
       
  1473     UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, currentSearchLocaleID(), 0, &status);
       
  1474     ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
       
  1475     return searcher;
       
  1476 }
       
  1477 
       
  1478 static UStringSearch* searcher()
       
  1479 {
       
  1480     static UStringSearch* searcher = createSearcher();
       
  1481     return searcher;
       
  1482 }
       
  1483 
       
  1484 static inline void lockSearcher()
       
  1485 {
       
  1486 #ifndef NDEBUG
       
  1487     ASSERT(!searcherInUse);
       
  1488     searcherInUse = true;
       
  1489 #endif
       
  1490 }
       
  1491 
       
  1492 static inline void unlockSearcher()
       
  1493 {
       
  1494 #ifndef NDEBUG
       
  1495     ASSERT(searcherInUse);
       
  1496     searcherInUse = false;
       
  1497 #endif
       
  1498 }
       
  1499 
       
  1500 // ICU's search ignores the distinction between small kana letters and ones
       
  1501 // that are not small, and also characters that differ only in the voicing
       
  1502 // marks when considering only primary collation strength diffrences.
       
  1503 // This is not helpful for end users, since these differences make words
       
  1504 // distinct, so for our purposes we need these to be considered.
       
  1505 // The Unicode folks do not think the collation algorithm should be
       
  1506 // changed. To work around this, we would like to tailor the ICU searcher,
       
  1507 // but we can't get that to work yet. So instead, we check for cases where
       
  1508 // these differences occur, and skip those matches.
       
  1509 
       
  1510 // We refer to the above technique as the "kana workaround". The next few
       
  1511 // functions are helper functinos for the kana workaround.
       
  1512 
       
  1513 static inline bool isKanaLetter(UChar character)
       
  1514 {
       
  1515     // Hiragana letters.
       
  1516     if (character >= 0x3041 && character <= 0x3096)
       
  1517         return true;
       
  1518 
       
  1519     // Katakana letters.
       
  1520     if (character >= 0x30A1 && character <= 0x30FA)
       
  1521         return true;
       
  1522     if (character >= 0x31F0 && character <= 0x31FF)
       
  1523         return true;
       
  1524 
       
  1525     // Halfwidth katakana letters.
       
  1526     if (character >= 0xFF66 && character <= 0xFF9D && character != 0xFF70)
       
  1527         return true;
       
  1528 
       
  1529     return false;
       
  1530 }
       
  1531 
       
  1532 static inline bool isSmallKanaLetter(UChar character)
       
  1533 {
       
  1534     ASSERT(isKanaLetter(character));
       
  1535 
       
  1536     switch (character) {
       
  1537     case 0x3041: // HIRAGANA LETTER SMALL A
       
  1538     case 0x3043: // HIRAGANA LETTER SMALL I
       
  1539     case 0x3045: // HIRAGANA LETTER SMALL U
       
  1540     case 0x3047: // HIRAGANA LETTER SMALL E
       
  1541     case 0x3049: // HIRAGANA LETTER SMALL O
       
  1542     case 0x3063: // HIRAGANA LETTER SMALL TU
       
  1543     case 0x3083: // HIRAGANA LETTER SMALL YA
       
  1544     case 0x3085: // HIRAGANA LETTER SMALL YU
       
  1545     case 0x3087: // HIRAGANA LETTER SMALL YO
       
  1546     case 0x308E: // HIRAGANA LETTER SMALL WA
       
  1547     case 0x3095: // HIRAGANA LETTER SMALL KA
       
  1548     case 0x3096: // HIRAGANA LETTER SMALL KE
       
  1549     case 0x30A1: // KATAKANA LETTER SMALL A
       
  1550     case 0x30A3: // KATAKANA LETTER SMALL I
       
  1551     case 0x30A5: // KATAKANA LETTER SMALL U
       
  1552     case 0x30A7: // KATAKANA LETTER SMALL E
       
  1553     case 0x30A9: // KATAKANA LETTER SMALL O
       
  1554     case 0x30C3: // KATAKANA LETTER SMALL TU
       
  1555     case 0x30E3: // KATAKANA LETTER SMALL YA
       
  1556     case 0x30E5: // KATAKANA LETTER SMALL YU
       
  1557     case 0x30E7: // KATAKANA LETTER SMALL YO
       
  1558     case 0x30EE: // KATAKANA LETTER SMALL WA
       
  1559     case 0x30F5: // KATAKANA LETTER SMALL KA
       
  1560     case 0x30F6: // KATAKANA LETTER SMALL KE
       
  1561     case 0x31F0: // KATAKANA LETTER SMALL KU
       
  1562     case 0x31F1: // KATAKANA LETTER SMALL SI
       
  1563     case 0x31F2: // KATAKANA LETTER SMALL SU
       
  1564     case 0x31F3: // KATAKANA LETTER SMALL TO
       
  1565     case 0x31F4: // KATAKANA LETTER SMALL NU
       
  1566     case 0x31F5: // KATAKANA LETTER SMALL HA
       
  1567     case 0x31F6: // KATAKANA LETTER SMALL HI
       
  1568     case 0x31F7: // KATAKANA LETTER SMALL HU
       
  1569     case 0x31F8: // KATAKANA LETTER SMALL HE
       
  1570     case 0x31F9: // KATAKANA LETTER SMALL HO
       
  1571     case 0x31FA: // KATAKANA LETTER SMALL MU
       
  1572     case 0x31FB: // KATAKANA LETTER SMALL RA
       
  1573     case 0x31FC: // KATAKANA LETTER SMALL RI
       
  1574     case 0x31FD: // KATAKANA LETTER SMALL RU
       
  1575     case 0x31FE: // KATAKANA LETTER SMALL RE
       
  1576     case 0x31FF: // KATAKANA LETTER SMALL RO
       
  1577     case 0xFF67: // HALFWIDTH KATAKANA LETTER SMALL A
       
  1578     case 0xFF68: // HALFWIDTH KATAKANA LETTER SMALL I
       
  1579     case 0xFF69: // HALFWIDTH KATAKANA LETTER SMALL U
       
  1580     case 0xFF6A: // HALFWIDTH KATAKANA LETTER SMALL E
       
  1581     case 0xFF6B: // HALFWIDTH KATAKANA LETTER SMALL O
       
  1582     case 0xFF6C: // HALFWIDTH KATAKANA LETTER SMALL YA
       
  1583     case 0xFF6D: // HALFWIDTH KATAKANA LETTER SMALL YU
       
  1584     case 0xFF6E: // HALFWIDTH KATAKANA LETTER SMALL YO
       
  1585     case 0xFF6F: // HALFWIDTH KATAKANA LETTER SMALL TU
       
  1586         return true;
       
  1587     }
       
  1588     return false;
       
  1589 }
       
  1590 
       
  1591 enum VoicedSoundMarkType { NoVoicedSoundMark, VoicedSoundMark, SemiVoicedSoundMark };
       
  1592 
       
  1593 static inline VoicedSoundMarkType composedVoicedSoundMark(UChar character)
       
  1594 {
       
  1595     ASSERT(isKanaLetter(character));
       
  1596 
       
  1597     switch (character) {
       
  1598     case 0x304C: // HIRAGANA LETTER GA
       
  1599     case 0x304E: // HIRAGANA LETTER GI
       
  1600     case 0x3050: // HIRAGANA LETTER GU
       
  1601     case 0x3052: // HIRAGANA LETTER GE
       
  1602     case 0x3054: // HIRAGANA LETTER GO
       
  1603     case 0x3056: // HIRAGANA LETTER ZA
       
  1604     case 0x3058: // HIRAGANA LETTER ZI
       
  1605     case 0x305A: // HIRAGANA LETTER ZU
       
  1606     case 0x305C: // HIRAGANA LETTER ZE
       
  1607     case 0x305E: // HIRAGANA LETTER ZO
       
  1608     case 0x3060: // HIRAGANA LETTER DA
       
  1609     case 0x3062: // HIRAGANA LETTER DI
       
  1610     case 0x3065: // HIRAGANA LETTER DU
       
  1611     case 0x3067: // HIRAGANA LETTER DE
       
  1612     case 0x3069: // HIRAGANA LETTER DO
       
  1613     case 0x3070: // HIRAGANA LETTER BA
       
  1614     case 0x3073: // HIRAGANA LETTER BI
       
  1615     case 0x3076: // HIRAGANA LETTER BU
       
  1616     case 0x3079: // HIRAGANA LETTER BE
       
  1617     case 0x307C: // HIRAGANA LETTER BO
       
  1618     case 0x3094: // HIRAGANA LETTER VU
       
  1619     case 0x30AC: // KATAKANA LETTER GA
       
  1620     case 0x30AE: // KATAKANA LETTER GI
       
  1621     case 0x30B0: // KATAKANA LETTER GU
       
  1622     case 0x30B2: // KATAKANA LETTER GE
       
  1623     case 0x30B4: // KATAKANA LETTER GO
       
  1624     case 0x30B6: // KATAKANA LETTER ZA
       
  1625     case 0x30B8: // KATAKANA LETTER ZI
       
  1626     case 0x30BA: // KATAKANA LETTER ZU
       
  1627     case 0x30BC: // KATAKANA LETTER ZE
       
  1628     case 0x30BE: // KATAKANA LETTER ZO
       
  1629     case 0x30C0: // KATAKANA LETTER DA
       
  1630     case 0x30C2: // KATAKANA LETTER DI
       
  1631     case 0x30C5: // KATAKANA LETTER DU
       
  1632     case 0x30C7: // KATAKANA LETTER DE
       
  1633     case 0x30C9: // KATAKANA LETTER DO
       
  1634     case 0x30D0: // KATAKANA LETTER BA
       
  1635     case 0x30D3: // KATAKANA LETTER BI
       
  1636     case 0x30D6: // KATAKANA LETTER BU
       
  1637     case 0x30D9: // KATAKANA LETTER BE
       
  1638     case 0x30DC: // KATAKANA LETTER BO
       
  1639     case 0x30F4: // KATAKANA LETTER VU
       
  1640     case 0x30F7: // KATAKANA LETTER VA
       
  1641     case 0x30F8: // KATAKANA LETTER VI
       
  1642     case 0x30F9: // KATAKANA LETTER VE
       
  1643     case 0x30FA: // KATAKANA LETTER VO
       
  1644         return VoicedSoundMark;
       
  1645     case 0x3071: // HIRAGANA LETTER PA
       
  1646     case 0x3074: // HIRAGANA LETTER PI
       
  1647     case 0x3077: // HIRAGANA LETTER PU
       
  1648     case 0x307A: // HIRAGANA LETTER PE
       
  1649     case 0x307D: // HIRAGANA LETTER PO
       
  1650     case 0x30D1: // KATAKANA LETTER PA
       
  1651     case 0x30D4: // KATAKANA LETTER PI
       
  1652     case 0x30D7: // KATAKANA LETTER PU
       
  1653     case 0x30DA: // KATAKANA LETTER PE
       
  1654     case 0x30DD: // KATAKANA LETTER PO
       
  1655         return SemiVoicedSoundMark;
       
  1656     }
       
  1657     return NoVoicedSoundMark;
       
  1658 }
       
  1659 
       
  1660 static inline bool isCombiningVoicedSoundMark(UChar character)
       
  1661 {
       
  1662     switch (character) {
       
  1663     case 0x3099: // COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
       
  1664     case 0x309A: // COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
       
  1665         return true;
       
  1666     }
       
  1667     return false;
       
  1668 }
       
  1669 
       
  1670 static inline bool containsKanaLetters(const String& pattern)
       
  1671 {
       
  1672     const UChar* characters = pattern.characters();
       
  1673     unsigned length = pattern.length();
       
  1674     for (unsigned i = 0; i < length; ++i) {
       
  1675         if (isKanaLetter(characters[i]))
       
  1676             return true;
       
  1677     }
       
  1678     return false;
       
  1679 }
       
  1680 
       
  1681 static void normalizeCharacters(const UChar* characters, unsigned length, Vector<UChar>& buffer)
       
  1682 {
       
  1683     ASSERT(length);
       
  1684 
       
  1685     buffer.resize(length);
       
  1686 
       
  1687     UErrorCode status = U_ZERO_ERROR;
       
  1688     size_t bufferSize = unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), length, &status);
       
  1689     ASSERT(status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING || status == U_BUFFER_OVERFLOW_ERROR);
       
  1690     ASSERT(bufferSize);
       
  1691 
       
  1692     buffer.resize(bufferSize);
       
  1693 
       
  1694     if (status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING)
       
  1695         return;
       
  1696 
       
  1697     status = U_ZERO_ERROR;
       
  1698     unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), bufferSize, &status);
       
  1699     ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
       
  1700 }
       
  1701 
       
  1702 inline SearchBuffer::SearchBuffer(const String& target, bool isCaseSensitive)
       
  1703     : m_target(target)
       
  1704     , m_atBreak(true)
       
  1705     , m_targetRequiresKanaWorkaround(containsKanaLetters(m_target))
       
  1706 {
       
  1707     ASSERT(!m_target.isEmpty());
       
  1708 
       
  1709     // FIXME: We'd like to tailor the searcher to fold quote marks for us instead
       
  1710     // of doing it in a separate replacement pass here, but ICU doesn't offer a way
       
  1711     // to add tailoring on top of the locale-specific tailoring as of this writing.
       
  1712     foldQuoteMarks(m_target);
       
  1713 
       
  1714     size_t targetLength = m_target.length();
       
  1715     m_buffer.reserveInitialCapacity(max(targetLength * 8, minimumSearchBufferSize));
       
  1716     m_overlap = m_buffer.capacity() / 4;
       
  1717 
       
  1718     // Grab the single global searcher.
       
  1719     // If we ever have a reason to do more than once search buffer at once, we'll have
       
  1720     // to move to multiple searchers.
       
  1721     lockSearcher();
       
  1722 
       
  1723     UStringSearch* searcher = WebCore::searcher();
       
  1724     UCollator* collator = usearch_getCollator(searcher);
       
  1725 
       
  1726     UCollationStrength strength = isCaseSensitive ? UCOL_TERTIARY : UCOL_PRIMARY;
       
  1727     if (ucol_getStrength(collator) != strength) {
       
  1728         ucol_setStrength(collator, strength);
       
  1729         usearch_reset(searcher);
       
  1730     }
       
  1731 
       
  1732     UErrorCode status = U_ZERO_ERROR;
       
  1733     usearch_setPattern(searcher, m_target.characters(), targetLength, &status);
       
  1734     ASSERT(status == U_ZERO_ERROR);
       
  1735 
       
  1736     // The kana workaround requires a normalized copy of the target string.
       
  1737     if (m_targetRequiresKanaWorkaround)
       
  1738         normalizeCharacters(m_target.characters(), m_target.length(), m_normalizedTarget);
       
  1739 }
       
  1740 
       
  1741 inline SearchBuffer::~SearchBuffer()
       
  1742 {
       
  1743     unlockSearcher();
       
  1744 }
       
  1745 
       
  1746 inline size_t SearchBuffer::append(const UChar* characters, size_t length)
       
  1747 {
       
  1748     ASSERT(length);
       
  1749 
       
  1750     if (m_atBreak) {
       
  1751         m_buffer.shrink(0);
       
  1752         m_atBreak = false;
       
  1753     } else if (m_buffer.size() == m_buffer.capacity()) {
       
  1754         memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar));
       
  1755         m_buffer.shrink(m_overlap);
       
  1756     }
       
  1757 
       
  1758     size_t oldLength = m_buffer.size();
       
  1759     size_t usableLength = min(m_buffer.capacity() - oldLength, length);
       
  1760     ASSERT(usableLength);
       
  1761     m_buffer.append(characters, usableLength);
       
  1762     foldQuoteMarks(m_buffer.data() + oldLength, usableLength);
       
  1763     return usableLength;
       
  1764 }
       
  1765 
       
  1766 inline bool SearchBuffer::atBreak() const
       
  1767 {
       
  1768     return m_atBreak;
       
  1769 }
       
  1770 
       
  1771 inline void SearchBuffer::reachedBreak()
       
  1772 {
       
  1773     m_atBreak = true;
       
  1774 }
       
  1775 
       
  1776 inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const
       
  1777 {
       
  1778     // This function implements the kana workaround. If usearch treats
       
  1779     // it as a match, but we do not want to, then it's a "bad match".
       
  1780     if (!m_targetRequiresKanaWorkaround)
       
  1781         return false;
       
  1782 
       
  1783     // Normalize into a match buffer. We reuse a single buffer rather than
       
  1784     // creating a new one each time.
       
  1785     normalizeCharacters(match, matchLength, m_normalizedMatch);
       
  1786 
       
  1787     const UChar* a = m_normalizedTarget.begin();
       
  1788     const UChar* aEnd = m_normalizedTarget.end();
       
  1789 
       
  1790     const UChar* b = m_normalizedMatch.begin();
       
  1791     const UChar* bEnd = m_normalizedMatch.end();
       
  1792 
       
  1793     while (true) {
       
  1794         // Skip runs of non-kana-letter characters. This is necessary so we can
       
  1795         // correctly handle strings where the target and match have different-length
       
  1796         // runs of characters that match, while still double checking the correctness
       
  1797         // of matches of kana letters with other kana letters.
       
  1798         while (a != aEnd && !isKanaLetter(*a))
       
  1799             ++a;
       
  1800         while (b != bEnd && !isKanaLetter(*b))
       
  1801             ++b;
       
  1802 
       
  1803         // If we reached the end of either the target or the match, we should have
       
  1804         // reached the end of both; both should have the same number of kana letters.
       
  1805         if (a == aEnd || b == bEnd) {
       
  1806             ASSERT(a == aEnd);
       
  1807             ASSERT(b == bEnd);
       
  1808             return false;
       
  1809         }
       
  1810 
       
  1811         // Check for differences in the kana letter character itself.
       
  1812         if (isSmallKanaLetter(*a) != isSmallKanaLetter(*b))
       
  1813             return true;
       
  1814         if (composedVoicedSoundMark(*a) != composedVoicedSoundMark(*b))
       
  1815             return true;
       
  1816         ++a;
       
  1817         ++b;
       
  1818 
       
  1819         // Check for differences in combining voiced sound marks found after the letter.
       
  1820         while (1) {
       
  1821             if (!(a != aEnd && isCombiningVoicedSoundMark(*a))) {
       
  1822                 if (b != bEnd && isCombiningVoicedSoundMark(*b))
       
  1823                     return true;
       
  1824                 break;
       
  1825             }
       
  1826             if (!(b != bEnd && isCombiningVoicedSoundMark(*b)))
       
  1827                 return true;
       
  1828             if (*a != *b)
       
  1829                 return true;
       
  1830             ++a;
       
  1831             ++b;
       
  1832         }
       
  1833     }
       
  1834 }
       
  1835 
       
  1836 inline size_t SearchBuffer::search(size_t& start)
       
  1837 {
       
  1838     size_t size = m_buffer.size();
       
  1839     if (m_atBreak) {
       
  1840         if (!size)
       
  1841             return 0;
       
  1842     } else {
       
  1843         if (size != m_buffer.capacity())
       
  1844             return 0;
       
  1845     }
       
  1846 
       
  1847     UStringSearch* searcher = WebCore::searcher();
       
  1848 
       
  1849     UErrorCode status = U_ZERO_ERROR;
       
  1850     usearch_setText(searcher, m_buffer.data(), size, &status);
       
  1851     ASSERT(status == U_ZERO_ERROR);
       
  1852 
       
  1853     int matchStart = usearch_first(searcher, &status);
       
  1854     ASSERT(status == U_ZERO_ERROR);
       
  1855 
       
  1856 nextMatch:
       
  1857     if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
       
  1858         ASSERT(matchStart == USEARCH_DONE);
       
  1859         return 0;
       
  1860     }
       
  1861 
       
  1862     // Matches that start in the overlap area are only tentative.
       
  1863     // The same match may appear later, matching more characters,
       
  1864     // possibly including a combining character that's not yet in the buffer.
       
  1865     if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
       
  1866         memcpy(m_buffer.data(), m_buffer.data() + size - m_overlap, m_overlap * sizeof(UChar));
       
  1867         m_buffer.shrink(m_overlap);
       
  1868         return 0;
       
  1869     }
       
  1870 
       
  1871     size_t matchedLength = usearch_getMatchedLength(searcher);
       
  1872     ASSERT(matchStart + matchedLength <= size);
       
  1873 
       
  1874     // If this match is "bad", move on to the next match.
       
  1875     if (isBadMatch(m_buffer.data() + matchStart, matchedLength)) {
       
  1876         matchStart = usearch_next(searcher, &status);
       
  1877         ASSERT(status == U_ZERO_ERROR);
       
  1878         goto nextMatch;
       
  1879     }
       
  1880 
       
  1881     size_t newSize = size - (matchStart + 1);
       
  1882     memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
       
  1883     m_buffer.shrink(newSize);
       
  1884 
       
  1885     start = size - matchStart;
       
  1886     return matchedLength;
       
  1887 }
       
  1888 
       
  1889 #else // !ICU_UNICODE
       
  1890 
       
  1891 inline SearchBuffer::SearchBuffer(const String& target, bool isCaseSensitive)
       
  1892     : m_target(isCaseSensitive ? target : target.foldCase())
       
  1893     , m_isCaseSensitive(isCaseSensitive)
       
  1894     , m_buffer(m_target.length())
       
  1895     , m_isCharacterStartBuffer(m_target.length())
       
  1896     , m_isBufferFull(false)
       
  1897     , m_cursor(0)
       
  1898 {
       
  1899     ASSERT(!m_target.isEmpty());
       
  1900     m_target.replace(noBreakSpace, ' ');
       
  1901     foldQuoteMarks(m_target);
       
  1902 }
       
  1903 
       
  1904 inline SearchBuffer::~SearchBuffer()
       
  1905 {
       
  1906 }
       
  1907 
       
  1908 inline void SearchBuffer::reachedBreak()
       
  1909 {
       
  1910     m_cursor = 0;
       
  1911     m_isBufferFull = false;
       
  1912 }
       
  1913 
       
  1914 inline bool SearchBuffer::atBreak() const
       
  1915 {
       
  1916     return !m_cursor && !m_isBufferFull;
       
  1917 }
       
  1918 
       
  1919 inline void SearchBuffer::append(UChar c, bool isStart)
       
  1920 {
       
  1921     m_buffer[m_cursor] = c == noBreakSpace ? ' ' : foldQuoteMark(c);
       
  1922     m_isCharacterStartBuffer[m_cursor] = isStart;
       
  1923     if (++m_cursor == m_target.length()) {
       
  1924         m_cursor = 0;
       
  1925         m_isBufferFull = true;
       
  1926     }
       
  1927 }
       
  1928 
       
  1929 inline size_t SearchBuffer::append(const UChar* characters, size_t length)
       
  1930 {
       
  1931     ASSERT(length);
       
  1932     if (m_isCaseSensitive) {
       
  1933         append(characters[0], true);
       
  1934         return 1;
       
  1935     }
       
  1936     const int maxFoldedCharacters = 16; // sensible maximum is 3, this should be more than enough
       
  1937     UChar foldedCharacters[maxFoldedCharacters];
       
  1938     bool error;
       
  1939     int numFoldedCharacters = foldCase(foldedCharacters, maxFoldedCharacters, characters, 1, &error);
       
  1940     ASSERT(!error);
       
  1941     ASSERT(numFoldedCharacters);
       
  1942     ASSERT(numFoldedCharacters <= maxFoldedCharacters);
       
  1943     if (!error && numFoldedCharacters) {
       
  1944         numFoldedCharacters = min(numFoldedCharacters, maxFoldedCharacters);
       
  1945         append(foldedCharacters[0], true);
       
  1946         for (int i = 1; i < numFoldedCharacters; ++i)
       
  1947             append(foldedCharacters[i], false);
       
  1948     }
       
  1949     return 1;
       
  1950 }
       
  1951 
       
  1952 inline size_t SearchBuffer::search(size_t& start)
       
  1953 {
       
  1954     if (!m_isBufferFull)
       
  1955         return 0;
       
  1956     if (!m_isCharacterStartBuffer[m_cursor])
       
  1957         return 0;
       
  1958 
       
  1959     size_t tailSpace = m_target.length() - m_cursor;
       
  1960     if (memcmp(&m_buffer[m_cursor], m_target.characters(), tailSpace * sizeof(UChar)) != 0)
       
  1961         return 0;
       
  1962     if (memcmp(&m_buffer[0], m_target.characters() + tailSpace, m_cursor * sizeof(UChar)) != 0)
       
  1963         return 0;
       
  1964 
       
  1965     start = length();
       
  1966 
       
  1967     // Now that we've found a match once, we don't want to find it again, because those
       
  1968     // are the SearchBuffer semantics, allowing for a buffer where you append more than one
       
  1969     // character at a time. To do this we take advantage of m_isCharacterStartBuffer, but if
       
  1970     // we want to get rid of that in the future we could track this with a separate boolean
       
  1971     // or even move the characters to the start of the buffer and set m_isBufferFull to false.
       
  1972     m_isCharacterStartBuffer[m_cursor] = false;
       
  1973 
       
  1974     return start;
       
  1975 }
       
  1976 
       
  1977 // Returns the number of characters that were appended to the buffer (what we are searching in).
       
  1978 // That's not necessarily the same length as the passed-in target string, because case folding
       
  1979 // can make two strings match even though they're not the same length.
       
  1980 size_t SearchBuffer::length() const
       
  1981 {
       
  1982     size_t bufferSize = m_target.length();
       
  1983     size_t length = 0;
       
  1984     for (size_t i = 0; i < bufferSize; ++i)
       
  1985         length += m_isCharacterStartBuffer[i];
       
  1986     return length;
       
  1987 }
       
  1988 
       
  1989 #endif // !ICU_UNICODE
       
  1990 
       
  1991 // --------
       
  1992 
       
  1993 int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation)
       
  1994 {
       
  1995     int length = 0;
       
  1996     for (TextIterator it(r, forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); !it.atEnd(); it.advance())
       
  1997         length += it.length();
       
  1998     
       
  1999     return length;
       
  2000 }
       
  2001 
       
  2002 PassRefPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount)
       
  2003 {
       
  2004     CharacterIterator entireRangeIterator(entireRange);
       
  2005     return characterSubrange(entireRangeIterator, characterOffset, characterCount);
       
  2006 }
       
  2007 
       
  2008 PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool forSelectionPreservation)
       
  2009 {
       
  2010     RefPtr<Range> resultRange = scope->document()->createRange();
       
  2011 
       
  2012     int docTextPosition = 0;
       
  2013     int rangeEnd = rangeLocation + rangeLength;
       
  2014     bool startRangeFound = false;
       
  2015 
       
  2016     RefPtr<Range> textRunRange;
       
  2017 
       
  2018     TextIterator it(rangeOfContents(scope).get(), forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior);
       
  2019     
       
  2020     // FIXME: the atEnd() check shouldn't be necessary, workaround for <http://bugs.webkit.org/show_bug.cgi?id=6289>.
       
  2021     if (rangeLocation == 0 && rangeLength == 0 && it.atEnd()) {
       
  2022         textRunRange = it.range();
       
  2023         
       
  2024         ExceptionCode ec = 0;
       
  2025         resultRange->setStart(textRunRange->startContainer(), 0, ec);
       
  2026         ASSERT(!ec);
       
  2027         resultRange->setEnd(textRunRange->startContainer(), 0, ec);
       
  2028         ASSERT(!ec);
       
  2029         
       
  2030         return resultRange.release();
       
  2031     }
       
  2032 
       
  2033     for (; !it.atEnd(); it.advance()) {
       
  2034         int len = it.length();
       
  2035         textRunRange = it.range();
       
  2036         
       
  2037         bool foundStart = rangeLocation >= docTextPosition && rangeLocation <= docTextPosition + len;
       
  2038         bool foundEnd = rangeEnd >= docTextPosition && rangeEnd <= docTextPosition + len;
       
  2039         
       
  2040         // Fix textRunRange->endPosition(), but only if foundStart || foundEnd, because it is only
       
  2041         // in those cases that textRunRange is used.
       
  2042         if (foundEnd) {
       
  2043             // FIXME: This is a workaround for the fact that the end of a run is often at the wrong
       
  2044             // position for emitted '\n's.
       
  2045             if (len == 1 && it.characters()[0] == '\n') {
       
  2046                 scope->document()->updateLayoutIgnorePendingStylesheets();
       
  2047                 it.advance();
       
  2048                 if (!it.atEnd()) {
       
  2049                     RefPtr<Range> range = it.range();
       
  2050                     ExceptionCode ec = 0;
       
  2051                     textRunRange->setEnd(range->startContainer(), range->startOffset(), ec);
       
  2052                     ASSERT(!ec);
       
  2053                 } else {
       
  2054                     Position runStart = textRunRange->startPosition();
       
  2055                     Position runEnd = VisiblePosition(runStart).next().deepEquivalent();
       
  2056                     if (runEnd.isNotNull()) {
       
  2057                         ExceptionCode ec = 0;
       
  2058                         textRunRange->setEnd(runEnd.node(), runEnd.deprecatedEditingOffset(), ec);
       
  2059                         ASSERT(!ec);
       
  2060                     }
       
  2061                 }
       
  2062             }
       
  2063         }
       
  2064 
       
  2065         if (foundStart) {
       
  2066             startRangeFound = true;
       
  2067             int exception = 0;
       
  2068             if (textRunRange->startContainer()->isTextNode()) {
       
  2069                 int offset = rangeLocation - docTextPosition;
       
  2070                 resultRange->setStart(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception);
       
  2071             } else {
       
  2072                 if (rangeLocation == docTextPosition)
       
  2073                     resultRange->setStart(textRunRange->startContainer(), textRunRange->startOffset(), exception);
       
  2074                 else
       
  2075                     resultRange->setStart(textRunRange->endContainer(), textRunRange->endOffset(), exception);
       
  2076             }
       
  2077         }
       
  2078 
       
  2079         if (foundEnd) {
       
  2080             int exception = 0;
       
  2081             if (textRunRange->startContainer()->isTextNode()) {
       
  2082                 int offset = rangeEnd - docTextPosition;
       
  2083                 resultRange->setEnd(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception);
       
  2084             } else {
       
  2085                 if (rangeEnd == docTextPosition)
       
  2086                     resultRange->setEnd(textRunRange->startContainer(), textRunRange->startOffset(), exception);
       
  2087                 else
       
  2088                     resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception);
       
  2089             }
       
  2090             docTextPosition += len;
       
  2091             break;
       
  2092         }
       
  2093         docTextPosition += len;
       
  2094     }
       
  2095     
       
  2096     if (!startRangeFound)
       
  2097         return 0;
       
  2098     
       
  2099     if (rangeLength != 0 && rangeEnd > docTextPosition) { // rangeEnd is out of bounds
       
  2100         int exception = 0;
       
  2101         resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception);
       
  2102     }
       
  2103     
       
  2104     return resultRange.release();
       
  2105 }
       
  2106 
       
  2107 // --------
       
  2108     
       
  2109 UChar* plainTextToMallocAllocatedBuffer(const Range* r, unsigned& bufferLength, bool isDisplayString)
       
  2110 {
       
  2111     UChar* result = 0;
       
  2112 
       
  2113     // Do this in pieces to avoid massive reallocations if there is a large amount of text.
       
  2114     // Use system malloc for buffers since they can consume lots of memory and current TCMalloc is unable return it back to OS.
       
  2115     static const unsigned cMaxSegmentSize = 1 << 16;
       
  2116     bufferLength = 0;
       
  2117     typedef pair<UChar*, unsigned> TextSegment;
       
  2118     Vector<TextSegment>* textSegments = 0;
       
  2119     Vector<UChar> textBuffer;
       
  2120     textBuffer.reserveInitialCapacity(cMaxSegmentSize);
       
  2121     for (TextIterator it(r, isDisplayString ? TextIteratorDefaultBehavior : TextIteratorEmitsTextsWithoutTranscoding); !it.atEnd(); it.advance()) {
       
  2122         if (textBuffer.size() && textBuffer.size() + it.length() > cMaxSegmentSize) {
       
  2123             UChar* newSegmentBuffer = static_cast<UChar*>(malloc(textBuffer.size() * sizeof(UChar)));
       
  2124             if (!newSegmentBuffer)
       
  2125                 goto exit;
       
  2126             memcpy(newSegmentBuffer, textBuffer.data(), textBuffer.size() * sizeof(UChar));
       
  2127             if (!textSegments)
       
  2128                 textSegments = new Vector<TextSegment>;
       
  2129             textSegments->append(make_pair(newSegmentBuffer, (unsigned)textBuffer.size()));
       
  2130             textBuffer.clear();
       
  2131         }
       
  2132         textBuffer.append(it.characters(), it.length());
       
  2133         bufferLength += it.length();
       
  2134     }
       
  2135 
       
  2136     if (!bufferLength)
       
  2137         return 0;
       
  2138 
       
  2139     // Since we know the size now, we can make a single buffer out of the pieces with one big alloc
       
  2140     result = static_cast<UChar*>(malloc(bufferLength * sizeof(UChar)));
       
  2141     if (!result)
       
  2142         goto exit;
       
  2143 
       
  2144     {
       
  2145         UChar* resultPos = result;
       
  2146         if (textSegments) {
       
  2147             unsigned size = textSegments->size();
       
  2148             for (unsigned i = 0; i < size; ++i) {
       
  2149                 const TextSegment& segment = textSegments->at(i);
       
  2150                 memcpy(resultPos, segment.first, segment.second * sizeof(UChar));
       
  2151                 resultPos += segment.second;
       
  2152             }
       
  2153         }
       
  2154         memcpy(resultPos, textBuffer.data(), textBuffer.size() * sizeof(UChar));
       
  2155     }
       
  2156 
       
  2157 exit:
       
  2158     if (textSegments) {
       
  2159         unsigned size = textSegments->size();
       
  2160         for (unsigned i = 0; i < size; ++i)
       
  2161             free(textSegments->at(i).first);
       
  2162         delete textSegments;
       
  2163     }
       
  2164     
       
  2165     if (isDisplayString && r->ownerDocument())
       
  2166         r->ownerDocument()->displayBufferModifiedByEncoding(result, bufferLength);
       
  2167 
       
  2168     return result;
       
  2169 }
       
  2170 
       
  2171 String plainText(const Range* r)
       
  2172 {
       
  2173     unsigned length;
       
  2174     UChar* buf = plainTextToMallocAllocatedBuffer(r, length, false);
       
  2175     if (!buf)
       
  2176         return "";
       
  2177     String result(buf, length);
       
  2178     free(buf);
       
  2179     return result;
       
  2180 }
       
  2181 
       
  2182 static inline bool isAllCollapsibleWhitespace(const String& string)
       
  2183 {
       
  2184     const UChar* characters = string.characters();
       
  2185     unsigned length = string.length();
       
  2186     for (unsigned i = 0; i < length; ++i) {
       
  2187         if (!isCollapsibleWhitespace(characters[i]))
       
  2188             return false;
       
  2189     }
       
  2190     return true;
       
  2191 }
       
  2192 
       
  2193 static PassRefPtr<Range> collapsedToBoundary(const Range* range, bool forward)
       
  2194 {
       
  2195     ExceptionCode ec = 0;
       
  2196     RefPtr<Range> result = range->cloneRange(ec);
       
  2197     ASSERT(!ec);
       
  2198     result->collapse(!forward, ec);
       
  2199     ASSERT(!ec);
       
  2200     return result.release();
       
  2201 }
       
  2202 
       
  2203 static size_t findPlainText(CharacterIterator& it, const String& target, bool forward, bool caseSensitive, size_t& matchStart)
       
  2204 {
       
  2205     matchStart = 0;
       
  2206     size_t matchLength = 0;
       
  2207 
       
  2208     SearchBuffer buffer(target, caseSensitive);
       
  2209 
       
  2210     while (!it.atEnd()) {
       
  2211         it.advance(buffer.append(it.characters(), it.length()));
       
  2212 tryAgain:
       
  2213         size_t matchStartOffset;
       
  2214         if (size_t newMatchLength = buffer.search(matchStartOffset)) {
       
  2215             // Note that we found a match, and where we found it.
       
  2216             size_t lastCharacterInBufferOffset = it.characterOffset();
       
  2217             ASSERT(lastCharacterInBufferOffset >= matchStartOffset);
       
  2218             matchStart = lastCharacterInBufferOffset - matchStartOffset;
       
  2219             matchLength = newMatchLength;
       
  2220             // If searching forward, stop on the first match.
       
  2221             // If searching backward, don't stop, so we end up with the last match.
       
  2222             if (forward)
       
  2223                 break;
       
  2224             goto tryAgain;
       
  2225         }
       
  2226         if (it.atBreak() && !buffer.atBreak()) {
       
  2227             buffer.reachedBreak();
       
  2228             goto tryAgain;
       
  2229         }
       
  2230     }
       
  2231 
       
  2232     return matchLength;
       
  2233 }
       
  2234 
       
  2235 PassRefPtr<Range> findPlainText(const Range* range, const String& target, bool forward, bool caseSensitive)
       
  2236 {
       
  2237     // First, find the text.
       
  2238     size_t matchStart;
       
  2239     size_t matchLength;
       
  2240     {
       
  2241         CharacterIterator findIterator(range, TextIteratorEntersTextControls);
       
  2242         matchLength = findPlainText(findIterator, target, forward, caseSensitive, matchStart);
       
  2243         if (!matchLength)
       
  2244             return collapsedToBoundary(range, forward);
       
  2245     }
       
  2246 
       
  2247     // Then, find the document position of the start and the end of the text.
       
  2248     CharacterIterator computeRangeIterator(range, TextIteratorEntersTextControls);
       
  2249     return characterSubrange(computeRangeIterator, matchStart, matchLength);
       
  2250 }
       
  2251 
       
  2252 }