src/corelib/tools/qtextboundaryfinder.cpp
changeset 0 1918ee327afb
child 4 3b1da2848fc7
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 /****************************************************************************
       
     2 **
       
     3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     4 ** All rights reserved.
       
     5 ** Contact: Nokia Corporation (qt-info@nokia.com)
       
     6 **
       
     7 ** This file is part of the QtCore module of the Qt Toolkit.
       
     8 **
       
     9 ** $QT_BEGIN_LICENSE:LGPL$
       
    10 ** No Commercial Usage
       
    11 ** This file contains pre-release code and may not be distributed.
       
    12 ** You may use this file in accordance with the terms and conditions
       
    13 ** contained in the Technology Preview License Agreement accompanying
       
    14 ** this package.
       
    15 **
       
    16 ** GNU Lesser General Public License Usage
       
    17 ** Alternatively, this file may be used under the terms of the GNU Lesser
       
    18 ** General Public License version 2.1 as published by the Free Software
       
    19 ** Foundation and appearing in the file LICENSE.LGPL included in the
       
    20 ** packaging of this file.  Please review the following information to
       
    21 ** ensure the GNU Lesser General Public License version 2.1 requirements
       
    22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
       
    23 **
       
    24 ** In addition, as a special exception, Nokia gives you certain additional
       
    25 ** rights.  These rights are described in the Nokia Qt LGPL Exception
       
    26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
       
    27 **
       
    28 ** If you have questions regarding the use of this file, please contact
       
    29 ** Nokia at qt-info@nokia.com.
       
    30 **
       
    31 **
       
    32 **
       
    33 **
       
    34 **
       
    35 **
       
    36 **
       
    37 **
       
    38 ** $QT_END_LICENSE$
       
    39 **
       
    40 ****************************************************************************/
       
    41 #include <QtCore/qtextboundaryfinder.h>
       
    42 #include <QtCore/qvarlengtharray.h>
       
    43 #include <private/qunicodetables_p.h>
       
    44 #include <qdebug.h>
       
    45 #include "private/qharfbuzz_p.h"
       
    46 
       
    47 QT_BEGIN_NAMESPACE
       
    48 
       
    49 class QTextBoundaryFinderPrivate
       
    50 {
       
    51 public:
       
    52     HB_CharAttributes attributes[1];
       
    53 };
       
    54 
       
    55 static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int length, HB_CharAttributes *attributes)
       
    56 {
       
    57     QVarLengthArray<HB_ScriptItem> scriptItems;
       
    58 
       
    59     const ushort *string = reinterpret_cast<const ushort *>(chars);
       
    60     const ushort *unicode = string;
       
    61     // correctly assign script, isTab and isObject to the script analysis
       
    62     const ushort *uc = unicode;
       
    63     const ushort *e = uc + length;
       
    64     int script = QUnicodeTables::Common;
       
    65     int lastScript = QUnicodeTables::Common;
       
    66     const ushort *start = uc;
       
    67     while (uc < e) {
       
    68         int s = QUnicodeTables::script(*uc);
       
    69         if (s != QUnicodeTables::Inherited)
       
    70             script = s;
       
    71         if (*uc == QChar::ObjectReplacementCharacter || *uc == QChar::LineSeparator || *uc == 9) 
       
    72             script = QUnicodeTables::Common;
       
    73         if (script != lastScript) {
       
    74             if (uc != start) {
       
    75                 HB_ScriptItem item;
       
    76                 item.pos = start - string;
       
    77                 item.length = uc - start;
       
    78                 item.script = (HB_Script)lastScript;
       
    79                 item.bidiLevel = 0; // ### what's the proper value?
       
    80                 scriptItems.append(item);
       
    81                 start = uc;
       
    82             }
       
    83             lastScript = script;
       
    84         }
       
    85         ++uc;
       
    86     }
       
    87     if (uc != start) {
       
    88         HB_ScriptItem item;
       
    89         item.pos = start - string;
       
    90         item.length = uc - start;
       
    91         item.script = (HB_Script)lastScript;
       
    92         item.bidiLevel = 0; // ### what's the proper value?
       
    93         scriptItems.append(item);
       
    94     }
       
    95 
       
    96     qGetCharAttributes(string, length, scriptItems.data(), scriptItems.count(), attributes);
       
    97     if (type == QTextBoundaryFinder::Word)
       
    98         HB_GetWordBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes);
       
    99     else if (type == QTextBoundaryFinder::Sentence)
       
   100         HB_GetSentenceBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes);
       
   101 }
       
   102 
       
   103 /*! \class QTextBoundaryFinder
       
   104 
       
   105     \brief The QTextBoundaryFinder class provides a way of finding Unicode text boundaries in a string.
       
   106 
       
   107     \since 4.4
       
   108     \ingroup tools
       
   109     \ingroup shared
       
   110     \ingroup string-processing
       
   111     \reentrant
       
   112 
       
   113     QTextBoundaryFinder allows to find Unicode text boundaries in a
       
   114     string, similar to the Unicode text boundary specification (see
       
   115     http://www.unicode.org/reports/tr29/tr29-11.html).
       
   116 
       
   117     QTextBoundaryFinder can operate on a QString in four possible
       
   118     modes depending on the value of \a BoundaryType.
       
   119 
       
   120     Units of Unicode characters that make up what the user thinks of
       
   121     as a character or basic unit of the language are here called
       
   122     Grapheme clusters. The two unicode characters 'A' + diaeresis do
       
   123     for example form one grapheme cluster as the user thinks of them
       
   124     as one character, yet it is in this case represented by two
       
   125     unicode code points.
       
   126 
       
   127     Word boundaries are there to locate the start and end of what a
       
   128     language considers to be a word.
       
   129 
       
   130     Line break boundaries give possible places where a line break
       
   131     might happen and sentence boundaries will show the beginning and
       
   132     end of whole sentences.
       
   133 */
       
   134 
       
   135 /*!
       
   136     \enum QTextBoundaryFinder::BoundaryType
       
   137 
       
   138     \value Grapheme Finds a grapheme which is the smallest boundary. It
       
   139     including letters, punctation marks, numerals and more.
       
   140     \value Word Finds a word.
       
   141     \value Line Finds possible positions for breaking the text into multiple
       
   142     lines.
       
   143     \value Sentence Finds sentence boundaries. These include periods, question
       
   144     marks etc.
       
   145 */
       
   146 
       
   147 /*!
       
   148   \enum QTextBoundaryFinder::BoundaryReason
       
   149 
       
   150   \value NotAtBoundary  The boundary finder is not at a boundary position.
       
   151   \value StartWord  The boundary finder is at the start of a word.
       
   152   \value EndWord  The boundary finder is at the end of a word.
       
   153 */
       
   154 
       
   155 /*!
       
   156   Constructs an invalid QTextBoundaryFinder object.
       
   157 */
       
   158 QTextBoundaryFinder::QTextBoundaryFinder()
       
   159     : t(Grapheme)
       
   160     , chars(0)
       
   161     , length(0)
       
   162     , freePrivate(true)
       
   163     , d(0)
       
   164 {
       
   165 }
       
   166 
       
   167 /*!
       
   168   Copies the QTextBoundaryFinder object, \a other.
       
   169 */
       
   170 QTextBoundaryFinder::QTextBoundaryFinder(const QTextBoundaryFinder &other)
       
   171     : t(other.t)
       
   172     , s(other.s)
       
   173     , chars(other.chars)
       
   174     , length(other.length)
       
   175     , pos(other.pos)
       
   176     , freePrivate(true)
       
   177 {
       
   178     d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
       
   179     Q_CHECK_PTR(d);
       
   180     memcpy(d, other.d, length*sizeof(HB_CharAttributes));
       
   181 }
       
   182 
       
   183 /*!
       
   184   Assigns the object, \a other, to another QTextBoundaryFinder object.
       
   185 */
       
   186 QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &other)
       
   187 {
       
   188     if (&other == this)
       
   189         return *this;
       
   190 
       
   191     t = other.t;
       
   192     s = other.s;
       
   193     chars = other.chars;
       
   194     length = other.length;
       
   195     pos = other.pos;
       
   196     freePrivate = true;
       
   197 
       
   198     QTextBoundaryFinderPrivate *newD = (QTextBoundaryFinderPrivate *)
       
   199         realloc(d, length*sizeof(HB_CharAttributes));
       
   200     Q_CHECK_PTR(newD);
       
   201     d = newD;
       
   202     memcpy(d, other.d, length*sizeof(HB_CharAttributes));
       
   203 
       
   204     return *this;
       
   205 }
       
   206 
       
   207 /*!
       
   208   Destructs the QTextBoundaryFinder object.
       
   209 */
       
   210 QTextBoundaryFinder::~QTextBoundaryFinder()
       
   211 {
       
   212     if (freePrivate)
       
   213         free(d);
       
   214 }
       
   215 
       
   216 /*!
       
   217   Creates a QTextBoundaryFinder object of \a type operating on \a string.
       
   218 */
       
   219 QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &string)
       
   220     : t(type)
       
   221     , s(string)
       
   222     , chars(string.unicode())
       
   223     , length(string.length())
       
   224     , pos(0)
       
   225     , freePrivate(true)
       
   226 {
       
   227     d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
       
   228     Q_CHECK_PTR(d);
       
   229     init(t, chars, length, d->attributes);
       
   230 }
       
   231 
       
   232 /*!
       
   233   Creates a QTextBoundaryFinder object of \a type operating on \a chars
       
   234   with \a length.
       
   235 
       
   236   \a buffer is an optional working buffer of size \a bufferSize you can pass to
       
   237   the QTextBoundaryFinder. If the buffer is large enough to hold the working
       
   238   data required, it will use this instead of allocating its own buffer.
       
   239 
       
   240   \warning QTextBoundaryFinder does not create a copy of \a chars. It is the
       
   241   application programmer's responsability to ensure the array is allocated for
       
   242   as long as the QTextBoundaryFinder object stays alive. The same applies to
       
   243   \a buffer.
       
   244 */
       
   245 QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars, int length, unsigned char *buffer, int bufferSize)
       
   246     : t(type)
       
   247     , chars(chars)
       
   248     , length(length)
       
   249     , pos(0)
       
   250 {
       
   251     if (buffer && (uint)bufferSize >= length*sizeof(HB_CharAttributes)) {
       
   252         d = (QTextBoundaryFinderPrivate *)buffer;
       
   253         freePrivate = false;
       
   254     } else {
       
   255         d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
       
   256         Q_CHECK_PTR(d);
       
   257         freePrivate = true;
       
   258     }
       
   259     init(t, chars, length, d->attributes);
       
   260 }
       
   261 
       
   262 /*!
       
   263   Moves the finder to the start of the string. This is equivalent to setPosition(0).
       
   264 
       
   265   \sa setPosition(), position()
       
   266 */
       
   267 void QTextBoundaryFinder::toStart()
       
   268 {
       
   269     pos = 0;
       
   270 }
       
   271 
       
   272 /*!
       
   273   Moves the finder to the end of the string. This is equivalent to setPosition(string.length()).
       
   274 
       
   275   \sa setPosition(), position()
       
   276 */
       
   277 void QTextBoundaryFinder::toEnd()
       
   278 {
       
   279     pos = length;
       
   280 }
       
   281 
       
   282 /*!
       
   283   Returns the current position of the QTextBoundaryFinder.
       
   284 
       
   285   The range is from 0 (the beginning of the string) to the length of
       
   286   the string inclusive.
       
   287 
       
   288   \sa setPosition()
       
   289 */
       
   290 int QTextBoundaryFinder::position() const
       
   291 {
       
   292     return pos;
       
   293 }
       
   294 
       
   295 /*!
       
   296   Sets the current position of the QTextBoundaryFinder to \a position.
       
   297 
       
   298   If \a position is out of bounds, it will be bound to only valid
       
   299   positions. In this case, valid positions are from 0 to the length of
       
   300   the string inclusive.
       
   301 
       
   302   \sa position()
       
   303 */
       
   304 void QTextBoundaryFinder::setPosition(int position)
       
   305 {
       
   306     pos = qBound(0, position, length);
       
   307 }
       
   308 
       
   309 /*! \fn QTextBoundaryFinder::BoundaryType QTextBoundaryFinder::type() const
       
   310 
       
   311   Returns the type of the QTextBoundaryFinder.
       
   312 */
       
   313 
       
   314 /*! \fn bool QTextBoundaryFinder::isValid() const
       
   315 
       
   316    Returns true if the text boundary finder is valid; otherwise returns false.
       
   317    A default QTextBoundaryFinder is invalid.
       
   318 */
       
   319 
       
   320 /*!
       
   321   Returns the string  the QTextBoundaryFinder object operates on.
       
   322 */
       
   323 QString QTextBoundaryFinder::string() const
       
   324 {
       
   325     if (chars == s.unicode() && length == s.length())
       
   326         return s;
       
   327     return QString(chars, length);
       
   328 }
       
   329 
       
   330 
       
   331 /*!
       
   332   Moves the QTextBoundaryFinder to the next boundary position and returns that position.
       
   333 
       
   334   Returns -1 is there is no next boundary.
       
   335 */
       
   336 int QTextBoundaryFinder::toNextBoundary()
       
   337 {
       
   338     if (!d) {
       
   339         pos = -1;
       
   340         return pos;
       
   341     }
       
   342 
       
   343     if (pos < 0 || pos >= length) {
       
   344         pos = -1;
       
   345         return pos;
       
   346     }
       
   347     ++pos;
       
   348     if (pos == length)
       
   349         return pos;
       
   350     
       
   351     switch(t) {
       
   352     case Grapheme:
       
   353         while (pos < length && !d->attributes[pos].charStop)
       
   354             ++pos;
       
   355         break;
       
   356     case Word:
       
   357         while (pos < length && !d->attributes[pos].wordBoundary)
       
   358             ++pos;
       
   359         break;
       
   360     case Sentence:
       
   361         while (pos < length && !d->attributes[pos].sentenceBoundary)
       
   362             ++pos;
       
   363         break;
       
   364     case Line:
       
   365         while (pos < length && d->attributes[pos].lineBreakType < HB_Break)
       
   366             ++pos;
       
   367         break;
       
   368     }
       
   369 
       
   370     return pos;
       
   371 }
       
   372 
       
   373 /*!
       
   374   Moves the QTextBoundaryFinder to the previous boundary position and returns that position.
       
   375 
       
   376   Returns -1 is there is no previous boundary.
       
   377 */
       
   378 int QTextBoundaryFinder::toPreviousBoundary()
       
   379 {
       
   380     if (!d) {
       
   381         pos = -1;
       
   382         return pos;
       
   383     }
       
   384 
       
   385     if (pos <= 0 || pos > length) {
       
   386         pos = -1;
       
   387         return pos;
       
   388     }
       
   389     --pos;
       
   390     if (pos == 0)
       
   391         return pos;
       
   392 
       
   393     switch(t) {
       
   394     case Grapheme:
       
   395         while (pos > 0 && !d->attributes[pos].charStop)
       
   396             --pos;
       
   397         break;
       
   398     case Word:
       
   399         while (pos > 0 && !d->attributes[pos].wordBoundary)
       
   400             --pos;
       
   401         break;
       
   402     case Sentence:
       
   403         while (pos > 0 && !d->attributes[pos].sentenceBoundary)
       
   404             --pos;
       
   405         break;
       
   406     case Line:
       
   407         while (pos > 0 && d->attributes[pos].lineBreakType < HB_Break)
       
   408             --pos;
       
   409         break;
       
   410     }
       
   411 
       
   412     return pos;
       
   413 }
       
   414 
       
   415 /*!
       
   416   Returns true if the object's position() is currently at a valid text boundary.
       
   417 */
       
   418 bool QTextBoundaryFinder::isAtBoundary() const
       
   419 {
       
   420     if (!d || pos < 0)
       
   421         return false;
       
   422 
       
   423     if (pos == length)
       
   424         return true;
       
   425 
       
   426     switch(t) {
       
   427     case Grapheme:
       
   428         return d->attributes[pos].charStop;
       
   429     case Word:
       
   430         return d->attributes[pos].wordBoundary;
       
   431     case Line:
       
   432         return d->attributes[pos].lineBreakType >= HB_Break;
       
   433     case Sentence:
       
   434         return d->attributes[pos].sentenceBoundary;
       
   435     }
       
   436     return false;
       
   437 }
       
   438 
       
   439 /*!
       
   440   Returns the reasons for the boundary finder to have chosen the current position as a boundary.
       
   441 */
       
   442 QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() const
       
   443 {
       
   444     if (!d)
       
   445         return NotAtBoundary;
       
   446     if (! isAtBoundary())
       
   447         return NotAtBoundary;
       
   448     if (pos == 0) {
       
   449         if (d->attributes[pos].whiteSpace)
       
   450             return NotAtBoundary;
       
   451         return StartWord;
       
   452     }
       
   453     if (pos >= length - 1) {
       
   454         if (d->attributes[length-1].whiteSpace)
       
   455             return NotAtBoundary;
       
   456         return EndWord;
       
   457     }
       
   458 
       
   459     BoundaryReasons answer;
       
   460     const bool nextIsSpace = d->attributes[pos + 1].whiteSpace;
       
   461     const bool prevIsSpace = d->attributes[pos - 1].whiteSpace;
       
   462 
       
   463     if (d->attributes[pos].whiteSpace)
       
   464         answer = EndWord;
       
   465     else if (!prevIsSpace) {
       
   466         answer = StartWord;
       
   467         answer |= EndWord;
       
   468     }
       
   469 
       
   470     if (prevIsSpace)
       
   471         answer |= StartWord;
       
   472     if (nextIsSpace)
       
   473         answer |= EndWord;
       
   474     if (answer == 0) {
       
   475         answer = StartWord;
       
   476         answer |= EndWord;
       
   477     }
       
   478 
       
   479     return answer;
       
   480 }
       
   481 
       
   482 QT_END_NAMESPACE