src/corelib/tools/qbytearraymatcher.cpp
changeset 0 1918ee327afb
child 4 3b1da2848fc7
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 /****************************************************************************
       
     2 **
       
     3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     4 ** All rights reserved.
       
     5 ** Contact: Nokia Corporation (qt-info@nokia.com)
       
     6 **
       
     7 ** This file is part of the QtCore module of the Qt Toolkit.
       
     8 **
       
     9 ** $QT_BEGIN_LICENSE:LGPL$
       
    10 ** No Commercial Usage
       
    11 ** This file contains pre-release code and may not be distributed.
       
    12 ** You may use this file in accordance with the terms and conditions
       
    13 ** contained in the Technology Preview License Agreement accompanying
       
    14 ** this package.
       
    15 **
       
    16 ** GNU Lesser General Public License Usage
       
    17 ** Alternatively, this file may be used under the terms of the GNU Lesser
       
    18 ** General Public License version 2.1 as published by the Free Software
       
    19 ** Foundation and appearing in the file LICENSE.LGPL included in the
       
    20 ** packaging of this file.  Please review the following information to
       
    21 ** ensure the GNU Lesser General Public License version 2.1 requirements
       
    22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
       
    23 **
       
    24 ** In addition, as a special exception, Nokia gives you certain additional
       
    25 ** rights.  These rights are described in the Nokia Qt LGPL Exception
       
    26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
       
    27 **
       
    28 ** If you have questions regarding the use of this file, please contact
       
    29 ** Nokia at qt-info@nokia.com.
       
    30 **
       
    31 **
       
    32 **
       
    33 **
       
    34 **
       
    35 **
       
    36 **
       
    37 **
       
    38 ** $QT_END_LICENSE$
       
    39 **
       
    40 ****************************************************************************/
       
    41 
       
    42 #include "qbytearraymatcher.h"
       
    43 
       
    44 #include <limits.h>
       
    45 
       
    46 QT_BEGIN_NAMESPACE
       
    47 
       
    48 static inline void bm_init_skiptable(const uchar *cc, int len, uchar *skiptable)
       
    49 {
       
    50     int l = qMin(len, 255);
       
    51     memset(skiptable, l, 256*sizeof(uchar));
       
    52     cc += len - l;
       
    53     while (l--)
       
    54         skiptable[*cc++] = l;
       
    55 }
       
    56 
       
    57 static inline int bm_find(const uchar *cc, int l, int index, const uchar *puc, uint pl,
       
    58                           const uchar *skiptable)
       
    59 {
       
    60     if (pl == 0)
       
    61         return index > l ? -1 : index;
       
    62     const uint pl_minus_one = pl - 1;
       
    63 
       
    64     register const uchar *current = cc + index + pl_minus_one;
       
    65     const uchar *end = cc + l;
       
    66     while (current < end) {
       
    67         uint skip = skiptable[*current];
       
    68         if (!skip) {
       
    69             // possible match
       
    70             while (skip < pl) {
       
    71                 if (*(current - skip) != puc[pl_minus_one - skip])
       
    72                     break;
       
    73                 skip++;
       
    74             }
       
    75             if (skip > pl_minus_one) // we have a match
       
    76                 return (current - cc) - skip + 1;
       
    77 
       
    78             // in case we don't have a match we are a bit inefficient as we only skip by one
       
    79             // when we have the non matching char in the string.
       
    80             if (skiptable[*(current - skip)] == pl)
       
    81                 skip = pl - skip;
       
    82             else
       
    83                 skip = 1;
       
    84         }
       
    85         if (current > end - skip)
       
    86             break;
       
    87         current += skip;
       
    88     }
       
    89     return -1; // not found
       
    90 }
       
    91 
       
    92 /*! \class QByteArrayMatcher
       
    93     \brief The QByteArrayMatcher class holds a sequence of bytes that
       
    94     can be quickly matched in a byte array.
       
    95 
       
    96     \ingroup tools
       
    97     \ingroup string-processing
       
    98 
       
    99     This class is useful when you have a sequence of bytes that you
       
   100     want to repeatedly match against some byte arrays (perhaps in a
       
   101     loop), or when you want to search for the same sequence of bytes
       
   102     multiple times in the same byte array. Using a matcher object and
       
   103     indexIn() is faster than matching a plain QByteArray with
       
   104     QByteArray::indexOf() if repeated matching takes place. This
       
   105     class offers no benefit if you are doing one-off byte array
       
   106     matches.
       
   107 
       
   108     Create the QByteArrayMatcher with the QByteArray you want to
       
   109     search for. Then call indexIn() on the QByteArray that you want to
       
   110     search.
       
   111 
       
   112     \sa QByteArray, QStringMatcher
       
   113 */
       
   114 
       
   115 /*!
       
   116     Constructs an empty byte array matcher that won't match anything.
       
   117     Call setPattern() to give it a pattern to match.
       
   118 */
       
   119 QByteArrayMatcher::QByteArrayMatcher()
       
   120     : d(0)
       
   121 {
       
   122     p.p = 0;
       
   123     p.l = 0;
       
   124     qMemSet(p.q_skiptable, 0, sizeof(p.q_skiptable));
       
   125 }
       
   126 
       
   127 /*!
       
   128   Constructs a byte array matcher from \a pattern. \a pattern
       
   129   has the given \a length. \a pattern must remain in scope, but
       
   130   the destructor does not delete \a pattern.
       
   131  */
       
   132 QByteArrayMatcher::QByteArrayMatcher(const char *pattern, int length)
       
   133     : d(0)
       
   134 {
       
   135     p.p = reinterpret_cast<const uchar *>(pattern);
       
   136     p.l = length;
       
   137     bm_init_skiptable(p.p, p.l, p.q_skiptable);
       
   138 }
       
   139 
       
   140 /*!
       
   141     Constructs a byte array matcher that will search for \a pattern.
       
   142     Call indexIn() to perform a search.
       
   143 */
       
   144 QByteArrayMatcher::QByteArrayMatcher(const QByteArray &pattern)
       
   145     : d(0), q_pattern(pattern)
       
   146 {
       
   147     p.p = reinterpret_cast<const uchar *>(pattern.constData());
       
   148     p.l = pattern.size();
       
   149     bm_init_skiptable(p.p, p.l, p.q_skiptable);
       
   150 }
       
   151 
       
   152 /*!
       
   153     Copies the \a other byte array matcher to this byte array matcher.
       
   154 */
       
   155 QByteArrayMatcher::QByteArrayMatcher(const QByteArrayMatcher &other)
       
   156     : d(0)
       
   157 {
       
   158     operator=(other);
       
   159 }
       
   160 
       
   161 /*!
       
   162     Destroys the byte array matcher.
       
   163 */
       
   164 QByteArrayMatcher::~QByteArrayMatcher()
       
   165 {
       
   166 }
       
   167 
       
   168 /*!
       
   169     Assigns the \a other byte array matcher to this byte array matcher.
       
   170 */
       
   171 QByteArrayMatcher &QByteArrayMatcher::operator=(const QByteArrayMatcher &other)
       
   172 {
       
   173     q_pattern = other.q_pattern;
       
   174     qMemCopy(&p, &other.p, sizeof(p));
       
   175     return *this;
       
   176 }
       
   177 
       
   178 /*!
       
   179     Sets the byte array that this byte array matcher will search for
       
   180     to \a pattern.
       
   181 
       
   182     \sa pattern(), indexIn()
       
   183 */
       
   184 void QByteArrayMatcher::setPattern(const QByteArray &pattern)
       
   185 {
       
   186     q_pattern = pattern;
       
   187     p.p = reinterpret_cast<const uchar *>(pattern.constData());
       
   188     p.l = pattern.size();
       
   189     bm_init_skiptable(p.p, p.l, p.q_skiptable);
       
   190 }
       
   191 
       
   192 /*!
       
   193     Searches the byte array \a ba, from byte position \a from (default
       
   194     0, i.e. from the first byte), for the byte array pattern() that
       
   195     was set in the constructor or in the most recent call to
       
   196     setPattern(). Returns the position where the pattern() matched in
       
   197     \a ba, or -1 if no match was found.
       
   198 */
       
   199 int QByteArrayMatcher::indexIn(const QByteArray &ba, int from) const
       
   200 {
       
   201     if (from < 0)
       
   202         from = 0;
       
   203     return bm_find(reinterpret_cast<const uchar *>(ba.constData()), ba.size(), from,
       
   204                    p.p, p.l, p.q_skiptable);
       
   205 }
       
   206 
       
   207 /*!
       
   208     Searches the char string \a str, which has length \a len, from
       
   209     byte position \a from (default 0, i.e. from the first byte), for
       
   210     the byte array pattern() that was set in the constructor or in the
       
   211     most recent call to setPattern(). Returns the position where the
       
   212     pattern() matched in \a str, or -1 if no match was found.
       
   213 */
       
   214 int QByteArrayMatcher::indexIn(const char *str, int len, int from) const
       
   215 {
       
   216     if (from < 0)
       
   217         from = 0;
       
   218     return bm_find(reinterpret_cast<const uchar *>(str), len, from,
       
   219                    p.p, p.l, p.q_skiptable);
       
   220 }
       
   221 
       
   222 /*!
       
   223     \fn QByteArray QByteArrayMatcher::pattern() const
       
   224 
       
   225     Returns the byte array pattern that this byte array matcher will
       
   226     search for.
       
   227 
       
   228     \sa setPattern()
       
   229 */
       
   230 
       
   231 
       
   232 static int findChar(const char *str, int len, char ch, int from)
       
   233 {
       
   234     const uchar *s = (const uchar *)str;
       
   235     uchar c = (uchar)ch;
       
   236     if (from < 0)
       
   237         from = qMax(from + len, 0);
       
   238     if (from < len) {
       
   239         const uchar *n = s + from - 1;
       
   240         const uchar *e = s + len;
       
   241         while (++n != e)
       
   242             if (*n == c)
       
   243                 return  n - s;
       
   244     }
       
   245     return -1;
       
   246 }
       
   247 
       
   248 /*! \internal
       
   249  */
       
   250 static int qFindByteArrayBoyerMoore(
       
   251     const char *haystack, int haystackLen, int haystackOffset,
       
   252     const char *needle, int needleLen)
       
   253 {
       
   254     uchar skiptable[256];
       
   255     bm_init_skiptable((const uchar *)needle, needleLen, skiptable);
       
   256     if (haystackOffset < 0)
       
   257         haystackOffset = 0;
       
   258     return bm_find((const uchar *)haystack, haystackLen, haystackOffset,
       
   259                    (const uchar *)needle, needleLen, skiptable);
       
   260 }
       
   261 
       
   262 #define REHASH(a) \
       
   263     if (sl_minus_1 < sizeof(uint) * CHAR_BIT) \
       
   264         hashHaystack -= (a) << sl_minus_1; \
       
   265     hashHaystack <<= 1
       
   266 
       
   267 /*! \internal
       
   268  */
       
   269 int qFindByteArray(
       
   270     const char *haystack0, int haystackLen, int from,
       
   271     const char *needle, int needleLen)
       
   272 {
       
   273     const int l = haystackLen;
       
   274     const int sl = needleLen;
       
   275     if (from < 0)
       
   276         from += l;
       
   277     if (uint(sl + from) > (uint)l)
       
   278         return -1;
       
   279     if (!sl)
       
   280         return from;
       
   281     if (!l)
       
   282         return -1;
       
   283 
       
   284     if (sl == 1)
       
   285         return findChar(haystack0, haystackLen, needle[0], from);
       
   286 
       
   287     /*
       
   288       We use the Boyer-Moore algorithm in cases where the overhead
       
   289       for the skip table should pay off, otherwise we use a simple
       
   290       hash function.
       
   291     */
       
   292     if (l > 500 && sl > 5)
       
   293         return qFindByteArrayBoyerMoore(haystack0, haystackLen, from,
       
   294                                         needle, needleLen);
       
   295 
       
   296     /*
       
   297       We use some hashing for efficiency's sake. Instead of
       
   298       comparing strings, we compare the hash value of str with that
       
   299       of a part of this QString. Only if that matches, we call memcmp().
       
   300     */
       
   301     const char *haystack = haystack0 + from;
       
   302     const char *end = haystack0 + (l - sl);
       
   303     const uint sl_minus_1 = sl - 1;
       
   304     uint hashNeedle = 0, hashHaystack = 0;
       
   305     int idx;
       
   306     for (idx = 0; idx < sl; ++idx) {
       
   307         hashNeedle = ((hashNeedle<<1) + needle[idx]);
       
   308         hashHaystack = ((hashHaystack<<1) + haystack[idx]);
       
   309     }
       
   310     hashHaystack -= *(haystack + sl_minus_1);
       
   311 
       
   312     while (haystack <= end) {
       
   313         hashHaystack += *(haystack + sl_minus_1);
       
   314         if (hashHaystack == hashNeedle && *needle == *haystack
       
   315              && memcmp(needle, haystack, sl) == 0)
       
   316             return haystack - haystack0;
       
   317 
       
   318         REHASH(*haystack);
       
   319         ++haystack;
       
   320     }
       
   321     return -1;
       
   322 }
       
   323 
       
   324 QT_END_NAMESPACE