|
1 /* stringlib: fastsearch implementation */ |
|
2 |
|
3 #ifndef STRINGLIB_FASTSEARCH_H |
|
4 #define STRINGLIB_FASTSEARCH_H |
|
5 |
|
6 /* fast search/count implementation, based on a mix between boyer- |
|
7 moore and horspool, with a few more bells and whistles on the top. |
|
8 for some more background, see: http://effbot.org/stringlib */ |
|
9 |
|
10 /* note: fastsearch may access s[n], which isn't a problem when using |
|
11 Python's ordinary string types, but may cause problems if you're |
|
12 using this code in other contexts. also, the count mode returns -1 |
|
13 if there cannot possible be a match in the target string, and 0 if |
|
14 it has actually checked for matches, but didn't find any. callers |
|
15 beware! */ |
|
16 |
|
17 #define FAST_COUNT 0 |
|
18 #define FAST_SEARCH 1 |
|
19 |
|
20 Py_LOCAL_INLINE(Py_ssize_t) |
|
21 fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, |
|
22 const STRINGLIB_CHAR* p, Py_ssize_t m, |
|
23 int mode) |
|
24 { |
|
25 long mask; |
|
26 Py_ssize_t skip, count = 0; |
|
27 Py_ssize_t i, j, mlast, w; |
|
28 |
|
29 w = n - m; |
|
30 |
|
31 if (w < 0) |
|
32 return -1; |
|
33 |
|
34 /* look for special cases */ |
|
35 if (m <= 1) { |
|
36 if (m <= 0) |
|
37 return -1; |
|
38 /* use special case for 1-character strings */ |
|
39 if (mode == FAST_COUNT) { |
|
40 for (i = 0; i < n; i++) |
|
41 if (s[i] == p[0]) |
|
42 count++; |
|
43 return count; |
|
44 } else { |
|
45 for (i = 0; i < n; i++) |
|
46 if (s[i] == p[0]) |
|
47 return i; |
|
48 } |
|
49 return -1; |
|
50 } |
|
51 |
|
52 mlast = m - 1; |
|
53 |
|
54 /* create compressed boyer-moore delta 1 table */ |
|
55 skip = mlast - 1; |
|
56 /* process pattern[:-1] */ |
|
57 for (mask = i = 0; i < mlast; i++) { |
|
58 mask |= (1 << (p[i] & 0x1F)); |
|
59 if (p[i] == p[mlast]) |
|
60 skip = mlast - i - 1; |
|
61 } |
|
62 /* process pattern[-1] outside the loop */ |
|
63 mask |= (1 << (p[mlast] & 0x1F)); |
|
64 |
|
65 for (i = 0; i <= w; i++) { |
|
66 /* note: using mlast in the skip path slows things down on x86 */ |
|
67 if (s[i+m-1] == p[m-1]) { |
|
68 /* candidate match */ |
|
69 for (j = 0; j < mlast; j++) |
|
70 if (s[i+j] != p[j]) |
|
71 break; |
|
72 if (j == mlast) { |
|
73 /* got a match! */ |
|
74 if (mode != FAST_COUNT) |
|
75 return i; |
|
76 count++; |
|
77 i = i + mlast; |
|
78 continue; |
|
79 } |
|
80 /* miss: check if next character is part of pattern */ |
|
81 if (!(mask & (1 << (s[i+m] & 0x1F)))) |
|
82 i = i + m; |
|
83 else |
|
84 i = i + skip; |
|
85 } else { |
|
86 /* skip: check if next character is part of pattern */ |
|
87 if (!(mask & (1 << (s[i+m] & 0x1F)))) |
|
88 i = i + m; |
|
89 } |
|
90 } |
|
91 |
|
92 if (mode != FAST_COUNT) |
|
93 return -1; |
|
94 return count; |
|
95 } |
|
96 |
|
97 #endif |
|
98 |
|
99 /* |
|
100 Local variables: |
|
101 c-basic-offset: 4 |
|
102 indent-tabs-mode: nil |
|
103 End: |
|
104 */ |