|
1 // |
|
2 // © Portions Copyright (c) Symbian Software Ltd 2007. All rights reserved. |
|
3 // |
|
4 /* |
|
5 * pattern.c - pattern matching |
|
6 * |
|
7 * This file is part of zsh, the Z shell. |
|
8 * |
|
9 * Copyright (c) 1999 Peter Stephenson |
|
10 * All rights reserved. |
|
11 * |
|
12 * Permission is hereby granted, without written agreement and without |
|
13 * license or royalty fees, to use, copy, modify, and distribute this |
|
14 * software and to distribute modified versions of this software for any |
|
15 * purpose, provided that the above copyright notice and the following |
|
16 * two paragraphs appear in all copies of this software. |
|
17 * |
|
18 * In no event shall Peter Stephenson or the Zsh Development Group be liable |
|
19 * to any party for direct, indirect, special, incidental, or consequential |
|
20 * damages arising out of the use of this software and its documentation, |
|
21 * even if Peter Stephenson and the Zsh Development Group have been advised of |
|
22 * the possibility of such damage. |
|
23 * |
|
24 * Peter Stephenson and the Zsh Development Group specifically disclaim any |
|
25 * warranties, including, but not limited to, the implied warranties of |
|
26 * merchantability and fitness for a particular purpose. The software |
|
27 * provided hereunder is on an "as is" basis, and Peter Stephenson and the |
|
28 * Zsh Development Group have no obligation to provide maintenance, |
|
29 * support, updates, enhancements, or modifications. |
|
30 * |
|
31 * Pattern matching code derived from the regexp library by Henry |
|
32 * Spencer, which has the following copyright. |
|
33 * |
|
34 * Copyright (c) 1986 by University of Toronto. |
|
35 * Written by Henry Spencer. Not derived from licensed software. |
|
36 * |
|
37 * Permission is granted to anyone to use this software for any |
|
38 * purpose on any computer system, and to redistribute it freely, |
|
39 * subject to the following restrictions: |
|
40 * |
|
41 * 1. The author is not responsible for the consequences of use of |
|
42 * this software, no matter how awful, even if they arise |
|
43 * from defects in it. |
|
44 * |
|
45 * 2. The origin of this software must not be misrepresented, either |
|
46 * by explicit claim or by omission. |
|
47 * |
|
48 * 3. Altered versions must be plainly marked as such, and must not |
|
49 * be misrepresented as being the original software. |
|
50 * |
|
51 * Eagle-eyed readers will notice this is an altered version. Incredibly |
|
52 * sharp-eyed readers might even find bits that weren't altered. |
|
53 * |
|
54 * |
|
55 * And I experienced a sense that, like certain regular |
|
56 * expressions, seemed to match the day from beginning to end, so |
|
57 * that I did not need to identify the parenthesised subexpression |
|
58 * that told of dawn, nor the group of characters that indicated |
|
59 * the moment when my grandfather returned home with news of |
|
60 * Swann's departure for Paris; and the whole length of the month |
|
61 * of May, as if matched by a closure, fitted into the buffer of my |
|
62 * life with no sign of overflowing, turning the days, like a |
|
63 * procession of insects that could consist of this or that |
|
64 * species, into a random and unstructured repetition of different |
|
65 * sequences, anchored from the first day of the month to the last |
|
66 * in the same fashion as the weeks when I knew I would not see |
|
67 * Gilberte and would search in vain for any occurrences of the |
|
68 * string in the avenue of hawthorns by Tansonville, without my |
|
69 * having to delimit explicitly the start or finish of the pattern. |
|
70 * |
|
71 * M. Proust, "In Search of Lost Files", |
|
72 * bk I, "The Walk by Bourne's Place". |
|
73 */ |
|
74 |
|
75 #include "zsh.mdh" |
|
76 |
|
77 /* |
|
78 * The following union is used mostly for alignment purposes. |
|
79 * Normal nodes are longs, while certain nodes take a char * as an argument; |
|
80 * here we make sure that they both work out to the same length. |
|
81 * The compiled regexp we construct consists of upats stuck together; |
|
82 * anything else to be added (strings, numbers) is stuck after and |
|
83 * then aligned to a whole number of upat units. |
|
84 * |
|
85 * Note also that offsets are in terms of the sizes of these things. |
|
86 */ |
|
87 union upat { |
|
88 long l; |
|
89 unsigned char *p; |
|
90 }; |
|
91 |
|
92 typedef union upat *Upat; |
|
93 |
|
94 #include "pattern.pro" |
|
95 |
|
96 /* Number of active parenthesized expressions allowed in backreferencing */ |
|
97 #define NSUBEXP 9 |
|
98 |
|
99 /* definition number opnd? meaning */ |
|
100 #define P_END 0x00 /* no End of program. */ |
|
101 #define P_EXCSYNC 0x01 /* no Test if following exclude already failed */ |
|
102 #define P_EXCEND 0x02 /* no Test if exclude matched orig branch */ |
|
103 #define P_BACK 0x03 /* no Match "", "next" ptr points backward. */ |
|
104 #define P_EXACTLY 0x04 /* lstr Match this string. */ |
|
105 #define P_NOTHING 0x05 /* no Match empty string. */ |
|
106 #define P_ONEHASH 0x06 /* node Match this (simple) thing 0 or more times. */ |
|
107 #define P_TWOHASH 0x07 /* node Match this (simple) thing 1 or more times. */ |
|
108 #define P_GFLAGS 0x08 /* long Match nothing and set globbing flags */ |
|
109 #define P_ISSTART 0x09 /* no Match start of string. */ |
|
110 #define P_ISEND 0x0a /* no Match end of string. */ |
|
111 /* numbered so we can test bit 5 for a branch */ |
|
112 #define P_BRANCH 0x20 /* node Match this alternative, or the next... */ |
|
113 #define P_WBRANCH 0x21 /* uc* node P_BRANCH, but match at least 1 char */ |
|
114 /* excludes are also branches, but have bit 4 set, too */ |
|
115 #define P_EXCLUDE 0x30 /* uc* node Exclude this from previous branch */ |
|
116 #define P_EXCLUDP 0x31 /* uc* node Exclude, using full file path so far */ |
|
117 /* numbered so we can test bit 6 so as not to match initial '.' */ |
|
118 #define P_ANY 0x40 /* no Match any one character. */ |
|
119 #define P_ANYOF 0x41 /* str Match any character in this string. */ |
|
120 #define P_ANYBUT 0x42 /* str Match any character not in this string. */ |
|
121 #define P_STAR 0x43 /* no Match any set of characters. */ |
|
122 #define P_NUMRNG 0x44 /* zr, zr Match a numeric range. */ |
|
123 #define P_NUMFROM 0x45 /* zr Match a number >= X */ |
|
124 #define P_NUMTO 0x46 /* zr Match a number <= X */ |
|
125 #define P_NUMANY 0x47 /* no Match any set of decimal digits */ |
|
126 /* spaces left for P_OPEN+n,... for backreferences */ |
|
127 #define P_OPEN 0x80 /* no Mark this point in input as start of n. */ |
|
128 #define P_CLOSE 0x90 /* no Analogous to OPEN. */ |
|
129 /* |
|
130 * no no argument |
|
131 * zr the range type zrange_t: may be zlong or unsigned long |
|
132 * char a single char |
|
133 * uc* a pointer to unsigned char, used at run time and initialised |
|
134 * to NULL. |
|
135 * str null-terminated, metafied string |
|
136 * lstr length as long then string, not null-terminated, unmetafied. |
|
137 */ |
|
138 |
|
139 /* |
|
140 * Notes on usage: |
|
141 * P_WBRANCH: This works like a branch and is used in complex closures, |
|
142 * to ensure we don't succeed on a zero-length match of the pattern, |
|
143 * since that would cause an infinite loop. We do this by recording |
|
144 * the positions where we have already tried to match. See the |
|
145 * P_WBRANCH test in patmatch(). |
|
146 * |
|
147 * P_ANY, P_ANYOF: the operand is a null terminated |
|
148 * string. Normal characters match as expected. Characters |
|
149 * in the range Meta+PP_ALPHA..Meta+PP_UNKNWN do the appropriate |
|
150 * Posix range tests. This relies on imeta returning true for these |
|
151 * characters. We treat unknown POSIX ranges as never matching. |
|
152 * PP_RANGE means the next two (possibly metafied) characters form |
|
153 * the limits of a range to test; it's too much like hard work to |
|
154 * expand the range. |
|
155 * |
|
156 * P_EXCLUDE, P_EXCSYNC, PEXCEND: P_EXCLUDE appears in the pattern like |
|
157 * P_BRANCH, but applies to the immediately preceding branch. The code in |
|
158 * the corresponding branch is followed by a P_EXCSYNC, which simply |
|
159 * acts as a marker that a P_EXCLUDE comes next. The P_EXCLUDE |
|
160 * has a pointer to char embeded in it, which works |
|
161 * like P_WBRANCH: if we get to the P_EXCSYNC, and we already matched |
|
162 * up to the same position, fail. Thus we are forced to backtrack |
|
163 * on closures in the P_BRANCH if the first attempt was excluded. |
|
164 * Corresponding to P_EXCSYNC in the original branch, there is a |
|
165 * P_EXCEND in the exclusion. If we get to this point, and we did |
|
166 * *not* match in the original branch, the exclusion itself fails, |
|
167 * otherwise it succeeds since we know the tail already matches, |
|
168 * so P_EXCEND is the end of the exclusion test. |
|
169 * The whole sorry mess looks like this, where the upper lines |
|
170 * show the linkage of the branches, and the lower shows the linkage |
|
171 * of their pattern arguments. |
|
172 * |
|
173 * --------------------- ---------------------- |
|
174 * ^ v ^ v |
|
175 * ( <BRANCH>:apat-><EXCSYNC> <EXCLUDE>:excpat-><EXCEND> ) tail |
|
176 * ^ |
|
177 * | | |
|
178 * -------------------------------------- |
|
179 * |
|
180 * P_EXCLUDP: this behaves exactly like P_EXCLUDE, with the sole exception |
|
181 * that we prepend the path so far to the exclude pattern. This is |
|
182 * for top level file globs, e.g. ** / *.c~*foo.c |
|
183 * ^ I had to leave this space |
|
184 * P_NUM*: zl is a zlong if that is 64-bit, else an unsigned long. |
|
185 */ |
|
186 #define PP_ALPHA 1 |
|
187 #define PP_ALNUM 2 |
|
188 #define PP_ASCII 3 |
|
189 #define PP_BLANK 4 |
|
190 #define PP_CNTRL 5 |
|
191 #define PP_DIGIT 6 |
|
192 #define PP_GRAPH 7 |
|
193 #define PP_LOWER 8 |
|
194 #define PP_PRINT 9 |
|
195 #define PP_PUNCT 10 |
|
196 #define PP_SPACE 11 |
|
197 #define PP_UPPER 12 |
|
198 #define PP_XDIGIT 13 |
|
199 #define PP_UNKWN 14 |
|
200 #define PP_RANGE 15 |
|
201 |
|
202 #define P_OP(p) ((p)->l & 0xff) |
|
203 #define P_NEXT(p) ((p)->l >> 8) |
|
204 #define P_OPERAND(p) ((p) + 1) |
|
205 #define P_ISBRANCH(p) ((p)->l & 0x20) |
|
206 #define P_ISEXCLUDE(p) (((p)->l & 0x30) == 0x30) |
|
207 #define P_NOTDOT(p) ((p)->l & 0x40) |
|
208 |
|
209 /* Specific to lstr type, i.e. P_EXACTLY. */ |
|
210 #define P_LS_LEN(p) ((p)[1].l) /* can be used as lvalue */ |
|
211 #define P_LS_STR(p) ((char *)((p) + 2)) |
|
212 |
|
213 /* Flags needed when pattern is executed */ |
|
214 #define P_SIMPLE 0x01 /* Simple enough to be #/## operand. */ |
|
215 #define P_HSTART 0x02 /* Starts with # or ##'d pattern. */ |
|
216 #define P_PURESTR 0x04 /* Can be matched with a strcmp */ |
|
217 |
|
218 /* |
|
219 * Increment pointer which may be on a Meta (x is a pointer variable), |
|
220 * returning the incremented value (i.e. like pre-increment). |
|
221 * |
|
222 * In future the following will need to refer to metafied multibyte |
|
223 * characters. References to invidual characters are not turned |
|
224 * into a macro when the characters is metafied (c.f. CHARREF() |
|
225 * below then the character is not metafied) and will need tracking |
|
226 * down. |
|
227 */ |
|
228 #define METAINC(x) ((x) += (*(x) == Meta) ? 2 : 1) |
|
229 /* |
|
230 * Return unmetafied char from string (x is any char *) |
|
231 */ |
|
232 #define UNMETA(x) (*(x) == Meta ? (x)[1] ^ 32 : *(x)) |
|
233 |
|
234 #if defined(ZSH_64_BIT_TYPE) || defined(LONG_IS_64_BIT) |
|
235 typedef zlong zrange_t; |
|
236 #define ZRANGE_T_IS_SIGNED (1) |
|
237 #else |
|
238 typedef unsigned long zrange_t; |
|
239 #endif |
|
240 |
|
241 /* |
|
242 * Characters which terminate a pattern segment. We actually use |
|
243 * a pointer patendseg which skips the first character if we are not |
|
244 * parsing a file pattern. |
|
245 * Note that the size of this and the next array are hard-wired |
|
246 * via the definitions. |
|
247 */ |
|
248 |
|
249 static char endseg[] = { |
|
250 '/', /* file only */ |
|
251 '\0', Bar, Outpar, /* all patterns */ |
|
252 Tilde /* extended glob only */ |
|
253 }; |
|
254 |
|
255 #define PATENDSEGLEN_NORM 4 |
|
256 #define PATENDSEGLEN_EXT 5 |
|
257 |
|
258 /* Characters which terminate a simple string */ |
|
259 |
|
260 static char endstr[] = { |
|
261 '/', /* file only */ |
|
262 '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang, |
|
263 /* all patterns */ |
|
264 Tilde, Hat, Pound /* extended glob only */ |
|
265 }; |
|
266 |
|
267 #define PATENDSTRLEN_NORM 9 |
|
268 #define PATENDSTRLEN_EXT 12 |
|
269 |
|
270 |
|
271 /* Default size for pattern buffer */ |
|
272 #define P_DEF_ALLOC 256 |
|
273 |
|
274 /* Flags used in compilation */ |
|
275 static char *patstart, *patparse; /* input pointers */ |
|
276 static int patnpar; /* () count */ |
|
277 static char *patcode; /* point of code emission */ |
|
278 static long patsize; /* size of code */ |
|
279 static char *patout; /* start of code emission string */ |
|
280 static long patalloc; /* size allocated for same */ |
|
281 static char *patendseg; /* characters ending segment */ |
|
282 static int patendseglen; /* length of same */ |
|
283 static char *patendstr; /* characters ending plain string */ |
|
284 static int patendstrlen; /* length of sameo */ |
|
285 |
|
286 /* Flags used in both compilation and execution */ |
|
287 static int patflags; /* flags passed down to patcompile */ |
|
288 static int patglobflags; /* globbing flags & approx */ |
|
289 |
|
290 /* Add n more characters, ensuring there is enough space. */ |
|
291 |
|
292 enum { |
|
293 PA_NOALIGN = 1, |
|
294 PA_UNMETA = 2 |
|
295 }; |
|
296 |
|
297 /**/ |
|
298 static void |
|
299 patadd(char *add, int ch, long n, int paflags) |
|
300 { |
|
301 /* Make sure everything gets aligned unless we get PA_NOALIGN. */ |
|
302 long newpatsize = patsize + n; |
|
303 if (!(paflags & PA_NOALIGN)) |
|
304 newpatsize = (newpatsize + sizeof(union upat) - 1) & |
|
305 ~(sizeof(union upat) - 1); |
|
306 if (patalloc < newpatsize) { |
|
307 long newpatalloc = |
|
308 2*(newpatsize > patalloc ? newpatsize : patalloc); |
|
309 patout = (char *)zrealloc((char *)patout, newpatalloc); |
|
310 patcode = patout + patsize; |
|
311 patalloc = newpatalloc; |
|
312 } |
|
313 patsize = newpatsize; |
|
314 if (add) { |
|
315 if (paflags & PA_UNMETA) { |
|
316 /* |
|
317 * Unmetafy and untokenize the string as we go. |
|
318 * The Meta characters in add aren't counted in n. |
|
319 */ |
|
320 while (n--) { |
|
321 if (itok(*add)) |
|
322 *patcode++ = ztokens[*add++ - Pound]; |
|
323 else if (*add == Meta) { |
|
324 add++; |
|
325 *patcode++ = *add++ ^ 32; |
|
326 } else { |
|
327 *patcode++ = *add++; |
|
328 } |
|
329 } |
|
330 } else { |
|
331 while (n--) |
|
332 *patcode++ = *add++; |
|
333 } |
|
334 } else |
|
335 *patcode++ = ch; |
|
336 patcode = patout + patsize; |
|
337 } |
|
338 |
|
339 static long rn_offs; |
|
340 /* operates on pointers to union upat, returns a pointer */ |
|
341 #define PATNEXT(p) ((rn_offs = P_NEXT(p)) ? \ |
|
342 (P_OP(p) == P_BACK) ? \ |
|
343 ((p)-rn_offs) : ((p)+rn_offs) : NULL) |
|
344 |
|
345 /* Called before parsing a set of file matchs to initialize flags */ |
|
346 |
|
347 /**/ |
|
348 void |
|
349 patcompstart(void) |
|
350 { |
|
351 if (isset(CASEGLOB)) |
|
352 patglobflags = 0; |
|
353 else |
|
354 patglobflags = GF_IGNCASE; |
|
355 } |
|
356 |
|
357 /* |
|
358 * Top level pattern compilation subroutine |
|
359 * exp is a null-terminated, metafied string. |
|
360 * inflags is an or of some PAT_* flags. |
|
361 * endexp, if non-null, is set to a pointer to the end of the |
|
362 * part of exp which was compiled. This is used when |
|
363 * compiling patterns for directories which must be |
|
364 * matched recursively. |
|
365 */ |
|
366 |
|
367 /**/ |
|
368 mod_export Patprog |
|
369 patcompile(char *exp, int inflags, char **endexp) |
|
370 { |
|
371 int flags = 0; |
|
372 long len = 0; |
|
373 long startoff; |
|
374 Upat pscan; |
|
375 Upat ptemp; |
|
376 char *lng, *strp = NULL; |
|
377 Patprog p; |
|
378 |
|
379 startoff = sizeof(struct patprog); |
|
380 /* Ensure alignment of start of program string */ |
|
381 startoff = (startoff + sizeof(union upat) - 1) & ~(sizeof(union upat) - 1); |
|
382 |
|
383 /* Allocate reasonable sized chunk if none, reduce size if too big */ |
|
384 if (patalloc != P_DEF_ALLOC) |
|
385 patout = (char *)zrealloc(patout, patalloc = P_DEF_ALLOC); |
|
386 patcode = patout + startoff; |
|
387 patsize = patcode - patout; |
|
388 patstart = patparse = exp; |
|
389 /* |
|
390 * Note global patnpar numbers parentheses 1..9, while patnpar |
|
391 * in struct is actual count of parentheses. |
|
392 */ |
|
393 patnpar = 1; |
|
394 patflags = inflags & ~(PAT_PURES|PAT_HAS_EXCLUDP); |
|
395 |
|
396 patendseg = endseg; |
|
397 patendseglen = isset(EXTENDEDGLOB) ? PATENDSEGLEN_EXT : PATENDSEGLEN_NORM; |
|
398 patendstr = endstr; |
|
399 patendstrlen = isset(EXTENDEDGLOB) ? PATENDSTRLEN_EXT : PATENDSTRLEN_NORM; |
|
400 |
|
401 if (!(patflags & PAT_FILE)) { |
|
402 patendseg++; |
|
403 patendstr++; |
|
404 patendseglen--; |
|
405 patendstrlen--; |
|
406 remnulargs(patparse); |
|
407 patglobflags = 0; |
|
408 } |
|
409 /* |
|
410 * Have to be set now, since they get updated during compilation. |
|
411 */ |
|
412 ((Patprog)patout)->globflags = patglobflags; |
|
413 |
|
414 if (!(patflags & PAT_ANY)) { |
|
415 /* Look for a really pure string, with no tokens at all. */ |
|
416 if (!patglobflags |
|
417 #ifdef __CYGWIN__ |
|
418 /* |
|
419 * If the OS treats files case-insensitively and we |
|
420 * are looking at files, we don't need to use pattern |
|
421 * matching to find the file. |
|
422 */ |
|
423 || (!(patglobflags & ~GF_IGNCASE) && (patflags & PAT_FILE)) |
|
424 #endif |
|
425 ) |
|
426 { |
|
427 /* |
|
428 * Waah! I wish I understood this. |
|
429 * Empty metafied strings have an initial Nularg. |
|
430 * This never corresponds to a real character in |
|
431 * a glob pattern or string, so skip it. |
|
432 */ |
|
433 if (*exp == Nularg) |
|
434 exp++; |
|
435 for (strp = exp; *strp && |
|
436 (!(patflags & PAT_FILE) || *strp != '/') && !itok(*strp); |
|
437 strp++) |
|
438 ; |
|
439 } |
|
440 if (!strp || (*strp && *strp != '/')) { |
|
441 /* No, do normal compilation. */ |
|
442 strp = NULL; |
|
443 if (patcompswitch(0, &flags) == 0) |
|
444 return NULL; |
|
445 } else { |
|
446 /* |
|
447 * Yes, copy the string, and skip compilation altogether. |
|
448 * Null terminate for the benefit of globbing. |
|
449 * Leave metafied both for globbing and for our own |
|
450 * efficiency. |
|
451 */ |
|
452 patparse = strp; |
|
453 len = strp - exp; |
|
454 patadd(exp, 0, len + 1, 0); |
|
455 patout[startoff + len] = '\0'; |
|
456 patflags |= PAT_PURES; |
|
457 } |
|
458 } |
|
459 |
|
460 /* end of compilation: safe to use pointers */ |
|
461 p = (Patprog)patout; |
|
462 p->startoff = startoff; |
|
463 p->patstartch = '\0'; |
|
464 p->globend = patglobflags; |
|
465 p->flags = patflags; |
|
466 p->mustoff = 0; |
|
467 p->size = patsize; |
|
468 p->patmlen = len; |
|
469 p->patnpar = patnpar-1; |
|
470 |
|
471 if (!strp) { |
|
472 pscan = (Upat)(patout + startoff); |
|
473 |
|
474 ptemp=PATNEXT(pscan); |
|
475 if (!(patflags & PAT_ANY) && (P_OP(ptemp) == P_END)) { |
|
476 /* only one top level choice */ |
|
477 pscan = P_OPERAND(pscan); |
|
478 |
|
479 if (flags & P_PURESTR) { |
|
480 /* |
|
481 * The pattern can be matched with a simple strncmp/strcmp. |
|
482 * Careful in case we've overwritten the node for the next ptr. |
|
483 */ |
|
484 char *dst = patout + startoff; |
|
485 Upat next; |
|
486 p->flags |= PAT_PURES; |
|
487 for (; pscan; pscan = next) { |
|
488 next = PATNEXT(pscan); |
|
489 if (P_OP(pscan) == P_EXACTLY) { |
|
490 char *opnd = P_LS_STR(pscan), *mtest; |
|
491 long oplen = P_LS_LEN(pscan), ilen; |
|
492 int nmeta = 0; |
|
493 /* |
|
494 * Unfortunately we unmetafied the string |
|
495 * and we need to put any metacharacters |
|
496 * back now we know it's a pure string. |
|
497 * This shouldn't happen too often, it's |
|
498 * just that there are some cases such |
|
499 * as . and .. in files where we really |
|
500 * need a pure string even if there are |
|
501 * pattern characters flying around. |
|
502 */ |
|
503 for (mtest = opnd, ilen = oplen; ilen; |
|
504 mtest++, ilen--) |
|
505 if (imeta(*mtest)) |
|
506 nmeta++; |
|
507 if (nmeta) { |
|
508 char *oldpatout = patout; |
|
509 patadd(NULL, 0, nmeta, 0); |
|
510 /* |
|
511 * Yuk. |
|
512 */ |
|
513 p = (Patprog)patout; |
|
514 opnd = patout + (opnd - oldpatout); |
|
515 dst = patout + startoff; |
|
516 } |
|
517 |
|
518 while (oplen--) { |
|
519 if (imeta(*opnd)) { |
|
520 *dst++ = Meta; |
|
521 *dst++ = *opnd ^ 32; |
|
522 } else { |
|
523 *dst++ = *opnd++; |
|
524 } |
|
525 } |
|
526 } |
|
527 } |
|
528 p->size = dst - patout; |
|
529 /* patmlen is really strlen. We don't need a null. */ |
|
530 p->patmlen = p->size - startoff; |
|
531 } else { |
|
532 /* starting point info */ |
|
533 if (P_OP(pscan) == P_EXACTLY && !p->globflags && |
|
534 P_LS_LEN(pscan)) |
|
535 p->patstartch = *P_LS_STR(pscan); |
|
536 /* |
|
537 * Find the longest literal string in something expensive. |
|
538 * This is itself not all that cheap if we have |
|
539 * case-insensitive matching or approximation, so don't. |
|
540 */ |
|
541 if ((flags & P_HSTART) && !p->globflags) { |
|
542 lng = NULL; |
|
543 len = 0; |
|
544 for (; pscan; pscan = PATNEXT(pscan)) |
|
545 if (P_OP(pscan) == P_EXACTLY && |
|
546 P_LS_LEN(pscan) >= len) { |
|
547 lng = P_LS_STR(pscan); |
|
548 len = P_LS_LEN(pscan); |
|
549 } |
|
550 if (lng) { |
|
551 p->mustoff = lng - patout; |
|
552 p->patmlen = len; |
|
553 } |
|
554 } |
|
555 } |
|
556 } |
|
557 } |
|
558 |
|
559 /* |
|
560 * The pattern was compiled in a fixed buffer: unless told otherwise, |
|
561 * we stick the compiled pattern on the heap. This is necessary |
|
562 * for files where we will often be compiling multiple segments at once. |
|
563 * But if we get the ZDUP flag we always put it in zalloc()ed memory. |
|
564 */ |
|
565 if (patflags & PAT_ZDUP) { |
|
566 Patprog newp = (Patprog)zalloc(patsize); |
|
567 memcpy((char *)newp, (char *)p, patsize); |
|
568 p = newp; |
|
569 } else if (!(patflags & PAT_STATIC)) { |
|
570 Patprog newp = (Patprog)zhalloc(patsize); |
|
571 memcpy((char *)newp, (char *)p, patsize); |
|
572 p = newp; |
|
573 } |
|
574 |
|
575 if (endexp) |
|
576 *endexp = patparse; |
|
577 return p; |
|
578 } |
|
579 |
|
580 /* |
|
581 * Main body or parenthesized subexpression in pattern |
|
582 * Parenthesis (and any ksh_glob gubbins) will have been removed. |
|
583 */ |
|
584 |
|
585 /**/ |
|
586 static long |
|
587 patcompswitch(int paren, int *flagp) |
|
588 { |
|
589 long starter, br, ender, excsync = 0; |
|
590 int parno = 0; |
|
591 int flags, gfchanged = 0, savglobflags = patglobflags; |
|
592 Upat ptr; |
|
593 |
|
594 *flagp = 0; |
|
595 |
|
596 if (paren && (patglobflags & GF_BACKREF) && patnpar <= NSUBEXP) { |
|
597 /* |
|
598 * parenthesized: make an open node. |
|
599 * We can only refer to the first nine parentheses. |
|
600 * For any others, we just use P_OPEN on its own; there's |
|
601 * no gain in arbitrarily limiting the number of parentheses. |
|
602 */ |
|
603 parno = patnpar++; |
|
604 starter = patnode(P_OPEN + parno); |
|
605 } else |
|
606 starter = 0; |
|
607 |
|
608 br = patnode(P_BRANCH); |
|
609 if (!patcompbranch(&flags)) |
|
610 return 0; |
|
611 if (patglobflags != savglobflags) |
|
612 gfchanged++; |
|
613 if (starter) |
|
614 pattail(starter, br); |
|
615 else |
|
616 starter = br; |
|
617 |
|
618 *flagp |= flags & (P_HSTART|P_PURESTR); |
|
619 |
|
620 while (*patparse == Bar || |
|
621 (isset(EXTENDEDGLOB) && *patparse == Tilde && |
|
622 (patparse[1] == '/' || |
|
623 !memchr(patendseg, patparse[1], patendseglen)))) { |
|
624 int tilde = *patparse++ == Tilde; |
|
625 long gfnode = 0, newbr; |
|
626 |
|
627 *flagp &= ~P_PURESTR; |
|
628 |
|
629 if (tilde) { |
|
630 union upat up; |
|
631 /* excsync remembers the P_EXCSYNC node before a chain of |
|
632 * exclusions: all point back to this. only the |
|
633 * original (non-excluded) branch gets a trailing P_EXCSYNC. |
|
634 */ |
|
635 if (!excsync) { |
|
636 excsync = patnode(P_EXCSYNC); |
|
637 patoptail(br, excsync); |
|
638 } |
|
639 /* |
|
640 * By default, approximations are turned off in exclusions: |
|
641 * we need to do this here as otherwise the code compiling |
|
642 * the exclusion doesn't know if the flags have really |
|
643 * changed if the error count gets restored. |
|
644 */ |
|
645 patglobflags &= ~0xff; |
|
646 if (!(patflags & PAT_FILET) || paren) { |
|
647 br = patnode(P_EXCLUDE); |
|
648 } else { |
|
649 /* |
|
650 * At top level (paren == 0) in a file glob !(patflags |
|
651 * &PAT_FILET) do the exclusion prepending the file path |
|
652 * so far. We need to flag this to avoid unnecessarily |
|
653 * copying the path. |
|
654 */ |
|
655 br = patnode(P_EXCLUDP); |
|
656 patflags |= PAT_HAS_EXCLUDP; |
|
657 } |
|
658 up.p = NULL; |
|
659 patadd((char *)&up, 0, sizeof(up), 0); |
|
660 /* / is not treated as special if we are at top level */ |
|
661 if (!paren && *patendseg == '/') { |
|
662 tilde++; |
|
663 patendseg++; |
|
664 patendseglen--; |
|
665 patendstr++; |
|
666 patendstrlen--; |
|
667 } |
|
668 } else { |
|
669 excsync = 0; |
|
670 br = patnode(P_BRANCH); |
|
671 /* |
|
672 * The position of the following statements means globflags |
|
673 * set in the main branch carry over to the exclusion. |
|
674 */ |
|
675 if (!paren) { |
|
676 patglobflags = 0; |
|
677 if (((Patprog)patout)->globflags) { |
|
678 /* |
|
679 * If at top level, we need to reinitialize flags to zero, |
|
680 * since (#i)foo|bar only applies to foo and we stuck |
|
681 * the #i into the global flags. |
|
682 * We could have done it so that they only got set in the |
|
683 * first branch, but it's quite convenient having any |
|
684 * global flags set in the header and not buried in the |
|
685 * pattern. (Or maybe it isn't and we should |
|
686 * forget this bit and always stick in an explicit GFLAGS |
|
687 * statement instead of using the header.) |
|
688 * Also, this can't happen for file globs where there are |
|
689 * no top-level |'s. |
|
690 * |
|
691 * No gfchanged, as nothing to follow branch at top |
|
692 * level. |
|
693 */ |
|
694 union upat up; |
|
695 gfnode = patnode(P_GFLAGS); |
|
696 up.l = patglobflags; |
|
697 patadd((char *)&up, 0, sizeof(union upat), 0); |
|
698 } |
|
699 } else { |
|
700 patglobflags = savglobflags; |
|
701 } |
|
702 } |
|
703 newbr = patcompbranch(&flags); |
|
704 if (tilde == 2) { |
|
705 /* restore special treatment of / */ |
|
706 patendseg--; |
|
707 patendseglen++; |
|
708 patendstr--; |
|
709 patendstrlen++; |
|
710 } |
|
711 if (!newbr) |
|
712 return 0; |
|
713 if (gfnode) |
|
714 pattail(gfnode, newbr); |
|
715 if (!tilde && patglobflags != savglobflags) |
|
716 gfchanged++; |
|
717 pattail(starter, br); |
|
718 if (excsync) |
|
719 patoptail(br, patnode(P_EXCEND)); |
|
720 *flagp |= flags & P_HSTART; |
|
721 } |
|
722 |
|
723 /* |
|
724 * Make a closing node, hooking it to the end. |
|
725 * Note that we can't optimize P_NOTHING out here, since another |
|
726 * branch at that point would indicate the current choices continue, |
|
727 * which they don't. |
|
728 */ |
|
729 ender = patnode(paren ? parno ? P_CLOSE+parno : P_NOTHING : P_END); |
|
730 pattail(starter, ender); |
|
731 |
|
732 /* |
|
733 * Hook the tails of the branches to the closing node, |
|
734 * except for exclusions which terminate where they are. |
|
735 */ |
|
736 for (ptr = (Upat)patout + starter; ptr; ptr = PATNEXT(ptr)) |
|
737 if (!P_ISEXCLUDE(ptr)) |
|
738 patoptail(ptr-(Upat)patout, ender); |
|
739 |
|
740 /* check for proper termination */ |
|
741 if ((paren && *patparse++ != Outpar) || |
|
742 (!paren && *patparse && |
|
743 !((patflags & PAT_FILE) && *patparse == '/'))) |
|
744 return 0; |
|
745 |
|
746 if (paren && gfchanged) { |
|
747 /* |
|
748 * Restore old values of flags when leaving parentheses. |
|
749 * gfchanged detects a change in any branch (except exclusions |
|
750 * which are separate), since we need to emit this even if |
|
751 * a later branch happened to put the flags back. |
|
752 */ |
|
753 pattail(ender, patnode(P_GFLAGS)); |
|
754 patglobflags = savglobflags; |
|
755 patadd((char *)&savglobflags, 0, sizeof(long), 0); |
|
756 } |
|
757 |
|
758 return starter; |
|
759 } |
|
760 |
|
761 /* |
|
762 * Compile something ended by Bar, Outpar, Tilde, or end of string. |
|
763 * Note the BRANCH or EXCLUDE tag must already have been omitted: |
|
764 * this returns the position of the operand of that. |
|
765 */ |
|
766 |
|
767 /**/ |
|
768 static long |
|
769 patcompbranch(int *flagp) |
|
770 { |
|
771 long chain, latest = 0, starter; |
|
772 int flags = 0; |
|
773 |
|
774 *flagp = P_PURESTR; |
|
775 |
|
776 starter = chain = 0; |
|
777 while (!memchr(patendseg, *patparse, patendseglen) || |
|
778 (*patparse == Tilde && patparse[1] != '/' && |
|
779 memchr(patendseg, patparse[1], patendseglen))) { |
|
780 if (isset(EXTENDEDGLOB) && |
|
781 ((!isset(SHGLOB) && |
|
782 (*patparse == Inpar && patparse[1] == Pound)) || |
|
783 (isset(KSHGLOB) && *patparse == '@' && patparse[1] == Inpar && |
|
784 patparse[2] == Pound))) { |
|
785 /* Globbing flags. */ |
|
786 char *pp1 = patparse; |
|
787 int oldglobflags = patglobflags, ignore; |
|
788 long assert; |
|
789 patparse += (*patparse == '@') ? 3 : 2; |
|
790 if (!patgetglobflags(&patparse, &assert, &ignore)) |
|
791 return 0; |
|
792 if (!ignore) { |
|
793 if (assert) { |
|
794 /* |
|
795 * Start/end assertion looking like flags, but |
|
796 * actually handled as a normal node |
|
797 */ |
|
798 latest = patnode(assert); |
|
799 flags = 0; |
|
800 } else { |
|
801 if (pp1 == patstart) { |
|
802 /* Right at start of pattern, the simplest case. |
|
803 * Put them into the flags and don't emit anything. |
|
804 */ |
|
805 ((Patprog)patout)->globflags = patglobflags; |
|
806 continue; |
|
807 } else if (!*patparse) { |
|
808 /* Right at the end, so just leave the flags for |
|
809 * the next Patprog in the chain to pick up. |
|
810 */ |
|
811 break; |
|
812 } |
|
813 /* |
|
814 * Otherwise, we have to stick them in as a pattern |
|
815 * matching nothing. |
|
816 */ |
|
817 if (oldglobflags != patglobflags) { |
|
818 /* Flags changed */ |
|
819 union upat up; |
|
820 latest = patnode(P_GFLAGS); |
|
821 up.l = patglobflags; |
|
822 patadd((char *)&up, 0, sizeof(union upat), 0); |
|
823 } else { |
|
824 /* No effect. */ |
|
825 continue; |
|
826 } |
|
827 } |
|
828 } else if (!*patparse) |
|
829 break; |
|
830 else |
|
831 continue; |
|
832 } else if (isset(EXTENDEDGLOB) && *patparse == Hat) { |
|
833 /* |
|
834 * ^pat: anything but pat. For proper backtracking, |
|
835 * etc., we turn this into (*~pat), except without the |
|
836 * parentheses. |
|
837 */ |
|
838 patparse++; |
|
839 latest = patcompnot(0, &flags); |
|
840 } else |
|
841 latest = patcomppiece(&flags); |
|
842 if (!latest) |
|
843 return 0; |
|
844 if (!starter) |
|
845 starter = latest; |
|
846 if (!(flags & P_PURESTR)) |
|
847 *flagp &= ~P_PURESTR; |
|
848 if (!chain) |
|
849 *flagp |= flags & P_HSTART; |
|
850 else |
|
851 pattail(chain, latest); |
|
852 chain = latest; |
|
853 } |
|
854 /* check if there was nothing in the loop, i.e. () */ |
|
855 if (!chain) |
|
856 starter = patnode(P_NOTHING); |
|
857 |
|
858 return starter; |
|
859 } |
|
860 |
|
861 /* get glob flags, return 1 for success, 0 for failure */ |
|
862 |
|
863 /**/ |
|
864 int |
|
865 patgetglobflags(char **strp, long *assertp, int *ignore) |
|
866 { |
|
867 char *nptr, *ptr = *strp; |
|
868 zlong ret; |
|
869 |
|
870 *assertp = 0; |
|
871 *ignore = 1; |
|
872 /* (#X): assumes we are still positioned on the first X */ |
|
873 for (; *ptr && *ptr != Outpar; ptr++) { |
|
874 if (*ptr == 'q') { |
|
875 /* Glob qualifiers, ignored in pattern code */ |
|
876 while (*ptr && *ptr != Outpar) |
|
877 ptr++; |
|
878 break; |
|
879 } else { |
|
880 *ignore = 0; |
|
881 switch (*ptr) { |
|
882 case 'a': |
|
883 /* Approximate matching, max no. of errors follows */ |
|
884 ret = zstrtol(++ptr, &nptr, 10); |
|
885 /* |
|
886 * We can't have more than 254, because we need 255 to |
|
887 * mark 254 errors in wbranch and exclude sync strings |
|
888 * (hypothetically --- hope no-one tries it). |
|
889 */ |
|
890 if (ret < 0 || ret > 254 || ptr == nptr) |
|
891 return 0; |
|
892 patglobflags = (patglobflags & ~0xff) | (ret & 0xff); |
|
893 ptr = nptr-1; |
|
894 break; |
|
895 |
|
896 case 'l': |
|
897 /* Lowercase in pattern matches lower or upper in target */ |
|
898 patglobflags = (patglobflags & ~GF_IGNCASE) | GF_LCMATCHUC; |
|
899 break; |
|
900 |
|
901 case 'i': |
|
902 /* Fully case insensitive */ |
|
903 patglobflags = (patglobflags & ~GF_LCMATCHUC) | GF_IGNCASE; |
|
904 break; |
|
905 |
|
906 case 'I': |
|
907 /* Restore case sensitivity */ |
|
908 patglobflags &= ~(GF_LCMATCHUC|GF_IGNCASE); |
|
909 break; |
|
910 |
|
911 case 'b': |
|
912 /* Make backreferences */ |
|
913 patglobflags |= GF_BACKREF; |
|
914 break; |
|
915 |
|
916 case 'B': |
|
917 /* Don't make backreferences */ |
|
918 patglobflags &= ~GF_BACKREF; |
|
919 break; |
|
920 |
|
921 case 'm': |
|
922 /* Make references to complete match */ |
|
923 patglobflags |= GF_MATCHREF; |
|
924 break; |
|
925 |
|
926 case 'M': |
|
927 /* Don't */ |
|
928 patglobflags &= ~GF_MATCHREF; |
|
929 break; |
|
930 |
|
931 case 's': |
|
932 *assertp = P_ISSTART; |
|
933 break; |
|
934 |
|
935 case 'e': |
|
936 *assertp = P_ISEND; |
|
937 break; |
|
938 |
|
939 default: |
|
940 return 0; |
|
941 } |
|
942 } |
|
943 } |
|
944 if (*ptr != Outpar) |
|
945 return 0; |
|
946 /* Start/end assertions must appear on their own. */ |
|
947 if (*assertp && (*strp)[1] != Outpar) |
|
948 return 0; |
|
949 *strp = ptr + 1; |
|
950 return 1; |
|
951 } |
|
952 |
|
953 /* |
|
954 * compile a chunk such as a literal string or a [...] followed |
|
955 * by a possible hash operator |
|
956 */ |
|
957 |
|
958 /**/ |
|
959 static long |
|
960 patcomppiece(int *flagp) |
|
961 { |
|
962 long starter = 0, next, pound, op; |
|
963 int flags, flags2, kshchar, len, ch, patch, nmeta; |
|
964 union upat up; |
|
965 char *nptr, *str0, *ptr, cbuf[2]; |
|
966 zrange_t from, to; |
|
967 |
|
968 flags = 0; |
|
969 str0 = patparse; |
|
970 for (;;) { |
|
971 /* |
|
972 * Check if we have a string. First, we need to make sure |
|
973 * the string doesn't introduce a ksh-like parenthesized expression. |
|
974 */ |
|
975 kshchar = '\0'; |
|
976 if (isset(KSHGLOB) && *patparse && patparse[1] == Inpar) { |
|
977 if (strchr("?*+!@", *patparse)) |
|
978 kshchar = STOUC(*patparse); |
|
979 else if (*patparse == Star || *patparse == Quest) |
|
980 kshchar = STOUC(ztokens[*patparse - Pound]); |
|
981 } |
|
982 |
|
983 /* |
|
984 * End of string (or no string at all) if ksh-type parentheses, |
|
985 * or special character, unless that character is a tilde and |
|
986 * the character following is an end-of-segment character. Thus |
|
987 * tildes are not special if there is nothing following to |
|
988 * be excluded. |
|
989 */ |
|
990 if (kshchar || (memchr(patendstr, *patparse, patendstrlen) && |
|
991 (*patparse != Tilde || |
|
992 patparse[1] == '/' || |
|
993 !memchr(patendseg, patparse[1], patendseglen)))) |
|
994 break; |
|
995 |
|
996 METAINC(patparse); |
|
997 } |
|
998 |
|
999 if (patparse > str0) { |
|
1000 long slen = patparse - str0; |
|
1001 int morelen; |
|
1002 |
|
1003 /* Ordinary string: cancel kshchar lookahead */ |
|
1004 kshchar = '\0'; |
|
1005 /* |
|
1006 * Assume it matches a simple string until we find otherwise. |
|
1007 */ |
|
1008 flags |= P_PURESTR; |
|
1009 DPUTS(patparse == str0, "BUG: matched nothing in patcomppiece."); |
|
1010 /* more than one character matched? */ |
|
1011 morelen = str0 + (*str0 == Meta ? 2 : 1) < patparse; |
|
1012 /* |
|
1013 * If we have more than one character, a following hash only |
|
1014 * applies to the last, so decrement. |
|
1015 */ |
|
1016 if (isset(EXTENDEDGLOB) && *patparse == Pound && morelen) |
|
1017 patparse -= (patparse > str0 + 1 && patparse[-2] == Meta) ? 2 : 1; |
|
1018 /* |
|
1019 * If len is 1, we can't have an active # following, so doesn't |
|
1020 * matter that we don't make X in `XX#' simple. |
|
1021 */ |
|
1022 if (!morelen) |
|
1023 flags |= P_SIMPLE; |
|
1024 starter = patnode(P_EXACTLY); |
|
1025 |
|
1026 /* Get length of string without metafication. */ |
|
1027 nmeta = 0; |
|
1028 /* inherited from domatch, but why, exactly? */ |
|
1029 if (*str0 == Nularg) |
|
1030 str0++; |
|
1031 for (ptr = str0; ptr < patparse; ptr++) { |
|
1032 if (*ptr == Meta) { |
|
1033 nmeta++; |
|
1034 ptr++; |
|
1035 } |
|
1036 } |
|
1037 slen = (patparse - str0) - nmeta; |
|
1038 /* First add length, which is a long */ |
|
1039 patadd((char *)&slen, 0, sizeof(long), 0); |
|
1040 /* |
|
1041 * Then the string, not null terminated. |
|
1042 * Unmetafy and untokenize; pass the final length, |
|
1043 * which is what we need to allocate, i.e. not including |
|
1044 * a count for each Meta in the string. |
|
1045 */ |
|
1046 patadd(str0, 0, slen, PA_UNMETA); |
|
1047 nptr = P_LS_STR((Upat)patout + starter); |
|
1048 /* |
|
1049 * It's much simpler to turn off pure string mode for |
|
1050 * any case-insensitive or approximate matching; usually, |
|
1051 * that is correct, or they wouldn't have been turned on. |
|
1052 * However, we need to make sure we match a "." or ".." |
|
1053 * in a file name as a pure string. There's a minor bug |
|
1054 * that this will also apply to something like |
|
1055 * ..(#a1).. (i.e. the (#a1) has no effect), but if you're |
|
1056 * going to write funny patterns, you get no sympathy from me. |
|
1057 */ |
|
1058 if (patglobflags) { |
|
1059 if (!(patflags & PAT_FILE)) |
|
1060 flags &= ~P_PURESTR; |
|
1061 else if (!(nptr[0] == '.' && |
|
1062 (slen == 1 || (nptr[1] == '.' && slen == 2)))) |
|
1063 flags &= ~P_PURESTR; |
|
1064 } |
|
1065 } else { |
|
1066 if (kshchar) |
|
1067 patparse++; |
|
1068 |
|
1069 patch = *patparse; |
|
1070 METAINC(patparse); |
|
1071 switch(patch) { |
|
1072 case Quest: |
|
1073 flags |= P_SIMPLE; |
|
1074 starter = patnode(P_ANY); |
|
1075 break; |
|
1076 case Star: |
|
1077 /* kshchar is used as a sign that we can't have #'s. */ |
|
1078 kshchar = -1; |
|
1079 starter = patnode(P_STAR); |
|
1080 break; |
|
1081 case Inbrack: |
|
1082 flags |= P_SIMPLE; |
|
1083 if (*patparse == Hat || *patparse == '^' || *patparse == '!') { |
|
1084 patparse++; |
|
1085 starter = patnode(P_ANYBUT); |
|
1086 } else |
|
1087 starter = patnode(P_ANYOF); |
|
1088 if (*patparse == Outbrack) { |
|
1089 patparse++; |
|
1090 patadd(NULL, ']', 1, PA_NOALIGN); |
|
1091 } |
|
1092 while (*patparse && *patparse != Outbrack) { |
|
1093 /* Meta is not a token */ |
|
1094 if (*patparse == Inbrack && patparse[1] == ':' && |
|
1095 (nptr = strchr(patparse+2, ':')) && |
|
1096 nptr[1] == Outbrack) { |
|
1097 /* Posix range. */ |
|
1098 patparse += 2; |
|
1099 len = nptr - patparse; |
|
1100 if (!strncmp(patparse, "alpha", len)) |
|
1101 ch = PP_ALPHA; |
|
1102 else if (!strncmp(patparse, "alnum", len)) |
|
1103 ch = PP_ALNUM; |
|
1104 else if (!strncmp(patparse, "ascii", len)) |
|
1105 ch = PP_ASCII; |
|
1106 else if (!strncmp(patparse, "blank", len)) |
|
1107 ch = PP_BLANK; |
|
1108 else if (!strncmp(patparse, "cntrl", len)) |
|
1109 ch = PP_CNTRL; |
|
1110 else if (!strncmp(patparse, "digit", len)) |
|
1111 ch = PP_DIGIT; |
|
1112 else if (!strncmp(patparse, "graph", len)) |
|
1113 ch = PP_GRAPH; |
|
1114 else if (!strncmp(patparse, "lower", len)) |
|
1115 ch = PP_LOWER; |
|
1116 else if (!strncmp(patparse, "print", len)) |
|
1117 ch = PP_PRINT; |
|
1118 else if (!strncmp(patparse, "punct", len)) |
|
1119 ch = PP_PUNCT; |
|
1120 else if (!strncmp(patparse, "space", len)) |
|
1121 ch = PP_SPACE; |
|
1122 else if (!strncmp(patparse, "upper", len)) |
|
1123 ch = PP_UPPER; |
|
1124 else if (!strncmp(patparse, "xdigit", len)) |
|
1125 ch = PP_XDIGIT; |
|
1126 else |
|
1127 ch = PP_UNKWN; |
|
1128 patparse = nptr + 2; |
|
1129 if (ch != PP_UNKWN) |
|
1130 patadd(NULL, STOUC(Meta+ch), 1, PA_NOALIGN); |
|
1131 continue; |
|
1132 } |
|
1133 if (itok(*patparse)) { |
|
1134 cbuf[0] = ztokens[*patparse - Pound]; |
|
1135 } else if (*patparse == Meta) { |
|
1136 cbuf[0] = Meta; |
|
1137 cbuf[1] = *++patparse; |
|
1138 } else |
|
1139 cbuf[0] = *patparse; |
|
1140 patparse++; |
|
1141 |
|
1142 if (*patparse == '-' && patparse[1] != Outbrack) { |
|
1143 patadd(NULL, STOUC(Meta+PP_RANGE), 1, PA_NOALIGN); |
|
1144 patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, PA_NOALIGN); |
|
1145 if (itok(*++patparse)) { |
|
1146 patadd(0, STOUC(ztokens[*patparse - Pound]), 1, |
|
1147 PA_NOALIGN); |
|
1148 } else |
|
1149 patadd(patparse, 0, (*patparse == Meta) ? 2 : 1, |
|
1150 PA_NOALIGN); |
|
1151 METAINC(patparse); |
|
1152 } else |
|
1153 patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, PA_NOALIGN); |
|
1154 } |
|
1155 if (*patparse != Outbrack) |
|
1156 return 0; |
|
1157 patparse++; |
|
1158 /* terminate null string and fix alignment */ |
|
1159 patadd(NULL, 0, 1, 0); |
|
1160 break; |
|
1161 case Inpar: |
|
1162 /* is this how to treat parentheses in SHGLOB? */ |
|
1163 if (isset(SHGLOB) && !kshchar) |
|
1164 return 0; |
|
1165 if (kshchar == '!') { |
|
1166 /* This is nasty, we should really either handle all |
|
1167 * kshglobbing below or here. But most of the |
|
1168 * others look like non-ksh patterns, while this one |
|
1169 * doesn't, so we handle it here and leave the rest. |
|
1170 * We treat it like an extendedglob ^, except that |
|
1171 * it goes into parentheses. |
|
1172 * |
|
1173 * If we did do kshglob here, we could support |
|
1174 * the old behaviour that things like !(foo)## |
|
1175 * work, but it makes the code more complicated at |
|
1176 * the expense of allowing the user to do things |
|
1177 * they shouldn't. |
|
1178 */ |
|
1179 if (!(starter = patcompnot(1, &flags2))) |
|
1180 return 0; |
|
1181 } else if (!(starter = patcompswitch(1, &flags2))) |
|
1182 return 0; |
|
1183 flags |= flags2 & P_HSTART; |
|
1184 break; |
|
1185 case Inang: |
|
1186 /* Numeric glob */ |
|
1187 len = 0; /* beginning present 1, end present 2 */ |
|
1188 if (idigit(*patparse)) { |
|
1189 from = (zrange_t) zstrtol((char *)patparse, |
|
1190 (char **)&nptr, 10); |
|
1191 patparse = nptr; |
|
1192 len |= 1; |
|
1193 } |
|
1194 DPUTS(*patparse != '-', "BUG: - missing from numeric glob"); |
|
1195 patparse++; |
|
1196 if (idigit(*patparse)) { |
|
1197 to = (zrange_t) zstrtol((char *)patparse, |
|
1198 (char **)&nptr, 10); |
|
1199 patparse = nptr; |
|
1200 len |= 2; |
|
1201 } |
|
1202 if (*patparse != Outang) |
|
1203 return 0; |
|
1204 patparse++; |
|
1205 switch(len) { |
|
1206 case 3: |
|
1207 starter = patnode(P_NUMRNG); |
|
1208 patadd((char *)&from, 0, sizeof(from), 0); |
|
1209 patadd((char *)&to, 0, sizeof(to), 0); |
|
1210 break; |
|
1211 case 2: |
|
1212 starter = patnode(P_NUMTO); |
|
1213 patadd((char *)&to, 0, sizeof(to), 0); |
|
1214 break; |
|
1215 case 1: |
|
1216 starter = patnode(P_NUMFROM); |
|
1217 patadd((char *)&from, 0, sizeof(from), 0); |
|
1218 break; |
|
1219 case 0: |
|
1220 starter = patnode(P_NUMANY); |
|
1221 break; |
|
1222 } |
|
1223 /* This can't be simple, because it isn't. |
|
1224 * Mention in manual that matching digits with [...] |
|
1225 * is more efficient. |
|
1226 */ |
|
1227 break; |
|
1228 case Pound: |
|
1229 DPUTS(!isset(EXTENDEDGLOB), "BUG: # not treated as string"); |
|
1230 /* |
|
1231 * A hash here is an error; it should follow something |
|
1232 * repeatable. |
|
1233 */ |
|
1234 return 0; |
|
1235 break; |
|
1236 #ifdef DEBUG |
|
1237 default: |
|
1238 dputs("BUG: character not handled in patcomppiece"); |
|
1239 return 0; |
|
1240 break; |
|
1241 #endif |
|
1242 } |
|
1243 } |
|
1244 |
|
1245 if (!(pound = (*patparse == Pound && isset(EXTENDEDGLOB))) && |
|
1246 (kshchar <= 0 || kshchar == '@' || kshchar == '!')) { |
|
1247 *flagp = flags; |
|
1248 return starter; |
|
1249 } |
|
1250 |
|
1251 /* too much at once doesn't currently work */ |
|
1252 if (kshchar && pound) |
|
1253 return 0; |
|
1254 |
|
1255 if (kshchar == '*') { |
|
1256 op = P_ONEHASH; |
|
1257 *flagp = P_HSTART; |
|
1258 } else if (kshchar == '+') { |
|
1259 op = P_TWOHASH; |
|
1260 *flagp = P_HSTART; |
|
1261 } else if (kshchar == '?') { |
|
1262 op = 0; |
|
1263 *flagp = 0; |
|
1264 } else if (*++patparse == Pound) { |
|
1265 op = P_TWOHASH; |
|
1266 patparse++; |
|
1267 *flagp = P_HSTART; |
|
1268 } else { |
|
1269 op = P_ONEHASH; |
|
1270 *flagp = P_HSTART; |
|
1271 } |
|
1272 |
|
1273 /* |
|
1274 * Note optimizations with pointers into P_NOTHING branches: some |
|
1275 * should logically point to next node after current piece. |
|
1276 * |
|
1277 * Backtracking is also encoded in a slightly obscure way: the |
|
1278 * code emitted ensures we test the non-empty branch of complex |
|
1279 * patterns before the empty branch on each repetition. Hence |
|
1280 * each time we fail on a non-empty branch, we try the empty branch, |
|
1281 * which is equivalent to backtracking. |
|
1282 */ |
|
1283 if ((flags & P_SIMPLE) && (op == P_ONEHASH || op == P_TWOHASH) && |
|
1284 P_OP((Upat)patout+starter) == P_ANY) { |
|
1285 /* Optimize ?# to *. Silly thing to do, since who would use |
|
1286 * use ?# ? But it makes the later code shorter. |
|
1287 */ |
|
1288 Upat uptr = (Upat)patout + starter; |
|
1289 if (op == P_TWOHASH) { |
|
1290 /* ?## becomes ?* */ |
|
1291 uptr->l = (uptr->l & ~0xff) | P_ANY; |
|
1292 pattail(starter, patnode(P_STAR)); |
|
1293 } else { |
|
1294 uptr->l = (uptr->l & ~0xff) | P_STAR; |
|
1295 } |
|
1296 } else if ((flags & P_SIMPLE) && op && !(patglobflags & 0xff)) { |
|
1297 /* Don't simplify if we need to look for approximations. */ |
|
1298 patinsert(op, starter, NULL, 0); |
|
1299 } else if (op == P_ONEHASH) { |
|
1300 /* Emit x# as (x&|), where & means "self". */ |
|
1301 up.p = NULL; |
|
1302 patinsert(P_WBRANCH, starter, (char *)&up, sizeof(up)); |
|
1303 /* Either x */ |
|
1304 patoptail(starter, patnode(P_BACK)); /* and loop */ |
|
1305 patoptail(starter, starter); /* back */ |
|
1306 pattail(starter, patnode(P_BRANCH)); /* or */ |
|
1307 pattail(starter, patnode(P_NOTHING)); /* null. */ |
|
1308 } else if (op == P_TWOHASH) { |
|
1309 /* Emit x## as x(&|) where & means "self". */ |
|
1310 next = patnode(P_WBRANCH); /* Either */ |
|
1311 up.p = NULL; |
|
1312 patadd((char *)&up, 0, sizeof(up), 0); |
|
1313 pattail(starter, next); |
|
1314 pattail(patnode(P_BACK), starter); /* loop back */ |
|
1315 pattail(next, patnode(P_BRANCH)); /* or */ |
|
1316 pattail(starter, patnode(P_NOTHING)); /* null. */ |
|
1317 } else if (kshchar == '?') { |
|
1318 /* Emit ?(x) as (x|) */ |
|
1319 patinsert(P_BRANCH, starter, NULL, 0); /* Either x */ |
|
1320 pattail(starter, patnode(P_BRANCH)); /* or */ |
|
1321 next = patnode(P_NOTHING); /* null */ |
|
1322 pattail(starter, next); |
|
1323 patoptail(starter, next); |
|
1324 } |
|
1325 if (*patparse == Pound) |
|
1326 return 0; |
|
1327 |
|
1328 return starter; |
|
1329 } |
|
1330 |
|
1331 /* |
|
1332 * Turn a ^foo (paren = 0) or !(foo) (paren = 1) into *~foo with |
|
1333 * parentheses if necessary. As you see, that's really quite easy. |
|
1334 */ |
|
1335 |
|
1336 /**/ |
|
1337 static long |
|
1338 patcompnot(int paren, int *flagsp) |
|
1339 { |
|
1340 union upat up; |
|
1341 long excsync, br, excl, n, starter; |
|
1342 int dummy; |
|
1343 |
|
1344 /* Here, we're matching a star at the start. */ |
|
1345 *flagsp = P_HSTART; |
|
1346 |
|
1347 starter = patnode(P_BRANCH); |
|
1348 br = patnode(P_STAR); |
|
1349 excsync = patnode(P_EXCSYNC); |
|
1350 pattail(br, excsync); |
|
1351 pattail(starter, excl = patnode(P_EXCLUDE)); |
|
1352 up.p = NULL; |
|
1353 patadd((char *)&up, 0, sizeof(up), 0); |
|
1354 if (!(br = (paren ? patcompswitch(1, &dummy) : patcompbranch(&dummy)))) |
|
1355 return 0; |
|
1356 pattail(br, patnode(P_EXCEND)); |
|
1357 n = patnode(P_NOTHING); /* just so much easier */ |
|
1358 pattail(excsync, n); |
|
1359 pattail(excl, n); |
|
1360 |
|
1361 return starter; |
|
1362 } |
|
1363 |
|
1364 /* Emit a node */ |
|
1365 |
|
1366 /**/ |
|
1367 static long |
|
1368 patnode(long op) |
|
1369 { |
|
1370 long starter = (Upat)patcode - (Upat)patout; |
|
1371 union upat up; |
|
1372 |
|
1373 up.l = op; |
|
1374 patadd((char *)&up, 0, sizeof(union upat), 0); |
|
1375 return starter; |
|
1376 } |
|
1377 |
|
1378 /* |
|
1379 * insert an operator in front of an already emitted operand: |
|
1380 * we relocate the operand. there had better be nothing else after. |
|
1381 */ |
|
1382 |
|
1383 /**/ |
|
1384 static void |
|
1385 patinsert(long op, int opnd, char *xtra, int sz) |
|
1386 { |
|
1387 char *src, *dst, *opdst; |
|
1388 union upat buf, *lptr; |
|
1389 |
|
1390 buf.l = 0; |
|
1391 patadd((char *)&buf, 0, sizeof(buf), 0); |
|
1392 if (sz) |
|
1393 patadd(xtra, 0, sz, 0); |
|
1394 src = patcode - sizeof(union upat) - sz; |
|
1395 dst = patcode; |
|
1396 opdst = patout + opnd * sizeof(union upat); |
|
1397 while (src > opdst) |
|
1398 *--dst = *--src; |
|
1399 |
|
1400 /* A cast can't be an lvalue */ |
|
1401 lptr = (Upat)opdst; |
|
1402 lptr->l = op; |
|
1403 opdst += sizeof(union upat); |
|
1404 while (sz--) |
|
1405 *opdst++ = *xtra++; |
|
1406 } |
|
1407 |
|
1408 /* set the 'next' pointer at the end of a node chain */ |
|
1409 |
|
1410 /**/ |
|
1411 static void |
|
1412 pattail(long p, long val) |
|
1413 { |
|
1414 Upat scan, temp; |
|
1415 long offset; |
|
1416 |
|
1417 scan = (Upat)patout + p; |
|
1418 for (;;) { |
|
1419 if (!(temp = PATNEXT(scan))) |
|
1420 break; |
|
1421 scan = temp; |
|
1422 } |
|
1423 |
|
1424 offset = (P_OP(scan) == P_BACK) |
|
1425 ? (scan - (Upat)patout) - val : val - (scan - (Upat)patout); |
|
1426 |
|
1427 scan->l |= offset << 8; |
|
1428 } |
|
1429 |
|
1430 /* do pattail, but on operand of first argument; nop if operandless */ |
|
1431 |
|
1432 /**/ |
|
1433 static void patoptail(long p, long val) |
|
1434 { |
|
1435 Upat ptr = (Upat)patout + p; |
|
1436 int op = P_OP(ptr); |
|
1437 if (!p || !P_ISBRANCH(ptr)) |
|
1438 return; |
|
1439 if (op == P_BRANCH) |
|
1440 pattail(P_OPERAND(p), val); |
|
1441 else |
|
1442 pattail(P_OPERAND(p) + 1, val); |
|
1443 } |
|
1444 |
|
1445 |
|
1446 /* |
|
1447 * Run a pattern. |
|
1448 */ |
|
1449 static char *patinstart; /* Start of input string */ |
|
1450 static char *patinend; /* End of input string */ |
|
1451 static char *patinput; /* String input pointer */ |
|
1452 static char *patinpath; /* Full path for use with ~ exclusions */ |
|
1453 static int patinlen; /* Length of last successful match. |
|
1454 * Includes count of Meta characters. |
|
1455 */ |
|
1456 |
|
1457 static char *patbeginp[NSUBEXP]; /* Pointer to backref beginnings */ |
|
1458 static char *patendp[NSUBEXP]; /* Pointer to backref ends */ |
|
1459 static int parsfound; /* parentheses (with backrefs) found */ |
|
1460 |
|
1461 static int globdots; /* Glob initial dots? */ |
|
1462 |
|
1463 /* |
|
1464 * Macros which are currently trivial but are likely to be less |
|
1465 * so when we handle multibyte characters. They operate on |
|
1466 * umetafied strings. |
|
1467 */ |
|
1468 |
|
1469 /* Get a character from the start point in a string */ |
|
1470 #define CHARREF(x) (STOUC(*x)) |
|
1471 /* Get a pointer to the next character */ |
|
1472 #define CHARNEXT(x) (x+1) |
|
1473 /* Increment a pointer past the current character. */ |
|
1474 #define CHARINC(x) (x++) |
|
1475 /* Counter the number of characters between two pointers, largest first */ |
|
1476 #define CHARSUB(x,y) (x-y) |
|
1477 |
|
1478 /* |
|
1479 * The following need to be accessed in the globbing scanner for |
|
1480 * a multi-component file path. See horror story in glob.c. |
|
1481 */ |
|
1482 /**/ |
|
1483 int errsfound; /* Total error count so far */ |
|
1484 |
|
1485 /**/ |
|
1486 int forceerrs; /* Forced maximum error count */ |
|
1487 |
|
1488 /**/ |
|
1489 void |
|
1490 pattrystart(void) |
|
1491 { |
|
1492 forceerrs = -1; |
|
1493 errsfound = 0; |
|
1494 } |
|
1495 |
|
1496 /* |
|
1497 * Test prog against null-terminated, metafied string. |
|
1498 */ |
|
1499 |
|
1500 /**/ |
|
1501 mod_export int |
|
1502 pattry(Patprog prog, char *string) |
|
1503 { |
|
1504 return pattryrefs(prog, string, -1, -1, 0, NULL, NULL, NULL); |
|
1505 } |
|
1506 |
|
1507 /* |
|
1508 * Test prog against string of given length, no null termination |
|
1509 * but still metafied at this point. offset gives an offset |
|
1510 * to include in reported match indices |
|
1511 */ |
|
1512 |
|
1513 /**/ |
|
1514 mod_export int |
|
1515 pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset) |
|
1516 { |
|
1517 return pattryrefs(prog, string, len, unmetalen, offset, NULL, NULL, NULL); |
|
1518 } |
|
1519 |
|
1520 /* |
|
1521 * Test prog against string with given lengths. The input |
|
1522 * string is metafied; stringlen is the raw string length, and |
|
1523 * unmetalen the number of characters in the original string (some |
|
1524 * of which may now be metafied). Either value may be -1 |
|
1525 * to indicate a null-terminated string which will be counted. Note |
|
1526 * there may be a severe penalty for this if a lot of matching is done |
|
1527 * on one string. |
|
1528 * |
|
1529 * offset is the position in the original string (not seen by |
|
1530 * the pattern module) at which we are trying to match. |
|
1531 * This is added in to the positions recorded in patbeginp and patendp |
|
1532 * when we are looking for substrings. Currently this only happens |
|
1533 * in the parameter substitution code. |
|
1534 * |
|
1535 * Note this is a character offset, i.e. a metafied character |
|
1536 * counts as 1. |
|
1537 * |
|
1538 * The last three arguments are used to report the positions for the |
|
1539 * backreferences. On entry, *nump should contain the maximum number |
|
1540 * of positions to report. In this case the match, mbegin, mend |
|
1541 * arrays are not altered. |
|
1542 */ |
|
1543 |
|
1544 /**/ |
|
1545 mod_export int |
|
1546 pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, |
|
1547 int patoffset, |
|
1548 int *nump, int *begp, int *endp) |
|
1549 { |
|
1550 int i, maxnpos = 0, ret, needfullpath, unmetalenp; |
|
1551 int origlen; |
|
1552 char **sp, **ep, *tryalloced, *ptr; |
|
1553 char *progstr = (char *)prog + prog->startoff; |
|
1554 |
|
1555 if (nump) { |
|
1556 maxnpos = *nump; |
|
1557 *nump = 0; |
|
1558 } |
|
1559 /* inherited from domatch, but why, exactly? */ |
|
1560 if (*string == Nularg) { |
|
1561 string++; |
|
1562 unmetalen--; |
|
1563 } |
|
1564 |
|
1565 if (stringlen < 0) |
|
1566 stringlen = strlen(string); |
|
1567 origlen = stringlen; |
|
1568 |
|
1569 patflags = prog->flags; |
|
1570 /* |
|
1571 * For a top-level ~-exclusion, we will need the full |
|
1572 * path to exclude, so copy the path so far and append the |
|
1573 * current test string. |
|
1574 */ |
|
1575 needfullpath = (patflags & PAT_HAS_EXCLUDP) && pathpos; |
|
1576 |
|
1577 /* Get the length of the full string when unmetafied. */ |
|
1578 if (unmetalen < 0) |
|
1579 unmetalen = ztrsub(string + stringlen, string); |
|
1580 if (needfullpath) |
|
1581 unmetalenp = ztrsub(pathbuf + pathpos, pathbuf); |
|
1582 else |
|
1583 unmetalenp = 0; |
|
1584 |
|
1585 DPUTS(needfullpath && (patflags & (PAT_PURES|PAT_ANY)), |
|
1586 "rum sort of file exclusion"); |
|
1587 /* |
|
1588 * Partly for efficiency, and partly for the convenience of |
|
1589 * globbing, we don't unmetafy pure string patterns, and |
|
1590 * there's no reason to if the pattern is just a *. |
|
1591 */ |
|
1592 if (!(patflags & (PAT_PURES|PAT_ANY)) |
|
1593 && (needfullpath || unmetalen != stringlen)) { |
|
1594 /* |
|
1595 * We need to copy if we need to prepend the path so far |
|
1596 * (in which case we copy both chunks), or if we have |
|
1597 * Meta characters. |
|
1598 */ |
|
1599 char *dst; |
|
1600 int icopy, ncopy; |
|
1601 |
|
1602 dst = tryalloced = zalloc(unmetalen + unmetalenp); |
|
1603 |
|
1604 if (needfullpath) { |
|
1605 /* loop twice, copy path buffer first time */ |
|
1606 ptr = pathbuf; |
|
1607 ncopy = unmetalenp; |
|
1608 } else { |
|
1609 /* just loop once, copy string with unmetafication */ |
|
1610 ptr = string; |
|
1611 ncopy = unmetalen; |
|
1612 } |
|
1613 for (icopy = 0; icopy < 2; icopy++) { |
|
1614 for (i = 0; i < ncopy; i++) { |
|
1615 if (*ptr == Meta) { |
|
1616 ptr++; |
|
1617 *dst++ = *ptr++ ^ 32; |
|
1618 } else { |
|
1619 *dst++ = *ptr++; |
|
1620 } |
|
1621 } |
|
1622 if (!needfullpath) |
|
1623 break; |
|
1624 /* next time append test string to path so far */ |
|
1625 ptr = string; |
|
1626 ncopy = unmetalen; |
|
1627 } |
|
1628 |
|
1629 if (needfullpath) { |
|
1630 patinstart = tryalloced + unmetalenp; |
|
1631 patinpath = tryalloced; |
|
1632 } else { |
|
1633 patinstart = tryalloced; |
|
1634 patinpath = NULL; |
|
1635 } |
|
1636 stringlen = unmetalen; |
|
1637 } else { |
|
1638 patinstart = string; |
|
1639 tryalloced = patinpath = NULL; |
|
1640 } |
|
1641 |
|
1642 patinend = patinstart + stringlen; |
|
1643 /* |
|
1644 * From now on we do not require NULL termination of |
|
1645 * the test string. There should also be no more references |
|
1646 * to the variable string. |
|
1647 */ |
|
1648 |
|
1649 if (prog->flags & (PAT_PURES|PAT_ANY)) { |
|
1650 /* |
|
1651 * Either we are testing against a pure string, |
|
1652 * or we can match anything at all. |
|
1653 */ |
|
1654 int ret; |
|
1655 if (prog->flags & PAT_ANY) { |
|
1656 /* |
|
1657 * Optimisation for a single "*": always matches |
|
1658 * (except for no_glob_dots, see below). |
|
1659 */ |
|
1660 ret = 1; |
|
1661 } else { |
|
1662 /* |
|
1663 * Testing a pure string. See if initial |
|
1664 * components match. |
|
1665 */ |
|
1666 int lendiff = stringlen - prog->patmlen; |
|
1667 if (lendiff < 0) { |
|
1668 /* No, the pattern string is too long. */ |
|
1669 ret = 0; |
|
1670 } else if (!memcmp(progstr, patinstart, prog->patmlen)) { |
|
1671 /* |
|
1672 * Initial component matches. Matches either |
|
1673 * if lengths are the same or we are not anchored |
|
1674 * to the end of the string. |
|
1675 */ |
|
1676 ret = !lendiff || (prog->flags & PAT_NOANCH); |
|
1677 } else { |
|
1678 /* No match. */ |
|
1679 ret = 0; |
|
1680 } |
|
1681 } |
|
1682 if (ret) { |
|
1683 /* |
|
1684 * For files, we won't match initial "."s unless |
|
1685 * glob_dots is set. |
|
1686 */ |
|
1687 if ((prog->flags & PAT_NOGLD) && *patinstart == '.') { |
|
1688 ret = 0; |
|
1689 } else { |
|
1690 /* |
|
1691 * Remember the length in case used for ${..#..} etc. |
|
1692 * In this case, we didn't unmetafy the string. |
|
1693 */ |
|
1694 patinlen = (int)prog->patmlen; |
|
1695 /* if matching files, must update globbing flags */ |
|
1696 patglobflags = prog->globend; |
|
1697 } |
|
1698 } |
|
1699 |
|
1700 if (tryalloced) |
|
1701 zfree(tryalloced, unmetalen + unmetalenp); |
|
1702 |
|
1703 return ret; |
|
1704 } else { |
|
1705 /* |
|
1706 * Test for a `must match' string, unless we're scanning for a match |
|
1707 * in which case we don't need to do this each time. |
|
1708 */ |
|
1709 ret = 1; |
|
1710 if (!(prog->flags & PAT_SCAN) && prog->mustoff) |
|
1711 { |
|
1712 char *testptr; /* start pointer into test string */ |
|
1713 char *teststop; /* last point from which we can match */ |
|
1714 char *patptr = (char *)prog + prog->mustoff; |
|
1715 int patlen = prog->patmlen; |
|
1716 int found = 0; |
|
1717 |
|
1718 if (patlen > stringlen) { |
|
1719 /* Too long, can't match. */ |
|
1720 ret = 0; |
|
1721 } else { |
|
1722 teststop = patinend - patlen; |
|
1723 |
|
1724 for (testptr = patinstart; testptr <= teststop; testptr++) |
|
1725 { |
|
1726 if (!memcmp(testptr, patptr, patlen)) { |
|
1727 found = 1; |
|
1728 break; |
|
1729 } |
|
1730 } |
|
1731 |
|
1732 if (!found) |
|
1733 ret = 0; |
|
1734 } |
|
1735 } |
|
1736 if (!ret) { |
|
1737 if (tryalloced) |
|
1738 zfree(tryalloced, unmetalen + unmetalenp); |
|
1739 return 0; |
|
1740 } |
|
1741 |
|
1742 patglobflags = prog->globflags; |
|
1743 if (!(patflags & PAT_FILE)) { |
|
1744 forceerrs = -1; |
|
1745 errsfound = 0; |
|
1746 } |
|
1747 globdots = !(patflags & PAT_NOGLD); |
|
1748 parsfound = 0; |
|
1749 |
|
1750 patinput = patinstart; |
|
1751 |
|
1752 if (patmatch((Upat)progstr)) { |
|
1753 /* |
|
1754 * we were lazy and didn't save the globflags if an exclusion |
|
1755 * failed, so set it now |
|
1756 */ |
|
1757 patglobflags = prog->globend; |
|
1758 |
|
1759 /* |
|
1760 * Record length of successful match, including Meta |
|
1761 * characters. Do it here so that patmatchlen() can return |
|
1762 * it even if we delete the pattern strings. |
|
1763 */ |
|
1764 patinlen = patinput - patinstart; |
|
1765 /* |
|
1766 * Optimization: if we didn't find any Meta characters |
|
1767 * to begin with, we don't need to look for them now. |
|
1768 */ |
|
1769 if (unmetalen != origlen) { |
|
1770 for (ptr = patinstart; ptr < patinput; ptr++) |
|
1771 if (imeta(*ptr)) |
|
1772 patinlen++; |
|
1773 } |
|
1774 |
|
1775 /* |
|
1776 * Should we clear backreferences and matches on a failed |
|
1777 * match? |
|
1778 */ |
|
1779 if ((patglobflags & GF_MATCHREF) && !(patflags & PAT_FILE)) { |
|
1780 /* |
|
1781 * m flag: for global match. This carries no overhead |
|
1782 * in the pattern matching part. |
|
1783 * |
|
1784 * Remember the test pattern is already unmetafied. |
|
1785 */ |
|
1786 char *str; |
|
1787 int mlen = CHARSUB(patinput, patinstart); |
|
1788 |
|
1789 str = metafy(patinstart, patinput - patinstart, META_DUP); |
|
1790 setsparam("MATCH", str); |
|
1791 setiparam("MBEGIN", (zlong)(patoffset + !isset(KSHARRAYS))); |
|
1792 setiparam("MEND", |
|
1793 (zlong)(mlen + patoffset + |
|
1794 !isset(KSHARRAYS) - 1)); |
|
1795 } |
|
1796 if (prog->patnpar && nump) { |
|
1797 /* |
|
1798 * b flag: for backreferences using parentheses. Reported |
|
1799 * directly. |
|
1800 */ |
|
1801 *nump = prog->patnpar; |
|
1802 |
|
1803 sp = patbeginp; |
|
1804 ep = patendp; |
|
1805 |
|
1806 for (i = 0; i < prog->patnpar && i < maxnpos; i++) { |
|
1807 if (parsfound & (1 << i)) { |
|
1808 if (begp) |
|
1809 *begp++ = CHARSUB(*sp, patinstart) + patoffset; |
|
1810 if (endp) |
|
1811 *endp++ = CHARSUB(*ep, patinstart) + patoffset |
|
1812 - 1; |
|
1813 } else { |
|
1814 if (begp) |
|
1815 *begp++ = -1; |
|
1816 if (endp) |
|
1817 *endp++ = -1; |
|
1818 } |
|
1819 |
|
1820 sp++; |
|
1821 ep++; |
|
1822 } |
|
1823 } else if (prog->patnpar && !(patflags & PAT_FILE)) { |
|
1824 /* |
|
1825 * b flag: for backreferences using parentheses. |
|
1826 */ |
|
1827 int palen = prog->patnpar+1; |
|
1828 char **matcharr, **mbeginarr, **mendarr; |
|
1829 char numbuf[DIGBUFSIZE]; |
|
1830 |
|
1831 matcharr = zshcalloc(palen*sizeof(char *)); |
|
1832 mbeginarr = zshcalloc(palen*sizeof(char *)); |
|
1833 mendarr = zshcalloc(palen*sizeof(char *)); |
|
1834 |
|
1835 sp = patbeginp; |
|
1836 ep = patendp; |
|
1837 |
|
1838 for (i = 0; i < prog->patnpar; i++) { |
|
1839 if (parsfound & (1 << i)) { |
|
1840 matcharr[i] = metafy(*sp, *ep - *sp, META_DUP); |
|
1841 /* |
|
1842 * mbegin and mend give indexes into the string |
|
1843 * in the standard notation, i.e. respecting |
|
1844 * KSHARRAYS, and with the end index giving |
|
1845 * the last character, not one beyond. |
|
1846 * For example, foo=foo; [[ $foo = (f)oo ]] gives |
|
1847 * (without KSHARRAYS) indexes 1 and 1, which |
|
1848 * corresponds to indexing as ${foo[1,1]}. |
|
1849 */ |
|
1850 sprintf(numbuf, "%ld", |
|
1851 (long)(CHARSUB(*sp, patinstart) + |
|
1852 patoffset + |
|
1853 !isset(KSHARRAYS))); |
|
1854 mbeginarr[i] = ztrdup(numbuf); |
|
1855 sprintf(numbuf, "%ld", |
|
1856 (long)(CHARSUB(*ep, patinstart) + |
|
1857 patoffset + |
|
1858 !isset(KSHARRAYS) - 1)); |
|
1859 mendarr[i] = ztrdup(numbuf); |
|
1860 } else { |
|
1861 /* Pattern wasn't set: either it was in an |
|
1862 * unmatched branch, or a hashed parenthesis |
|
1863 * that didn't match at all. |
|
1864 */ |
|
1865 matcharr[i] = ztrdup(""); |
|
1866 mbeginarr[i] = ztrdup("-1"); |
|
1867 mendarr[i] = ztrdup("-1"); |
|
1868 } |
|
1869 sp++; |
|
1870 ep++; |
|
1871 } |
|
1872 setaparam("match", matcharr); |
|
1873 setaparam("mbegin", mbeginarr); |
|
1874 setaparam("mend", mendarr); |
|
1875 } |
|
1876 |
|
1877 ret = 1; |
|
1878 } else |
|
1879 ret = 0; |
|
1880 |
|
1881 if (tryalloced) |
|
1882 zfree(tryalloced, unmetalen + unmetalenp); |
|
1883 |
|
1884 return ret; |
|
1885 } |
|
1886 } |
|
1887 |
|
1888 /* |
|
1889 * Return length of previous succesful match. This is |
|
1890 * in metafied bytes, i.e. includes a count of Meta characters. |
|
1891 * Unusual and futile attempt at modular encapsulation. |
|
1892 */ |
|
1893 |
|
1894 /**/ |
|
1895 int |
|
1896 patmatchlen(void) |
|
1897 { |
|
1898 return patinlen; |
|
1899 } |
|
1900 |
|
1901 /* |
|
1902 * Match literal characters with case insensitivity test: the first |
|
1903 * comes from the input string, the second the current pattern. |
|
1904 */ |
|
1905 #define CHARMATCH(chin, chpa) (chin == chpa || \ |
|
1906 ((patglobflags & GF_IGNCASE) ? \ |
|
1907 ((isupper(chin) ? tolower(chin) : chin) == \ |
|
1908 (isupper(chpa) ? tolower(chpa) : chpa)) : \ |
|
1909 (patglobflags & GF_LCMATCHUC) ? \ |
|
1910 (islower(chpa) && toupper(chpa) == chin) : 0)) |
|
1911 /* |
|
1912 * The same but caching an expression from the first argument, |
|
1913 * Requires local charmatch_cache definition. |
|
1914 */ |
|
1915 #define CHARMATCH_EXPR(expr, chpa) \ |
|
1916 (charmatch_cache = (expr), CHARMATCH(charmatch_cache, chpa)) |
|
1917 |
|
1918 /* |
|
1919 * exactpos is used to remember how far down an exact string we have |
|
1920 * matched, if we are doing approximation and can therefore redo from |
|
1921 * the same point; we never need to otherwise. |
|
1922 * |
|
1923 * exactend is a pointer to the end of the string, which isn't |
|
1924 * null-terminated. |
|
1925 */ |
|
1926 static char *exactpos, *exactend; |
|
1927 |
|
1928 /* |
|
1929 * Main matching routine. |
|
1930 * |
|
1931 * Testing the tail end of a match is usually done by recursion, but |
|
1932 * we try to eliminate that in favour of looping for simple cases. |
|
1933 */ |
|
1934 |
|
1935 /**/ |
|
1936 static int |
|
1937 patmatch(Upat prog) |
|
1938 { |
|
1939 /* Current and next nodes */ |
|
1940 Upat scan = prog, next, opnd; |
|
1941 char *start, *save, *chrop, *chrend, *compend; |
|
1942 int savglobflags, op, no, min, nextch, fail = 0, saverrsfound; |
|
1943 zrange_t from, to, comp; |
|
1944 |
|
1945 while (scan) { |
|
1946 next = PATNEXT(scan); |
|
1947 |
|
1948 if (!globdots && P_NOTDOT(scan) && patinput == patinstart && |
|
1949 patinput < patinend && *patinput == '.') |
|
1950 return 0; |
|
1951 |
|
1952 switch (P_OP(scan)) { |
|
1953 case P_ANY: |
|
1954 if (patinput == patinend) |
|
1955 fail = 1; |
|
1956 else |
|
1957 CHARINC(patinput); |
|
1958 break; |
|
1959 case P_EXACTLY: |
|
1960 /* |
|
1961 * acts as nothing if *chrop is null: this is used by |
|
1962 * approx code. |
|
1963 */ |
|
1964 if (exactpos) { |
|
1965 chrop = exactpos; |
|
1966 chrend = exactend; |
|
1967 } else { |
|
1968 chrop = P_LS_STR(scan); |
|
1969 chrend = chrop + P_LS_LEN(scan); |
|
1970 } |
|
1971 exactpos = NULL; |
|
1972 while (chrop < chrend && patinput < patinend) { |
|
1973 int chin = CHARREF(patinput); |
|
1974 int chpa = CHARREF(chrop); |
|
1975 if (!CHARMATCH(chin, chpa)) { |
|
1976 fail = 1; |
|
1977 break; |
|
1978 } |
|
1979 CHARINC(chrop); |
|
1980 CHARINC(patinput); |
|
1981 } |
|
1982 if (chrop < chrend) { |
|
1983 exactpos = chrop; |
|
1984 exactend = chrend; |
|
1985 fail = 1; |
|
1986 } |
|
1987 break; |
|
1988 case P_ANYOF: |
|
1989 if (patinput == patinend || |
|
1990 !patmatchrange((char *)P_OPERAND(scan), |
|
1991 CHARREF(patinput))) |
|
1992 fail = 1; |
|
1993 else |
|
1994 CHARINC(patinput); |
|
1995 break; |
|
1996 case P_ANYBUT: |
|
1997 if (patinput == patinend || |
|
1998 patmatchrange((char *)P_OPERAND(scan), |
|
1999 CHARREF(patinput))) |
|
2000 fail = 1; |
|
2001 else |
|
2002 CHARINC(patinput); |
|
2003 break; |
|
2004 case P_NUMRNG: |
|
2005 case P_NUMFROM: |
|
2006 case P_NUMTO: |
|
2007 /* |
|
2008 * To do this properly, we really have to treat numbers as |
|
2009 * closures: that's so things like <1-1000>33 will |
|
2010 * match 633 (they didn't up to 3.1.6). To avoid making this |
|
2011 * too inefficient, we see if there's an exact match next: |
|
2012 * if there is, and it's not a digit, we return 1 after |
|
2013 * the first attempt. |
|
2014 */ |
|
2015 op = P_OP(scan); |
|
2016 start = (char *)P_OPERAND(scan); |
|
2017 from = to = 0; |
|
2018 if (op != P_NUMTO) { |
|
2019 #ifdef ZSH_64_BIT_TYPE |
|
2020 /* We can't rely on pointer alignment being good enough. */ |
|
2021 memcpy((char *)&from, start, sizeof(zrange_t)); |
|
2022 #else |
|
2023 from = *((zrange_t *) start); |
|
2024 #endif |
|
2025 start += sizeof(zrange_t); |
|
2026 } |
|
2027 if (op != P_NUMFROM) { |
|
2028 #ifdef ZSH_64_BIT_TYPE |
|
2029 memcpy((char *)&to, start, sizeof(zrange_t)); |
|
2030 #else |
|
2031 to = *((zrange_t *) start); |
|
2032 #endif |
|
2033 } |
|
2034 start = compend = patinput; |
|
2035 comp = 0; |
|
2036 while (patinput < patinend && idigit(*patinput)) { |
|
2037 if (comp) |
|
2038 comp *= 10; |
|
2039 comp += *patinput - '0'; |
|
2040 patinput++; |
|
2041 compend++; |
|
2042 |
|
2043 if (comp & ((zrange_t)1 << (sizeof(comp)*8 - |
|
2044 #ifdef ZRANGE_T_IS_SIGNED |
|
2045 2 |
|
2046 #else |
|
2047 1 |
|
2048 #endif |
|
2049 ))) { |
|
2050 /* |
|
2051 * Out of range (allowing for signedness, which |
|
2052 * we need if we are using zlongs). |
|
2053 * This is as far as we can go. |
|
2054 * If we're doing a range "from", skip all the |
|
2055 * remaining numbers. Otherwise, we can't |
|
2056 * match beyond the previous point anyway. |
|
2057 * Leave the pointer to the last calculated |
|
2058 * position (compend) where it was before. |
|
2059 */ |
|
2060 if (op == P_NUMFROM) { |
|
2061 while (patinput < patinend && idigit(*patinput)) |
|
2062 patinput++; |
|
2063 } |
|
2064 } |
|
2065 } |
|
2066 save = patinput; |
|
2067 no = 0; |
|
2068 while (patinput > start) { |
|
2069 /* if already too small, no power on earth can save it */ |
|
2070 if (comp < from && patinput <= compend) |
|
2071 break; |
|
2072 if ((op == P_NUMFROM || comp <= to) && patmatch(next)) { |
|
2073 return 1; |
|
2074 } |
|
2075 if (!no && P_OP(next) == P_EXACTLY && |
|
2076 (!P_LS_LEN(next) || |
|
2077 !idigit(STOUC(*P_LS_STR(next)))) && |
|
2078 !(patglobflags & 0xff)) |
|
2079 return 0; |
|
2080 patinput = --save; |
|
2081 no++; |
|
2082 /* |
|
2083 * With a range start and an unrepresentable test |
|
2084 * number, we just back down the test string without |
|
2085 * changing the number until we get to a representable |
|
2086 * one. |
|
2087 */ |
|
2088 if (patinput < compend) |
|
2089 comp /= 10; |
|
2090 } |
|
2091 patinput = start; |
|
2092 fail = 1; |
|
2093 break; |
|
2094 case P_NUMANY: |
|
2095 /* This is <->: any old set of digits, don't bother comparing */ |
|
2096 start = patinput; |
|
2097 while (patinput < patinend && idigit(CHARREF(patinput))) |
|
2098 patinput++; |
|
2099 save = patinput; |
|
2100 no = 0; |
|
2101 while (patinput > start) { |
|
2102 if (patmatch(next)) |
|
2103 return 1; |
|
2104 if (!no && P_OP(next) == P_EXACTLY && |
|
2105 (!P_LS_LEN(next) || |
|
2106 !idigit(CHARREF(P_LS_STR(next)))) && |
|
2107 !(patglobflags & 0xff)) |
|
2108 return 0; |
|
2109 patinput = --save; |
|
2110 no++; |
|
2111 } |
|
2112 patinput = start; |
|
2113 fail = 1; |
|
2114 break; |
|
2115 case P_NOTHING: |
|
2116 break; |
|
2117 case P_BACK: |
|
2118 break; |
|
2119 case P_GFLAGS: |
|
2120 patglobflags = P_OPERAND(scan)->l; |
|
2121 break; |
|
2122 case P_OPEN: |
|
2123 case P_OPEN+1: |
|
2124 case P_OPEN+2: |
|
2125 case P_OPEN+3: |
|
2126 case P_OPEN+4: |
|
2127 case P_OPEN+5: |
|
2128 case P_OPEN+6: |
|
2129 case P_OPEN+7: |
|
2130 case P_OPEN+8: |
|
2131 case P_OPEN+9: |
|
2132 no = P_OP(scan) - P_OPEN; |
|
2133 save = patinput; |
|
2134 |
|
2135 if (patmatch(next)) { |
|
2136 /* |
|
2137 * Don't set patbeginp if some later invocation of |
|
2138 * the same parentheses already has. |
|
2139 */ |
|
2140 if (no && !(parsfound & (1 << (no - 1)))) { |
|
2141 patbeginp[no-1] = save; |
|
2142 parsfound |= 1 << (no - 1); |
|
2143 } |
|
2144 return 1; |
|
2145 } else |
|
2146 return 0; |
|
2147 break; |
|
2148 case P_CLOSE: |
|
2149 case P_CLOSE+1: |
|
2150 case P_CLOSE+2: |
|
2151 case P_CLOSE+3: |
|
2152 case P_CLOSE+4: |
|
2153 case P_CLOSE+5: |
|
2154 case P_CLOSE+6: |
|
2155 case P_CLOSE+7: |
|
2156 case P_CLOSE+8: |
|
2157 case P_CLOSE+9: |
|
2158 no = P_OP(scan) - P_CLOSE; |
|
2159 save = patinput; |
|
2160 |
|
2161 if (patmatch(next)) { |
|
2162 DPUTS(!patendp, "patendp not set for backreferencing"); |
|
2163 if (no && !(parsfound & (1 << (no + 15)))) { |
|
2164 patendp[no-1] = save; |
|
2165 parsfound |= 1 << (no + 15); |
|
2166 } |
|
2167 return 1; |
|
2168 } else |
|
2169 return 0; |
|
2170 break; |
|
2171 case P_EXCSYNC: |
|
2172 /* See the P_EXCLUDE code below for where syncptr comes from */ |
|
2173 { |
|
2174 unsigned char *syncptr; |
|
2175 Upat after; |
|
2176 after = P_OPERAND(scan); |
|
2177 DPUTS(!P_ISEXCLUDE(after), |
|
2178 "BUG: EXCSYNC not followed by EXCLUDE."); |
|
2179 DPUTS(!P_OPERAND(after)->p, |
|
2180 "BUG: EXCSYNC not handled by EXCLUDE"); |
|
2181 syncptr = P_OPERAND(after)->p + (patinput - patinstart); |
|
2182 /* |
|
2183 * If we already matched from here, this time we fail. |
|
2184 * See WBRANCH code for story about error count. |
|
2185 */ |
|
2186 if (*syncptr && errsfound + 1 >= *syncptr) |
|
2187 return 0; |
|
2188 /* |
|
2189 * Else record that we (possibly) matched this time. |
|
2190 * No harm if we don't: then the previous test will just |
|
2191 * short cut the attempted match that is bound to fail. |
|
2192 * We never try to exclude something that has already |
|
2193 * failed anyway. |
|
2194 */ |
|
2195 *syncptr = errsfound + 1; |
|
2196 } |
|
2197 break; |
|
2198 case P_EXCEND: |
|
2199 /* |
|
2200 * This is followed by a P_EXCSYNC, but only in the P_EXCLUDE |
|
2201 * branch. Actually, we don't bother following it: all we |
|
2202 * need to know is that we successfully matched so far up |
|
2203 * to the end of the asserted pattern; the endpoint |
|
2204 * in the target string is nulled out. |
|
2205 */ |
|
2206 if (!(fail = (patinput < patinend))) |
|
2207 return 1; |
|
2208 break; |
|
2209 case P_BRANCH: |
|
2210 case P_WBRANCH: |
|
2211 /* P_EXCLUDE shouldn't occur without a P_BRANCH */ |
|
2212 if (!P_ISBRANCH(next)) { |
|
2213 /* no choice, avoid recursion */ |
|
2214 DPUTS(P_OP(scan) == P_WBRANCH, |
|
2215 "BUG: WBRANCH with no alternative."); |
|
2216 next = P_OPERAND(scan); |
|
2217 } else { |
|
2218 do { |
|
2219 save = patinput; |
|
2220 savglobflags = patglobflags; |
|
2221 saverrsfound = errsfound; |
|
2222 if (P_ISEXCLUDE(next)) { |
|
2223 /* |
|
2224 * The strategy is to test the asserted pattern, |
|
2225 * recording via P_EXCSYNC how far the part to |
|
2226 * be excluded matched. We then set the |
|
2227 * length of the test string to that |
|
2228 * point and see if the exclusion as far as |
|
2229 * P_EXCEND also matches that string. |
|
2230 * We need to keep testing the asserted pattern |
|
2231 * by backtracking, since the first attempt |
|
2232 * may be excluded while a later attempt may not. |
|
2233 * For this we keep a pointer just after |
|
2234 * the P_EXCLUDE which is tested by the P_EXCSYNC |
|
2235 * to see if we matched there last time, in which |
|
2236 * case we fail. If there is nothing to backtrack |
|
2237 * over, that doesn't matter: we should fail anyway. |
|
2238 * The pointer also tells us where the asserted |
|
2239 * pattern matched for use by the exclusion. |
|
2240 * |
|
2241 * It's hard to allocate space for this |
|
2242 * beforehand since we may need to do it |
|
2243 * recursively. |
|
2244 * |
|
2245 * P.S. in case you were wondering, this code |
|
2246 * is horrible. |
|
2247 */ |
|
2248 Upat syncstrp; |
|
2249 char *origpatinend; |
|
2250 unsigned char *oldsyncstr; |
|
2251 char *matchpt = NULL; |
|
2252 int ret, savglobdots, matchederrs = 0; |
|
2253 int savparsfound = parsfound; |
|
2254 DPUTS(P_OP(scan) == P_WBRANCH, |
|
2255 "BUG: excluded WBRANCH"); |
|
2256 syncstrp = P_OPERAND(next); |
|
2257 /* |
|
2258 * Unlike WBRANCH, each test at the same exclude |
|
2259 * sync point (due to an external loop) is separate, |
|
2260 * i.e testing (foo~bar)# is no different from |
|
2261 * (foo~bar)(foo~bar)... from the exclusion point |
|
2262 * of view, so we use a different sync string. |
|
2263 */ |
|
2264 oldsyncstr = syncstrp->p; |
|
2265 syncstrp->p = (unsigned char *) |
|
2266 zshcalloc((patinend - patinstart) + 1); |
|
2267 origpatinend = patinend; |
|
2268 while ((ret = patmatch(P_OPERAND(scan)))) { |
|
2269 unsigned char *syncpt; |
|
2270 char *savpatinstart; |
|
2271 int savforce = forceerrs; |
|
2272 int savpatflags = patflags, synclen; |
|
2273 forceerrs = -1; |
|
2274 savglobdots = globdots; |
|
2275 matchederrs = errsfound; |
|
2276 matchpt = patinput; /* may not be end */ |
|
2277 globdots = 1; /* OK to match . first */ |
|
2278 /* Find the point where the scan |
|
2279 * matched the part to be excluded: because |
|
2280 * of backtracking, the one |
|
2281 * most recently matched will be the first. |
|
2282 * (Luckily, backtracking is done after all |
|
2283 * possibilities for approximation have been |
|
2284 * checked.) |
|
2285 */ |
|
2286 for (syncpt = syncstrp->p; !*syncpt; syncpt++) |
|
2287 ; |
|
2288 synclen = syncpt - syncstrp->p; |
|
2289 if (patinstart + synclen != patinend) { |
|
2290 /* |
|
2291 * Temporarily mark the string as |
|
2292 * ending at this point. |
|
2293 */ |
|
2294 DPUTS(patinstart + synclen > matchpt, |
|
2295 "BUG: EXCSYNC failed"); |
|
2296 |
|
2297 patinend = patinstart + synclen; |
|
2298 /* |
|
2299 * If this isn't really the end of the string, |
|
2300 * remember this for the (#e) assertion. |
|
2301 */ |
|
2302 patflags |= PAT_NOTEND; |
|
2303 } |
|
2304 savpatinstart = patinstart; |
|
2305 next = PATNEXT(scan); |
|
2306 while (next && P_ISEXCLUDE(next)) { |
|
2307 patinput = save; |
|
2308 /* |
|
2309 * turn off approximations in exclusions: |
|
2310 * note we keep remaining patglobflags |
|
2311 * set by asserted branch (or previous |
|
2312 * excluded branches, for consistency). |
|
2313 */ |
|
2314 patglobflags &= ~0xff; |
|
2315 errsfound = 0; |
|
2316 opnd = P_OPERAND(next) + 1; |
|
2317 if (P_OP(next) == P_EXCLUDP && patinpath) { |
|
2318 /* |
|
2319 * Top level exclusion with a file, |
|
2320 * applies to whole path so add the |
|
2321 * segments already matched. |
|
2322 * We copied these in front of the |
|
2323 * test pattern, so patinend doesn't |
|
2324 * need moving. |
|
2325 */ |
|
2326 DPUTS(patinput != patinstart, |
|
2327 "BUG: not at start excluding path"); |
|
2328 patinput = patinstart = patinpath; |
|
2329 } |
|
2330 if (patmatch(opnd)) { |
|
2331 ret = 0; |
|
2332 /* |
|
2333 * Another subtlety: if we exclude the |
|
2334 * match, any parentheses just found |
|
2335 * become invalidated. |
|
2336 */ |
|
2337 parsfound = savparsfound; |
|
2338 } |
|
2339 if (patinpath) { |
|
2340 patinput = savpatinstart + |
|
2341 (patinput - patinstart); |
|
2342 patinstart = savpatinstart; |
|
2343 } |
|
2344 if (!ret) |
|
2345 break; |
|
2346 next = PATNEXT(next); |
|
2347 } |
|
2348 /* |
|
2349 * Restore original end position. |
|
2350 */ |
|
2351 patinend = origpatinend; |
|
2352 patflags = savpatflags; |
|
2353 globdots = savglobdots; |
|
2354 forceerrs = savforce; |
|
2355 if (ret) |
|
2356 break; |
|
2357 patinput = save; |
|
2358 patglobflags = savglobflags; |
|
2359 errsfound = saverrsfound; |
|
2360 } |
|
2361 zfree((char *)syncstrp->p, |
|
2362 (patinend - patinstart) + 1); |
|
2363 syncstrp->p = oldsyncstr; |
|
2364 if (ret) { |
|
2365 patinput = matchpt; |
|
2366 errsfound = matchederrs; |
|
2367 return 1; |
|
2368 } |
|
2369 while ((scan = PATNEXT(scan)) && |
|
2370 P_ISEXCLUDE(scan)) |
|
2371 ; |
|
2372 } else { |
|
2373 int ret = 1, pfree = 0; |
|
2374 Upat ptrp = NULL; |
|
2375 unsigned char *ptr; |
|
2376 if (P_OP(scan) == P_WBRANCH) { |
|
2377 /* |
|
2378 * This is where we make sure that we are not |
|
2379 * repeatedly matching zero-length strings in |
|
2380 * a closure, which would cause an infinite loop, |
|
2381 * and also remove exponential behaviour in |
|
2382 * backtracking nested closures. |
|
2383 * The P_WBRANCH operator leaves a space for a |
|
2384 * uchar *, initialized to NULL, which is |
|
2385 * turned into a string the same length as the |
|
2386 * target string. Every time we match from a |
|
2387 * particular point in the target string, we |
|
2388 * stick a 1 at the corresponding point here. |
|
2389 * If we come round to the same branch again, and |
|
2390 * there is already a 1, then the test fails. |
|
2391 */ |
|
2392 opnd = P_OPERAND(scan); |
|
2393 ptrp = opnd++; |
|
2394 if (!ptrp->p) { |
|
2395 ptrp->p = (unsigned char *) |
|
2396 zshcalloc((patinend - patinstart) + 1); |
|
2397 pfree = 1; |
|
2398 } |
|
2399 ptr = ptrp->p + (patinput - patinstart); |
|
2400 |
|
2401 /* |
|
2402 * Without approximation, this is just a |
|
2403 * single bit test. With approximation, we |
|
2404 * need to know how many errors there were |
|
2405 * last time we made the test. If errsfound |
|
2406 * is now smaller than it was, hence we can |
|
2407 * make more approximations in the remaining |
|
2408 * code, we continue with the test. |
|
2409 * (This is why the max number of errors is |
|
2410 * 254, not 255.) |
|
2411 */ |
|
2412 if (*ptr && errsfound + 1 >= *ptr) |
|
2413 ret = 0; |
|
2414 *ptr = errsfound + 1; |
|
2415 } else |
|
2416 opnd = P_OPERAND(scan); |
|
2417 if (ret) |
|
2418 ret = patmatch(opnd); |
|
2419 if (pfree) { |
|
2420 zfree((char *)ptrp->p, |
|
2421 (patinend - patinstart) + 1); |
|
2422 ptrp->p = NULL; |
|
2423 } |
|
2424 if (ret) |
|
2425 return 1; |
|
2426 scan = PATNEXT(scan); |
|
2427 } |
|
2428 patinput = save; |
|
2429 patglobflags = savglobflags; |
|
2430 errsfound = saverrsfound; |
|
2431 DPUTS(P_OP(scan) == P_WBRANCH, |
|
2432 "BUG: WBRANCH not first choice."); |
|
2433 next = PATNEXT(scan); |
|
2434 } while (scan && P_ISBRANCH(scan)); |
|
2435 return 0; |
|
2436 } |
|
2437 break; |
|
2438 case P_STAR: |
|
2439 /* Handle specially for speed, although really P_ONEHASH+P_ANY */ |
|
2440 case P_ONEHASH: |
|
2441 case P_TWOHASH: |
|
2442 /* |
|
2443 * This is just simple cases, matching one character. |
|
2444 * With approximations, we still handle * this way, since |
|
2445 * no approximation is ever necessary, but other closures |
|
2446 * are handled by the more complicated branching method |
|
2447 */ |
|
2448 op = P_OP(scan); |
|
2449 /* Note that no counts possibly metafied characters */ |
|
2450 start = patinput; |
|
2451 if (op == P_STAR) { |
|
2452 for (no = 0; patinput < patinend; CHARINC(patinput)) |
|
2453 no++; |
|
2454 /* simple optimization for reasonably common case */ |
|
2455 if (P_OP(next) == P_END) |
|
2456 return 1; |
|
2457 } else { |
|
2458 DPUTS(patglobflags & 0xff, |
|
2459 "BUG: wrong backtracking with approximation."); |
|
2460 if (!globdots && P_NOTDOT(P_OPERAND(scan)) && |
|
2461 patinput == patinstart && patinput < patinend && |
|
2462 CHARREF(patinput) == '.') |
|
2463 return 0; |
|
2464 no = patrepeat(P_OPERAND(scan)); |
|
2465 } |
|
2466 min = (op == P_TWOHASH) ? 1 : 0; |
|
2467 /* |
|
2468 * Lookahead to avoid useless matches. This is not possible |
|
2469 * with approximation. |
|
2470 */ |
|
2471 if (P_OP(next) == P_EXACTLY && P_LS_LEN(next) && |
|
2472 !(patglobflags & 0xff)) { |
|
2473 char *nextop = P_LS_STR(next); |
|
2474 /* |
|
2475 * If that P_EXACTLY is last (common in simple patterns, |
|
2476 * such as *.c), then it can be only be matched at one |
|
2477 * point in the test string, so record that. |
|
2478 */ |
|
2479 Upat ptemp= PATNEXT(next); |
|
2480 if ( P_OP(ptemp)== P_END && |
|
2481 !(patflags & PAT_NOANCH)) { |
|
2482 int ptlen = patinend - patinput; |
|
2483 int lenmatch = patinend - (min ? CHARNEXT(start) : start); |
|
2484 /* Are we in the right range? */ |
|
2485 if (P_LS_LEN(next) > lenmatch || P_LS_LEN(next) < ptlen) |
|
2486 return 0; |
|
2487 /* Yes, just position appropriately and test. */ |
|
2488 patinput += ptlen - P_LS_LEN(next); |
|
2489 /* |
|
2490 * Here we will need to be careful that patinput is not |
|
2491 * in the middle of a multibyte character. |
|
2492 */ |
|
2493 /* Continue loop with P_EXACTLY test. */ |
|
2494 break; |
|
2495 } |
|
2496 nextch = CHARREF(nextop); |
|
2497 } else |
|
2498 nextch = -1; |
|
2499 save = patinput; |
|
2500 savglobflags = patglobflags; |
|
2501 saverrsfound = errsfound; |
|
2502 while (no >= min) { |
|
2503 int charmatch_cache; |
|
2504 if (nextch < 0 || |
|
2505 (patinput < patinend && |
|
2506 CHARMATCH_EXPR(CHARREF(patinput), nextch))) { |
|
2507 if (patmatch(next)) |
|
2508 return 1; |
|
2509 } |
|
2510 no--; |
|
2511 save--; |
|
2512 /* |
|
2513 * Here we will need to make sure save is |
|
2514 * decremented properly to the start of |
|
2515 * the preceeding multibyte character. |
|
2516 */ |
|
2517 patinput = save; |
|
2518 patglobflags = savglobflags; |
|
2519 errsfound = saverrsfound; |
|
2520 } |
|
2521 /* |
|
2522 * As with branches, the patmatch(next) stuff for * |
|
2523 * handles approximation, so we don't need to try |
|
2524 * anything here. |
|
2525 */ |
|
2526 return 0; |
|
2527 case P_ISSTART: |
|
2528 if (patinput != patinstart || (patflags & PAT_NOTSTART)) |
|
2529 fail = 1; |
|
2530 break; |
|
2531 case P_ISEND: |
|
2532 if (patinput < patinend || (patflags & PAT_NOTEND)) |
|
2533 fail = 1; |
|
2534 break; |
|
2535 case P_END: |
|
2536 if (!(fail = (patinput < patinend && !(patflags & PAT_NOANCH)))) |
|
2537 return 1; |
|
2538 break; |
|
2539 #ifdef DEBUG |
|
2540 default: |
|
2541 dputs("BUG: bad operand in patmatch."); |
|
2542 return 0; |
|
2543 break; |
|
2544 #endif |
|
2545 } |
|
2546 |
|
2547 if (fail) { |
|
2548 if (errsfound < (patglobflags & 0xff) && |
|
2549 (forceerrs == -1 || errsfound < forceerrs)) { |
|
2550 /* |
|
2551 * Approximation code. There are four possibilities |
|
2552 * |
|
2553 * 1. omit character from input string |
|
2554 * 2. transpose characters in input and pattern strings |
|
2555 * 3. omit character in both input and pattern strings |
|
2556 * 4. omit character from pattern string. |
|
2557 * |
|
2558 * which we try in that order. |
|
2559 * |
|
2560 * Of these, 2, 3 and 4 require an exact match string |
|
2561 * (P_EXACTLY) while 1, 2 and 3 require that we not |
|
2562 * have reached the end of the input string. |
|
2563 * |
|
2564 * Note in each case after making the approximation we |
|
2565 * need to retry the *same* pattern; this is what |
|
2566 * requires exactpos, a slightly doleful way of |
|
2567 * communicating with the exact character matcher. |
|
2568 */ |
|
2569 char *savexact = exactpos; |
|
2570 save = patinput; |
|
2571 savglobflags = patglobflags; |
|
2572 saverrsfound = ++errsfound; |
|
2573 fail = 0; |
|
2574 |
|
2575 DPUTS(P_OP(scan) != P_EXACTLY && exactpos, |
|
2576 "BUG: non-exact match has set exactpos"); |
|
2577 |
|
2578 /* Try omitting a character from the input string */ |
|
2579 if (patinput < patinend) { |
|
2580 CHARINC(patinput); |
|
2581 /* If we are not on an exact match, then this is |
|
2582 * our last gasp effort, so we can optimize out |
|
2583 * the recursive call. |
|
2584 */ |
|
2585 if (P_OP(scan) != P_EXACTLY) |
|
2586 continue; |
|
2587 if (patmatch(scan)) |
|
2588 return 1; |
|
2589 } |
|
2590 |
|
2591 if (P_OP(scan) == P_EXACTLY) { |
|
2592 char *nextexact = savexact; |
|
2593 DPUTS(!savexact, |
|
2594 "BUG: exact match has not set exactpos"); |
|
2595 CHARINC(nextexact); |
|
2596 |
|
2597 if (save < patinend) { |
|
2598 char *nextin = save; |
|
2599 CHARINC(nextin); |
|
2600 patglobflags = savglobflags; |
|
2601 errsfound = saverrsfound; |
|
2602 exactpos = savexact; |
|
2603 |
|
2604 /* |
|
2605 * Try swapping two characters in patinput and |
|
2606 * exactpos |
|
2607 */ |
|
2608 if (save < patinend && nextin < patinend && |
|
2609 nextexact < exactend) { |
|
2610 int cin0 = CHARREF(save); |
|
2611 int cpa0 = CHARREF(exactpos); |
|
2612 int cin1 = CHARREF(nextin); |
|
2613 int cpa1 = CHARREF(nextexact); |
|
2614 |
|
2615 if (CHARMATCH(cin0, cpa1) && |
|
2616 CHARMATCH(cin1, cpa0)) { |
|
2617 patinput = nextin; |
|
2618 CHARINC(patinput); |
|
2619 exactpos = nextexact; |
|
2620 CHARINC(exactpos); |
|
2621 if (patmatch(scan)) |
|
2622 return 1; |
|
2623 |
|
2624 patglobflags = savglobflags; |
|
2625 errsfound = saverrsfound; |
|
2626 } |
|
2627 } |
|
2628 |
|
2629 /* |
|
2630 * Try moving up both strings. |
|
2631 */ |
|
2632 patinput = nextin; |
|
2633 exactpos = nextexact; |
|
2634 if (patmatch(scan)) |
|
2635 return 1; |
|
2636 |
|
2637 patinput = save; |
|
2638 patglobflags = savglobflags; |
|
2639 errsfound = saverrsfound; |
|
2640 exactpos = savexact; |
|
2641 } |
|
2642 |
|
2643 DPUTS(exactpos == exactend, "approximating too far"); |
|
2644 /* |
|
2645 * Try moving up the exact match pattern. |
|
2646 * This must be the last attempt, so just loop |
|
2647 * instead of calling recursively. |
|
2648 */ |
|
2649 CHARINC(exactpos); |
|
2650 continue; |
|
2651 } |
|
2652 } |
|
2653 exactpos = NULL; |
|
2654 return 0; |
|
2655 } |
|
2656 |
|
2657 scan = next; |
|
2658 } |
|
2659 |
|
2660 return 0; |
|
2661 } |
|
2662 |
|
2663 /**/ |
|
2664 static int |
|
2665 patmatchrange(char *range, int ch) |
|
2666 { |
|
2667 int r1, r2; |
|
2668 |
|
2669 /* |
|
2670 * Careful here: unlike other strings, range is a NULL-terminated, |
|
2671 * metafied string, because we need to treat the Posix and hyphenated |
|
2672 * ranges specially. |
|
2673 */ |
|
2674 for (; *range; range++) { |
|
2675 if (imeta(STOUC(*range))) { |
|
2676 switch (STOUC(*range)-STOUC(Meta)) { |
|
2677 case 0: |
|
2678 if (STOUC(*++range ^ 32) == ch) |
|
2679 return 1; |
|
2680 break; |
|
2681 case PP_ALPHA: |
|
2682 if (isalpha(ch)) |
|
2683 return 1; |
|
2684 break; |
|
2685 case PP_ALNUM: |
|
2686 if (isalnum(ch)) |
|
2687 return 1; |
|
2688 break; |
|
2689 case PP_ASCII: |
|
2690 if ((ch & ~0x7f) == 0) |
|
2691 return 1; |
|
2692 break; |
|
2693 case PP_BLANK: |
|
2694 if (ch == ' ' || ch == '\t') |
|
2695 return 1; |
|
2696 break; |
|
2697 case PP_CNTRL: |
|
2698 if (iscntrl(ch)) |
|
2699 return 1; |
|
2700 break; |
|
2701 case PP_DIGIT: |
|
2702 if (isdigit(ch)) |
|
2703 return 1; |
|
2704 break; |
|
2705 case PP_GRAPH: |
|
2706 if (isgraph(ch)) |
|
2707 return 1; |
|
2708 break; |
|
2709 case PP_LOWER: |
|
2710 if (islower(ch)) |
|
2711 return 1; |
|
2712 break; |
|
2713 case PP_PRINT: |
|
2714 if (isprint(ch)) |
|
2715 return 1; |
|
2716 break; |
|
2717 case PP_PUNCT: |
|
2718 if (ispunct(ch)) |
|
2719 return 1; |
|
2720 break; |
|
2721 case PP_SPACE: |
|
2722 if (isspace(ch)) |
|
2723 return 1; |
|
2724 break; |
|
2725 case PP_UPPER: |
|
2726 if (isupper(ch)) |
|
2727 return 1; |
|
2728 break; |
|
2729 case PP_XDIGIT: |
|
2730 if (isxdigit(ch)) |
|
2731 return 1; |
|
2732 break; |
|
2733 case PP_RANGE: |
|
2734 range++; |
|
2735 r1 = STOUC(UNMETA(range)); |
|
2736 METAINC(range); |
|
2737 r2 = STOUC(UNMETA(range)); |
|
2738 if (*range == Meta) |
|
2739 range++; |
|
2740 if (r1 <= ch && ch <= r2) |
|
2741 return 1; |
|
2742 break; |
|
2743 case PP_UNKWN: |
|
2744 DPUTS(1, "BUG: unknown posix range passed through.\n"); |
|
2745 break; |
|
2746 default: |
|
2747 DPUTS(1, "BUG: unknown metacharacter in range."); |
|
2748 break; |
|
2749 } |
|
2750 } else if (STOUC(*range) == ch) |
|
2751 return 1; |
|
2752 } |
|
2753 return 0; |
|
2754 } |
|
2755 |
|
2756 /* repeatedly match something simple and say how many times */ |
|
2757 |
|
2758 /**/ |
|
2759 static int patrepeat(Upat p) |
|
2760 { |
|
2761 int count = 0, tch, charmatch_cache; |
|
2762 char *scan, *opnd; |
|
2763 |
|
2764 scan = patinput; |
|
2765 opnd = (char *)P_OPERAND(p); |
|
2766 |
|
2767 switch(P_OP(p)) { |
|
2768 #ifdef DEBUG |
|
2769 case P_ANY: |
|
2770 dputs("BUG: ?# did not get optimized to *"); |
|
2771 return 0; |
|
2772 break; |
|
2773 #endif |
|
2774 case P_EXACTLY: |
|
2775 DPUTS(P_LS_LEN(p) != 1, "closure following more than one character"); |
|
2776 tch = CHARREF(P_LS_STR(p)); |
|
2777 while (scan < patinend && |
|
2778 CHARMATCH_EXPR(CHARREF(scan), tch)) { |
|
2779 count++; |
|
2780 CHARINC(scan); |
|
2781 } |
|
2782 break; |
|
2783 case P_ANYOF: |
|
2784 while (scan < patinend && patmatchrange(opnd, CHARREF(scan))) { |
|
2785 count++; |
|
2786 CHARINC(scan); |
|
2787 } |
|
2788 break; |
|
2789 case P_ANYBUT: |
|
2790 while (scan < patinend && !patmatchrange(opnd, CHARREF(scan))) { |
|
2791 count++; |
|
2792 CHARINC(scan); |
|
2793 } |
|
2794 break; |
|
2795 #ifdef DEBUG |
|
2796 default: |
|
2797 dputs("BUG: something very strange is happening in patrepeat"); |
|
2798 return 0; |
|
2799 break; |
|
2800 #endif |
|
2801 } |
|
2802 |
|
2803 patinput = scan; |
|
2804 return count; |
|
2805 } |
|
2806 |
|
2807 /* Free a patprog. */ |
|
2808 |
|
2809 /**/ |
|
2810 mod_export void |
|
2811 freepatprog(Patprog prog) |
|
2812 { |
|
2813 if (prog && prog != dummy_patprog1 && prog != dummy_patprog2) |
|
2814 zfree(prog, prog->size); |
|
2815 } |
|
2816 |
|
2817 /**/ |
|
2818 #ifdef ZSH_PAT_DEBUG |
|
2819 |
|
2820 /* Debugging stuff: print and test a regular expression */ |
|
2821 |
|
2822 /* Dump a regexp onto stdout in vaguely comprehensible form */ |
|
2823 |
|
2824 /**/ |
|
2825 static void |
|
2826 patdump(Patprog r) |
|
2827 { |
|
2828 char *s, *base, op = P_EXACTLY; |
|
2829 Upat up, codestart, next; |
|
2830 |
|
2831 base = (char *)r; |
|
2832 s = base + r->startoff; |
|
2833 |
|
2834 if (r->flags & PAT_PURES) { |
|
2835 printf("STRING:%s\n", (char *)s); |
|
2836 } else { |
|
2837 codestart = (Upat)s; |
|
2838 while (op != P_END) { |
|
2839 up = (Upat)s; |
|
2840 op = P_OP(up); |
|
2841 printf("%2d%s", up-codestart, patprop(up)); |
|
2842 next = PATNEXT(up); |
|
2843 printf("(%d)", next ? next-codestart : 0); |
|
2844 s += sizeof(union upat); |
|
2845 if (op == P_EXACTLY) { |
|
2846 long llen = *(long *)s; |
|
2847 s += sizeof(long); |
|
2848 while (llen--) { |
|
2849 putchar(CHARREF(s)); |
|
2850 CHARINC(s); |
|
2851 } |
|
2852 } else if (op == P_ANYOF || op == P_ANYBUT) { |
|
2853 while (*s != '\0') { |
|
2854 if (itok(*s)) { |
|
2855 if (*s == Meta + PP_RANGE) { |
|
2856 s++; |
|
2857 printf("<RANGE:%c-", UNMETA(s)); |
|
2858 METAINC(s); |
|
2859 printf("%c>", UNMETA(s)); |
|
2860 } else { |
|
2861 printf("<TYPE:%d>", *s - Meta); |
|
2862 s++; |
|
2863 continue; |
|
2864 } |
|
2865 } else |
|
2866 putchar(UNMETA(s)); |
|
2867 METAINC(s); |
|
2868 } |
|
2869 } else if (op == P_NUMRNG || op == P_NUMFROM || op == P_NUMTO) { |
|
2870 printf("%lu", (unsigned long)*(zrange_t *)s); |
|
2871 s += sizeof(zrange_t); |
|
2872 if (op == P_NUMRNG) { |
|
2873 printf("-%lu", (unsigned long)*(zrange_t *)s); |
|
2874 s += sizeof(zrange_t); |
|
2875 } |
|
2876 } else if (op == P_GFLAGS) { |
|
2877 printf("%ld, %ld", (++up)->l & ~0xff, (++up)->l & 0xff); |
|
2878 s += sizeof(union upat); |
|
2879 } else if (op == P_WBRANCH || op == P_EXCLUDE || |
|
2880 op == P_EXCLUDP) { |
|
2881 s += sizeof(union upat); |
|
2882 } |
|
2883 putchar('\n'); |
|
2884 s = base + (((s - base) + sizeof(union upat) - 1) & |
|
2885 ~(sizeof(union upat) - 1)); |
|
2886 } |
|
2887 } |
|
2888 |
|
2889 printf("Total size = %ld\n", r->size); |
|
2890 if (r->patstartch) |
|
2891 printf("start `%c' ", r->patstartch); |
|
2892 if (!(r->flags & PAT_NOANCH)) |
|
2893 printf("EOL-anchor "); |
|
2894 if (r->patnpar) |
|
2895 printf("%d active backreferences ", r->patnpar); |
|
2896 if (r->mustoff) |
|
2897 printf("must have \"%s\"", (char *)r + r->mustoff); |
|
2898 printf("\n"); |
|
2899 if (r->globflags) { |
|
2900 printf("Globbing flags: "); |
|
2901 if (r->globflags & GF_LCMATCHUC) |
|
2902 printf("LC matches UC "); |
|
2903 if (r->globflags & GF_IGNCASE) |
|
2904 printf("Ignore case"); |
|
2905 printf("\n"); |
|
2906 if (r->globflags & 0xff) |
|
2907 printf("Max errors = %d\n", r->globflags & 0xff); |
|
2908 } |
|
2909 } |
|
2910 |
|
2911 /**/ |
|
2912 static char * |
|
2913 patprop(Upat op) |
|
2914 { |
|
2915 char *p = NULL; |
|
2916 static char buf[50]; |
|
2917 |
|
2918 strcpy(buf, ":"); |
|
2919 |
|
2920 switch(P_OP(op)) { |
|
2921 case P_ANY: |
|
2922 p = "ANY"; |
|
2923 break; |
|
2924 case P_ANYOF: |
|
2925 p = "ANYOF"; |
|
2926 break; |
|
2927 case P_ANYBUT: |
|
2928 p = "ANYBUT"; |
|
2929 break; |
|
2930 case P_BRANCH: |
|
2931 p = "BRANCH"; |
|
2932 break; |
|
2933 case P_WBRANCH: |
|
2934 p = "WBRANCH"; |
|
2935 break; |
|
2936 case P_EXCLUDE: |
|
2937 p = "EXCLUDE"; |
|
2938 break; |
|
2939 case P_EXCLUDP: |
|
2940 p = "EXCLUDP"; |
|
2941 break; |
|
2942 case P_EXCSYNC: |
|
2943 p = "EXCSYNC"; |
|
2944 break; |
|
2945 case P_EXCEND: |
|
2946 p = "EXCEND"; |
|
2947 break; |
|
2948 case P_EXACTLY: |
|
2949 p = "EXACTLY"; |
|
2950 break; |
|
2951 case P_GFLAGS: |
|
2952 p = "GFLAGS"; |
|
2953 break; |
|
2954 case P_ISSTART: |
|
2955 p = "ISSTART"; |
|
2956 break; |
|
2957 case P_ISEND: |
|
2958 p = "ISEND"; |
|
2959 break; |
|
2960 case P_NOTHING: |
|
2961 p = "NOTHING"; |
|
2962 break; |
|
2963 case P_BACK: |
|
2964 p = "BACK"; |
|
2965 break; |
|
2966 case P_END: |
|
2967 p = "END"; |
|
2968 break; |
|
2969 case P_OPEN: |
|
2970 case P_OPEN+1: |
|
2971 case P_OPEN+2: |
|
2972 case P_OPEN+3: |
|
2973 case P_OPEN+4: |
|
2974 case P_OPEN+5: |
|
2975 case P_OPEN+6: |
|
2976 case P_OPEN+7: |
|
2977 case P_OPEN+8: |
|
2978 case P_OPEN+9: |
|
2979 sprintf(buf+strlen(buf), "OPEN%ld", P_OP(op)-P_OPEN); |
|
2980 p = NULL; |
|
2981 break; |
|
2982 case P_CLOSE: |
|
2983 case P_CLOSE+1: |
|
2984 case P_CLOSE+2: |
|
2985 case P_CLOSE+3: |
|
2986 case P_CLOSE+4: |
|
2987 case P_CLOSE+5: |
|
2988 case P_CLOSE+6: |
|
2989 case P_CLOSE+7: |
|
2990 case P_CLOSE+8: |
|
2991 case P_CLOSE+9: |
|
2992 sprintf(buf+strlen(buf), "CLOSE%ld", P_OP(op)-P_CLOSE); |
|
2993 p = NULL; |
|
2994 break; |
|
2995 case P_STAR: |
|
2996 p = "STAR"; |
|
2997 break; |
|
2998 case P_ONEHASH: |
|
2999 p = "ONEHASH"; |
|
3000 break; |
|
3001 case P_TWOHASH: |
|
3002 p = "TWOHASH"; |
|
3003 break; |
|
3004 case P_NUMRNG: |
|
3005 p = "NUMRNG"; |
|
3006 break; |
|
3007 case P_NUMFROM: |
|
3008 p = "NUMFROM"; |
|
3009 break; |
|
3010 case P_NUMTO: |
|
3011 p = "NUMTO"; |
|
3012 break; |
|
3013 case P_NUMANY: |
|
3014 p = "NUMANY"; |
|
3015 break; |
|
3016 default: |
|
3017 fprintf(stderr, "Bad opcode\n"); |
|
3018 p = NULL; |
|
3019 break; |
|
3020 } |
|
3021 if (p) |
|
3022 strcat(buf, p); |
|
3023 return buf; |
|
3024 } |
|
3025 #ifndef __SYMBIAN32__ |
|
3026 /**/ |
|
3027 int |
|
3028 bin_patdebug(char *name, char **args, char *ops, int func) |
|
3029 { |
|
3030 Patprog prog; |
|
3031 int ret = 0; |
|
3032 |
|
3033 tokenize(*args); |
|
3034 |
|
3035 if (!(prog = patcompile((char *)*args, 0, 0))) |
|
3036 return 1; |
|
3037 if (ops['p'] || !args[1]) { |
|
3038 patdump(prog); |
|
3039 } |
|
3040 |
|
3041 while (*++args) { |
|
3042 if (!pattry(prog, (char *)*args)) |
|
3043 ret++; |
|
3044 } |
|
3045 return ret; |
|
3046 } |
|
3047 #endif //#ifndef __SYMBIAN32__ |
|
3048 /**/ |
|
3049 #endif /* ZSH_PAT_DEBUG */ |