openenvutils/commandshell/shell/src/pattern.c
changeset 0 2e3d3ce01487
child 4 0fdb7f6b0309
equal deleted inserted replaced
-1:000000000000 0:2e3d3ce01487
       
     1 //
       
     2 // © Portions Copyright (c) Symbian Software Ltd 2007. All rights reserved.
       
     3 //
       
     4 /*
       
     5  * pattern.c - pattern matching
       
     6  *
       
     7  * This file is part of zsh, the Z shell.
       
     8  *
       
     9  * Copyright (c) 1999 Peter Stephenson
       
    10  * All rights reserved.
       
    11  *
       
    12  * Permission is hereby granted, without written agreement and without
       
    13  * license or royalty fees, to use, copy, modify, and distribute this
       
    14  * software and to distribute modified versions of this software for any
       
    15  * purpose, provided that the above copyright notice and the following
       
    16  * two paragraphs appear in all copies of this software.
       
    17  *
       
    18  * In no event shall Peter Stephenson or the Zsh Development Group be liable
       
    19  * to any party for direct, indirect, special, incidental, or consequential
       
    20  * damages arising out of the use of this software and its documentation,
       
    21  * even if Peter Stephenson and the Zsh Development Group have been advised of
       
    22  * the possibility of such damage.
       
    23  *
       
    24  * Peter Stephenson and the Zsh Development Group specifically disclaim any
       
    25  * warranties, including, but not limited to, the implied warranties of
       
    26  * merchantability and fitness for a particular purpose.  The software
       
    27  * provided hereunder is on an "as is" basis, and Peter Stephenson and the
       
    28  * Zsh Development Group have no obligation to provide maintenance,
       
    29  * support, updates, enhancements, or modifications.
       
    30  *
       
    31  * Pattern matching code derived from the regexp library by Henry
       
    32  * Spencer, which has the following copyright.
       
    33  *
       
    34  *	Copyright (c) 1986 by University of Toronto.
       
    35  *	Written by Henry Spencer.  Not derived from licensed software.
       
    36  *
       
    37  *	Permission is granted to anyone to use this software for any
       
    38  *	purpose on any computer system, and to redistribute it freely,
       
    39  *	subject to the following restrictions:
       
    40  *
       
    41  *	1. The author is not responsible for the consequences of use of
       
    42  *		this software, no matter how awful, even if they arise
       
    43  *		from defects in it.
       
    44  *
       
    45  *	2. The origin of this software must not be misrepresented, either
       
    46  *		by explicit claim or by omission.
       
    47  *
       
    48  *	3. Altered versions must be plainly marked as such, and must not
       
    49  *		be misrepresented as being the original software.
       
    50  *
       
    51  * Eagle-eyed readers will notice this is an altered version.  Incredibly
       
    52  * sharp-eyed readers might even find bits that weren't altered.
       
    53  *
       
    54  *
       
    55  *      And I experienced a sense that, like certain regular
       
    56  *      expressions, seemed to match the day from beginning to end, so
       
    57  *      that I did not need to identify the parenthesised subexpression
       
    58  *      that told of dawn, nor the group of characters that indicated
       
    59  *      the moment when my grandfather returned home with news of
       
    60  *      Swann's departure for Paris; and the whole length of the month
       
    61  *      of May, as if matched by a closure, fitted into the buffer of my
       
    62  *      life with no sign of overflowing, turning the days, like a
       
    63  *      procession of insects that could consist of this or that
       
    64  *      species, into a random and unstructured repetition of different
       
    65  *      sequences, anchored from the first day of the month to the last
       
    66  *      in the same fashion as the weeks when I knew I would not see
       
    67  *      Gilberte and would search in vain for any occurrences of the
       
    68  *      string in the avenue of hawthorns by Tansonville, without my
       
    69  *      having to delimit explicitly the start or finish of the pattern.
       
    70  *
       
    71  *                                 M. Proust, "In Search of Lost Files",
       
    72  *                                 bk I, "The Walk by Bourne's Place".
       
    73  */
       
    74 
       
    75 #include "zsh.mdh"
       
    76 
       
    77 /*
       
    78  * The following union is used mostly for alignment purposes.
       
    79  * Normal nodes are longs, while certain nodes take a char * as an argument;
       
    80  * here we make sure that they both work out to the same length.
       
    81  * The compiled regexp we construct consists of upats stuck together;
       
    82  * anything else to be added (strings, numbers) is stuck after and
       
    83  * then aligned to a whole number of upat units.
       
    84  *
       
    85  * Note also that offsets are in terms of the sizes of these things.
       
    86  */
       
    87 union upat {
       
    88     long l;
       
    89     unsigned char *p;
       
    90 };
       
    91 
       
    92 typedef union upat *Upat;
       
    93 
       
    94 #include "pattern.pro"
       
    95 
       
    96 /* Number of active parenthesized expressions allowed in backreferencing */
       
    97 #define NSUBEXP  9
       
    98 
       
    99 /* definition	number	opnd?	meaning */
       
   100 #define	P_END	  0x00	/* no	End of program. */
       
   101 #define P_EXCSYNC 0x01	/* no   Test if following exclude already failed */
       
   102 #define P_EXCEND  0x02	/* no   Test if exclude matched orig branch */
       
   103 #define	P_BACK	  0x03	/* no	Match "", "next" ptr points backward. */
       
   104 #define	P_EXACTLY 0x04	/* lstr	Match this string. */
       
   105 #define	P_NOTHING 0x05	/* no	Match empty string. */
       
   106 #define	P_ONEHASH 0x06	/* node	Match this (simple) thing 0 or more times. */
       
   107 #define	P_TWOHASH 0x07	/* node	Match this (simple) thing 1 or more times. */
       
   108 #define P_GFLAGS  0x08	/* long Match nothing and set globbing flags */
       
   109 #define P_ISSTART 0x09  /* no   Match start of string. */
       
   110 #define P_ISEND   0x0a  /* no   Match end of string. */
       
   111 /* numbered so we can test bit 5 for a branch */
       
   112 #define	P_BRANCH  0x20	/* node	Match this alternative, or the next... */
       
   113 #define	P_WBRANCH 0x21	/* uc* node P_BRANCH, but match at least 1 char */
       
   114 /* excludes are also branches, but have bit 4 set, too */
       
   115 #define P_EXCLUDE 0x30	/* uc* node Exclude this from previous branch */
       
   116 #define P_EXCLUDP 0x31	/* uc* node Exclude, using full file path so far */
       
   117 /* numbered so we can test bit 6 so as not to match initial '.' */
       
   118 #define	P_ANY	  0x40	/* no	Match any one character. */
       
   119 #define	P_ANYOF	  0x41	/* str  Match any character in this string. */
       
   120 #define	P_ANYBUT  0x42	/* str  Match any character not in this string. */
       
   121 #define P_STAR    0x43	/* no   Match any set of characters. */
       
   122 #define P_NUMRNG  0x44	/* zr, zr Match a numeric range. */
       
   123 #define P_NUMFROM 0x45	/* zr   Match a number >= X */
       
   124 #define P_NUMTO   0x46	/* zr   Match a number <= X */
       
   125 #define P_NUMANY  0x47	/* no   Match any set of decimal digits */
       
   126 /* spaces left for P_OPEN+n,... for backreferences */
       
   127 #define	P_OPEN	  0x80	/* no	Mark this point in input as start of n. */
       
   128 #define	P_CLOSE	  0x90	/* no	Analogous to OPEN. */
       
   129 /*
       
   130  * no    no argument
       
   131  * zr    the range type zrange_t:  may be zlong or unsigned long
       
   132  * char  a single char
       
   133  * uc*   a pointer to unsigned char, used at run time and initialised
       
   134  *       to NULL.
       
   135  * str   null-terminated, metafied string
       
   136  * lstr  length as long then string, not null-terminated, unmetafied.
       
   137  */
       
   138 
       
   139 /*
       
   140  * Notes on usage:
       
   141  * P_WBRANCH:  This works like a branch and is used in complex closures,
       
   142  *    to ensure we don't succeed on a zero-length match of the pattern,
       
   143  *    since that would cause an infinite loop.  We do this by recording
       
   144  *    the positions where we have already tried to match.   See the
       
   145  *    P_WBRANCH test in patmatch().
       
   146  *
       
   147  *  P_ANY, P_ANYOF:  the operand is a null terminated
       
   148  *    string.  Normal characters match as expected.  Characters
       
   149  *    in the range Meta+PP_ALPHA..Meta+PP_UNKNWN do the appropriate
       
   150  *    Posix range tests.  This relies on imeta returning true for these
       
   151  *    characters.  We treat unknown POSIX ranges as never matching.
       
   152  *    PP_RANGE means the next two (possibly metafied) characters form
       
   153  *    the limits of a range to test; it's too much like hard work to
       
   154  *    expand the range.
       
   155  *
       
   156  *  P_EXCLUDE, P_EXCSYNC, PEXCEND:  P_EXCLUDE appears in the pattern like
       
   157  *    P_BRANCH, but applies to the immediately preceding branch.  The code in
       
   158  *    the corresponding branch is followed by a P_EXCSYNC, which simply
       
   159  *    acts as a marker that a P_EXCLUDE comes next.  The P_EXCLUDE
       
   160  *    has a pointer to char embeded in it, which works
       
   161  *    like P_WBRANCH:  if we get to the P_EXCSYNC, and we already matched
       
   162  *    up to the same position, fail.  Thus we are forced to backtrack
       
   163  *    on closures in the P_BRANCH if the first attempt was excluded.
       
   164  *    Corresponding to P_EXCSYNC in the original branch, there is a
       
   165  *    P_EXCEND in the exclusion.  If we get to this point, and we did
       
   166  *    *not* match in the original branch, the exclusion itself fails,
       
   167  *    otherwise it succeeds since we know the tail already matches,
       
   168  *    so P_EXCEND is the end of the exclusion test.
       
   169  *    The whole sorry mess looks like this, where the upper lines
       
   170  *    show the linkage of the branches, and the lower shows the linkage
       
   171  *    of their pattern arguments.
       
   172  *
       
   173  *     	        ---------------------      ----------------------
       
   174  *              ^      	       	     v    ^      	         v
       
   175  *      ( <BRANCH>:apat-><EXCSYNC> <EXCLUDE>:excpat-><EXCEND> ) tail
       
   176  *                               	                         ^
       
   177  *		       	  |                                      |
       
   178  *			   --------------------------------------
       
   179  *
       
   180  * P_EXCLUDP: this behaves exactly like P_EXCLUDE, with the sole exception
       
   181  *   that we prepend the path so far to the exclude pattern.   This is
       
   182  *   for top level file globs, e.g. ** / *.c~*foo.c
       
   183  *                                    ^ I had to leave this space
       
   184  * P_NUM*: zl is a zlong if that is 64-bit, else an unsigned long.
       
   185  */
       
   186 #define PP_ALPHA  1
       
   187 #define PP_ALNUM  2
       
   188 #define PP_ASCII  3
       
   189 #define PP_BLANK  4
       
   190 #define PP_CNTRL  5
       
   191 #define PP_DIGIT  6
       
   192 #define PP_GRAPH  7
       
   193 #define PP_LOWER  8
       
   194 #define PP_PRINT  9
       
   195 #define PP_PUNCT  10
       
   196 #define PP_SPACE  11
       
   197 #define PP_UPPER  12
       
   198 #define PP_XDIGIT 13
       
   199 #define PP_UNKWN  14
       
   200 #define PP_RANGE  15
       
   201 
       
   202 #define	P_OP(p)		((p)->l & 0xff)
       
   203 #define	P_NEXT(p)	((p)->l >> 8)
       
   204 #define	P_OPERAND(p)	((p) + 1)
       
   205 #define P_ISBRANCH(p)   ((p)->l & 0x20)
       
   206 #define P_ISEXCLUDE(p)	(((p)->l & 0x30) == 0x30)
       
   207 #define P_NOTDOT(p)	((p)->l & 0x40)
       
   208 
       
   209 /* Specific to lstr type, i.e. P_EXACTLY. */
       
   210 #define P_LS_LEN(p)	((p)[1].l) /* can be used as lvalue */
       
   211 #define P_LS_STR(p)	((char *)((p) + 2))
       
   212 
       
   213 /* Flags needed when pattern is executed */
       
   214 #define P_SIMPLE        0x01	/* Simple enough to be #/## operand. */
       
   215 #define P_HSTART        0x02	/* Starts with # or ##'d pattern. */
       
   216 #define P_PURESTR	0x04	/* Can be matched with a strcmp */
       
   217 
       
   218 /*
       
   219  * Increment pointer which may be on a Meta (x is a pointer variable),
       
   220  * returning the incremented value (i.e. like pre-increment).
       
   221  *
       
   222  * In future the following will need to refer to metafied multibyte
       
   223  * characters.  References to invidual characters are not turned
       
   224  * into a macro when the characters is metafied (c.f. CHARREF()
       
   225  * below then the character is not metafied) and will need tracking
       
   226  * down.
       
   227  */
       
   228 #define METAINC(x)	((x) += (*(x) == Meta) ? 2 : 1)
       
   229 /*
       
   230  * Return unmetafied char from string (x is any char *)
       
   231  */
       
   232 #define UNMETA(x)	(*(x) == Meta ? (x)[1] ^ 32 : *(x))
       
   233 
       
   234 #if defined(ZSH_64_BIT_TYPE) || defined(LONG_IS_64_BIT)
       
   235 typedef zlong zrange_t;
       
   236 #define ZRANGE_T_IS_SIGNED	(1)
       
   237 #else
       
   238 typedef unsigned long zrange_t;
       
   239 #endif
       
   240 
       
   241 /*
       
   242  * Characters which terminate a pattern segment.  We actually use
       
   243  * a pointer patendseg which skips the first character if we are not
       
   244  * parsing a file pattern.
       
   245  * Note that the size of this and the next array are hard-wired
       
   246  * via the definitions.
       
   247  */
       
   248 
       
   249 static char endseg[] = {
       
   250     '/',			/* file only */
       
   251     '\0', Bar, Outpar,		/* all patterns */
       
   252     Tilde			/* extended glob only */
       
   253 };
       
   254 
       
   255 #define PATENDSEGLEN_NORM 4
       
   256 #define PATENDSEGLEN_EXT  5
       
   257 
       
   258 /* Characters which terminate a simple string */
       
   259 
       
   260 static char endstr[] = {
       
   261     '/',			/* file only */
       
   262     '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang,
       
   263 				/* all patterns */
       
   264     Tilde, Hat, Pound		/* extended glob only */
       
   265 };
       
   266 
       
   267 #define PATENDSTRLEN_NORM 9
       
   268 #define PATENDSTRLEN_EXT  12
       
   269 
       
   270 
       
   271 /* Default size for pattern buffer */
       
   272 #define P_DEF_ALLOC 256
       
   273 
       
   274 /* Flags used in compilation */
       
   275 static char *patstart, *patparse;	/* input pointers */
       
   276 static int patnpar;		/* () count */
       
   277 static char *patcode;		/* point of code emission */
       
   278 static long patsize;		/* size of code */
       
   279 static char *patout;		/* start of code emission string */
       
   280 static long patalloc;		/* size allocated for same */
       
   281 static char *patendseg;		/* characters ending segment */
       
   282 static int patendseglen;	/* length of same */
       
   283 static char *patendstr;		/* characters ending plain string */
       
   284 static int patendstrlen;	/* length of sameo */
       
   285 
       
   286 /* Flags used in both compilation and execution */
       
   287 static int patflags;		    /* flags passed down to patcompile */
       
   288 static int patglobflags;  /* globbing flags & approx */
       
   289 
       
   290 /* Add n more characters, ensuring there is enough space. */
       
   291 
       
   292 enum {
       
   293     PA_NOALIGN = 1,
       
   294     PA_UNMETA  = 2
       
   295 };
       
   296 
       
   297 /**/
       
   298 static void
       
   299 patadd(char *add, int ch, long n, int paflags)
       
   300 {
       
   301     /* Make sure everything gets aligned unless we get PA_NOALIGN. */
       
   302     long newpatsize = patsize + n;
       
   303     if (!(paflags & PA_NOALIGN))
       
   304 	newpatsize = (newpatsize + sizeof(union upat) - 1) &
       
   305 		      ~(sizeof(union upat) - 1);
       
   306     if (patalloc < newpatsize) {
       
   307 	long newpatalloc =
       
   308 	    2*(newpatsize > patalloc ? newpatsize : patalloc);
       
   309 	patout = (char *)zrealloc((char *)patout, newpatalloc);
       
   310 	patcode = patout + patsize;
       
   311 	patalloc = newpatalloc;
       
   312     }
       
   313     patsize = newpatsize;
       
   314     if (add) {
       
   315 	if (paflags & PA_UNMETA) {
       
   316 	    /*
       
   317 	     * Unmetafy and untokenize the string as we go.
       
   318 	     * The Meta characters in add aren't counted in n.
       
   319 	     */
       
   320 	    while (n--) {
       
   321 		if (itok(*add))
       
   322 		    *patcode++ = ztokens[*add++ - Pound];
       
   323 		else if (*add == Meta) {
       
   324 		    add++;
       
   325 		    *patcode++ = *add++ ^ 32;
       
   326 		} else {
       
   327 		    *patcode++ = *add++;
       
   328 		}
       
   329 	    }
       
   330 	} else {
       
   331 	    while (n--)
       
   332 		*patcode++ = *add++;
       
   333 	}
       
   334     } else
       
   335 	*patcode++ = ch;
       
   336     patcode = patout + patsize;
       
   337 }
       
   338 
       
   339 static long rn_offs;
       
   340 /* operates on pointers to union upat, returns a pointer */
       
   341 #define PATNEXT(p) ((rn_offs = P_NEXT(p)) ? \
       
   342 		    (P_OP(p) == P_BACK) ? \
       
   343 		    ((p)-rn_offs) : ((p)+rn_offs) : NULL)
       
   344 
       
   345 /* Called before parsing a set of file matchs to initialize flags */
       
   346 
       
   347 /**/
       
   348 void
       
   349 patcompstart(void)
       
   350 {
       
   351     if (isset(CASEGLOB))
       
   352 	patglobflags = 0;
       
   353     else
       
   354 	patglobflags = GF_IGNCASE;
       
   355 }
       
   356 
       
   357 /*
       
   358  * Top level pattern compilation subroutine
       
   359  * exp is a null-terminated, metafied string.
       
   360  * inflags is an or of some PAT_* flags.
       
   361  * endexp, if non-null, is set to a pointer to the end of the
       
   362  *   part of exp which was compiled.  This is used when
       
   363  *   compiling patterns for directories which must be
       
   364  *   matched recursively.
       
   365  */
       
   366 
       
   367 /**/
       
   368 mod_export Patprog
       
   369 patcompile(char *exp, int inflags, char **endexp)
       
   370 {
       
   371     int flags = 0;
       
   372     long len = 0;
       
   373     long startoff;
       
   374     Upat pscan;
       
   375     Upat ptemp;
       
   376     char *lng, *strp = NULL;
       
   377     Patprog p;
       
   378 
       
   379     startoff = sizeof(struct patprog);
       
   380     /* Ensure alignment of start of program string */
       
   381     startoff = (startoff + sizeof(union upat) - 1) & ~(sizeof(union upat) - 1);
       
   382 
       
   383     /* Allocate reasonable sized chunk if none, reduce size if too big */
       
   384     if (patalloc != P_DEF_ALLOC)
       
   385 	patout = (char *)zrealloc(patout, patalloc = P_DEF_ALLOC);
       
   386     patcode = patout + startoff;
       
   387     patsize = patcode - patout;
       
   388     patstart = patparse = exp;
       
   389     /*
       
   390      * Note global patnpar numbers parentheses 1..9, while patnpar
       
   391      * in struct is actual count of parentheses.
       
   392      */
       
   393     patnpar = 1;
       
   394     patflags = inflags & ~(PAT_PURES|PAT_HAS_EXCLUDP);
       
   395 
       
   396     patendseg = endseg;
       
   397     patendseglen = isset(EXTENDEDGLOB) ? PATENDSEGLEN_EXT : PATENDSEGLEN_NORM;
       
   398     patendstr = endstr;
       
   399     patendstrlen = isset(EXTENDEDGLOB) ? PATENDSTRLEN_EXT : PATENDSTRLEN_NORM;
       
   400 
       
   401     if (!(patflags & PAT_FILE)) {
       
   402 	patendseg++;
       
   403 	patendstr++;
       
   404 	patendseglen--;
       
   405 	patendstrlen--;
       
   406 	remnulargs(patparse);
       
   407 	patglobflags = 0;
       
   408     }
       
   409     /*
       
   410      * Have to be set now, since they get updated during compilation.
       
   411      */
       
   412     ((Patprog)patout)->globflags = patglobflags;
       
   413 
       
   414     if (!(patflags & PAT_ANY)) {
       
   415 	/* Look for a really pure string, with no tokens at all. */
       
   416 	if (!patglobflags
       
   417 #ifdef __CYGWIN__
       
   418 	    /*
       
   419 	     * If the OS treats files case-insensitively and we
       
   420 	     * are looking at files, we don't need to use pattern
       
   421 	     * matching to find the file.
       
   422 	     */
       
   423 	    || (!(patglobflags & ~GF_IGNCASE) && (patflags & PAT_FILE))
       
   424 #endif
       
   425 	    )
       
   426 	{
       
   427 	    /*
       
   428 	     * Waah!  I wish I understood this.
       
   429 	     * Empty metafied strings have an initial Nularg.
       
   430 	     * This never corresponds to a real character in
       
   431 	     * a glob pattern or string, so skip it.
       
   432 	     */
       
   433 	    if (*exp == Nularg)
       
   434 		exp++;
       
   435 	    for (strp = exp; *strp &&
       
   436 		     (!(patflags & PAT_FILE) || *strp != '/') && !itok(*strp);
       
   437 		 strp++)
       
   438 		;
       
   439 	}
       
   440 	if (!strp || (*strp && *strp != '/')) {
       
   441 	    /* No, do normal compilation. */
       
   442 	    strp = NULL;
       
   443 	    if (patcompswitch(0, &flags) == 0)
       
   444 		return NULL;
       
   445 	} else {
       
   446 	    /*
       
   447 	     * Yes, copy the string, and skip compilation altogether.
       
   448 	     * Null terminate for the benefit of globbing.
       
   449 	     * Leave metafied both for globbing and for our own
       
   450 	     * efficiency.
       
   451 	     */
       
   452 	    patparse = strp;
       
   453 	    len = strp - exp;
       
   454 	    patadd(exp, 0, len + 1, 0);
       
   455 	    patout[startoff + len] = '\0';
       
   456 	    patflags |= PAT_PURES;
       
   457 	}
       
   458     }
       
   459 
       
   460     /* end of compilation: safe to use pointers */
       
   461     p = (Patprog)patout;
       
   462     p->startoff = startoff;
       
   463     p->patstartch = '\0';
       
   464     p->globend = patglobflags;
       
   465     p->flags = patflags;
       
   466     p->mustoff = 0;
       
   467     p->size = patsize;
       
   468     p->patmlen = len;
       
   469     p->patnpar = patnpar-1;
       
   470 
       
   471     if (!strp) {
       
   472 	pscan = (Upat)(patout + startoff);
       
   473     
       
   474 	ptemp=PATNEXT(pscan);	
       
   475 	if (!(patflags & PAT_ANY) && (P_OP(ptemp) == P_END)) {
       
   476 	    /* only one top level choice */
       
   477 	    pscan = P_OPERAND(pscan);
       
   478 
       
   479 	    if (flags & P_PURESTR) {
       
   480 		/*
       
   481 		 * The pattern can be matched with a simple strncmp/strcmp.
       
   482 		 * Careful in case we've overwritten the node for the next ptr.
       
   483 		 */
       
   484 		char *dst = patout + startoff;
       
   485 		Upat next;
       
   486 		p->flags |= PAT_PURES;
       
   487 		for (; pscan; pscan = next) {
       
   488 		    next = PATNEXT(pscan);
       
   489 		    if (P_OP(pscan) == P_EXACTLY) {
       
   490 			char *opnd = P_LS_STR(pscan), *mtest;
       
   491 			long oplen = P_LS_LEN(pscan), ilen;
       
   492 			int nmeta = 0;
       
   493 			/*
       
   494 			 * Unfortunately we unmetafied the string
       
   495 			 * and we need to put any metacharacters
       
   496 			 * back now we know it's a pure string.
       
   497 			 * This shouldn't happen too often, it's
       
   498 			 * just that there are some cases such
       
   499 			 * as . and .. in files where we really
       
   500 			 * need a pure string even if there are
       
   501 			 * pattern characters flying around.
       
   502 			 */
       
   503 			for (mtest = opnd, ilen = oplen; ilen;
       
   504 			     mtest++, ilen--)
       
   505 			    if (imeta(*mtest))
       
   506 				nmeta++;
       
   507 			if (nmeta) {
       
   508 			    char *oldpatout = patout;
       
   509 			    patadd(NULL, 0, nmeta, 0);
       
   510 			    /*
       
   511 			     * Yuk.
       
   512 			     */
       
   513 			    p = (Patprog)patout;
       
   514 			    opnd = patout + (opnd - oldpatout);
       
   515 			    dst = patout + startoff;
       
   516 			}
       
   517 
       
   518 			while (oplen--) {
       
   519 			    if (imeta(*opnd)) {
       
   520 				*dst++ = Meta;
       
   521 				*dst++ = *opnd ^ 32;
       
   522 			    } else {
       
   523 				*dst++ = *opnd++;
       
   524 			    }
       
   525 			}
       
   526 		    }
       
   527 		}
       
   528 		p->size = dst - patout;
       
   529 		/* patmlen is really strlen.  We don't need a null. */
       
   530 		p->patmlen = p->size - startoff;
       
   531 	    } else {
       
   532 		/* starting point info */
       
   533 		if (P_OP(pscan) == P_EXACTLY && !p->globflags &&
       
   534 		    P_LS_LEN(pscan))
       
   535 		    p->patstartch = *P_LS_STR(pscan);
       
   536 		/*
       
   537 		 * Find the longest literal string in something expensive.
       
   538 		 * This is itself not all that cheap if we have
       
   539 		 * case-insensitive matching or approximation, so don't.
       
   540 		 */
       
   541 		if ((flags & P_HSTART) && !p->globflags) {
       
   542 		    lng = NULL;
       
   543 		    len = 0;
       
   544 		    for (; pscan; pscan = PATNEXT(pscan))
       
   545 			if (P_OP(pscan) == P_EXACTLY &&
       
   546 			    P_LS_LEN(pscan) >= len) {
       
   547 			    lng = P_LS_STR(pscan);
       
   548 			    len = P_LS_LEN(pscan);
       
   549 			}
       
   550 		    if (lng) {
       
   551 			p->mustoff = lng - patout;
       
   552 			p->patmlen = len;
       
   553 		    }
       
   554 		}
       
   555 	    }
       
   556 	}
       
   557     }
       
   558 
       
   559     /*
       
   560      * The pattern was compiled in a fixed buffer:  unless told otherwise,
       
   561      * we stick the compiled pattern on the heap.  This is necessary
       
   562      * for files where we will often be compiling multiple segments at once.
       
   563      * But if we get the ZDUP flag we always put it in zalloc()ed memory.
       
   564      */
       
   565     if (patflags & PAT_ZDUP) {
       
   566 	Patprog newp = (Patprog)zalloc(patsize);
       
   567 	memcpy((char *)newp, (char *)p, patsize);
       
   568 	p = newp;
       
   569     } else if (!(patflags & PAT_STATIC)) {
       
   570 	Patprog newp = (Patprog)zhalloc(patsize);
       
   571 	memcpy((char *)newp, (char *)p, patsize);
       
   572 	p = newp;
       
   573     }
       
   574 
       
   575     if (endexp)
       
   576 	*endexp = patparse;
       
   577     return p;
       
   578 }
       
   579 
       
   580 /*
       
   581  * Main body or parenthesized subexpression in pattern
       
   582  * Parenthesis (and any ksh_glob gubbins) will have been removed.
       
   583  */
       
   584 
       
   585 /**/
       
   586 static long
       
   587 patcompswitch(int paren, int *flagp)
       
   588 {
       
   589     long starter, br, ender, excsync = 0;
       
   590     int parno = 0;
       
   591     int flags, gfchanged = 0, savglobflags = patglobflags;
       
   592     Upat ptr;
       
   593 
       
   594     *flagp = 0;
       
   595 
       
   596     if (paren && (patglobflags & GF_BACKREF) && patnpar <= NSUBEXP) {
       
   597 	/*
       
   598 	 * parenthesized:  make an open node.
       
   599 	 * We can only refer to the first nine parentheses.
       
   600 	 * For any others, we just use P_OPEN on its own; there's
       
   601 	 * no gain in arbitrarily limiting the number of parentheses.
       
   602 	 */
       
   603 	parno = patnpar++;
       
   604 	starter = patnode(P_OPEN + parno);
       
   605     } else
       
   606 	starter = 0;
       
   607 
       
   608     br = patnode(P_BRANCH);
       
   609     if (!patcompbranch(&flags))
       
   610 	return 0;
       
   611     if (patglobflags != savglobflags)
       
   612 	gfchanged++;
       
   613     if (starter)
       
   614 	pattail(starter, br);
       
   615     else
       
   616 	starter = br;
       
   617 
       
   618     *flagp |= flags & (P_HSTART|P_PURESTR);
       
   619 
       
   620     while (*patparse == Bar ||
       
   621 	   (isset(EXTENDEDGLOB) && *patparse == Tilde &&
       
   622 	    (patparse[1] == '/' ||
       
   623 	     !memchr(patendseg, patparse[1], patendseglen)))) {
       
   624 	int tilde = *patparse++ == Tilde;
       
   625 	long gfnode = 0, newbr;
       
   626 
       
   627 	*flagp &= ~P_PURESTR;
       
   628 
       
   629 	if (tilde) {
       
   630 	    union upat up;
       
   631 	    /* excsync remembers the P_EXCSYNC node before a chain of
       
   632 	     * exclusions:  all point back to this.  only the
       
   633 	     * original (non-excluded) branch gets a trailing P_EXCSYNC.
       
   634 	     */
       
   635 	    if (!excsync) {
       
   636 		excsync = patnode(P_EXCSYNC);
       
   637 		patoptail(br, excsync);
       
   638 	    }
       
   639 	    /*
       
   640 	     * By default, approximations are turned off in exclusions:
       
   641 	     * we need to do this here as otherwise the code compiling
       
   642 	     * the exclusion doesn't know if the flags have really
       
   643 	     * changed if the error count gets restored.
       
   644 	     */
       
   645 	    patglobflags &= ~0xff;
       
   646 	    if (!(patflags & PAT_FILET) || paren) {
       
   647 		br = patnode(P_EXCLUDE);
       
   648 	    } else {
       
   649 		/*
       
   650 		 * At top level (paren == 0) in a file glob !(patflags
       
   651 		 * &PAT_FILET) do the exclusion prepending the file path
       
   652 		 * so far.  We need to flag this to avoid unnecessarily
       
   653 		 * copying the path.
       
   654 		 */
       
   655 		br = patnode(P_EXCLUDP);
       
   656 		patflags |= PAT_HAS_EXCLUDP;
       
   657 	    }
       
   658 	    up.p = NULL;
       
   659 	    patadd((char *)&up, 0, sizeof(up), 0);
       
   660 	    /* / is not treated as special if we are at top level */
       
   661 	    if (!paren && *patendseg == '/') {
       
   662 		tilde++;
       
   663 		patendseg++;
       
   664 		patendseglen--;
       
   665 		patendstr++;
       
   666 		patendstrlen--;
       
   667 	    }
       
   668 	} else {
       
   669 	    excsync = 0;
       
   670 	    br = patnode(P_BRANCH);
       
   671 	    /*
       
   672 	     * The position of the following statements means globflags
       
   673 	     * set in the main branch carry over to the exclusion.
       
   674 	     */
       
   675 	    if (!paren) {
       
   676 		patglobflags = 0;
       
   677 		if (((Patprog)patout)->globflags) {
       
   678 		    /*
       
   679 		     * If at top level, we need to reinitialize flags to zero,
       
   680 		     * since (#i)foo|bar only applies to foo and we stuck
       
   681 		     * the #i into the global flags.
       
   682 		     * We could have done it so that they only got set in the
       
   683 		     * first branch, but it's quite convenient having any
       
   684 		     * global flags set in the header and not buried in the
       
   685 		     * pattern.  (Or maybe it isn't and we should
       
   686 		     * forget this bit and always stick in an explicit GFLAGS
       
   687 		     * statement instead of using the header.)
       
   688 		     * Also, this can't happen for file globs where there are
       
   689 		     * no top-level |'s.
       
   690 		     *
       
   691 		     * No gfchanged, as nothing to follow branch at top
       
   692 		     * level. 
       
   693 		     */
       
   694 		    union upat up;
       
   695 		    gfnode = patnode(P_GFLAGS);
       
   696 		    up.l = patglobflags;
       
   697 		    patadd((char *)&up, 0, sizeof(union upat), 0);
       
   698 		}
       
   699 	    } else {
       
   700 		patglobflags = savglobflags;
       
   701 	    }
       
   702 	}
       
   703 	newbr = patcompbranch(&flags);
       
   704 	if (tilde == 2) {
       
   705 	    /* restore special treatment of / */
       
   706 	    patendseg--;
       
   707 	    patendseglen++;
       
   708 	    patendstr--;
       
   709 	    patendstrlen++;
       
   710 	}
       
   711 	if (!newbr)
       
   712 	    return 0;
       
   713 	if (gfnode)
       
   714 	    pattail(gfnode, newbr);
       
   715 	if (!tilde && patglobflags != savglobflags)
       
   716 	    gfchanged++;
       
   717 	pattail(starter, br);
       
   718 	if (excsync)
       
   719 	    patoptail(br, patnode(P_EXCEND));
       
   720 	*flagp |= flags & P_HSTART;
       
   721     }
       
   722 
       
   723     /*
       
   724      * Make a closing node, hooking it to the end.
       
   725      * Note that we can't optimize P_NOTHING out here, since another
       
   726      * branch at that point would indicate the current choices continue,
       
   727      * which they don't.
       
   728      */
       
   729     ender = patnode(paren ? parno ? P_CLOSE+parno : P_NOTHING : P_END);
       
   730     pattail(starter, ender);
       
   731 
       
   732     /*
       
   733      * Hook the tails of the branches to the closing node,
       
   734      * except for exclusions which terminate where they are.
       
   735      */
       
   736     for (ptr = (Upat)patout + starter; ptr; ptr = PATNEXT(ptr))
       
   737 	if (!P_ISEXCLUDE(ptr))
       
   738 	    patoptail(ptr-(Upat)patout, ender);
       
   739 
       
   740     /* check for proper termination */
       
   741     if ((paren && *patparse++ != Outpar) ||
       
   742 	(!paren && *patparse &&
       
   743 	 !((patflags & PAT_FILE) && *patparse == '/')))
       
   744 	return 0;
       
   745 
       
   746     if (paren && gfchanged) {
       
   747 	/*
       
   748 	 * Restore old values of flags when leaving parentheses.
       
   749 	 * gfchanged detects a change in any branch (except exclusions
       
   750 	 * which are separate), since we need to emit this even if
       
   751 	 * a later branch happened to put the flags back.
       
   752 	 */
       
   753 	pattail(ender, patnode(P_GFLAGS));
       
   754 	patglobflags = savglobflags;
       
   755 	patadd((char *)&savglobflags, 0, sizeof(long), 0);
       
   756     }
       
   757 
       
   758     return starter;
       
   759 }
       
   760 
       
   761 /*
       
   762  * Compile something ended by Bar, Outpar, Tilde, or end of string.
       
   763  * Note the BRANCH or EXCLUDE tag must already have been omitted:
       
   764  * this returns the position of the operand of that.
       
   765  */
       
   766 
       
   767 /**/
       
   768 static long
       
   769 patcompbranch(int *flagp)
       
   770 {
       
   771     long chain, latest = 0, starter;
       
   772     int flags = 0;
       
   773 
       
   774     *flagp = P_PURESTR;
       
   775 
       
   776     starter = chain = 0;
       
   777     while (!memchr(patendseg, *patparse, patendseglen) ||
       
   778 	   (*patparse == Tilde && patparse[1] != '/' &&
       
   779 	    memchr(patendseg, patparse[1], patendseglen))) {
       
   780 	if (isset(EXTENDEDGLOB) &&
       
   781 	    ((!isset(SHGLOB) &&
       
   782 	      (*patparse == Inpar && patparse[1] == Pound)) ||
       
   783 	     (isset(KSHGLOB) && *patparse == '@' && patparse[1] == Inpar &&
       
   784 	      patparse[2] == Pound))) {
       
   785 	    /* Globbing flags. */
       
   786 	    char *pp1 = patparse;
       
   787 	    int oldglobflags = patglobflags, ignore;
       
   788 	    long assert;
       
   789 	    patparse += (*patparse == '@') ? 3 : 2;
       
   790 	    if (!patgetglobflags(&patparse, &assert, &ignore))
       
   791 		return 0;
       
   792 	    if (!ignore) {
       
   793 		if (assert) {
       
   794 		    /*
       
   795 		     * Start/end assertion looking like flags, but
       
   796 		     * actually handled as a normal node
       
   797 		     */
       
   798 		    latest = patnode(assert);
       
   799 		    flags = 0;
       
   800 		} else {
       
   801 		    if (pp1 == patstart) {
       
   802 			/* Right at start of pattern, the simplest case.
       
   803 			 * Put them into the flags and don't emit anything.
       
   804 			 */
       
   805 			((Patprog)patout)->globflags = patglobflags;
       
   806 			continue;
       
   807 		    } else if (!*patparse) {
       
   808 			/* Right at the end, so just leave the flags for
       
   809 			 * the next Patprog in the chain to pick up.
       
   810 			 */
       
   811 			break;
       
   812 		    }
       
   813 		    /*
       
   814 		     * Otherwise, we have to stick them in as a pattern
       
   815 		     * matching nothing.
       
   816 		     */
       
   817 		    if (oldglobflags != patglobflags) {
       
   818 			/* Flags changed */
       
   819 			union upat up;
       
   820 			latest = patnode(P_GFLAGS);
       
   821 			up.l = patglobflags;
       
   822 			patadd((char *)&up, 0, sizeof(union upat), 0);
       
   823 		    } else {
       
   824 			/* No effect. */
       
   825 			continue;
       
   826 		    }
       
   827 		}
       
   828 	    } else if (!*patparse)
       
   829 		break;
       
   830 	    else
       
   831 		continue;
       
   832 	} else if (isset(EXTENDEDGLOB) && *patparse == Hat) {
       
   833 	    /*
       
   834 	     * ^pat:  anything but pat.  For proper backtracking,
       
   835 	     * etc., we turn this into (*~pat), except without the
       
   836 	     * parentheses.
       
   837 	     */
       
   838 	    patparse++;
       
   839 	    latest = patcompnot(0, &flags);
       
   840 	} else
       
   841 	    latest = patcomppiece(&flags);
       
   842 	if (!latest)
       
   843 	    return 0;
       
   844 	if (!starter)
       
   845 	    starter = latest;
       
   846 	if (!(flags & P_PURESTR))
       
   847 	    *flagp &= ~P_PURESTR;
       
   848 	if (!chain)
       
   849 	    *flagp |= flags & P_HSTART;
       
   850 	else
       
   851 	    pattail(chain, latest);
       
   852 	chain = latest;
       
   853     }
       
   854     /* check if there was nothing in the loop, i.e. () */
       
   855     if (!chain)
       
   856 	starter = patnode(P_NOTHING);
       
   857 
       
   858     return starter;
       
   859 }
       
   860 
       
   861 /* get glob flags, return 1 for success, 0 for failure */
       
   862 
       
   863 /**/
       
   864 int
       
   865 patgetglobflags(char **strp, long *assertp, int *ignore)
       
   866 {
       
   867     char *nptr, *ptr = *strp;
       
   868     zlong ret;
       
   869 
       
   870     *assertp = 0;
       
   871     *ignore = 1;
       
   872     /* (#X): assumes we are still positioned on the first X */
       
   873     for (; *ptr && *ptr != Outpar; ptr++) {
       
   874 	if (*ptr == 'q') { 
       
   875 	    /* Glob qualifiers, ignored in pattern code */
       
   876 	    while (*ptr && *ptr != Outpar)
       
   877 		ptr++;
       
   878 	    break;
       
   879 	} else {
       
   880 	    *ignore = 0;
       
   881 	    switch (*ptr) {
       
   882 	    case 'a':
       
   883 		/* Approximate matching, max no. of errors follows */
       
   884 		ret = zstrtol(++ptr, &nptr, 10);
       
   885 		/*
       
   886 		 * We can't have more than 254, because we need 255 to
       
   887 		 * mark 254 errors in wbranch and exclude sync strings
       
   888 		 * (hypothetically --- hope no-one tries it).
       
   889 		 */
       
   890 		if (ret < 0 || ret > 254 || ptr == nptr)
       
   891 		    return 0;
       
   892 		patglobflags = (patglobflags & ~0xff) | (ret & 0xff);
       
   893 		ptr = nptr-1;
       
   894 		break;
       
   895 
       
   896 	    case 'l':
       
   897 		/* Lowercase in pattern matches lower or upper in target */
       
   898 		patglobflags = (patglobflags & ~GF_IGNCASE) | GF_LCMATCHUC;
       
   899 		break;
       
   900 
       
   901 	    case 'i':
       
   902 		/* Fully case insensitive */
       
   903 		patglobflags = (patglobflags & ~GF_LCMATCHUC) | GF_IGNCASE;
       
   904 		break;
       
   905 
       
   906 	    case 'I':
       
   907 		/* Restore case sensitivity */
       
   908 		patglobflags &= ~(GF_LCMATCHUC|GF_IGNCASE);
       
   909 		break;
       
   910 
       
   911 	    case 'b':
       
   912 		/* Make backreferences */
       
   913 		patglobflags |= GF_BACKREF;
       
   914 		break;
       
   915 
       
   916 	    case 'B':
       
   917 		/* Don't make backreferences */
       
   918 		patglobflags &= ~GF_BACKREF;
       
   919 		break;
       
   920 
       
   921 	    case 'm':
       
   922 		/* Make references to complete match */
       
   923 		patglobflags |= GF_MATCHREF;
       
   924 		break;
       
   925 
       
   926 	    case 'M':
       
   927 		/* Don't */
       
   928 		patglobflags &= ~GF_MATCHREF;
       
   929 		break;
       
   930 
       
   931 	    case 's':
       
   932 		*assertp = P_ISSTART;
       
   933 		break;
       
   934 
       
   935 	    case 'e':
       
   936 		*assertp = P_ISEND;
       
   937 		break;
       
   938 
       
   939 	    default:
       
   940 		return 0;
       
   941 	    }
       
   942 	}
       
   943     }
       
   944     if (*ptr != Outpar)
       
   945 	return 0;
       
   946     /* Start/end assertions must appear on their own. */
       
   947     if (*assertp && (*strp)[1] != Outpar)
       
   948 	return 0;
       
   949     *strp = ptr + 1;
       
   950     return 1;
       
   951 }
       
   952 
       
   953 /*
       
   954  * compile a chunk such as a literal string or a [...] followed
       
   955  * by a possible hash operator
       
   956  */
       
   957 
       
   958 /**/
       
   959 static long
       
   960 patcomppiece(int *flagp)
       
   961 {
       
   962     long starter = 0, next, pound, op;
       
   963     int flags, flags2, kshchar, len, ch, patch, nmeta;
       
   964     union upat up;
       
   965     char *nptr, *str0, *ptr, cbuf[2];
       
   966     zrange_t from, to;
       
   967 
       
   968     flags = 0;
       
   969     str0 = patparse;
       
   970     for (;;) {
       
   971 	/*
       
   972 	 * Check if we have a string. First, we need to make sure
       
   973 	 * the string doesn't introduce a ksh-like parenthesized expression.
       
   974 	 */
       
   975 	kshchar = '\0';
       
   976 	if (isset(KSHGLOB) && *patparse && patparse[1] == Inpar) {
       
   977 	    if (strchr("?*+!@", *patparse))
       
   978 		kshchar = STOUC(*patparse);
       
   979 	    else if (*patparse == Star || *patparse == Quest)
       
   980 		kshchar = STOUC(ztokens[*patparse - Pound]);
       
   981 	}
       
   982 
       
   983 	/*
       
   984 	 * End of string (or no string at all) if ksh-type parentheses,
       
   985 	 * or special character, unless that character is a tilde and
       
   986 	 * the character following is an end-of-segment character.  Thus
       
   987 	 * tildes are not special if there is nothing following to
       
   988 	 * be excluded.
       
   989 	 */
       
   990 	if (kshchar || (memchr(patendstr, *patparse, patendstrlen) &&
       
   991 			(*patparse != Tilde ||
       
   992 			 patparse[1] == '/' ||
       
   993 			 !memchr(patendseg, patparse[1], patendseglen))))
       
   994 	    break;
       
   995 
       
   996 	METAINC(patparse);
       
   997     }
       
   998 
       
   999     if (patparse > str0) {
       
  1000 	long slen = patparse - str0;
       
  1001 	int morelen;
       
  1002 
       
  1003 	/* Ordinary string: cancel kshchar lookahead */
       
  1004 	kshchar = '\0';
       
  1005 	/*
       
  1006 	 * Assume it matches a simple string until we find otherwise.
       
  1007 	 */
       
  1008 	flags |= P_PURESTR;
       
  1009 	DPUTS(patparse == str0, "BUG: matched nothing in patcomppiece.");
       
  1010 	/* more than one character matched? */
       
  1011 	morelen = str0 + (*str0 == Meta ? 2 : 1) < patparse;
       
  1012 	/*
       
  1013 	 * If we have more than one character, a following hash only
       
  1014 	 * applies to the last, so decrement.
       
  1015 	 */
       
  1016 	if (isset(EXTENDEDGLOB) && *patparse == Pound && morelen)
       
  1017 	    patparse -= (patparse > str0 + 1 && patparse[-2] == Meta) ? 2 : 1;
       
  1018 	/*
       
  1019 	 * If len is 1, we can't have an active # following, so doesn't
       
  1020 	 * matter that we don't make X in `XX#' simple.
       
  1021 	 */
       
  1022 	if (!morelen)
       
  1023 	    flags |= P_SIMPLE;
       
  1024 	starter = patnode(P_EXACTLY);
       
  1025 
       
  1026 	/* Get length of string without metafication. */
       
  1027 	nmeta = 0;
       
  1028 	/* inherited from domatch, but why, exactly? */
       
  1029 	if (*str0 == Nularg)
       
  1030 	    str0++;
       
  1031 	for (ptr = str0; ptr < patparse; ptr++) {
       
  1032 	    if (*ptr == Meta) {
       
  1033 		nmeta++;
       
  1034 		ptr++;
       
  1035 	    }
       
  1036 	}
       
  1037 	slen = (patparse - str0) - nmeta;
       
  1038 	/* First add length, which is a long */
       
  1039 	patadd((char *)&slen, 0, sizeof(long), 0);
       
  1040 	/*
       
  1041 	 * Then the string, not null terminated.
       
  1042 	 * Unmetafy and untokenize; pass the final length,
       
  1043 	 * which is what we need to allocate, i.e. not including
       
  1044 	 * a count for each Meta in the string.
       
  1045 	 */
       
  1046 	patadd(str0, 0, slen, PA_UNMETA);
       
  1047 	nptr = P_LS_STR((Upat)patout + starter);
       
  1048 	/*
       
  1049 	 * It's much simpler to turn off pure string mode for
       
  1050 	 * any case-insensitive or approximate matching; usually,
       
  1051 	 * that is correct, or they wouldn't have been turned on.
       
  1052 	 * However, we need to make sure we match a "." or ".."
       
  1053 	 * in a file name as a pure string.  There's a minor bug
       
  1054 	 * that this will also apply to something like
       
  1055 	 * ..(#a1).. (i.e. the (#a1) has no effect), but if you're
       
  1056 	 * going to write funny patterns, you get no sympathy from me.
       
  1057 	 */
       
  1058 	if (patglobflags) {
       
  1059 	    if (!(patflags & PAT_FILE))
       
  1060 		flags &= ~P_PURESTR;
       
  1061 	    else if (!(nptr[0] == '.' &&
       
  1062 		       (slen == 1 || (nptr[1] == '.' && slen == 2))))
       
  1063 		flags &= ~P_PURESTR;
       
  1064 	}
       
  1065     } else {
       
  1066 	if (kshchar)
       
  1067 	    patparse++;
       
  1068 
       
  1069 	patch = *patparse;
       
  1070 	METAINC(patparse);
       
  1071 	switch(patch) {
       
  1072 	case Quest:
       
  1073 	    flags |= P_SIMPLE;
       
  1074 	    starter = patnode(P_ANY);
       
  1075 	    break;
       
  1076 	case Star:
       
  1077 	    /* kshchar is used as a sign that we can't have #'s. */
       
  1078 	    kshchar = -1;
       
  1079 	    starter = patnode(P_STAR);
       
  1080 	    break;
       
  1081 	case Inbrack:
       
  1082 	    flags |= P_SIMPLE;
       
  1083 	    if (*patparse == Hat || *patparse == '^' || *patparse == '!') {
       
  1084 		patparse++;
       
  1085 		starter = patnode(P_ANYBUT);
       
  1086 	    } else
       
  1087 		starter = patnode(P_ANYOF);
       
  1088 	    if (*patparse == Outbrack) {
       
  1089 		patparse++;
       
  1090 		patadd(NULL, ']', 1, PA_NOALIGN);
       
  1091 	    }
       
  1092 	    while (*patparse && *patparse != Outbrack) {
       
  1093 		/* Meta is not a token */
       
  1094 		if (*patparse == Inbrack && patparse[1] == ':' &&
       
  1095 			(nptr = strchr(patparse+2, ':')) &&
       
  1096 			nptr[1] == Outbrack) {
       
  1097 			/* Posix range. */
       
  1098 			patparse += 2;
       
  1099 			len = nptr - patparse;
       
  1100 			if (!strncmp(patparse, "alpha", len))
       
  1101 			    ch = PP_ALPHA;
       
  1102 			else if (!strncmp(patparse, "alnum", len))
       
  1103 			    ch = PP_ALNUM;
       
  1104 			else if (!strncmp(patparse, "ascii", len))
       
  1105 			    ch = PP_ASCII;
       
  1106 			else if (!strncmp(patparse, "blank", len))
       
  1107 			    ch = PP_BLANK;
       
  1108 			else if (!strncmp(patparse, "cntrl", len))
       
  1109 			    ch = PP_CNTRL;
       
  1110 			else if (!strncmp(patparse, "digit", len))
       
  1111 			    ch = PP_DIGIT;
       
  1112 			else if (!strncmp(patparse, "graph", len))
       
  1113 			    ch = PP_GRAPH;
       
  1114 			else if (!strncmp(patparse, "lower", len))
       
  1115 			    ch = PP_LOWER;
       
  1116 			else if (!strncmp(patparse, "print", len))
       
  1117 			    ch = PP_PRINT;
       
  1118 			else if (!strncmp(patparse, "punct", len))
       
  1119 			    ch = PP_PUNCT;
       
  1120 			else if (!strncmp(patparse, "space", len))
       
  1121 			    ch = PP_SPACE;
       
  1122 			else if (!strncmp(patparse, "upper", len))
       
  1123 			    ch = PP_UPPER;
       
  1124 			else if (!strncmp(patparse, "xdigit", len))
       
  1125 			    ch = PP_XDIGIT;
       
  1126 			else
       
  1127 			    ch = PP_UNKWN;
       
  1128 			patparse = nptr + 2;
       
  1129 			if (ch != PP_UNKWN)
       
  1130 			    patadd(NULL, STOUC(Meta+ch), 1, PA_NOALIGN);
       
  1131 			continue;
       
  1132 		}
       
  1133 		if (itok(*patparse)) {
       
  1134 		    cbuf[0] = ztokens[*patparse - Pound];
       
  1135 		} else if (*patparse == Meta) {
       
  1136 		    cbuf[0] = Meta;
       
  1137 		    cbuf[1] = *++patparse;
       
  1138 		} else
       
  1139 		    cbuf[0] = *patparse;
       
  1140 		patparse++;
       
  1141 
       
  1142 		if (*patparse == '-' && patparse[1] != Outbrack) {
       
  1143 		    patadd(NULL, STOUC(Meta+PP_RANGE), 1, PA_NOALIGN);
       
  1144 		    patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, PA_NOALIGN);
       
  1145 		    if (itok(*++patparse)) {
       
  1146 			patadd(0, STOUC(ztokens[*patparse - Pound]), 1,
       
  1147 			       PA_NOALIGN);
       
  1148 		    } else
       
  1149 			patadd(patparse, 0, (*patparse == Meta) ? 2 : 1,
       
  1150 			       PA_NOALIGN);
       
  1151 		    METAINC(patparse);
       
  1152 		} else
       
  1153 		    patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, PA_NOALIGN);
       
  1154 	    }
       
  1155 	    if (*patparse != Outbrack)
       
  1156 		return 0;
       
  1157 	    patparse++;
       
  1158 	    /* terminate null string and fix alignment */
       
  1159 	    patadd(NULL, 0, 1, 0);
       
  1160 	    break;
       
  1161 	case Inpar:
       
  1162 	    /* is this how to treat parentheses in SHGLOB? */
       
  1163 	    if (isset(SHGLOB) && !kshchar)
       
  1164 		return 0;
       
  1165 	    if (kshchar == '!') {
       
  1166 		/* This is nasty, we should really either handle all
       
  1167 		 * kshglobbing below or here.  But most of the
       
  1168 		 * others look like non-ksh patterns, while this one
       
  1169 		 * doesn't, so we handle it here and leave the rest.
       
  1170 		 * We treat it like an extendedglob ^, except that
       
  1171 		 * it goes into parentheses.
       
  1172 		 *
       
  1173 		 * If we did do kshglob here, we could support
       
  1174 		 * the old behaviour that things like !(foo)##
       
  1175 		 * work, but it makes the code more complicated at
       
  1176 		 * the expense of allowing the user to do things
       
  1177 		 * they shouldn't.
       
  1178 		 */
       
  1179 		if (!(starter = patcompnot(1, &flags2)))
       
  1180 		    return 0;
       
  1181 	    } else if (!(starter = patcompswitch(1, &flags2)))
       
  1182 		return 0;
       
  1183 	    flags |= flags2 & P_HSTART;
       
  1184 	    break;
       
  1185 	case Inang:
       
  1186 	    /* Numeric glob */
       
  1187 	    len = 0;		/* beginning present 1, end present 2 */
       
  1188 	    if (idigit(*patparse)) {
       
  1189 		from = (zrange_t) zstrtol((char *)patparse,
       
  1190 					 (char **)&nptr, 10);
       
  1191 		patparse = nptr;
       
  1192 		len |= 1;
       
  1193 	    }
       
  1194 	    DPUTS(*patparse != '-', "BUG: - missing from numeric glob");
       
  1195 	    patparse++;
       
  1196 	    if (idigit(*patparse)) {
       
  1197 		to = (zrange_t) zstrtol((char *)patparse,
       
  1198 					  (char **)&nptr, 10);
       
  1199 		patparse = nptr;
       
  1200 		len |= 2;
       
  1201 	    }
       
  1202 	    if (*patparse != Outang)
       
  1203 		return 0;
       
  1204 	    patparse++;
       
  1205 	    switch(len) {
       
  1206 	    case 3:
       
  1207 		starter = patnode(P_NUMRNG);
       
  1208 		patadd((char *)&from, 0, sizeof(from), 0);
       
  1209 		patadd((char *)&to, 0, sizeof(to), 0);
       
  1210 		break;
       
  1211 	    case 2:
       
  1212 		starter = patnode(P_NUMTO);
       
  1213 		patadd((char *)&to, 0, sizeof(to), 0);
       
  1214 		break;
       
  1215 	    case 1:
       
  1216 		starter = patnode(P_NUMFROM);
       
  1217 		patadd((char *)&from, 0, sizeof(from), 0);
       
  1218 		break;
       
  1219 	    case 0:
       
  1220 		starter = patnode(P_NUMANY);
       
  1221 		break;
       
  1222 	    }
       
  1223 	    /* This can't be simple, because it isn't.
       
  1224 	     * Mention in manual that matching digits with [...]
       
  1225 	     * is more efficient.
       
  1226 	     */
       
  1227 	    break;
       
  1228 	case Pound:
       
  1229 	    DPUTS(!isset(EXTENDEDGLOB), "BUG: # not treated as string");
       
  1230 	    /*
       
  1231 	     * A hash here is an error; it should follow something
       
  1232 	     * repeatable.
       
  1233 	     */
       
  1234 	    return 0;
       
  1235 	    break;
       
  1236 #ifdef DEBUG
       
  1237 	default:
       
  1238 	    dputs("BUG: character not handled in patcomppiece");
       
  1239 	    return 0;
       
  1240 	    break;
       
  1241 #endif
       
  1242 	}
       
  1243     }
       
  1244 
       
  1245     if (!(pound = (*patparse == Pound && isset(EXTENDEDGLOB))) &&
       
  1246 	(kshchar <= 0 || kshchar == '@' || kshchar == '!')) {
       
  1247 	*flagp = flags;
       
  1248 	return starter;
       
  1249     }
       
  1250 
       
  1251     /* too much at once doesn't currently work */
       
  1252     if (kshchar && pound)
       
  1253 	return 0;
       
  1254 
       
  1255     if (kshchar == '*') {
       
  1256 	op = P_ONEHASH;
       
  1257 	*flagp = P_HSTART;
       
  1258     } else if (kshchar == '+') {
       
  1259 	op = P_TWOHASH;
       
  1260 	*flagp = P_HSTART;
       
  1261     } else if (kshchar == '?') {
       
  1262 	op = 0;
       
  1263 	*flagp = 0;
       
  1264     } else if (*++patparse == Pound) {
       
  1265 	op = P_TWOHASH;
       
  1266 	patparse++;
       
  1267 	*flagp = P_HSTART;
       
  1268     } else {
       
  1269 	op = P_ONEHASH;
       
  1270 	*flagp = P_HSTART;
       
  1271     }
       
  1272 
       
  1273     /*
       
  1274      * Note optimizations with pointers into P_NOTHING branches:  some
       
  1275      * should logically point to next node after current piece.
       
  1276      *
       
  1277      * Backtracking is also encoded in a slightly obscure way:  the
       
  1278      * code emitted ensures we test the non-empty branch of complex
       
  1279      * patterns before the empty branch on each repetition.  Hence
       
  1280      * each time we fail on a non-empty branch, we try the empty branch,
       
  1281      * which is equivalent to backtracking.
       
  1282      */
       
  1283     if ((flags & P_SIMPLE) && (op == P_ONEHASH || op == P_TWOHASH) &&
       
  1284 	P_OP((Upat)patout+starter) == P_ANY) {
       
  1285 	/* Optimize ?# to *.  Silly thing to do, since who would use
       
  1286 	 * use ?# ? But it makes the later code shorter.
       
  1287 	 */
       
  1288 	Upat uptr = (Upat)patout + starter;
       
  1289 	if (op == P_TWOHASH) {
       
  1290 	    /* ?## becomes ?* */
       
  1291 	    uptr->l = (uptr->l & ~0xff) | P_ANY;
       
  1292 	    pattail(starter, patnode(P_STAR));
       
  1293 	} else {
       
  1294 	    uptr->l = (uptr->l & ~0xff) | P_STAR;
       
  1295 	}
       
  1296     } else if ((flags & P_SIMPLE) && op && !(patglobflags & 0xff)) {
       
  1297 	/* Don't simplify if we need to look for approximations. */
       
  1298 	patinsert(op, starter, NULL, 0);
       
  1299     } else if (op == P_ONEHASH) {
       
  1300 	/* Emit x# as (x&|), where & means "self". */
       
  1301 	up.p = NULL;
       
  1302 	patinsert(P_WBRANCH, starter, (char *)&up, sizeof(up));
       
  1303 	                                      /* Either x */
       
  1304 	patoptail(starter, patnode(P_BACK));  /* and loop */
       
  1305 	patoptail(starter, starter);	      /* back */
       
  1306 	pattail(starter, patnode(P_BRANCH));  /* or */
       
  1307 	pattail(starter, patnode(P_NOTHING)); /* null. */
       
  1308     } else if (op == P_TWOHASH) {
       
  1309 	/* Emit x## as x(&|) where & means "self". */
       
  1310 	next = patnode(P_WBRANCH);	      /* Either */
       
  1311 	up.p = NULL;
       
  1312 	patadd((char *)&up, 0, sizeof(up), 0);
       
  1313 	pattail(starter, next);
       
  1314 	pattail(patnode(P_BACK), starter);    /* loop back */
       
  1315 	pattail(next, patnode(P_BRANCH));     /* or */
       
  1316 	pattail(starter, patnode(P_NOTHING)); /* null. */
       
  1317     } else if (kshchar == '?') {
       
  1318 	/* Emit ?(x) as (x|) */
       
  1319 	patinsert(P_BRANCH, starter, NULL, 0); /* Either x */
       
  1320 	pattail(starter, patnode(P_BRANCH));   /* or */
       
  1321 	next = patnode(P_NOTHING);	       /* null */
       
  1322 	pattail(starter, next);
       
  1323 	patoptail(starter, next);
       
  1324     }
       
  1325     if (*patparse == Pound)
       
  1326 	return 0;
       
  1327 
       
  1328     return starter;
       
  1329 }
       
  1330 
       
  1331 /*
       
  1332  * Turn a ^foo (paren = 0) or !(foo) (paren = 1) into *~foo with
       
  1333  * parentheses if necessary.   As you see, that's really quite easy.
       
  1334  */
       
  1335 
       
  1336 /**/
       
  1337 static long
       
  1338 patcompnot(int paren, int *flagsp)
       
  1339 {
       
  1340     union upat up;
       
  1341     long excsync, br, excl, n, starter;
       
  1342     int dummy;
       
  1343 
       
  1344     /* Here, we're matching a star at the start. */
       
  1345     *flagsp = P_HSTART;
       
  1346 
       
  1347     starter = patnode(P_BRANCH);
       
  1348     br = patnode(P_STAR);
       
  1349     excsync = patnode(P_EXCSYNC);
       
  1350     pattail(br, excsync);
       
  1351     pattail(starter, excl = patnode(P_EXCLUDE));
       
  1352     up.p = NULL;
       
  1353     patadd((char *)&up, 0, sizeof(up), 0);
       
  1354     if (!(br = (paren ? patcompswitch(1, &dummy) : patcompbranch(&dummy))))
       
  1355 	return 0;
       
  1356     pattail(br, patnode(P_EXCEND));
       
  1357     n = patnode(P_NOTHING); /* just so much easier */
       
  1358     pattail(excsync, n);
       
  1359     pattail(excl, n);
       
  1360 
       
  1361     return starter;
       
  1362 }
       
  1363 
       
  1364 /* Emit a node */
       
  1365 
       
  1366 /**/
       
  1367 static long
       
  1368 patnode(long op)
       
  1369 {
       
  1370     long starter = (Upat)patcode - (Upat)patout;
       
  1371     union upat up;
       
  1372 
       
  1373     up.l = op;
       
  1374     patadd((char *)&up, 0, sizeof(union upat), 0);
       
  1375     return starter;
       
  1376 }
       
  1377 
       
  1378 /*
       
  1379  * insert an operator in front of an already emitted operand:
       
  1380  * we relocate the operand.  there had better be nothing else after.
       
  1381  */
       
  1382 
       
  1383 /**/
       
  1384 static void
       
  1385 patinsert(long op, int opnd, char *xtra, int sz)
       
  1386 {
       
  1387     char *src, *dst, *opdst;
       
  1388     union upat buf, *lptr;
       
  1389 
       
  1390     buf.l = 0;
       
  1391     patadd((char *)&buf, 0, sizeof(buf), 0);
       
  1392     if (sz)
       
  1393 	patadd(xtra, 0, sz, 0);
       
  1394     src = patcode - sizeof(union upat) - sz;
       
  1395     dst = patcode;
       
  1396     opdst = patout + opnd * sizeof(union upat);
       
  1397     while (src > opdst)
       
  1398 	*--dst = *--src;
       
  1399 
       
  1400     /* A cast can't be an lvalue */
       
  1401     lptr = (Upat)opdst;
       
  1402     lptr->l = op;
       
  1403     opdst += sizeof(union upat);
       
  1404     while (sz--)
       
  1405 	*opdst++ = *xtra++;
       
  1406 }
       
  1407 
       
  1408 /* set the 'next' pointer at the end of a node chain */
       
  1409 
       
  1410 /**/
       
  1411 static void
       
  1412 pattail(long p, long val)
       
  1413 {
       
  1414     Upat scan, temp;
       
  1415     long offset;
       
  1416 
       
  1417     scan = (Upat)patout + p;
       
  1418     for (;;) {
       
  1419 	if (!(temp = PATNEXT(scan)))
       
  1420 	    break;
       
  1421 	scan = temp;
       
  1422     }
       
  1423 
       
  1424     offset = (P_OP(scan) == P_BACK)
       
  1425 	? (scan - (Upat)patout) - val : val - (scan - (Upat)patout);
       
  1426 
       
  1427     scan->l |= offset << 8;
       
  1428 }
       
  1429 
       
  1430 /* do pattail, but on operand of first argument; nop if operandless */
       
  1431 
       
  1432 /**/
       
  1433 static void patoptail(long p, long val)
       
  1434 {
       
  1435     Upat ptr = (Upat)patout + p;
       
  1436     int op = P_OP(ptr);
       
  1437     if (!p || !P_ISBRANCH(ptr))
       
  1438 	return;
       
  1439     if (op == P_BRANCH)
       
  1440 	pattail(P_OPERAND(p), val);
       
  1441     else
       
  1442 	pattail(P_OPERAND(p) + 1, val);
       
  1443 }
       
  1444 
       
  1445 
       
  1446 /*
       
  1447  * Run a pattern.
       
  1448  */
       
  1449 static char *patinstart;	/* Start of input string */
       
  1450 static char *patinend;		/* End of input string */
       
  1451 static char *patinput;		/* String input pointer */
       
  1452 static char *patinpath;		/* Full path for use with ~ exclusions */
       
  1453 static int   patinlen;		/* Length of last successful match.
       
  1454 				 * Includes count of Meta characters.
       
  1455 				 */
       
  1456 
       
  1457 static char *patbeginp[NSUBEXP];	/* Pointer to backref beginnings */
       
  1458 static char *patendp[NSUBEXP];		/* Pointer to backref ends */
       
  1459 static int parsfound;			/* parentheses (with backrefs) found */
       
  1460 
       
  1461 static int globdots;			/* Glob initial dots? */
       
  1462 
       
  1463 /*
       
  1464  * Macros which are currently trivial but are likely to be less
       
  1465  * so when we handle multibyte characters.  They operate on
       
  1466  * umetafied strings.
       
  1467  */
       
  1468 
       
  1469 /* Get a character from the start point in a string */
       
  1470 #define CHARREF(x)	(STOUC(*x))
       
  1471 /* Get  a pointer to the next character */
       
  1472 #define CHARNEXT(x)	(x+1)
       
  1473 /* Increment a pointer past the current character. */
       
  1474 #define CHARINC(x)	(x++)
       
  1475 /* Counter the number of characters between two pointers, largest first */
       
  1476 #define CHARSUB(x,y)	(x-y)
       
  1477 
       
  1478 /*
       
  1479  * The following need to be accessed in the globbing scanner for
       
  1480  * a multi-component file path.  See horror story in glob.c.
       
  1481  */
       
  1482 /**/
       
  1483 int errsfound;				/* Total error count so far */
       
  1484 
       
  1485 /**/
       
  1486 int forceerrs;				/* Forced maximum error count */
       
  1487 
       
  1488 /**/
       
  1489 void
       
  1490 pattrystart(void)
       
  1491 {
       
  1492     forceerrs = -1;
       
  1493     errsfound = 0;
       
  1494 }
       
  1495 
       
  1496 /*
       
  1497  * Test prog against null-terminated, metafied string.
       
  1498  */
       
  1499 
       
  1500 /**/
       
  1501 mod_export int
       
  1502 pattry(Patprog prog, char *string)
       
  1503 {
       
  1504     return pattryrefs(prog, string, -1, -1, 0, NULL, NULL, NULL);
       
  1505 }
       
  1506 
       
  1507 /*
       
  1508  * Test prog against string of given length, no null termination
       
  1509  * but still metafied at this point.  offset gives an offset
       
  1510  * to include in reported match indices
       
  1511  */
       
  1512 
       
  1513 /**/
       
  1514 mod_export int
       
  1515 pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset)
       
  1516 {
       
  1517     return pattryrefs(prog, string, len, unmetalen, offset, NULL, NULL, NULL);
       
  1518 }
       
  1519 
       
  1520 /*
       
  1521  * Test prog against string with given lengths.  The input
       
  1522  * string is metafied; stringlen is the raw string length, and
       
  1523  * unmetalen the number of characters in the original string (some
       
  1524  * of which may now be metafied).  Either value may be -1
       
  1525  * to indicate a null-terminated string which will be counted.  Note
       
  1526  * there may be a severe penalty for this if a lot of matching is done
       
  1527  * on one string.
       
  1528  *
       
  1529  * offset is the position in the original string (not seen by
       
  1530  * the pattern module) at which we are trying to match.
       
  1531  * This is added in to the positions recorded in patbeginp and patendp
       
  1532  * when we are looking for substrings.  Currently this only happens
       
  1533  * in the parameter substitution code.
       
  1534  *
       
  1535  * Note this is a character offset, i.e. a metafied character
       
  1536  * counts as 1.
       
  1537  *
       
  1538  * The last three arguments are used to report the positions for the
       
  1539  * backreferences. On entry, *nump should contain the maximum number
       
  1540  * of positions to report.  In this case the match, mbegin, mend
       
  1541  * arrays are not altered.
       
  1542  */
       
  1543 
       
  1544 /**/
       
  1545 mod_export int
       
  1546 pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen,
       
  1547 	   int patoffset,
       
  1548 	   int *nump, int *begp, int *endp)
       
  1549 {
       
  1550     int i, maxnpos = 0, ret, needfullpath, unmetalenp;
       
  1551     int origlen;
       
  1552     char **sp, **ep, *tryalloced, *ptr;
       
  1553     char *progstr = (char *)prog + prog->startoff;
       
  1554 
       
  1555     if (nump) {
       
  1556 	maxnpos = *nump;
       
  1557 	*nump = 0;
       
  1558     }
       
  1559     /* inherited from domatch, but why, exactly? */
       
  1560     if (*string == Nularg) {
       
  1561 	string++;
       
  1562 	unmetalen--;
       
  1563     }
       
  1564 
       
  1565     if (stringlen < 0)
       
  1566 	stringlen = strlen(string);
       
  1567     origlen = stringlen;
       
  1568 
       
  1569     patflags = prog->flags;
       
  1570     /*
       
  1571      * For a top-level ~-exclusion, we will need the full
       
  1572      * path to exclude, so copy the path so far and append the
       
  1573      * current test string.
       
  1574      */
       
  1575     needfullpath = (patflags & PAT_HAS_EXCLUDP) && pathpos;
       
  1576 
       
  1577     /* Get the length of the full string when unmetafied. */
       
  1578     if (unmetalen < 0)
       
  1579 	unmetalen = ztrsub(string + stringlen, string);
       
  1580     if (needfullpath)
       
  1581 	unmetalenp = ztrsub(pathbuf + pathpos, pathbuf);
       
  1582     else
       
  1583 	unmetalenp = 0;
       
  1584 
       
  1585     DPUTS(needfullpath && (patflags & (PAT_PURES|PAT_ANY)),
       
  1586 	  "rum sort of file exclusion");
       
  1587     /*
       
  1588      * Partly for efficiency, and partly for the convenience of
       
  1589      * globbing, we don't unmetafy pure string patterns, and
       
  1590      * there's no reason to if the pattern is just a *.
       
  1591      */
       
  1592     if (!(patflags & (PAT_PURES|PAT_ANY))
       
  1593 	&& (needfullpath || unmetalen != stringlen)) {
       
  1594 	/*
       
  1595 	 * We need to copy if we need to prepend the path so far
       
  1596 	 * (in which case we copy both chunks), or if we have
       
  1597 	 * Meta characters.
       
  1598 	 */
       
  1599 	char *dst;
       
  1600 	int icopy, ncopy;
       
  1601 
       
  1602 	dst = tryalloced = zalloc(unmetalen + unmetalenp);
       
  1603 
       
  1604 	if (needfullpath) {
       
  1605 	    /* loop twice, copy path buffer first time */
       
  1606 	    ptr = pathbuf;
       
  1607 	    ncopy = unmetalenp;
       
  1608 	} else {
       
  1609 	    /* just loop once, copy string with unmetafication */
       
  1610 	    ptr = string;
       
  1611 	    ncopy = unmetalen;
       
  1612 	}
       
  1613 	for (icopy = 0; icopy < 2; icopy++) {
       
  1614 	    for (i = 0; i < ncopy; i++) {
       
  1615 		if (*ptr == Meta) {
       
  1616 		    ptr++;
       
  1617 		    *dst++ = *ptr++ ^ 32;
       
  1618 		} else {
       
  1619 		    *dst++ = *ptr++;
       
  1620 		}
       
  1621 	    }
       
  1622 	    if (!needfullpath)
       
  1623 		break;
       
  1624 	    /* next time append test string to path so far */
       
  1625 	    ptr = string;
       
  1626 	    ncopy = unmetalen;
       
  1627 	}
       
  1628 
       
  1629 	if (needfullpath) {
       
  1630 	    patinstart = tryalloced + unmetalenp;
       
  1631 	    patinpath = tryalloced;
       
  1632 	} else {
       
  1633 	    patinstart = tryalloced;
       
  1634 	    patinpath = NULL;
       
  1635 	}
       
  1636 	stringlen = unmetalen;
       
  1637     } else {
       
  1638 	patinstart = string;
       
  1639 	tryalloced = patinpath = NULL;
       
  1640     }
       
  1641 
       
  1642     patinend = patinstart + stringlen;
       
  1643     /*
       
  1644      * From now on we do not require NULL termination of
       
  1645      * the test string.  There should also be no more references
       
  1646      * to the variable string.
       
  1647      */
       
  1648 
       
  1649     if (prog->flags & (PAT_PURES|PAT_ANY)) {
       
  1650 	/*
       
  1651 	 * Either we are testing against a pure string,
       
  1652 	 * or we can match anything at all.
       
  1653 	 */
       
  1654 	int ret;
       
  1655 	if (prog->flags & PAT_ANY) {
       
  1656 	    /*
       
  1657 	     * Optimisation for a single "*": always matches
       
  1658 	     * (except for no_glob_dots, see below).
       
  1659 	     */
       
  1660 	    ret = 1;
       
  1661 	} else {
       
  1662 	    /*
       
  1663 	     * Testing a pure string.  See if initial
       
  1664 	     * components match.
       
  1665 	     */
       
  1666 	    int lendiff = stringlen - prog->patmlen;
       
  1667 	    if (lendiff < 0) {
       
  1668 		/* No, the pattern string is too long. */
       
  1669 		ret = 0;
       
  1670 	    } else if (!memcmp(progstr, patinstart, prog->patmlen)) {
       
  1671 		/*
       
  1672 		 * Initial component matches.  Matches either
       
  1673 		 * if lengths are the same or we are not anchored
       
  1674 		 * to the end of the string.
       
  1675 		 */
       
  1676 		ret = !lendiff || (prog->flags & PAT_NOANCH);
       
  1677 	    } else {
       
  1678 		/* No match. */
       
  1679 		ret = 0;
       
  1680 	    }
       
  1681 	}
       
  1682 	if (ret) {
       
  1683 	    /*
       
  1684 	     * For files, we won't match initial "."s unless
       
  1685 	     * glob_dots is set.
       
  1686 	     */
       
  1687 	    if ((prog->flags & PAT_NOGLD) && *patinstart == '.') {
       
  1688 		ret = 0;
       
  1689 	    } else {
       
  1690 		/*
       
  1691 		 * Remember the length in case used for ${..#..} etc.
       
  1692 		 * In this case, we didn't unmetafy the string.
       
  1693 		 */
       
  1694 		patinlen = (int)prog->patmlen;
       
  1695 		/* if matching files, must update globbing flags */
       
  1696 		patglobflags = prog->globend;
       
  1697 	    }
       
  1698 	}
       
  1699 
       
  1700 	if (tryalloced)
       
  1701 	    zfree(tryalloced, unmetalen + unmetalenp);
       
  1702 
       
  1703 	return ret;
       
  1704     } else {
       
  1705 	/*
       
  1706 	 * Test for a `must match' string, unless we're scanning for a match
       
  1707 	 * in which case we don't need to do this each time.
       
  1708 	 */
       
  1709 	ret = 1;
       
  1710 	if (!(prog->flags & PAT_SCAN) && prog->mustoff)
       
  1711 	{
       
  1712 	    char *testptr;	/* start pointer into test string */
       
  1713 	    char *teststop;	/* last point from which we can match */
       
  1714 	    char *patptr = (char *)prog + prog->mustoff;
       
  1715 	    int patlen = prog->patmlen;
       
  1716 	    int found = 0;
       
  1717 
       
  1718 	    if (patlen > stringlen) {
       
  1719 		/* Too long, can't match. */
       
  1720 		ret = 0;
       
  1721 	    } else {
       
  1722 		teststop = patinend - patlen;
       
  1723 
       
  1724 		for (testptr = patinstart; testptr <= teststop; testptr++)
       
  1725 		{
       
  1726 		    if (!memcmp(testptr, patptr, patlen)) {
       
  1727 			found = 1;
       
  1728 			break;
       
  1729 		    }
       
  1730 		}
       
  1731 
       
  1732 		if (!found)
       
  1733 		    ret = 0;
       
  1734 	    }
       
  1735 	}
       
  1736 	if (!ret) {
       
  1737 	    if (tryalloced)
       
  1738 		zfree(tryalloced, unmetalen + unmetalenp);
       
  1739 	    return 0;
       
  1740 	}
       
  1741 
       
  1742 	patglobflags = prog->globflags;
       
  1743 	if (!(patflags & PAT_FILE)) {
       
  1744 	    forceerrs = -1;
       
  1745 	    errsfound = 0;
       
  1746 	}
       
  1747 	globdots = !(patflags & PAT_NOGLD);
       
  1748 	parsfound = 0;
       
  1749 
       
  1750 	patinput = patinstart;
       
  1751 
       
  1752 	if (patmatch((Upat)progstr)) {
       
  1753 	    /*
       
  1754 	     * we were lazy and didn't save the globflags if an exclusion
       
  1755 	     * failed, so set it now
       
  1756 	     */
       
  1757 	    patglobflags = prog->globend;
       
  1758 
       
  1759 	    /*
       
  1760 	     * Record length of successful match, including Meta
       
  1761 	     * characters.  Do it here so that patmatchlen() can return
       
  1762 	     * it even if we delete the pattern strings.
       
  1763 	     */
       
  1764 	    patinlen = patinput - patinstart;
       
  1765 	    /*
       
  1766 	     * Optimization: if we didn't find any Meta characters
       
  1767 	     * to begin with, we don't need to look for them now.
       
  1768 	     */
       
  1769 	    if (unmetalen != origlen) {
       
  1770 		for (ptr = patinstart; ptr < patinput; ptr++)
       
  1771 		    if (imeta(*ptr))
       
  1772 			patinlen++;
       
  1773 	    }
       
  1774 
       
  1775 	    /*
       
  1776 	     * Should we clear backreferences and matches on a failed
       
  1777 	     * match?
       
  1778 	     */
       
  1779 	    if ((patglobflags & GF_MATCHREF) && !(patflags & PAT_FILE)) {
       
  1780 		/*
       
  1781 		 * m flag: for global match.  This carries no overhead
       
  1782 		 * in the pattern matching part.
       
  1783 		 *
       
  1784 		 * Remember the test pattern is already unmetafied.
       
  1785 		 */
       
  1786 		char *str;
       
  1787 		int mlen = CHARSUB(patinput, patinstart);
       
  1788 
       
  1789 		str = metafy(patinstart, patinput - patinstart, META_DUP);
       
  1790 		setsparam("MATCH", str);
       
  1791 		setiparam("MBEGIN", (zlong)(patoffset + !isset(KSHARRAYS)));
       
  1792 		setiparam("MEND",
       
  1793 			  (zlong)(mlen + patoffset +
       
  1794 				  !isset(KSHARRAYS) - 1));
       
  1795 	    }
       
  1796 	    if (prog->patnpar && nump) {
       
  1797 		/*
       
  1798 		 * b flag: for backreferences using parentheses. Reported
       
  1799 		 * directly.
       
  1800 		 */
       
  1801 		*nump = prog->patnpar;
       
  1802 
       
  1803 		sp = patbeginp;
       
  1804 		ep = patendp;
       
  1805 
       
  1806 		for (i = 0; i < prog->patnpar && i < maxnpos; i++) {
       
  1807 		    if (parsfound & (1 << i)) {
       
  1808 			if (begp)
       
  1809 			    *begp++ = CHARSUB(*sp, patinstart) + patoffset;
       
  1810 			if (endp)
       
  1811 			    *endp++ = CHARSUB(*ep, patinstart) + patoffset
       
  1812 				- 1;
       
  1813 		    } else {
       
  1814 			if (begp)
       
  1815 			    *begp++ = -1;
       
  1816 			if (endp)
       
  1817 			    *endp++ = -1;
       
  1818 		    }
       
  1819 
       
  1820 		    sp++;
       
  1821 		    ep++;
       
  1822 		}
       
  1823 	    } else if (prog->patnpar && !(patflags & PAT_FILE)) {
       
  1824 		/*
       
  1825 		 * b flag: for backreferences using parentheses.
       
  1826 		 */
       
  1827 		int palen = prog->patnpar+1;
       
  1828 		char **matcharr, **mbeginarr, **mendarr;
       
  1829 		char numbuf[DIGBUFSIZE];
       
  1830 
       
  1831 		matcharr = zshcalloc(palen*sizeof(char *));
       
  1832 		mbeginarr = zshcalloc(palen*sizeof(char *));
       
  1833 		mendarr = zshcalloc(palen*sizeof(char *));
       
  1834 
       
  1835 		sp = patbeginp;
       
  1836 		ep = patendp;
       
  1837 
       
  1838 		for (i = 0; i < prog->patnpar; i++) {
       
  1839 		    if (parsfound & (1 << i)) {
       
  1840 			matcharr[i] = metafy(*sp, *ep - *sp, META_DUP);
       
  1841 			/*
       
  1842 			 * mbegin and mend give indexes into the string
       
  1843 			 * in the standard notation, i.e. respecting
       
  1844 			 * KSHARRAYS, and with the end index giving
       
  1845 			 * the last character, not one beyond.
       
  1846 			 * For example, foo=foo; [[ $foo = (f)oo ]] gives
       
  1847 			 * (without KSHARRAYS) indexes 1 and 1, which
       
  1848 			 * corresponds to indexing as ${foo[1,1]}.
       
  1849 			 */
       
  1850 			sprintf(numbuf, "%ld",
       
  1851 				(long)(CHARSUB(*sp, patinstart) +
       
  1852 				       patoffset +
       
  1853 				       !isset(KSHARRAYS)));
       
  1854 			mbeginarr[i] = ztrdup(numbuf);
       
  1855 			sprintf(numbuf, "%ld",
       
  1856 				(long)(CHARSUB(*ep, patinstart) +
       
  1857 				       patoffset +
       
  1858 				       !isset(KSHARRAYS) - 1));
       
  1859 			mendarr[i] = ztrdup(numbuf);
       
  1860 		    } else {
       
  1861 			/* Pattern wasn't set: either it was in an
       
  1862 			 * unmatched branch, or a hashed parenthesis
       
  1863 			 * that didn't match at all.
       
  1864 			 */
       
  1865 			matcharr[i] = ztrdup("");
       
  1866 			mbeginarr[i] = ztrdup("-1");
       
  1867 			mendarr[i] = ztrdup("-1");
       
  1868 		    }
       
  1869 		    sp++;
       
  1870 		    ep++;
       
  1871 		}
       
  1872 		setaparam("match", matcharr);
       
  1873 		setaparam("mbegin", mbeginarr);
       
  1874 		setaparam("mend", mendarr);
       
  1875 	    }
       
  1876 
       
  1877 	    ret = 1;
       
  1878 	} else
       
  1879 	    ret = 0;
       
  1880 
       
  1881 	if (tryalloced)
       
  1882 	    zfree(tryalloced, unmetalen + unmetalenp);
       
  1883 
       
  1884 	return ret;
       
  1885     }
       
  1886 }
       
  1887 
       
  1888 /*
       
  1889  * Return length of previous succesful match.  This is
       
  1890  * in metafied bytes, i.e. includes a count of Meta characters.
       
  1891  * Unusual and futile attempt at modular encapsulation.
       
  1892  */
       
  1893 
       
  1894 /**/
       
  1895 int
       
  1896 patmatchlen(void)
       
  1897 {
       
  1898     return patinlen;
       
  1899 }
       
  1900 
       
  1901 /*
       
  1902  * Match literal characters with case insensitivity test:  the first
       
  1903  * comes from the input string, the second the current pattern.
       
  1904  */
       
  1905 #define CHARMATCH(chin, chpa) (chin == chpa || \
       
  1906         ((patglobflags & GF_IGNCASE) ? \
       
  1907 	 ((isupper(chin) ? tolower(chin) : chin) == \
       
  1908 	  (isupper(chpa) ? tolower(chpa) : chpa)) : \
       
  1909 	 (patglobflags & GF_LCMATCHUC) ? \
       
  1910 	 (islower(chpa) && toupper(chpa) == chin) : 0))
       
  1911 /*
       
  1912  * The same but caching an expression from the first argument,
       
  1913  * Requires local charmatch_cache definition.
       
  1914  */
       
  1915 #define CHARMATCH_EXPR(expr, chpa) \
       
  1916 	(charmatch_cache = (expr), CHARMATCH(charmatch_cache, chpa))
       
  1917 
       
  1918 /*
       
  1919  * exactpos is used to remember how far down an exact string we have
       
  1920  * matched, if we are doing approximation and can therefore redo from
       
  1921  * the same point; we never need to otherwise.
       
  1922  *
       
  1923  * exactend is a pointer to the end of the string, which isn't
       
  1924  * null-terminated.
       
  1925  */
       
  1926 static char *exactpos, *exactend;
       
  1927 
       
  1928 /*
       
  1929  * Main matching routine.
       
  1930  *
       
  1931  * Testing the tail end of a match is usually done by recursion, but
       
  1932  * we try to eliminate that in favour of looping for simple cases.
       
  1933  */
       
  1934 
       
  1935 /**/
       
  1936 static int
       
  1937 patmatch(Upat prog)
       
  1938 {
       
  1939     /* Current and next nodes */
       
  1940     Upat scan = prog, next, opnd;
       
  1941     char *start, *save, *chrop, *chrend, *compend;
       
  1942     int savglobflags, op, no, min, nextch, fail = 0, saverrsfound;
       
  1943     zrange_t from, to, comp;
       
  1944 
       
  1945     while  (scan) {
       
  1946 	next = PATNEXT(scan);
       
  1947 
       
  1948 	if (!globdots && P_NOTDOT(scan) && patinput == patinstart &&
       
  1949 	    patinput < patinend && *patinput == '.')
       
  1950 	    return 0;
       
  1951 
       
  1952 	switch (P_OP(scan)) {
       
  1953 	case P_ANY:
       
  1954 	    if (patinput == patinend)
       
  1955 		fail = 1;
       
  1956 	    else
       
  1957 		CHARINC(patinput);
       
  1958 	    break;
       
  1959 	case P_EXACTLY:
       
  1960 	    /*
       
  1961 	     * acts as nothing if *chrop is null:  this is used by
       
  1962 	     * approx code.
       
  1963 	     */
       
  1964 	    if (exactpos) {
       
  1965 		chrop = exactpos;
       
  1966 		chrend = exactend;
       
  1967 	    } else {
       
  1968 		chrop = P_LS_STR(scan);
       
  1969 		chrend = chrop + P_LS_LEN(scan);
       
  1970 	    }
       
  1971 	    exactpos = NULL;
       
  1972 	    while (chrop < chrend && patinput < patinend) {
       
  1973 		int chin = CHARREF(patinput);
       
  1974 		int chpa = CHARREF(chrop);
       
  1975 		if (!CHARMATCH(chin, chpa)) {
       
  1976 		    fail = 1;
       
  1977 		    break;
       
  1978 		}
       
  1979 		CHARINC(chrop);
       
  1980 		CHARINC(patinput);
       
  1981 	    }
       
  1982 	    if (chrop < chrend) {
       
  1983 		exactpos = chrop;
       
  1984 		exactend = chrend;
       
  1985 		fail = 1;
       
  1986 	    }
       
  1987 	    break;
       
  1988 	case P_ANYOF:
       
  1989 	    if (patinput == patinend ||
       
  1990 		!patmatchrange((char *)P_OPERAND(scan),
       
  1991 			       CHARREF(patinput)))
       
  1992 		fail = 1;
       
  1993 	    else
       
  1994 		CHARINC(patinput);
       
  1995 	    break;
       
  1996 	case P_ANYBUT:
       
  1997 	    if (patinput == patinend ||
       
  1998 		patmatchrange((char *)P_OPERAND(scan),
       
  1999 			      CHARREF(patinput)))
       
  2000 		fail = 1;
       
  2001 	    else
       
  2002 		CHARINC(patinput);
       
  2003 	    break;
       
  2004 	case P_NUMRNG:
       
  2005 	case P_NUMFROM:
       
  2006 	case P_NUMTO:
       
  2007 	    /*
       
  2008 	     * To do this properly, we really have to treat numbers as
       
  2009 	     * closures:  that's so things like <1-1000>33 will
       
  2010 	     * match 633 (they didn't up to 3.1.6).  To avoid making this
       
  2011 	     * too inefficient, we see if there's an exact match next:
       
  2012 	     * if there is, and it's not a digit, we return 1 after
       
  2013 	     * the first attempt.
       
  2014 	     */
       
  2015 	    op = P_OP(scan);
       
  2016 	    start = (char *)P_OPERAND(scan);
       
  2017 	    from = to = 0;
       
  2018 	    if (op != P_NUMTO) {
       
  2019 #ifdef ZSH_64_BIT_TYPE
       
  2020 		/* We can't rely on pointer alignment being good enough. */
       
  2021 		memcpy((char *)&from, start, sizeof(zrange_t));
       
  2022 #else
       
  2023 		from = *((zrange_t *) start);
       
  2024 #endif
       
  2025 		start += sizeof(zrange_t);
       
  2026 	    }
       
  2027 	    if (op != P_NUMFROM) {
       
  2028 #ifdef ZSH_64_BIT_TYPE
       
  2029 		memcpy((char *)&to, start, sizeof(zrange_t));
       
  2030 #else
       
  2031 		to = *((zrange_t *) start);
       
  2032 #endif
       
  2033 	    }
       
  2034 	    start = compend = patinput;
       
  2035 	    comp = 0;
       
  2036 	    while (patinput < patinend && idigit(*patinput)) {
       
  2037 		if (comp)
       
  2038 		    comp *= 10;
       
  2039 		comp += *patinput - '0';
       
  2040 		patinput++;
       
  2041 		compend++;
       
  2042 
       
  2043 		if (comp & ((zrange_t)1 << (sizeof(comp)*8 -
       
  2044 #ifdef ZRANGE_T_IS_SIGNED
       
  2045 					    2
       
  2046 #else
       
  2047 					    1
       
  2048 #endif
       
  2049 				))) {
       
  2050 		    /*
       
  2051 		     * Out of range (allowing for signedness, which
       
  2052 		     * we need if we are using zlongs).
       
  2053 		     * This is as far as we can go.
       
  2054 		     * If we're doing a range "from", skip all the
       
  2055 		     * remaining numbers.  Otherwise, we can't
       
  2056 		     * match beyond the previous point anyway.
       
  2057 		     * Leave the pointer to the last calculated
       
  2058 		     * position (compend) where it was before.
       
  2059 		     */
       
  2060 		    if (op == P_NUMFROM) {
       
  2061 			while (patinput < patinend && idigit(*patinput))
       
  2062 			    patinput++;
       
  2063 		    }
       
  2064 		}
       
  2065 	    }
       
  2066 	    save = patinput;
       
  2067 	    no = 0;
       
  2068 	    while (patinput > start) {
       
  2069 		/* if already too small, no power on earth can save it */
       
  2070 		if (comp < from && patinput <= compend)
       
  2071 		    break;
       
  2072 		if ((op == P_NUMFROM || comp <= to) && patmatch(next)) {
       
  2073 		    return 1;
       
  2074 		}
       
  2075 		if (!no && P_OP(next) == P_EXACTLY &&
       
  2076 		    (!P_LS_LEN(next) ||
       
  2077 		     !idigit(STOUC(*P_LS_STR(next)))) &&
       
  2078 		    !(patglobflags & 0xff))
       
  2079 		    return 0;
       
  2080 		patinput = --save;
       
  2081 		no++;
       
  2082 		/*
       
  2083 		 * With a range start and an unrepresentable test
       
  2084 		 * number, we just back down the test string without
       
  2085 		 * changing the number until we get to a representable
       
  2086 		 * one.
       
  2087 		 */
       
  2088 		if (patinput < compend)
       
  2089 		    comp /= 10;
       
  2090 	    }
       
  2091 	    patinput = start;
       
  2092 	    fail = 1;
       
  2093 	    break;
       
  2094 	case P_NUMANY:
       
  2095 	    /* This is <->: any old set of digits, don't bother comparing */
       
  2096 	    start = patinput;
       
  2097 	    while (patinput < patinend && idigit(CHARREF(patinput)))
       
  2098 		patinput++;
       
  2099 	    save = patinput;
       
  2100 	    no = 0;
       
  2101 	    while (patinput > start) {
       
  2102 		if (patmatch(next))
       
  2103 		    return 1;
       
  2104 		if (!no && P_OP(next) == P_EXACTLY &&
       
  2105 		    (!P_LS_LEN(next) ||
       
  2106 		     !idigit(CHARREF(P_LS_STR(next)))) &&
       
  2107 		    !(patglobflags & 0xff))
       
  2108 		    return 0;
       
  2109 		patinput = --save;
       
  2110 		no++;
       
  2111 	    }
       
  2112 	    patinput = start;
       
  2113 	    fail = 1;
       
  2114 	    break;
       
  2115 	case P_NOTHING:
       
  2116 	    break;
       
  2117 	case P_BACK:
       
  2118 	    break;
       
  2119 	case P_GFLAGS:
       
  2120 	    patglobflags = P_OPERAND(scan)->l;
       
  2121 	    break;
       
  2122 	case P_OPEN:
       
  2123 	case P_OPEN+1:
       
  2124 	case P_OPEN+2:
       
  2125 	case P_OPEN+3:
       
  2126 	case P_OPEN+4:
       
  2127 	case P_OPEN+5:
       
  2128 	case P_OPEN+6:
       
  2129 	case P_OPEN+7:
       
  2130 	case P_OPEN+8:
       
  2131 	case P_OPEN+9:
       
  2132 	    no = P_OP(scan) - P_OPEN;
       
  2133 	    save = patinput;
       
  2134 
       
  2135 	    if (patmatch(next)) {
       
  2136 		/*
       
  2137 		 * Don't set patbeginp if some later invocation of
       
  2138 		 * the same parentheses already has.
       
  2139 		 */
       
  2140 		if (no && !(parsfound & (1 << (no - 1)))) {
       
  2141 		    patbeginp[no-1] = save;
       
  2142 		    parsfound |= 1 << (no - 1);
       
  2143 		}
       
  2144 		return 1;
       
  2145 	    } else
       
  2146 		return 0;
       
  2147 	    break;
       
  2148 	case P_CLOSE:
       
  2149 	case P_CLOSE+1:
       
  2150 	case P_CLOSE+2:
       
  2151 	case P_CLOSE+3:
       
  2152 	case P_CLOSE+4:
       
  2153 	case P_CLOSE+5:
       
  2154 	case P_CLOSE+6:
       
  2155 	case P_CLOSE+7:
       
  2156 	case P_CLOSE+8:
       
  2157 	case P_CLOSE+9:
       
  2158 	    no = P_OP(scan) - P_CLOSE;
       
  2159 	    save = patinput;
       
  2160 
       
  2161 	    if (patmatch(next)) {
       
  2162 		DPUTS(!patendp, "patendp not set for backreferencing");
       
  2163 		if (no && !(parsfound & (1 << (no + 15)))) {
       
  2164 		    patendp[no-1] = save;
       
  2165 		    parsfound |= 1 << (no + 15);
       
  2166 		}
       
  2167 		return 1;
       
  2168 	    } else
       
  2169 		return 0;
       
  2170 	    break;
       
  2171 	case P_EXCSYNC:
       
  2172 	    /* See the P_EXCLUDE code below for where syncptr comes from */
       
  2173 	    {
       
  2174 		unsigned char *syncptr;
       
  2175 		Upat after;
       
  2176 		after = P_OPERAND(scan);
       
  2177 		DPUTS(!P_ISEXCLUDE(after),
       
  2178 		      "BUG: EXCSYNC not followed by EXCLUDE.");
       
  2179 		DPUTS(!P_OPERAND(after)->p,
       
  2180 		      "BUG: EXCSYNC not handled by EXCLUDE");
       
  2181 		syncptr = P_OPERAND(after)->p + (patinput - patinstart);
       
  2182 		/*
       
  2183 		 * If we already matched from here, this time we fail.
       
  2184 		 * See WBRANCH code for story about error count.
       
  2185 		 */
       
  2186 		if (*syncptr && errsfound + 1 >= *syncptr)
       
  2187 		    return 0;
       
  2188 		/*
       
  2189 		 * Else record that we (possibly) matched this time.
       
  2190 		 * No harm if we don't:  then the previous test will just
       
  2191 		 * short cut the attempted match that is bound to fail.
       
  2192 		 * We never try to exclude something that has already
       
  2193 		 * failed anyway.
       
  2194 		 */
       
  2195 		*syncptr = errsfound + 1;
       
  2196 	    }
       
  2197 	    break;
       
  2198 	case P_EXCEND:
       
  2199 	    /*
       
  2200 	     * This is followed by a P_EXCSYNC, but only in the P_EXCLUDE
       
  2201 	     * branch.  Actually, we don't bother following it:  all we
       
  2202 	     * need to know is that we successfully matched so far up
       
  2203 	     * to the end of the asserted pattern; the endpoint
       
  2204 	     * in the target string is nulled out.
       
  2205 	     */
       
  2206 	    if (!(fail = (patinput < patinend)))
       
  2207 		return 1;
       
  2208 	    break;
       
  2209 	case P_BRANCH:
       
  2210 	case P_WBRANCH:
       
  2211 	    /* P_EXCLUDE shouldn't occur without a P_BRANCH */
       
  2212 	    if (!P_ISBRANCH(next)) {
       
  2213 		/* no choice, avoid recursion */
       
  2214 		DPUTS(P_OP(scan) == P_WBRANCH,
       
  2215 		      "BUG: WBRANCH with no alternative.");
       
  2216 		next = P_OPERAND(scan);
       
  2217 	    } else {
       
  2218 		do {
       
  2219 		    save = patinput;
       
  2220 		    savglobflags = patglobflags;
       
  2221 		    saverrsfound = errsfound;
       
  2222 		    if (P_ISEXCLUDE(next)) {
       
  2223 			/*
       
  2224 			 * The strategy is to test the asserted pattern,
       
  2225 			 * recording via P_EXCSYNC how far the part to
       
  2226 			 * be excluded matched.  We then set the
       
  2227 			 * length of the test string to that
       
  2228 			 * point and see if the exclusion as far as
       
  2229 			 * P_EXCEND also matches that string.
       
  2230 			 * We need to keep testing the asserted pattern
       
  2231 			 * by backtracking, since the first attempt
       
  2232 			 * may be excluded while a later attempt may not.
       
  2233 			 * For this we keep a pointer just after
       
  2234 			 * the P_EXCLUDE which is tested by the P_EXCSYNC
       
  2235 			 * to see if we matched there last time, in which
       
  2236 			 * case we fail.  If there is nothing to backtrack
       
  2237 			 * over, that doesn't matter:  we should fail anyway.
       
  2238 			 * The pointer also tells us where the asserted
       
  2239 			 * pattern matched for use by the exclusion.
       
  2240 			 *
       
  2241 			 * It's hard to allocate space for this
       
  2242 			 * beforehand since we may need to do it
       
  2243 			 * recursively.
       
  2244 			 *
       
  2245 			 * P.S. in case you were wondering, this code
       
  2246 			 * is horrible.
       
  2247 			 */
       
  2248 			Upat syncstrp;
       
  2249 			char *origpatinend;
       
  2250 			unsigned char *oldsyncstr;
       
  2251 			char *matchpt = NULL;
       
  2252 			int ret, savglobdots, matchederrs = 0;
       
  2253 			int savparsfound = parsfound;
       
  2254 			DPUTS(P_OP(scan) == P_WBRANCH,
       
  2255 			      "BUG: excluded WBRANCH");
       
  2256 			syncstrp = P_OPERAND(next);
       
  2257 			/*
       
  2258 			 * Unlike WBRANCH, each test at the same exclude
       
  2259 			 * sync point (due to an external loop) is separate,
       
  2260 			 * i.e testing (foo~bar)# is no different from
       
  2261 			 * (foo~bar)(foo~bar)... from the exclusion point
       
  2262 			 * of view, so we use a different sync string.
       
  2263 			 */
       
  2264 			oldsyncstr = syncstrp->p;
       
  2265 			syncstrp->p = (unsigned char *)
       
  2266 			    zshcalloc((patinend - patinstart) + 1);
       
  2267 			origpatinend = patinend;
       
  2268 			while ((ret = patmatch(P_OPERAND(scan)))) {
       
  2269 			    unsigned char *syncpt;
       
  2270 			    char *savpatinstart;
       
  2271 			    int savforce = forceerrs;
       
  2272 			    int savpatflags = patflags, synclen;
       
  2273 			    forceerrs = -1;
       
  2274 			    savglobdots = globdots;
       
  2275 			    matchederrs = errsfound;
       
  2276 			    matchpt = patinput;    /* may not be end */
       
  2277 			    globdots = 1;	   /* OK to match . first */
       
  2278 			    /* Find the point where the scan
       
  2279 			     * matched the part to be excluded: because
       
  2280 			     * of backtracking, the one
       
  2281 			     * most recently matched will be the first.
       
  2282 			     * (Luckily, backtracking is done after all
       
  2283 			     * possibilities for approximation have been
       
  2284 			     * checked.)
       
  2285 			     */
       
  2286 			    for (syncpt = syncstrp->p; !*syncpt; syncpt++)
       
  2287 				;
       
  2288 			    synclen = syncpt - syncstrp->p;
       
  2289 			    if (patinstart + synclen != patinend) {
       
  2290 				/*
       
  2291 				 * Temporarily mark the string as
       
  2292 				 * ending at this point.
       
  2293 				 */
       
  2294 				DPUTS(patinstart + synclen > matchpt,
       
  2295 				      "BUG: EXCSYNC failed");
       
  2296 
       
  2297 				patinend = patinstart + synclen;
       
  2298 				/*
       
  2299 				 * If this isn't really the end of the string,
       
  2300 				 * remember this for the (#e) assertion.
       
  2301 				 */
       
  2302 				patflags |= PAT_NOTEND;
       
  2303 			    }
       
  2304 			    savpatinstart = patinstart;
       
  2305 			    next = PATNEXT(scan);
       
  2306 			    while (next && P_ISEXCLUDE(next)) {
       
  2307 				patinput = save;
       
  2308 				/*
       
  2309 				 * turn off approximations in exclusions:
       
  2310 				 * note we keep remaining patglobflags
       
  2311 				 * set by asserted branch (or previous
       
  2312 				 * excluded branches, for consistency).
       
  2313 				 */
       
  2314 				patglobflags &= ~0xff;
       
  2315 				errsfound = 0;
       
  2316 				opnd = P_OPERAND(next) + 1;
       
  2317 				if (P_OP(next) == P_EXCLUDP && patinpath) {
       
  2318 				    /*
       
  2319 				     * Top level exclusion with a file,
       
  2320 				     * applies to whole path so add the
       
  2321 				     * segments already matched.
       
  2322 				     * We copied these in front of the
       
  2323 				     * test pattern, so patinend doesn't
       
  2324 				     * need moving.
       
  2325 				     */
       
  2326 				    DPUTS(patinput != patinstart,
       
  2327 					  "BUG: not at start excluding path");
       
  2328 				    patinput = patinstart = patinpath;
       
  2329 				}
       
  2330 				if (patmatch(opnd)) {
       
  2331 				    ret = 0;
       
  2332 				    /*
       
  2333 				     * Another subtlety: if we exclude the
       
  2334 				     * match, any parentheses just found
       
  2335 				     * become invalidated.
       
  2336 				     */
       
  2337 				    parsfound = savparsfound;
       
  2338 				}
       
  2339 				if (patinpath) {
       
  2340 				    patinput = savpatinstart +
       
  2341 					(patinput - patinstart);
       
  2342 				    patinstart = savpatinstart;
       
  2343 				}
       
  2344 				if (!ret)
       
  2345 				    break;
       
  2346 				next = PATNEXT(next);
       
  2347 			    }
       
  2348 			    /*
       
  2349 			     * Restore original end position.
       
  2350 			     */
       
  2351 			    patinend = origpatinend;
       
  2352 			    patflags = savpatflags;
       
  2353 			    globdots = savglobdots;
       
  2354 			    forceerrs = savforce;
       
  2355 			    if (ret)
       
  2356 				break;
       
  2357 			    patinput = save;
       
  2358 			    patglobflags = savglobflags;
       
  2359 			    errsfound = saverrsfound;
       
  2360 			}
       
  2361 			zfree((char *)syncstrp->p,
       
  2362 			      (patinend - patinstart) + 1);
       
  2363 			syncstrp->p = oldsyncstr;
       
  2364 			if (ret) {
       
  2365 			    patinput = matchpt;
       
  2366 			    errsfound = matchederrs;
       
  2367 			    return 1;
       
  2368 			}
       
  2369 			while ((scan = PATNEXT(scan)) &&
       
  2370 			       P_ISEXCLUDE(scan))
       
  2371 			    ;
       
  2372 		    } else {
       
  2373 			int ret = 1, pfree = 0;
       
  2374 			Upat ptrp = NULL;
       
  2375 			unsigned char *ptr;
       
  2376 			if (P_OP(scan) == P_WBRANCH) {
       
  2377 			    /*
       
  2378 			     * This is where we make sure that we are not
       
  2379 			     * repeatedly matching zero-length strings in
       
  2380 			     * a closure, which would cause an infinite loop,
       
  2381 			     * and also remove exponential behaviour in
       
  2382 			     * backtracking nested closures.
       
  2383 			     * The P_WBRANCH operator leaves a space for a
       
  2384 			     * uchar *, initialized to NULL, which is
       
  2385 			     * turned into a string the same length as the
       
  2386 			     * target string.  Every time we match from a
       
  2387 			     * particular point in the target string, we
       
  2388 			     * stick a 1 at the corresponding point here.
       
  2389 			     * If we come round to the same branch again, and
       
  2390 			     * there is already a 1, then the test fails.
       
  2391 			     */
       
  2392 			    opnd = P_OPERAND(scan);
       
  2393 			    ptrp = opnd++;
       
  2394 			    if (!ptrp->p) {
       
  2395 				ptrp->p = (unsigned char *)
       
  2396 				    zshcalloc((patinend - patinstart) + 1);
       
  2397 				pfree = 1;
       
  2398 			    }
       
  2399 			    ptr = ptrp->p + (patinput - patinstart);
       
  2400 
       
  2401 			    /*
       
  2402 			     * Without approximation, this is just a
       
  2403 			     * single bit test.  With approximation, we
       
  2404 			     * need to know how many errors there were
       
  2405 			     * last time we made the test.  If errsfound
       
  2406 			     * is now smaller than it was, hence we can
       
  2407 			     * make more approximations in the remaining
       
  2408 			     * code, we continue with the test.
       
  2409 			     * (This is why the max number of errors is
       
  2410 			     * 254, not 255.)
       
  2411 			     */
       
  2412 			    if (*ptr && errsfound + 1 >= *ptr)
       
  2413 				ret = 0;
       
  2414 			    *ptr = errsfound + 1;
       
  2415 			} else
       
  2416 			    opnd = P_OPERAND(scan);
       
  2417 			if (ret)
       
  2418 			    ret = patmatch(opnd);
       
  2419 			if (pfree) {
       
  2420 			    zfree((char *)ptrp->p,
       
  2421 				  (patinend - patinstart) + 1);
       
  2422 			    ptrp->p = NULL;
       
  2423 			}
       
  2424 			if (ret)
       
  2425 			    return 1;
       
  2426 			scan = PATNEXT(scan);
       
  2427 		    }
       
  2428 		    patinput = save;
       
  2429 		    patglobflags = savglobflags;
       
  2430 		    errsfound = saverrsfound;
       
  2431 		    DPUTS(P_OP(scan) == P_WBRANCH,
       
  2432 			  "BUG: WBRANCH not first choice.");
       
  2433 		    next = PATNEXT(scan);
       
  2434 		} while (scan && P_ISBRANCH(scan));
       
  2435 		return 0;
       
  2436 	    }
       
  2437 	    break;
       
  2438 	case P_STAR:
       
  2439 	    /* Handle specially for speed, although really P_ONEHASH+P_ANY */
       
  2440 	case P_ONEHASH:
       
  2441 	case P_TWOHASH:
       
  2442 	    /*
       
  2443 	     * This is just simple cases, matching one character.
       
  2444 	     * With approximations, we still handle * this way, since
       
  2445 	     * no approximation is ever necessary, but other closures
       
  2446 	     * are handled by the more complicated branching method
       
  2447 	     */
       
  2448 	    op = P_OP(scan);
       
  2449 	    /* Note that no counts possibly metafied characters */
       
  2450 	    start = patinput;
       
  2451 	    if (op == P_STAR) {
       
  2452 		for (no = 0; patinput < patinend; CHARINC(patinput))
       
  2453 		    no++;
       
  2454 		/* simple optimization for reasonably common case */
       
  2455 		if (P_OP(next) == P_END)
       
  2456 		    return 1;
       
  2457 	    } else {
       
  2458 		DPUTS(patglobflags & 0xff,
       
  2459 		      "BUG: wrong backtracking with approximation.");
       
  2460 		if (!globdots && P_NOTDOT(P_OPERAND(scan)) &&
       
  2461 		    patinput == patinstart && patinput < patinend &&
       
  2462 		    CHARREF(patinput) == '.')
       
  2463 		    return 0;
       
  2464 		no = patrepeat(P_OPERAND(scan));
       
  2465 	    }
       
  2466 	    min = (op == P_TWOHASH) ? 1 : 0;
       
  2467 	    /*
       
  2468 	     * Lookahead to avoid useless matches. This is not possible
       
  2469 	     * with approximation.
       
  2470 	     */
       
  2471 	    if (P_OP(next) == P_EXACTLY && P_LS_LEN(next) &&
       
  2472 		!(patglobflags & 0xff)) {
       
  2473 		char *nextop = P_LS_STR(next);
       
  2474 		/*
       
  2475 		 * If that P_EXACTLY is last (common in simple patterns,
       
  2476 		 * such as *.c), then it can be only be matched at one
       
  2477 		 * point in the test string, so record that.
       
  2478 		 */
       
  2479 		Upat ptemp= PATNEXT(next);
       
  2480 		if ( P_OP(ptemp)== P_END &&
       
  2481 		    !(patflags & PAT_NOANCH)) {
       
  2482 		    int ptlen = patinend - patinput;
       
  2483 		    int lenmatch = patinend - (min ? CHARNEXT(start) : start);
       
  2484 		    /* Are we in the right range? */
       
  2485 		    if (P_LS_LEN(next) > lenmatch || P_LS_LEN(next) < ptlen)
       
  2486 			return 0;
       
  2487 		    /* Yes, just position appropriately and test. */
       
  2488 		    patinput += ptlen - P_LS_LEN(next);
       
  2489 		    /*
       
  2490 		     * Here we will need to be careful that patinput is not
       
  2491 		     * in the middle of a multibyte character.
       
  2492 		     */
       
  2493 		    /* Continue loop with P_EXACTLY test. */
       
  2494 		    break;
       
  2495 		}
       
  2496 		nextch = CHARREF(nextop);
       
  2497 	    } else
       
  2498 		nextch = -1;
       
  2499 	    save = patinput;
       
  2500 	    savglobflags = patglobflags;
       
  2501 	    saverrsfound = errsfound;
       
  2502 	    while (no >= min) {
       
  2503 		int charmatch_cache;
       
  2504 		if (nextch < 0 ||
       
  2505 		    (patinput < patinend &&
       
  2506 		     CHARMATCH_EXPR(CHARREF(patinput), nextch))) {
       
  2507 		    if (patmatch(next))
       
  2508 			return 1;
       
  2509 		}
       
  2510 		no--;
       
  2511 		save--;
       
  2512 		/*
       
  2513 		 * Here we will need to make sure save is
       
  2514 		 * decremented properly to the start of
       
  2515 		 * the preceeding multibyte character.
       
  2516 		 */
       
  2517 		patinput = save;
       
  2518 		patglobflags = savglobflags;
       
  2519 		errsfound = saverrsfound;
       
  2520 	    }
       
  2521 	    /*
       
  2522 	     * As with branches, the patmatch(next) stuff for *
       
  2523 	     * handles approximation, so we don't need to try
       
  2524 	     * anything here.
       
  2525 	     */
       
  2526 	    return 0;
       
  2527 	case P_ISSTART:
       
  2528 	    if (patinput != patinstart || (patflags & PAT_NOTSTART))
       
  2529 		fail = 1;
       
  2530 	    break;
       
  2531 	case P_ISEND:
       
  2532 	    if (patinput < patinend || (patflags & PAT_NOTEND))
       
  2533 		fail = 1;
       
  2534 	    break;
       
  2535 	case P_END:
       
  2536 	    if (!(fail = (patinput < patinend && !(patflags & PAT_NOANCH))))
       
  2537 		return 1;
       
  2538 	    break;
       
  2539 #ifdef DEBUG
       
  2540 	default:
       
  2541 	    dputs("BUG: bad operand in patmatch.");
       
  2542 	    return 0;
       
  2543 	    break;
       
  2544 #endif
       
  2545 	}
       
  2546 
       
  2547 	if (fail) {
       
  2548 	    if (errsfound < (patglobflags & 0xff) &&
       
  2549 		(forceerrs == -1 || errsfound < forceerrs)) {
       
  2550 		/*
       
  2551 		 * Approximation code.  There are four possibilities
       
  2552 		 *
       
  2553 		 * 1. omit character from input string
       
  2554 		 * 2. transpose characters in input and pattern strings
       
  2555 		 * 3. omit character in both input and pattern strings
       
  2556 		 * 4. omit character from pattern string.
       
  2557 		 *
       
  2558 		 * which we try in that order.
       
  2559 		 *
       
  2560 		 * Of these, 2, 3 and 4 require an exact match string
       
  2561 		 * (P_EXACTLY) while 1, 2 and 3 require that we not
       
  2562 		 * have reached the end of the input string.
       
  2563 		 *
       
  2564 		 * Note in each case after making the approximation we
       
  2565 		 * need to retry the *same* pattern; this is what
       
  2566 		 * requires exactpos, a slightly doleful way of
       
  2567 		 * communicating with the exact character matcher.
       
  2568 		 */
       
  2569 		char *savexact = exactpos;
       
  2570 		save = patinput;
       
  2571 		savglobflags = patglobflags;
       
  2572 		saverrsfound = ++errsfound;
       
  2573 		fail = 0;
       
  2574 
       
  2575 		DPUTS(P_OP(scan) != P_EXACTLY && exactpos,
       
  2576 		      "BUG: non-exact match has set exactpos");
       
  2577 
       
  2578 		/* Try omitting a character from the input string */
       
  2579 		if (patinput < patinend) {
       
  2580 		    CHARINC(patinput);
       
  2581 		    /* If we are not on an exact match, then this is
       
  2582 		     * our last gasp effort, so we can optimize out
       
  2583 		     * the recursive call.
       
  2584 		     */
       
  2585 		    if (P_OP(scan) != P_EXACTLY)
       
  2586 			continue;
       
  2587 		    if (patmatch(scan))
       
  2588 			return 1;
       
  2589 		}
       
  2590 
       
  2591 		if (P_OP(scan) == P_EXACTLY) {
       
  2592 		    char *nextexact = savexact;
       
  2593 		    DPUTS(!savexact,
       
  2594 			  "BUG: exact match has not set exactpos");
       
  2595 		    CHARINC(nextexact);
       
  2596 
       
  2597 		    if (save < patinend) {
       
  2598 			char *nextin = save;
       
  2599 			CHARINC(nextin);
       
  2600 			patglobflags = savglobflags;
       
  2601 			errsfound = saverrsfound;
       
  2602 			exactpos = savexact;
       
  2603 
       
  2604 			/*
       
  2605 			 * Try swapping two characters in patinput and
       
  2606 			 * exactpos
       
  2607 			 */
       
  2608 			if (save < patinend && nextin < patinend &&
       
  2609 			    nextexact < exactend) {
       
  2610 			    int cin0 = CHARREF(save);
       
  2611 			    int cpa0 = CHARREF(exactpos);
       
  2612 			    int cin1 = CHARREF(nextin);
       
  2613 			    int cpa1 = CHARREF(nextexact);
       
  2614 
       
  2615 			    if (CHARMATCH(cin0, cpa1) &&
       
  2616 				CHARMATCH(cin1, cpa0)) {
       
  2617 				patinput = nextin;
       
  2618 				CHARINC(patinput);
       
  2619 				exactpos = nextexact;
       
  2620 				CHARINC(exactpos);
       
  2621 				if (patmatch(scan))
       
  2622 				    return 1;
       
  2623 
       
  2624 				patglobflags = savglobflags;
       
  2625 				errsfound = saverrsfound;
       
  2626 			    }
       
  2627 			}
       
  2628 
       
  2629 			/*
       
  2630 			 * Try moving up both strings.
       
  2631 			 */
       
  2632 			patinput = nextin;
       
  2633 			exactpos = nextexact;
       
  2634 			if (patmatch(scan))
       
  2635 			    return 1;
       
  2636 
       
  2637 			patinput = save;
       
  2638 			patglobflags = savglobflags;
       
  2639 			errsfound = saverrsfound;
       
  2640 			exactpos = savexact;
       
  2641 		    }
       
  2642 
       
  2643 		    DPUTS(exactpos == exactend, "approximating too far");
       
  2644 		    /*
       
  2645 		     * Try moving up the exact match pattern.
       
  2646 		     * This must be the last attempt, so just loop
       
  2647 		     * instead of calling recursively.
       
  2648 		     */
       
  2649 		    CHARINC(exactpos);
       
  2650 		    continue;
       
  2651 		}
       
  2652 	    }
       
  2653 	    exactpos = NULL;
       
  2654 	    return 0;
       
  2655 	}
       
  2656 
       
  2657 	scan = next;
       
  2658     }
       
  2659 
       
  2660     return 0;
       
  2661 }
       
  2662 
       
  2663 /**/
       
  2664 static int
       
  2665 patmatchrange(char *range, int ch)
       
  2666 {
       
  2667     int r1, r2;
       
  2668 
       
  2669     /*
       
  2670      * Careful here: unlike other strings, range is a NULL-terminated,
       
  2671      * metafied string, because we need to treat the Posix and hyphenated
       
  2672      * ranges specially.
       
  2673      */
       
  2674     for (; *range; range++) {
       
  2675 	if (imeta(STOUC(*range))) {
       
  2676 	    switch (STOUC(*range)-STOUC(Meta)) {
       
  2677 	    case 0:
       
  2678 		if (STOUC(*++range ^ 32) == ch)
       
  2679 		    return 1;
       
  2680 		break;
       
  2681 	    case PP_ALPHA:
       
  2682 		if (isalpha(ch))
       
  2683 		    return 1;
       
  2684 		break;
       
  2685 	    case PP_ALNUM:
       
  2686 		if (isalnum(ch))
       
  2687 		    return 1;
       
  2688 		break;
       
  2689 	    case PP_ASCII:
       
  2690 		if ((ch & ~0x7f) == 0)
       
  2691 		    return 1;
       
  2692 		break;
       
  2693 	    case PP_BLANK:
       
  2694 		if (ch == ' ' || ch == '\t')
       
  2695 		    return 1;
       
  2696 		break;
       
  2697 	    case PP_CNTRL:
       
  2698 		if (iscntrl(ch))
       
  2699 		    return 1;
       
  2700 		break;
       
  2701 	    case PP_DIGIT:
       
  2702 		if (isdigit(ch))
       
  2703 		    return 1;
       
  2704 		break;
       
  2705 	    case PP_GRAPH:
       
  2706 		if (isgraph(ch))
       
  2707 		    return 1;
       
  2708 		break;
       
  2709 	    case PP_LOWER:
       
  2710 		if (islower(ch))
       
  2711 		    return 1;
       
  2712 		break;
       
  2713 	    case PP_PRINT:
       
  2714 		if (isprint(ch))
       
  2715 		    return 1;
       
  2716 		break;
       
  2717 	    case PP_PUNCT:
       
  2718 		if (ispunct(ch))
       
  2719 		    return 1;
       
  2720 		break;
       
  2721 	    case PP_SPACE:
       
  2722 		if (isspace(ch))
       
  2723 		    return 1;
       
  2724 		break;
       
  2725 	    case PP_UPPER:
       
  2726 		if (isupper(ch))
       
  2727 		    return 1;
       
  2728 		break;
       
  2729 	    case PP_XDIGIT:
       
  2730 		if (isxdigit(ch))
       
  2731 		    return 1;
       
  2732 		break;
       
  2733 	    case PP_RANGE:
       
  2734 		range++;
       
  2735 		r1 = STOUC(UNMETA(range));
       
  2736 		METAINC(range);
       
  2737 		r2 = STOUC(UNMETA(range));
       
  2738 		if (*range == Meta)
       
  2739 		    range++;
       
  2740 		if (r1 <= ch && ch <= r2)
       
  2741 		    return 1;
       
  2742 		break;
       
  2743 	    case PP_UNKWN:
       
  2744 		DPUTS(1, "BUG: unknown posix range passed through.\n");
       
  2745 		break;
       
  2746 	    default:
       
  2747 		DPUTS(1, "BUG: unknown metacharacter in range.");
       
  2748 		break;
       
  2749 	    }
       
  2750 	} else if (STOUC(*range) == ch)
       
  2751 	    return 1;
       
  2752     }
       
  2753     return 0;
       
  2754 }
       
  2755 
       
  2756 /* repeatedly match something simple and say how many times */
       
  2757 
       
  2758 /**/
       
  2759 static int patrepeat(Upat p)
       
  2760 {
       
  2761     int count = 0, tch, charmatch_cache;
       
  2762     char *scan, *opnd;
       
  2763 
       
  2764     scan = patinput;
       
  2765     opnd = (char *)P_OPERAND(p);
       
  2766 
       
  2767     switch(P_OP(p)) {
       
  2768 #ifdef DEBUG
       
  2769     case P_ANY:
       
  2770 	dputs("BUG: ?# did not get optimized to *");
       
  2771 	return 0;
       
  2772 	break;
       
  2773 #endif
       
  2774     case P_EXACTLY:
       
  2775 	DPUTS(P_LS_LEN(p) != 1, "closure following more than one character");
       
  2776 	tch = CHARREF(P_LS_STR(p));
       
  2777 	while (scan < patinend &&
       
  2778 	       CHARMATCH_EXPR(CHARREF(scan), tch)) {
       
  2779 	    count++;
       
  2780 	    CHARINC(scan);
       
  2781 	}
       
  2782 	break;
       
  2783     case P_ANYOF:
       
  2784 	while (scan < patinend && patmatchrange(opnd, CHARREF(scan))) {
       
  2785 	    count++;
       
  2786 	    CHARINC(scan);
       
  2787     	}
       
  2788 	break;
       
  2789     case P_ANYBUT:
       
  2790 	while (scan < patinend && !patmatchrange(opnd, CHARREF(scan))) {
       
  2791 	    count++;
       
  2792 	    CHARINC(scan);
       
  2793     	}
       
  2794 	break;
       
  2795 #ifdef DEBUG
       
  2796     default:
       
  2797 	dputs("BUG: something very strange is happening in patrepeat");
       
  2798 	return 0;
       
  2799 	break;
       
  2800 #endif
       
  2801     }
       
  2802 
       
  2803     patinput = scan;
       
  2804     return count;
       
  2805 }
       
  2806 
       
  2807 /* Free a patprog. */
       
  2808 
       
  2809 /**/
       
  2810 mod_export void
       
  2811 freepatprog(Patprog prog)
       
  2812 {
       
  2813     if (prog && prog != dummy_patprog1 && prog != dummy_patprog2)
       
  2814 	zfree(prog, prog->size);
       
  2815 }
       
  2816 
       
  2817 /**/
       
  2818 #ifdef ZSH_PAT_DEBUG
       
  2819 
       
  2820 /* Debugging stuff: print and test a regular expression */
       
  2821 
       
  2822 /* Dump a regexp onto stdout in vaguely comprehensible form */
       
  2823 
       
  2824 /**/
       
  2825 static void
       
  2826 patdump(Patprog r)
       
  2827 {
       
  2828     char *s, *base, op = P_EXACTLY;
       
  2829     Upat up, codestart, next;
       
  2830 
       
  2831     base = (char *)r;
       
  2832     s = base + r->startoff;
       
  2833 
       
  2834     if (r->flags & PAT_PURES) {
       
  2835 	printf("STRING:%s\n", (char *)s);
       
  2836     } else {
       
  2837 	codestart = (Upat)s;
       
  2838 	while (op != P_END) {
       
  2839 	    up = (Upat)s;
       
  2840 	    op = P_OP(up);
       
  2841 	    printf("%2d%s", up-codestart, patprop(up));
       
  2842 	    next = PATNEXT(up);
       
  2843 	    printf("(%d)", next ? next-codestart : 0);
       
  2844 	    s += sizeof(union upat);
       
  2845 	    if (op == P_EXACTLY) {
       
  2846 		long llen = *(long *)s;
       
  2847 		s += sizeof(long);
       
  2848 		while (llen--) {
       
  2849 		    putchar(CHARREF(s));
       
  2850 		    CHARINC(s);
       
  2851 		}
       
  2852 	    } else if (op == P_ANYOF || op == P_ANYBUT) {
       
  2853 		while (*s != '\0') {
       
  2854 		    if (itok(*s)) {
       
  2855 			if (*s == Meta + PP_RANGE) {
       
  2856 			    s++;
       
  2857 			    printf("<RANGE:%c-", UNMETA(s));
       
  2858 			    METAINC(s);
       
  2859 			    printf("%c>", UNMETA(s));
       
  2860 			} else {
       
  2861 			    printf("<TYPE:%d>", *s - Meta);
       
  2862 			    s++;
       
  2863 			    continue;
       
  2864 			}
       
  2865 		    } else
       
  2866 			putchar(UNMETA(s));
       
  2867 		    METAINC(s);
       
  2868 		}
       
  2869 	    } else if (op == P_NUMRNG || op == P_NUMFROM || op == P_NUMTO) {
       
  2870 		printf("%lu", (unsigned long)*(zrange_t *)s);
       
  2871 		s += sizeof(zrange_t);
       
  2872 		if (op == P_NUMRNG) {
       
  2873 		    printf("-%lu", (unsigned long)*(zrange_t *)s);
       
  2874 		    s += sizeof(zrange_t);
       
  2875 		}
       
  2876 	    } else if (op == P_GFLAGS) {
       
  2877 		printf("%ld, %ld", (++up)->l & ~0xff, (++up)->l & 0xff);
       
  2878 		s += sizeof(union upat);
       
  2879 	    } else if (op == P_WBRANCH || op == P_EXCLUDE ||
       
  2880 		       op == P_EXCLUDP) {
       
  2881 		s += sizeof(union upat);
       
  2882 	    }
       
  2883 	    putchar('\n');
       
  2884 	    s = base + (((s - base) + sizeof(union upat) - 1) &
       
  2885 			~(sizeof(union upat) - 1));
       
  2886 	}
       
  2887     }
       
  2888 
       
  2889     printf("Total size = %ld\n", r->size);
       
  2890     if (r->patstartch)
       
  2891 	printf("start `%c' ", r->patstartch);
       
  2892     if (!(r->flags & PAT_NOANCH))
       
  2893 	printf("EOL-anchor ");
       
  2894     if (r->patnpar)
       
  2895 	printf("%d active backreferences ", r->patnpar);
       
  2896     if (r->mustoff)
       
  2897 	printf("must have \"%s\"", (char *)r + r->mustoff);
       
  2898     printf("\n");
       
  2899     if (r->globflags) {
       
  2900 	printf("Globbing flags: ");
       
  2901 	if (r->globflags & GF_LCMATCHUC)
       
  2902 	    printf("LC matches UC ");
       
  2903 	if (r->globflags & GF_IGNCASE)
       
  2904 	    printf("Ignore case");
       
  2905 	printf("\n");
       
  2906 	if (r->globflags & 0xff)
       
  2907 	    printf("Max errors = %d\n", r->globflags & 0xff);
       
  2908     }
       
  2909 }
       
  2910 
       
  2911 /**/
       
  2912 static char *
       
  2913 patprop(Upat op)
       
  2914 {
       
  2915     char *p = NULL;
       
  2916     static char buf[50];
       
  2917 
       
  2918     strcpy(buf, ":");
       
  2919 
       
  2920     switch(P_OP(op)) {
       
  2921     case P_ANY:
       
  2922 	p = "ANY";
       
  2923 	break;
       
  2924     case P_ANYOF:
       
  2925 	p = "ANYOF";
       
  2926 	break;
       
  2927     case P_ANYBUT:
       
  2928 	p = "ANYBUT";
       
  2929 	break;
       
  2930     case P_BRANCH:
       
  2931 	p = "BRANCH";
       
  2932 	break;
       
  2933     case P_WBRANCH:
       
  2934 	p = "WBRANCH";
       
  2935 	break;
       
  2936     case P_EXCLUDE:
       
  2937 	p = "EXCLUDE";
       
  2938 	break;
       
  2939     case P_EXCLUDP:
       
  2940 	p = "EXCLUDP";
       
  2941 	break;
       
  2942     case P_EXCSYNC:
       
  2943 	p = "EXCSYNC";
       
  2944 	break;
       
  2945     case P_EXCEND:
       
  2946 	p = "EXCEND";
       
  2947 	break;
       
  2948     case P_EXACTLY:
       
  2949 	p = "EXACTLY";
       
  2950 	break;
       
  2951     case P_GFLAGS:
       
  2952 	p = "GFLAGS";
       
  2953 	break;
       
  2954     case P_ISSTART:
       
  2955 	p = "ISSTART";
       
  2956 	break;
       
  2957     case P_ISEND:
       
  2958 	p = "ISEND";
       
  2959 	break;
       
  2960     case P_NOTHING:
       
  2961 	p = "NOTHING";
       
  2962 	break;
       
  2963     case P_BACK:
       
  2964 	p = "BACK";
       
  2965 	break;
       
  2966     case P_END:
       
  2967 	p = "END";
       
  2968 	break;
       
  2969     case P_OPEN:
       
  2970     case P_OPEN+1:
       
  2971     case P_OPEN+2:
       
  2972     case P_OPEN+3:
       
  2973     case P_OPEN+4:
       
  2974     case P_OPEN+5:
       
  2975     case P_OPEN+6:
       
  2976     case P_OPEN+7:
       
  2977     case P_OPEN+8:
       
  2978     case P_OPEN+9:
       
  2979 	sprintf(buf+strlen(buf), "OPEN%ld", P_OP(op)-P_OPEN);
       
  2980 	p = NULL;
       
  2981 	break;
       
  2982     case P_CLOSE:
       
  2983     case P_CLOSE+1:
       
  2984     case P_CLOSE+2:
       
  2985     case P_CLOSE+3:
       
  2986     case P_CLOSE+4:
       
  2987     case P_CLOSE+5:
       
  2988     case P_CLOSE+6:
       
  2989     case P_CLOSE+7:
       
  2990     case P_CLOSE+8:
       
  2991     case P_CLOSE+9:
       
  2992 	sprintf(buf+strlen(buf), "CLOSE%ld", P_OP(op)-P_CLOSE);
       
  2993 	p = NULL;
       
  2994 	break;
       
  2995     case P_STAR:
       
  2996 	p = "STAR";
       
  2997 	break;
       
  2998     case P_ONEHASH:
       
  2999 	p = "ONEHASH";
       
  3000 	break;
       
  3001     case P_TWOHASH:
       
  3002 	p = "TWOHASH";
       
  3003 	break;
       
  3004     case P_NUMRNG:
       
  3005 	p = "NUMRNG";
       
  3006 	break;
       
  3007     case P_NUMFROM:
       
  3008 	p = "NUMFROM";
       
  3009 	break;
       
  3010     case P_NUMTO:
       
  3011 	p = "NUMTO";
       
  3012 	break;
       
  3013     case P_NUMANY:
       
  3014 	p = "NUMANY";
       
  3015 	break;
       
  3016     default:
       
  3017 	fprintf(stderr, "Bad opcode\n");
       
  3018 	p = NULL;
       
  3019 	break;
       
  3020     }
       
  3021     if (p)
       
  3022 	strcat(buf, p);
       
  3023     return buf;
       
  3024 }
       
  3025 #ifndef __SYMBIAN32__
       
  3026 /**/
       
  3027 int
       
  3028 bin_patdebug(char *name, char **args, char *ops, int func)
       
  3029 {
       
  3030     Patprog prog;
       
  3031     int ret = 0;
       
  3032 
       
  3033     tokenize(*args);
       
  3034 
       
  3035     if (!(prog = patcompile((char *)*args, 0, 0)))
       
  3036 	return 1;
       
  3037     if (ops['p'] || !args[1]) {
       
  3038 	patdump(prog);
       
  3039     }
       
  3040 
       
  3041     while (*++args) {
       
  3042 	if (!pattry(prog, (char *)*args))
       
  3043 	    ret++;
       
  3044     }
       
  3045     return ret;
       
  3046 }
       
  3047 #endif //#ifndef __SYMBIAN32__
       
  3048 /**/
       
  3049 #endif /* ZSH_PAT_DEBUG */