genericopenlibs/openenvcore/libc/src/regex/src/regcomp.c
changeset 0 e4d67989cc36
child 72 403e7f6ed6c5
equal deleted inserted replaced
-1:000000000000 0:e4d67989cc36
       
     1 
       
     2 /*-
       
     3  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
       
     4  * Copyright (c) 1992, 1993, 1994
       
     5  *	The Regents of the University of California.  All rights reserved.
       
     6  *
       
     7  * This code is derived from software contributed to Berkeley by
       
     8  * Henry Spencer.
       
     9  *
       
    10  * Redistribution and use in source and binary forms, with or without
       
    11  * modification, are permitted provided that the following conditions
       
    12  * are met:
       
    13  * 1. Redistributions of source code must retain the above copyright
       
    14  *    notice, this list of conditions and the following disclaimer.
       
    15  * 2. Redistributions in binary form must reproduce the above copyright
       
    16  *    notice, this list of conditions and the following disclaimer in the
       
    17  *    documentation and/or other materials provided with the distribution.
       
    18  * 4. Neither the name of the University nor the names of its contributors
       
    19  *    may be used to endorse or promote products derived from this software
       
    20  *    without specific prior written permission.
       
    21  *
       
    22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
       
    23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
       
    26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       
    27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       
    28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       
    29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       
    30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       
    31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       
    32  * SUCH DAMAGE.
       
    33  * Portions Copyright (c) 2005-2007 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
       
    34  *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
       
    35  */
       
    36 
       
    37 #if defined(LIBC_SCCS) && !defined(lint)
       
    38 static char sccsid[] = "@(#)regcomp.c	8.5 (Berkeley) 3/20/94";
       
    39 #endif /* LIBC_SCCS and not lint */
       
    40 #include <sys/cdefs.h>
       
    41 
       
    42 #ifndef __SYMBIAN32__
       
    43 __FBSDID("$FreeBSD: src/lib/libc/regex/regcomp.c,v 1.34 2004/10/03 15:42:59 stefanf Exp $");
       
    44 #endif
       
    45 
       
    46 #ifdef __SYMBIAN32__
       
    47 #ifdef __cplusplus
       
    48 #include <e32def.h>
       
    49 #endif	
       
    50 #endif
       
    51 
       
    52 #include <sys/types.h>
       
    53 #include <stdio.h>
       
    54 #include <string.h>
       
    55 #include <ctype.h>
       
    56 #include <limits.h>
       
    57 #include <stdlib.h>
       
    58 #include <regex.h>
       
    59 #include <wchar.h>
       
    60 #include <wctype.h>
       
    61 
       
    62 #ifdef __SYMBIAN32__
       
    63 #include <langinfo.h> 
       
    64 #endif
       
    65 
       
    66 #ifndef __SYMBIAN32__
       
    67 #include "libc_collate.h"
       
    68 #endif
       
    69 
       
    70 //Warning:  #61-D: integer operation result is out of range
       
    71 #ifdef __ARMCC_3_1__
       
    72 #pragma diag_suppress 61
       
    73 #endif
       
    74 
       
    75 #include "utils.h"
       
    76 #include "regex2.h"
       
    77 
       
    78 #include "cname.h"
       
    79 
       
    80 #if (defined(__SYMBIAN32__) && (defined(__WINSCW__) || defined(__WINS__)))
       
    81 #include "libc_wsd_defs.h"
       
    82 #endif
       
    83 
       
    84 #ifdef EMULATOR
       
    85 #include "reent.h"
       
    86 #endif //EMULATOR
       
    87 
       
    88 /*
       
    89  * parse structure, passed up and down to avoid global variables and
       
    90  * other clumsinesses
       
    91  */
       
    92 struct parse {
       
    93 	char *next;		/* next character in RE */
       
    94 	char *end;		/* end of string (-> NUL normally) */
       
    95 	int error;		/* has an error been seen? */
       
    96 	sop *strip;		/* malloced strip */
       
    97 	sopno ssize;		/* malloced strip size (allocated) */
       
    98 	sopno slen;		/* malloced strip length (used) */
       
    99 	int ncsalloc;		/* number of csets allocated */
       
   100 	struct re_guts *g;
       
   101 #	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
       
   102 	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
       
   103 	sopno pend[NPAREN];	/* -> ) ([0] unused) */
       
   104 };
       
   105 
       
   106 /* ========= begin header generated by ./mkh ========= */
       
   107 #ifdef __cplusplus
       
   108 extern "C" {
       
   109 #endif
       
   110 #ifdef __SYMBIAN32__
       
   111 extern char LC_CTYPE_LocaleName[30];
       
   112 extern char LC_COLLATE_LocaleName[30];
       
   113 extern char LC_NUMERIC_LocaleName[30];
       
   114 extern char LC_MONETARY_LocaleName[30];
       
   115 extern char LC_TIME_LocaleName[30];
       
   116 extern char LC_ALL_LocaleName[30];
       
   117 
       
   118 #ifdef EMULATOR
       
   119 char * GET_WSD_VAR_NAME(LC_CTYPE_LocaleName, g)();
       
   120 char *GET_WSD_VAR_NAME(LC_COLLATE_LocaleName, g)();
       
   121 char * GET_WSD_VAR_NAME(LC_NUMERIC_LocaleName, g)();
       
   122 char * GET_WSD_VAR_NAME(LC_MONETARY_LocaleName, g)();
       
   123 char * GET_WSD_VAR_NAME(LC_TIME_LocaleName, g)();
       
   124 char * GET_WSD_VAR_NAME(LC_ALL_LocaleName, g)();
       
   125 GET_STATIC_ARRAY_FROM_TLS(cnames, struct cname)
       
   126 
       
   127 #define LC_CTYPE_LocaleName (GET_WSD_VAR_NAME(LC_CTYPE_LocaleName, g)())
       
   128 #define LC_COLLATE_LocaleName (GET_WSD_VAR_NAME(LC_COLLATE_LocaleName, g)())
       
   129 #define LC_NUMERIC_LocaleName (GET_WSD_VAR_NAME(LC_NUMERIC_LocaleName, g)())
       
   130 #define LC_MONETARY_LocaleName (GET_WSD_VAR_NAME(LC_MONETARY_LocaleName, g)())
       
   131 #define LC_TIME_LocaleName (GET_WSD_VAR_NAME(LC_TIME_LocaleName, g)())
       
   132 #define LC_ALL_LocaleName (GET_WSD_VAR_NAME(LC_ALL_LocaleName, g)())
       
   133 #define cnames (GET_WSD_VAR_NAME(cnames, s)())
       
   134 #endif //EMULATOR
       
   135 
       
   136 extern int __collate_range_cmp(int , int );
       
   137 
       
   138 #endif
       
   139 
       
   140 /* === regcomp.c === */
       
   141 static void p_ere(struct parse *p, wint_t stop);
       
   142 static void p_ere_exp(struct parse *p);
       
   143 static void p_str(struct parse *p);
       
   144 static void p_bre(struct parse *p, wint_t end1, wint_t end2);
       
   145 static int p_simp_re(struct parse *p, int starordinary);
       
   146 static int p_count(struct parse *p);
       
   147 static void p_bracket(struct parse *p);
       
   148 static void p_b_term(struct parse *p, cset *cs);
       
   149 static void p_b_cclass(struct parse *p, cset *cs);
       
   150 static void p_b_eclass(struct parse *p, cset *cs);
       
   151 static wint_t p_b_symbol(struct parse *p);
       
   152 static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
       
   153 static wint_t othercase(wint_t ch);
       
   154 static void bothcases(struct parse *p, wint_t ch);
       
   155 static void ordinary(struct parse *p, wint_t ch);
       
   156 static void nonnewline(struct parse *p);
       
   157 static void repeat(struct parse *p, sopno start, int from, int to);
       
   158 static int seterr(struct parse *p, int e);
       
   159 static cset *allocset(struct parse *p);
       
   160 static void freeset(struct parse *p, cset *cs);
       
   161 static void CHadd(struct parse *p, cset *cs, wint_t ch);
       
   162 static void CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max);
       
   163 static void CHaddtype(struct parse *p, cset *cs, wctype_t wct);
       
   164 static wint_t singleton(cset *cs);
       
   165 static sopno dupl(struct parse *p, sopno start, sopno finish);
       
   166 static void doemit(struct parse *p, sop op, size_t opnd);
       
   167 static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
       
   168 static void dofwd(struct parse *p, sopno pos, sop value);
       
   169 static void enlarge(struct parse *p, sopno size);
       
   170 static void stripsnug(struct parse *p, struct re_guts *g);
       
   171 static void findmust(struct parse *p, struct re_guts *g);
       
   172 static int altoffset(sop *scan, int offset);
       
   173 static void computejumps(struct parse *p, struct re_guts *g);
       
   174 static void computematchjumps(struct parse *p, struct re_guts *g);
       
   175 static sopno pluscount(struct parse *p, struct re_guts *g);
       
   176 static wint_t wgetnext(struct parse *p);
       
   177 
       
   178 #ifdef __cplusplus
       
   179 }
       
   180 #endif
       
   181 /* ========= end header generated by ./mkh ========= */
       
   182 
       
   183 #ifndef EMULATOR
       
   184 
       
   185 static char nuls[10];		/* place to point scanner in event of error */
       
   186 #else //EMULATOR
       
   187 
       
   188 GET_STATIC_ARRAY_FROM_TLS(nuls, char)
       
   189 #define nuls (GET_WSD_VAR_NAME(nuls, s)())
       
   190 #endif //EMULATOR
       
   191 
       
   192 /*
       
   193  * macros for use with parse structure
       
   194  * BEWARE:  these know that the parse structure is named `p' !!!
       
   195  */
       
   196 #define	PEEK()	(*p->next)
       
   197 #define	PEEK2()	(*(p->next+1))
       
   198 #define	MORE()	(p->next < p->end)
       
   199 #define	MORE2()	(p->next+1 < p->end)
       
   200 #define	SEE(c)	(MORE() && PEEK() == (c))
       
   201 #define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
       
   202 #define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
       
   203 #define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
       
   204 #define	NEXT()	(p->next++)
       
   205 #define	NEXT2()	(p->next += 2)
       
   206 #define	NEXTn(n)	(p->next += (n))
       
   207 #define	GETNEXT()	(*p->next++)
       
   208 #define	WGETNEXT()	wgetnext(p)
       
   209 #define	SETERROR(e)	seterr(p, (e))
       
   210 #define	REQUIRE(co, e)	((co) || SETERROR(e))
       
   211 #define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
       
   212 #define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
       
   213 #define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
       
   214 #define	EMIT(op, sopnd)	doemit(p, (sop)(op), (size_t)(sopnd))
       
   215 #define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
       
   216 #define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
       
   217 #define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
       
   218 #define	HERE()		(p->slen)
       
   219 #define	THERE()		(p->slen - 1)
       
   220 #define	THERETHERE()	(p->slen - 2)
       
   221 #define	DROP(n)	(p->slen -= (n))
       
   222 
       
   223 #ifndef NDEBUG
       
   224 #ifndef EMULATOR
       
   225 
       
   226 static int never = 0;		/* for use in asserts; shuts lint up */
       
   227 #else //EMULATOR
       
   228 
       
   229 GET_STATIC_VAR_FROM_TLS(never, int)
       
   230 #define never (*GET_WSD_VAR_NAME(never, s)())
       
   231 #endif //EMULATOR
       
   232 #else
       
   233 #define	never	0		/* some <assert.h>s have bugs too */
       
   234 #endif
       
   235 
       
   236 /* Macro used by computejump()/computematchjump() */
       
   237 #define MIN(a,b)	((a)<(b)?(a):(b))
       
   238 
       
   239 /*
       
   240  - regcomp - interface for parser and compilation
       
   241  = extern int regcomp(regex_t *, const char *, int);
       
   242  = #define	REG_BASIC	0000
       
   243  = #define	REG_EXTENDED	0001
       
   244  = #define	REG_ICASE	0002
       
   245  = #define	REG_NOSUB	0004
       
   246  = #define	REG_NEWLINE	0010
       
   247  = #define	REG_NOSPEC	0020
       
   248  = #define	REG_PEND	0040
       
   249  = #define	REG_DUMP	0200
       
   250  */
       
   251 EXPORT_C int				/* 0 success, otherwise REG_something */
       
   252  regcomp(preg, pattern, cflags)
       
   253 regex_t * __restrict preg;
       
   254 const char * __restrict pattern;
       
   255 int cflags;
       
   256 {
       
   257 	struct parse pa;
       
   258 	struct re_guts *g;
       
   259 	struct parse *p = &pa;
       
   260 	int i;
       
   261 	size_t len;
       
   262 #ifdef REDEBUG
       
   263 #	define	GOODFLAGS(f)	(f)
       
   264 #else
       
   265 #	define	GOODFLAGS(f)	((f)&~REG_DUMP)
       
   266 #endif
       
   267 
       
   268 	cflags = GOODFLAGS(cflags);
       
   269 	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
       
   270 		return(REG_INVARG);
       
   271 
       
   272 	if (cflags&REG_PEND) {
       
   273 		if (preg->re_endp < pattern)
       
   274 			return(REG_INVARG);
       
   275 		len = preg->re_endp - pattern;
       
   276 	} else
       
   277 		len = strlen((char *)pattern);
       
   278 
       
   279 	/* do the mallocs early so failure handling is easy */
       
   280 	g = (struct re_guts *)malloc(sizeof(struct re_guts));
       
   281 	if (g == NULL)
       
   282 		return(REG_ESPACE);
       
   283 	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
       
   284 	p->strip = (sop *)malloc(p->ssize * sizeof(sop));
       
   285 	p->slen = 0;
       
   286 	if (p->strip == NULL) {
       
   287 		free((char *)g);
       
   288 		return(REG_ESPACE);
       
   289 	}
       
   290 
       
   291 	/* set things up */
       
   292 	p->g = g;
       
   293 	p->next = (char *)pattern;	/* convenience; we do not modify it */
       
   294 	p->end = p->next + len;
       
   295 	p->error = 0;
       
   296 	p->ncsalloc = 0;
       
   297 	for (i = 0; i < NPAREN; i++) {
       
   298 		p->pbegin[i] = 0;
       
   299 		p->pend[i] = 0;
       
   300 	}
       
   301 	g->sets = NULL;
       
   302 	g->ncsets = 0;
       
   303 	g->cflags = cflags;
       
   304 	g->iflags = 0;
       
   305 	g->nbol = 0;
       
   306 	g->neol = 0;
       
   307 	g->must = NULL;
       
   308 	g->moffset = -1;
       
   309 	g->charjump = NULL;
       
   310 	g->matchjump = NULL;
       
   311 	g->mlen = 0;
       
   312 	g->nsub = 0;
       
   313 	g->backrefs = 0;
       
   314 
       
   315 	/* do it */
       
   316 	EMIT(OEND, 0);
       
   317 	g->firststate = THERE();
       
   318 	if (cflags&REG_EXTENDED)
       
   319 		p_ere(p, OUT);
       
   320 	else if (cflags&REG_NOSPEC)
       
   321 		p_str(p);
       
   322 	else
       
   323 		p_bre(p, OUT, OUT);
       
   324 	EMIT(OEND, 0);
       
   325 	g->laststate = THERE();
       
   326 
       
   327 	/* tidy up loose ends and fill things in */
       
   328 	stripsnug(p, g);
       
   329 	findmust(p, g);
       
   330 	/* only use Boyer-Moore algorithm if the pattern is bigger
       
   331 	 * than three characters
       
   332 	 */
       
   333 	if(g->mlen > 3) {
       
   334 		computejumps(p, g);
       
   335 		computematchjumps(p, g);
       
   336 		if(g->matchjump == NULL && g->charjump != NULL) {
       
   337 			free(g->charjump);
       
   338 			g->charjump = NULL;
       
   339 		}
       
   340 	}
       
   341 	g->nplus = pluscount(p, g);
       
   342 	g->magic = MAGIC2;
       
   343 	preg->re_nsub = g->nsub;
       
   344 	preg->re_g = g;
       
   345 	preg->re_magic = MAGIC1;
       
   346 #ifndef REDEBUG
       
   347 	/* not debugging, so can't rely on the assert() in regexec() */
       
   348 	if (g->iflags&BAD)
       
   349 		SETERROR(REG_ASSERT);
       
   350 #endif
       
   351 
       
   352 	/* win or lose, we're done */
       
   353 	if (p->error != 0)	/* lose */
       
   354 		regfree(preg);
       
   355 	return(p->error);
       
   356 }
       
   357 
       
   358 /*
       
   359  - p_ere - ERE parser top level, concatenation and alternation
       
   360  == static void p_ere(struct parse *p, int stop);
       
   361  */
       
   362 static void
       
   363 p_ere(p, stop)
       
   364 struct parse *p;
       
   365 int stop;			/* character this ERE should end at */
       
   366 {
       
   367 	char c;
       
   368 	sopno prevback;
       
   369 	sopno prevfwd;
       
   370 	sopno conc;
       
   371 	int first = 1;		/* is this the first alternative? */
       
   372 
       
   373 	for (;;) {
       
   374 		/* do a bunch of concatenated expressions */
       
   375 		conc = HERE();
       
   376 		while (MORE() && (c = PEEK()) != '|' && c != stop)
       
   377 			p_ere_exp(p);
       
   378 		(void)REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
       
   379 
       
   380 		if (!EAT('|'))
       
   381 			break;		/* NOTE BREAK OUT */
       
   382 
       
   383 		if (first) {
       
   384 			INSERT(OCH_, conc);	/* offset is wrong */
       
   385 			prevfwd = conc;
       
   386 			prevback = conc;
       
   387 			first = 0;
       
   388 		}
       
   389 		ASTERN(OOR1, prevback);
       
   390 		prevback = THERE();
       
   391 		AHEAD(prevfwd);			/* fix previous offset */
       
   392 		prevfwd = HERE();
       
   393 		EMIT(OOR2, 0);			/* offset is very wrong */
       
   394 	}
       
   395 
       
   396 	if (!first) {		/* tail-end fixups */
       
   397 		AHEAD(prevfwd);
       
   398 		ASTERN(O_CH, prevback);
       
   399 	}
       
   400 
       
   401 	assert(!MORE() || SEE(stop));
       
   402 }
       
   403 
       
   404 /*
       
   405  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
       
   406  == static void p_ere_exp(struct parse *p);
       
   407  */
       
   408 static void
       
   409 p_ere_exp(p)
       
   410 struct parse *p;
       
   411 {
       
   412 	char c;
       
   413 	wint_t wc;
       
   414 	sopno pos;
       
   415 	int count;
       
   416 	int count2;
       
   417 	sopno subno;
       
   418 	int wascaret = 0;
       
   419 
       
   420 	assert(MORE());		/* caller should have ensured this */
       
   421 	c = GETNEXT();
       
   422 
       
   423 	pos = HERE();
       
   424 	switch (c) {
       
   425 	case '(':
       
   426 		(void)REQUIRE(MORE(), REG_EPAREN);
       
   427 		p->g->nsub++;
       
   428 		subno = p->g->nsub;
       
   429 		if (subno < NPAREN)
       
   430 			p->pbegin[subno] = HERE();
       
   431 		EMIT(OLPAREN, subno);
       
   432 		if (!SEE(')'))
       
   433 			p_ere(p, ')');
       
   434 		if (subno < NPAREN) {
       
   435 			p->pend[subno] = HERE();
       
   436 			assert(p->pend[subno] != 0);
       
   437 		}
       
   438 		EMIT(ORPAREN, subno);
       
   439 		(void)MUSTEAT(')', REG_EPAREN);
       
   440 		break;
       
   441 #ifndef POSIX_MISTAKE
       
   442 	case ')':		/* happens only if no current unmatched ( */
       
   443 		/*
       
   444 		 * You may ask, why the ifndef?  Because I didn't notice
       
   445 		 * this until slightly too late for 1003.2, and none of the
       
   446 		 * other 1003.2 regular-expression reviewers noticed it at
       
   447 		 * all.  So an unmatched ) is legal POSIX, at least until
       
   448 		 * we can get it fixed.
       
   449 		 */
       
   450 		SETERROR(REG_EPAREN);
       
   451 		break;
       
   452 #endif
       
   453 	case '^':
       
   454 		EMIT(OBOL, 0);
       
   455 		p->g->iflags |= USEBOL;
       
   456 		p->g->nbol++;
       
   457 		wascaret = 1;
       
   458 		break;
       
   459 	case '$':
       
   460 		EMIT(OEOL, 0);
       
   461 		p->g->iflags |= USEEOL;
       
   462 		p->g->neol++;
       
   463 		break;
       
   464 	case '|':
       
   465 		SETERROR(REG_EMPTY);
       
   466 		break;
       
   467 	case '*':
       
   468 	case '+':
       
   469 	case '?':
       
   470 		SETERROR(REG_BADRPT);
       
   471 		break;
       
   472 	case '.':
       
   473 		if (p->g->cflags&REG_NEWLINE)
       
   474 			nonnewline(p);
       
   475 		else
       
   476 			EMIT(OANY, 0);
       
   477 		break;
       
   478 	case '[':
       
   479 		p_bracket(p);
       
   480 		break;
       
   481 	case '\\':
       
   482 		(void)REQUIRE(MORE(), REG_EESCAPE);
       
   483 		wc = WGETNEXT();
       
   484 		ordinary(p, wc);
       
   485 		break;
       
   486 	case '{':		/* okay as ordinary except if digit follows */
       
   487 		(void)REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
       
   488 		/* FALLTHROUGH */
       
   489 	default:
       
   490 		p->next--;
       
   491 		wc = WGETNEXT();
       
   492 		ordinary(p, wc);
       
   493 		break;
       
   494 	}
       
   495 
       
   496 	if (!MORE())
       
   497 		return;
       
   498 	c = PEEK();
       
   499 	/* we call { a repetition if followed by a digit */
       
   500 	if (!( c == '*' || c == '+' || c == '?' ||
       
   501 				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
       
   502 		return;		/* no repetition, we're done */
       
   503 	NEXT();
       
   504 
       
   505 	(void)REQUIRE(!wascaret, REG_BADRPT);
       
   506 	switch (c) {
       
   507 	case '*':	/* implemented as +? */
       
   508 		/* this case does not require the (y|) trick, noKLUDGE */
       
   509 		INSERT(OPLUS_, pos);
       
   510 		ASTERN(O_PLUS, pos);
       
   511 		INSERT(OQUEST_, pos);
       
   512 		ASTERN(O_QUEST, pos);
       
   513 		break;
       
   514 	case '+':
       
   515 		INSERT(OPLUS_, pos);
       
   516 		ASTERN(O_PLUS, pos);
       
   517 		break;
       
   518 	case '?':
       
   519 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
       
   520 		INSERT(OCH_, pos);		/* offset slightly wrong */
       
   521 		ASTERN(OOR1, pos);		/* this one's right */
       
   522 		AHEAD(pos);			/* fix the OCH_ */
       
   523 		EMIT(OOR2, 0);			/* offset very wrong... */
       
   524 		AHEAD(THERE());			/* ...so fix it */
       
   525 		ASTERN(O_CH, THERETHERE());
       
   526 		break;
       
   527 	case '{':
       
   528 		count = p_count(p);
       
   529 		if (EAT(',')) {
       
   530 			if (isdigit((uch)PEEK())) {
       
   531 				count2 = p_count(p);
       
   532 				(void)REQUIRE(count <= count2, REG_BADBR);
       
   533 			} else		/* single number with comma */
       
   534 				count2 = INFINITY;
       
   535 		} else		/* just a single number */
       
   536 			count2 = count;
       
   537 		repeat(p, pos, count, count2);
       
   538 		if (!EAT('}')) {	/* error heuristics */
       
   539 			while (MORE() && PEEK() != '}')
       
   540 				NEXT();
       
   541 			(void)REQUIRE(MORE(), REG_EBRACE);
       
   542 			SETERROR(REG_BADBR);
       
   543 		}
       
   544 		break;
       
   545 	}
       
   546 
       
   547 	if (!MORE())
       
   548 		return;
       
   549 	c = PEEK();
       
   550 	if (!( c == '*' || c == '+' || c == '?' ||
       
   551 				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
       
   552 		return;
       
   553 	SETERROR(REG_BADRPT);
       
   554 }
       
   555 
       
   556 /*
       
   557  - p_str - string (no metacharacters) "parser"
       
   558  == static void p_str(struct parse *p);
       
   559  */
       
   560 static void
       
   561 p_str(p)
       
   562 struct parse *p;
       
   563 {
       
   564 	(void)REQUIRE(MORE(), REG_EMPTY);
       
   565 	while (MORE())
       
   566 		ordinary(p, WGETNEXT());
       
   567 }
       
   568 
       
   569 /*
       
   570  - p_bre - BRE parser top level, anchoring and concatenation
       
   571  == static void p_bre(struct parse *p, int end1, \
       
   572  ==	int end2);
       
   573  * Giving end1 as OUT essentially eliminates the end1/end2 check.
       
   574  *
       
   575  * This implementation is a bit of a kludge, in that a trailing $ is first
       
   576  * taken as an ordinary character and then revised to be an anchor.
       
   577  * The amount of lookahead needed to avoid this kludge is excessive.
       
   578  */
       
   579 static void
       
   580 p_bre(p, end1, end2)
       
   581 struct parse *p;
       
   582 int end1;			/* first terminating character */
       
   583 int end2;			/* second terminating character */
       
   584 {
       
   585 	sopno start = HERE();
       
   586 	int first = 1;			/* first subexpression? */
       
   587 	int wasdollar = 0;
       
   588 
       
   589 	if (EAT('^')) {
       
   590 		EMIT(OBOL, 0);
       
   591 		p->g->iflags |= USEBOL;
       
   592 		p->g->nbol++;
       
   593 	}
       
   594 	while (MORE() && !SEETWO(end1, end2)) {
       
   595 		wasdollar = p_simp_re(p, first);
       
   596 		first = 0;
       
   597 	}
       
   598 	if (wasdollar) {	/* oops, that was a trailing anchor */
       
   599 		DROP(1);
       
   600 		EMIT(OEOL, 0);
       
   601 		p->g->iflags |= USEEOL;
       
   602 		p->g->neol++;
       
   603 	}
       
   604 
       
   605 	(void)REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
       
   606 }
       
   607 
       
   608 /*
       
   609  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
       
   610  == static int p_simp_re(struct parse *p, int starordinary);
       
   611  */
       
   612 static int			/* was the simple RE an unbackslashed $? */
       
   613 p_simp_re(p, starordinary)
       
   614 struct parse *p;
       
   615 int starordinary;		/* is a leading * an ordinary character? */
       
   616 {
       
   617 	int c;
       
   618 	int count;
       
   619 	int count2;
       
   620 	sopno pos;
       
   621 	int i;
       
   622 	wint_t wc;
       
   623 	sopno subno;
       
   624 #	define	BACKSL	(1<<CHAR_BIT)
       
   625 
       
   626 	pos = HERE();		/* repetion op, if any, covers from here */
       
   627 
       
   628 	assert(MORE());		/* caller should have ensured this */
       
   629 	c = GETNEXT();
       
   630 	if (c == '\\') {
       
   631 		(void)REQUIRE(MORE(), REG_EESCAPE);
       
   632 		c = BACKSL | GETNEXT();
       
   633 	}
       
   634 	switch (c) {
       
   635 	case '.':
       
   636 		if (p->g->cflags&REG_NEWLINE)
       
   637 			nonnewline(p);
       
   638 		else
       
   639 			EMIT(OANY, 0);
       
   640 		break;
       
   641 	case '[':
       
   642 		p_bracket(p);
       
   643 		break;
       
   644 	case BACKSL|'{':
       
   645 		SETERROR(REG_BADRPT);
       
   646 		break;
       
   647 	case BACKSL|'(':
       
   648 		p->g->nsub++;
       
   649 		subno = p->g->nsub;
       
   650 		if (subno < NPAREN)
       
   651 			p->pbegin[subno] = HERE();
       
   652 		EMIT(OLPAREN, subno);
       
   653 		/* the MORE here is an error heuristic */
       
   654 		if (MORE() && !SEETWO('\\', ')'))
       
   655 			p_bre(p, '\\', ')');
       
   656 		if (subno < NPAREN) {
       
   657 			p->pend[subno] = HERE();
       
   658 			assert(p->pend[subno] != 0);
       
   659 		}
       
   660 		EMIT(ORPAREN, subno);
       
   661 		(void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
       
   662 		break;
       
   663 	case BACKSL|')':	/* should not get here -- must be user */
       
   664 	case BACKSL|'}':
       
   665 		SETERROR(REG_EPAREN);
       
   666 		break;
       
   667 	case BACKSL|'1':
       
   668 	case BACKSL|'2':
       
   669 	case BACKSL|'3':
       
   670 	case BACKSL|'4':
       
   671 	case BACKSL|'5':
       
   672 	case BACKSL|'6':
       
   673 	case BACKSL|'7':
       
   674 	case BACKSL|'8':
       
   675 	case BACKSL|'9':
       
   676 		i = (c&~BACKSL) - '0';
       
   677 		assert(i < NPAREN);
       
   678 		if (p->pend[i] != 0) {
       
   679 			assert(i <= p->g->nsub);
       
   680 			EMIT(OBACK_, i);
       
   681 			assert(p->pbegin[i] != 0);
       
   682 			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
       
   683 			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
       
   684 			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
       
   685 			EMIT(O_BACK, i);
       
   686 		} else
       
   687 			SETERROR(REG_ESUBREG);
       
   688 		p->g->backrefs = 1;
       
   689 		break;
       
   690 	case '*':
       
   691 		(void)REQUIRE(starordinary, REG_BADRPT);
       
   692 		/* FALLTHROUGH */
       
   693 	default:
       
   694 		p->next--;
       
   695 		wc = WGETNEXT();
       
   696 		ordinary(p, wc);
       
   697 		break;
       
   698 	}
       
   699 
       
   700 	if (EAT('*')) {		/* implemented as +? */
       
   701 		/* this case does not require the (y|) trick, noKLUDGE */
       
   702 		INSERT(OPLUS_, pos);
       
   703 		ASTERN(O_PLUS, pos);
       
   704 		INSERT(OQUEST_, pos);
       
   705 		ASTERN(O_QUEST, pos);
       
   706 	} else if (EATTWO('\\', '{')) {
       
   707 		count = p_count(p);
       
   708 		if (EAT(',')) {
       
   709 			if (MORE() && isdigit((uch)PEEK())) {
       
   710 				count2 = p_count(p);
       
   711 				(void)REQUIRE(count <= count2, REG_BADBR);
       
   712 			} else		/* single number with comma */
       
   713 				count2 = INFINITY;
       
   714 		} else		/* just a single number */
       
   715 			count2 = count;
       
   716 		repeat(p, pos, count, count2);
       
   717 		if (!EATTWO('\\', '}')) {	/* error heuristics */
       
   718 			while (MORE() && !SEETWO('\\', '}'))
       
   719 				NEXT();
       
   720 			(void)REQUIRE(MORE(), REG_EBRACE);
       
   721 			SETERROR(REG_BADBR);
       
   722 		}
       
   723 	} else if (c == '$')     /* $ (but not \$) ends it */
       
   724 		return(1);
       
   725 
       
   726 	return(0);
       
   727 }
       
   728 
       
   729 /*
       
   730  - p_count - parse a repetition count
       
   731  == static int p_count(struct parse *p);
       
   732  */
       
   733 static int			/* the value */
       
   734 p_count(p)
       
   735 struct parse *p;
       
   736 {
       
   737 	int count = 0;
       
   738 	int ndigits = 0;
       
   739 
       
   740 	while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
       
   741 		count = count*10 + (GETNEXT() - '0');
       
   742 		ndigits++;
       
   743 	}
       
   744 
       
   745 	(void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
       
   746 	return(count);
       
   747 }
       
   748 
       
   749 /*
       
   750  - p_bracket - parse a bracketed character list
       
   751  == static void p_bracket(struct parse *p);
       
   752  */
       
   753 static void
       
   754 p_bracket(p)
       
   755 struct parse *p;
       
   756 {
       
   757 	cset *cs;
       
   758 	wint_t ch;
       
   759 
       
   760 	/* Dept of Truly Sickening Special-Case Kludges */
       
   761 	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
       
   762 		EMIT(OBOW, 0);
       
   763 		NEXTn(6);
       
   764 		return;
       
   765 	}
       
   766 	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
       
   767 		EMIT(OEOW, 0);
       
   768 		NEXTn(6);
       
   769 		return;
       
   770 	}
       
   771 
       
   772 	if ((cs = allocset(p)) == NULL)
       
   773 		return;
       
   774 
       
   775 	if (p->g->cflags&REG_ICASE)
       
   776 		cs->icase = 1;
       
   777 	if (EAT('^'))
       
   778 		cs->invert = 1;
       
   779 	if (EAT(']'))
       
   780 		CHadd(p, cs, ']');
       
   781 	else if (EAT('-'))
       
   782 		CHadd(p, cs, '-');
       
   783 	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
       
   784 		p_b_term(p, cs);
       
   785 	if (EAT('-'))
       
   786 		CHadd(p, cs, '-');
       
   787 	(void)MUSTEAT(']', REG_EBRACK);
       
   788 
       
   789 	if (p->error != 0)	/* don't mess things up further */
       
   790 		return;
       
   791 
       
   792 	if (cs->invert && p->g->cflags&REG_NEWLINE)
       
   793 		cs->bmp['\n' >> 3] |= 1 << ('\n' & 7);
       
   794 
       
   795 	if ((ch = singleton(cs)) != OUT) {	/* optimize singleton sets */
       
   796 		ordinary(p, ch);
       
   797 		freeset(p, cs);
       
   798 	} else
       
   799 		EMIT(OANYOF, (int)(cs - p->g->sets));
       
   800 }
       
   801 
       
   802 /*
       
   803  - p_b_term - parse one term of a bracketed character list
       
   804  == static void p_b_term(struct parse *p, cset *cs);
       
   805  */
       
   806 static void
       
   807 p_b_term(p, cs)
       
   808 struct parse *p;
       
   809 cset *cs;
       
   810 {
       
   811 	char c;
       
   812 	wint_t start, finish;
       
   813 	wint_t i;
       
   814 
       
   815 	/* classify what we've got */
       
   816 	switch ((MORE()) ? PEEK() : '\0') {
       
   817 	case '[':
       
   818 		c = (MORE2()) ? PEEK2() : '\0';
       
   819 		break;
       
   820 	case '-':
       
   821 		SETERROR(REG_ERANGE);
       
   822 		return;			/* NOTE RETURN */
       
   823 		break;
       
   824 	default:
       
   825 		c = '\0';
       
   826 		break;
       
   827 	}
       
   828 
       
   829 	switch (c) {
       
   830 	case ':':		/* character class */
       
   831 		NEXT2();
       
   832 		(void)REQUIRE(MORE(), REG_EBRACK);
       
   833 		c = PEEK();
       
   834 		(void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
       
   835 		p_b_cclass(p, cs);
       
   836 		(void)REQUIRE(MORE(), REG_EBRACK);
       
   837 		(void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
       
   838 		break;
       
   839 	case '=':		/* equivalence class */
       
   840 		NEXT2();
       
   841 		(void)REQUIRE(MORE(), REG_EBRACK);
       
   842 		c = PEEK();
       
   843 		(void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
       
   844 		p_b_eclass(p, cs);
       
   845 		(void)REQUIRE(MORE(), REG_EBRACK);
       
   846 		(void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
       
   847 		break;
       
   848 	default:		/* symbol, ordinary character, or range */
       
   849 		start = p_b_symbol(p);
       
   850 		if (SEE('-') && MORE2() && PEEK2() != ']') {
       
   851 			/* range */
       
   852 			NEXT();
       
   853 			if (EAT('-'))
       
   854 				finish = '-';
       
   855 			else
       
   856 				finish = p_b_symbol(p);
       
   857 		} else
       
   858 			finish = start;
       
   859 		if (start == finish)
       
   860 			CHadd(p, cs, start);
       
   861 		else {
       
   862 			#ifndef __SYMBIAN32__			
       
   863 			if (__collate_load_error )
       
   864 			#else				
       
   865 			if(((strcmp("C",(const char*) LC_COLLATE_LocaleName)==0) ||(strcmp("POSIX", (const char*) LC_COLLATE_LocaleName)==0))) 
       
   866 			#endif
       
   867 			   {
       
   868 			   	(void)REQUIRE((uch)start <= (uch)finish, REG_ERANGE);
       
   869 				CHaddrange(p, cs, start, finish);
       
   870 			} else {
       
   871 				#ifndef __SYMBIAN32__
       
   872 				
       
   873 								(void)REQUIRE(__collate_range_cmp(start, finish) <= 0, REG_ERANGE);
       
   874 				for (i = 0; i <= UCHAR_MAX; i++) {
       
   875 					if (   __collate_range_cmp(start, i) <= 0
       
   876 					    && __collate_range_cmp(i, finish) <= 0
       
   877 					   )
       
   878 						CHadd(p, cs, i);
       
   879 						
       
   880 				#else
       
   881 								
       
   882 				int i_tmp=0; 
       
   883 				(void)REQUIRE(__collate_range_cmp(start, finish) <= 0, REG_ERANGE);
       
   884 				for (i = start; i <= finish ; i++) {
       
   885 					i_tmp=i;
       
   886 					if (   __collate_range_cmp(start, i_tmp) <= 0
       
   887 					    && __collate_range_cmp(i_tmp, finish) <= 0
       
   888 					   )
       
   889 						 {
       
   890 						 	CHadd(p, cs, i_tmp);
       
   891 						 	if(__collate_range_cmp(i_tmp, finish) == 0)
       
   892 						 		break;
       
   893 						 }
       
   894 				#endif
       
   895 				}
       
   896 			}
       
   897 		}
       
   898 		break;
       
   899 	}
       
   900 }
       
   901 
       
   902 /*
       
   903  - p_b_cclass - parse a character-class name and deal with it
       
   904  == static void p_b_cclass(struct parse *p, cset *cs);
       
   905  */
       
   906 static void
       
   907 p_b_cclass(p, cs)
       
   908 struct parse *p;
       
   909 cset *cs;
       
   910 {
       
   911 	char *sp = p->next;
       
   912 	size_t len;
       
   913 	wctype_t wct;
       
   914 	char clname[16];
       
   915 
       
   916 	while (MORE() && isalpha((uch)PEEK()))
       
   917 		NEXT();
       
   918 	len = p->next - sp;
       
   919 	if (len >= sizeof(clname) - 1) {
       
   920 		SETERROR(REG_ECTYPE);
       
   921 		return;
       
   922 	}
       
   923 	memcpy(clname, sp, len);
       
   924 	clname[len] = '\0';
       
   925 	if ((wct = wctype(clname)) == 0) {
       
   926 		SETERROR(REG_ECTYPE);
       
   927 		return;
       
   928 	}
       
   929 	CHaddtype(p, cs, wct);
       
   930 }
       
   931 
       
   932 /*
       
   933  - p_b_eclass - parse an equivalence-class name and deal with it
       
   934  == static void p_b_eclass(struct parse *p, cset *cs);
       
   935  *
       
   936  * This implementation is incomplete. xxx
       
   937  */
       
   938 static void
       
   939 p_b_eclass(p, cs)
       
   940 struct parse *p;
       
   941 cset *cs;
       
   942 {
       
   943 	wint_t c;
       
   944 
       
   945 	c = p_b_coll_elem(p, '=');
       
   946 	CHadd(p, cs, c);
       
   947 }
       
   948 
       
   949 /*
       
   950  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
       
   951  == static char p_b_symbol(struct parse *p);
       
   952  */
       
   953 static wint_t			/* value of symbol */
       
   954 p_b_symbol(p)
       
   955 struct parse *p;
       
   956 {
       
   957 	wint_t value;
       
   958 
       
   959 	(void)REQUIRE(MORE(), REG_EBRACK);
       
   960 	if (!EATTWO('[', '.'))
       
   961 		return(WGETNEXT());
       
   962 
       
   963 	/* collating symbol */
       
   964 	value = p_b_coll_elem(p, '.');
       
   965 	(void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
       
   966 	return(value);
       
   967 }
       
   968 
       
   969 /*
       
   970  - p_b_coll_elem - parse a collating-element name and look it up
       
   971  == static char p_b_coll_elem(struct parse *p, int endc);
       
   972  */
       
   973 static wint_t			/* value of collating element */
       
   974 p_b_coll_elem(p, endc)
       
   975 struct parse *p;
       
   976 wint_t endc;			/* name ended by endc,']' */
       
   977 {
       
   978 	char *sp = p->next;
       
   979 	struct cname *cp;
       
   980 	int len;
       
   981 	mbstate_t mbs;
       
   982 	wchar_t wc;
       
   983 	size_t clen;
       
   984 
       
   985 	while (MORE() && !SEETWO(endc, ']'))
       
   986 		NEXT();
       
   987 	if (!MORE()) {
       
   988 		SETERROR(REG_EBRACK);
       
   989 		return(0);
       
   990 	}
       
   991 	len = p->next - sp;
       
   992 	for (cp = cnames; cp->name != NULL; cp++)
       
   993 		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
       
   994 			return(cp->code);	/* known name */
       
   995 	memset(&mbs, 0, sizeof(mbs));
       
   996 	if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
       
   997 		return (wc);			/* single character */
       
   998 	else if (clen == (size_t)-1 || clen == (size_t)-2)
       
   999 		SETERROR(REG_ILLSEQ);
       
  1000 	else
       
  1001 		SETERROR(REG_ECOLLATE);		/* neither */
       
  1002 	return(0);
       
  1003 }
       
  1004 
       
  1005 /*
       
  1006  - othercase - return the case counterpart of an alphabetic
       
  1007  == static char othercase(int ch);
       
  1008  */
       
  1009 static wint_t			/* if no counterpart, return ch */
       
  1010 othercase(ch)
       
  1011 wint_t ch;
       
  1012 {
       
  1013 	assert(iswalpha(ch));
       
  1014 	if (iswupper(ch))
       
  1015 		return(towlower(ch));
       
  1016 	else if (iswlower(ch))
       
  1017 		return(towupper(ch));
       
  1018 	else			/* peculiar, but could happen */
       
  1019 		return(ch);
       
  1020 }
       
  1021 
       
  1022 /*
       
  1023  - bothcases - emit a dualcase version of a two-case character
       
  1024  == static void bothcases(struct parse *p, int ch);
       
  1025  *
       
  1026  * Boy, is this implementation ever a kludge...
       
  1027  */
       
  1028 static void
       
  1029 bothcases(p, ch)
       
  1030 struct parse *p;
       
  1031 wint_t ch;
       
  1032 {
       
  1033 	char *oldnext = p->next;
       
  1034 	char *oldend = p->end;
       
  1035 	char bracket[3 + MB_LEN_MAX];
       
  1036 	size_t n;
       
  1037 	mbstate_t mbs;
       
  1038 
       
  1039 	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
       
  1040 	p->next = bracket;
       
  1041 	memset(&mbs, 0, sizeof(mbs));
       
  1042 	n = wcrtomb(bracket, ch, &mbs);
       
  1043 	assert(n != (size_t)-1);
       
  1044 	bracket[n] = ']';
       
  1045 	bracket[n + 1] = '\0';
       
  1046 	p->end = bracket+n+1;
       
  1047 	p_bracket(p);
       
  1048 	assert(p->next == p->end);
       
  1049 	p->next = oldnext;
       
  1050 	p->end = oldend;
       
  1051 }
       
  1052 
       
  1053 /*
       
  1054  - ordinary - emit an ordinary character
       
  1055  == static void ordinary(struct parse *p, int ch);
       
  1056  */
       
  1057 static void
       
  1058 ordinary(p, ch)
       
  1059 struct parse *p;
       
  1060 wint_t ch;
       
  1061 {
       
  1062 	cset *cs;
       
  1063 
       
  1064 	if ((p->g->cflags&REG_ICASE) && iswalpha(ch) && othercase(ch) != ch)
       
  1065 		bothcases(p, ch);
       
  1066 	else if ((ch & OPDMASK) == ch)
       
  1067 		EMIT(OCHAR, ch);
       
  1068 	else {
       
  1069 		/*
       
  1070 		 * Kludge: character is too big to fit into an OCHAR operand.
       
  1071 		 * Emit a singleton set.
       
  1072 		 */
       
  1073 		if ((cs = allocset(p)) == NULL)
       
  1074 			return;
       
  1075 		CHadd(p, cs, ch);
       
  1076 		EMIT(OANYOF, (int)(cs - p->g->sets));
       
  1077 	}
       
  1078 }
       
  1079 
       
  1080 /*
       
  1081  - nonnewline - emit REG_NEWLINE version of OANY
       
  1082  == static void nonnewline(struct parse *p);
       
  1083  *
       
  1084  * Boy, is this implementation ever a kludge...
       
  1085  */
       
  1086 static void
       
  1087 nonnewline(p)
       
  1088 struct parse *p;
       
  1089 {
       
  1090 	char *oldnext = p->next;
       
  1091 	char *oldend = p->end;
       
  1092 	char bracket[4];
       
  1093 
       
  1094 	p->next = bracket;
       
  1095 	p->end = bracket+3;
       
  1096 	bracket[0] = '^';
       
  1097 	bracket[1] = '\n';
       
  1098 	bracket[2] = ']';
       
  1099 	bracket[3] = '\0';
       
  1100 	p_bracket(p);
       
  1101 	assert(p->next == bracket+3);
       
  1102 	p->next = oldnext;
       
  1103 	p->end = oldend;
       
  1104 }
       
  1105 
       
  1106 /*
       
  1107  - repeat - generate code for a bounded repetition, recursively if needed
       
  1108  == static void repeat(struct parse *p, sopno start, int from, int to);
       
  1109  */
       
  1110 static void
       
  1111 repeat(p, start, from, to)
       
  1112 struct parse *p;
       
  1113 sopno start;			/* operand from here to end of strip */
       
  1114 int from;			/* repeated from this number */
       
  1115 int to;				/* to this number of times (maybe INFINITY) */
       
  1116 {
       
  1117 	sopno finish = HERE();
       
  1118 #	define	N	2
       
  1119 #	define	INF	3
       
  1120 #	define	REP(f, t)	((f)*8 + (t))
       
  1121 #	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
       
  1122 	sopno copy;
       
  1123 
       
  1124 	if (p->error != 0)	/* head off possible runaway recursion */
       
  1125 		return;
       
  1126 
       
  1127 	assert(from <= to);
       
  1128 
       
  1129 	switch (REP(MAP(from), MAP(to))) {
       
  1130 	case REP(0, 0):			/* must be user doing this */
       
  1131 		DROP(finish-start);	/* drop the operand */
       
  1132 		break;
       
  1133 	case REP(0, 1):			/* as x{1,1}? */
       
  1134 	case REP(0, N):			/* as x{1,n}? */
       
  1135 	case REP(0, INF):		/* as x{1,}? */
       
  1136 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
       
  1137 		INSERT(OCH_, start);		/* offset is wrong... */
       
  1138 		repeat(p, start+1, 1, to);
       
  1139 		ASTERN(OOR1, start);
       
  1140 		AHEAD(start);			/* ... fix it */
       
  1141 		EMIT(OOR2, 0);
       
  1142 		AHEAD(THERE());
       
  1143 		ASTERN(O_CH, THERETHERE());
       
  1144 		break;
       
  1145 	case REP(1, 1):			/* trivial case */
       
  1146 		/* done */
       
  1147 		break;
       
  1148 	case REP(1, N):			/* as x?x{1,n-1} */
       
  1149 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
       
  1150 		INSERT(OCH_, start);
       
  1151 		ASTERN(OOR1, start);
       
  1152 		AHEAD(start);
       
  1153 		EMIT(OOR2, 0);			/* offset very wrong... */
       
  1154 		AHEAD(THERE());			/* ...so fix it */
       
  1155 		ASTERN(O_CH, THERETHERE());
       
  1156 		copy = dupl(p, start+1, finish+1);
       
  1157 		assert(copy == finish+4);
       
  1158 		repeat(p, copy, 1, to-1);
       
  1159 		break;
       
  1160 	case REP(1, INF):		/* as x+ */
       
  1161 		INSERT(OPLUS_, start);
       
  1162 		ASTERN(O_PLUS, start);
       
  1163 		break;
       
  1164 	case REP(N, N):			/* as xx{m-1,n-1} */
       
  1165 		copy = dupl(p, start, finish);
       
  1166 		repeat(p, copy, from-1, to-1);
       
  1167 		break;
       
  1168 	case REP(N, INF):		/* as xx{n-1,INF} */
       
  1169 		copy = dupl(p, start, finish);
       
  1170 		repeat(p, copy, from-1, to);
       
  1171 		break;
       
  1172 	default:			/* "can't happen" */
       
  1173 		SETERROR(REG_ASSERT);	/* just in case */
       
  1174 		break;
       
  1175 	}
       
  1176 }
       
  1177 
       
  1178 /*
       
  1179  - wgetnext - helper function for WGETNEXT() macro. Gets the next wide
       
  1180  - character from the parse struct, signals a REG_ILLSEQ error if the
       
  1181  - character can't be converted. Returns the number of bytes consumed.
       
  1182  */
       
  1183 static wint_t
       
  1184 wgetnext(p)
       
  1185 struct parse *p;
       
  1186 {
       
  1187 	mbstate_t mbs;
       
  1188 	wchar_t wc;
       
  1189 	size_t n;
       
  1190 
       
  1191 	memset(&mbs, 0, sizeof(mbs));
       
  1192 	n = mbrtowc(&wc, p->next, p->end - p->next, &mbs);
       
  1193 	if (n == (size_t)-1 || n == (size_t)-2) {
       
  1194 		SETERROR(REG_ILLSEQ);
       
  1195 		return (0);
       
  1196 	}
       
  1197 	if (n == 0)
       
  1198 		n = 1;
       
  1199 	p->next += n;
       
  1200 	return (wc);
       
  1201 }
       
  1202 
       
  1203 /*
       
  1204  - seterr - set an error condition
       
  1205  == static int seterr(struct parse *p, int e);
       
  1206  */
       
  1207 static int			/* useless but makes type checking happy */
       
  1208 seterr(p, e)
       
  1209 struct parse *p;
       
  1210 int e;
       
  1211 {
       
  1212 	if (p->error == 0)	/* keep earliest error condition */
       
  1213 		p->error = e;
       
  1214 	p->next = nuls;		/* try to bring things to a halt */
       
  1215 	p->end = nuls;
       
  1216 	return(0);		/* make the return value well-defined */
       
  1217 }
       
  1218 
       
  1219 /*
       
  1220  - allocset - allocate a set of characters for []
       
  1221  == static cset *allocset(struct parse *p);
       
  1222  */
       
  1223 static cset *
       
  1224 allocset(p)
       
  1225 struct parse *p;
       
  1226 {
       
  1227 	cset *cs, *ncs;
       
  1228 
       
  1229 	ncs = realloc(p->g->sets, (p->g->ncsets + 1) * sizeof(*ncs));
       
  1230 	if (ncs == NULL) {
       
  1231 		SETERROR(REG_ESPACE);
       
  1232 		return (NULL);
       
  1233 	}
       
  1234 	p->g->sets = ncs;
       
  1235 	cs = &p->g->sets[p->g->ncsets++];
       
  1236 	memset(cs, 0, sizeof(*cs));
       
  1237 
       
  1238 	return(cs);
       
  1239 }
       
  1240 
       
  1241 /*
       
  1242  - freeset - free a now-unused set
       
  1243  == static void freeset(struct parse *p, cset *cs);
       
  1244  */
       
  1245 static void
       
  1246 freeset(p, cs)
       
  1247 struct parse *p;
       
  1248 cset *cs;
       
  1249 {
       
  1250 	cset *top = &p->g->sets[p->g->ncsets];
       
  1251 
       
  1252 	free(cs->wides);
       
  1253 	free(cs->ranges);
       
  1254 	free(cs->types);
       
  1255 	memset(cs, 0, sizeof(*cs));
       
  1256 	if (cs == top-1)	/* recover only the easy case */
       
  1257 		p->g->ncsets--;
       
  1258 }
       
  1259 
       
  1260 /*
       
  1261  - singleton - Determine whether a set contains only one character,
       
  1262  - returning it if so, otherwise returning OUT.
       
  1263  */
       
  1264 static wint_t
       
  1265 singleton(cs)
       
  1266 cset *cs;
       
  1267 {
       
  1268 	wint_t i, s, n;
       
  1269 
       
  1270 	for (i = n = 0; i < NC; i++)
       
  1271 		if (CHIN(cs, i)) {
       
  1272 			n++;
       
  1273 			s = i;
       
  1274 		}
       
  1275 	if (n == 1)
       
  1276 		return (s);
       
  1277 	if (cs->nwides == 1 && cs->nranges == 0 && cs->ntypes == 0 &&
       
  1278 	    cs->icase == 0)
       
  1279 		return (cs->wides[0]);
       
  1280 	/* Don't bother handling the other cases. */
       
  1281 	return (OUT);
       
  1282 }
       
  1283 
       
  1284 /*
       
  1285  - CHadd - add character to character set.
       
  1286  */
       
  1287 static void
       
  1288 CHadd(p, cs, ch)
       
  1289 struct parse *p;
       
  1290 cset *cs;
       
  1291 wint_t ch;
       
  1292 {
       
  1293 	wint_t nch, *newwides;
       
  1294 	assert(ch >= 0);
       
  1295 	if (ch < NC)
       
  1296 		cs->bmp[ch >> 3] |= 1 << (ch & 7);
       
  1297 	else {
       
  1298 		newwides = realloc(cs->wides, (cs->nwides + 1) *
       
  1299 		    sizeof(*cs->wides));
       
  1300 		if (newwides == NULL) {
       
  1301 			SETERROR(REG_ESPACE);
       
  1302 			return;
       
  1303 		}
       
  1304 		cs->wides = newwides;
       
  1305 		cs->wides[cs->nwides++] = ch;
       
  1306 	}
       
  1307 	if (cs->icase) {
       
  1308 		if ((nch = towlower(ch)) < NC)
       
  1309 			cs->bmp[nch >> 3] |= 1 << (nch & 7);
       
  1310 		if ((nch = towupper(ch)) < NC)
       
  1311 			cs->bmp[nch >> 3] |= 1 << (nch & 7);
       
  1312 	}
       
  1313 }
       
  1314 
       
  1315 /*
       
  1316  - CHaddrange - add all characters in the range [min,max] to a character set.
       
  1317  */
       
  1318 static void
       
  1319 CHaddrange(p, cs, min, max)
       
  1320 struct parse *p;
       
  1321 cset *cs;
       
  1322 wint_t min, max;
       
  1323 {
       
  1324 	crange *newranges;
       
  1325 
       
  1326 	for (; min < NC && min <= max; min++)
       
  1327 		CHadd(p, cs, min);
       
  1328 	if (min >= max)
       
  1329 		return;
       
  1330 	newranges = realloc(cs->ranges, (cs->nranges + 1) *
       
  1331 	    sizeof(*cs->ranges));
       
  1332 	if (newranges == NULL) {
       
  1333 		SETERROR(REG_ESPACE);
       
  1334 		return;
       
  1335 	}
       
  1336 	cs->ranges = newranges;
       
  1337 	cs->ranges[cs->nranges].min = min;
       
  1338 	cs->ranges[cs->nranges].min = max;
       
  1339 	cs->nranges++;
       
  1340 }
       
  1341 
       
  1342 /*
       
  1343  - CHaddtype - add all characters of a certain type to a character set.
       
  1344  */
       
  1345 static void
       
  1346 CHaddtype(p, cs, wct)
       
  1347 struct parse *p;
       
  1348 cset *cs;
       
  1349 wctype_t wct;
       
  1350 {
       
  1351 	wint_t i;
       
  1352 	wctype_t *newtypes;
       
  1353 
       
  1354 	for (i = 0; i < NC; i++)
       
  1355 		if (iswctype(i, wct))
       
  1356 			CHadd(p, cs, i);
       
  1357 	newtypes = realloc(cs->types, (cs->ntypes + 1) *
       
  1358 	    sizeof(*cs->types));
       
  1359 	if (newtypes == NULL) {
       
  1360 		SETERROR(REG_ESPACE);
       
  1361 		return;
       
  1362 	}
       
  1363 	cs->types = newtypes;
       
  1364 	cs->types[cs->ntypes++] = wct;
       
  1365 }
       
  1366 
       
  1367 /*
       
  1368  - dupl - emit a duplicate of a bunch of sops
       
  1369  == static sopno dupl(struct parse *p, sopno start, sopno finish);
       
  1370  */
       
  1371 static sopno			/* start of duplicate */
       
  1372 dupl(p, start, finish)
       
  1373 struct parse *p;
       
  1374 sopno start;			/* from here */
       
  1375 sopno finish;			/* to this less one */
       
  1376 {
       
  1377 	sopno ret = HERE();
       
  1378 	sopno len = finish - start;
       
  1379 
       
  1380 	assert(finish >= start);
       
  1381 	if (len == 0)
       
  1382 		return(ret);
       
  1383 	enlarge(p, p->ssize + len);	/* this many unexpected additions */
       
  1384 	assert(p->ssize >= p->slen + len);
       
  1385 	(void) memcpy((char *)(p->strip + p->slen),
       
  1386 		(char *)(p->strip + start), (size_t)len*sizeof(sop));
       
  1387 	p->slen += len;
       
  1388 	return(ret);
       
  1389 }
       
  1390 
       
  1391 /*
       
  1392  - doemit - emit a strip operator
       
  1393  == static void doemit(struct parse *p, sop op, size_t opnd);
       
  1394  *
       
  1395  * It might seem better to implement this as a macro with a function as
       
  1396  * hard-case backup, but it's just too big and messy unless there are
       
  1397  * some changes to the data structures.  Maybe later.
       
  1398  */
       
  1399 static void
       
  1400 doemit(p, op, opnd)
       
  1401 struct parse *p;
       
  1402 sop op;
       
  1403 size_t opnd;
       
  1404 {
       
  1405 	/* avoid making error situations worse */
       
  1406 	if (p->error != 0)
       
  1407 		return;
       
  1408 
       
  1409 	/* deal with oversize operands ("can't happen", more or less) */
       
  1410 	assert(opnd < 1<<OPSHIFT);
       
  1411 
       
  1412 	/* deal with undersized strip */
       
  1413 	if (p->slen >= p->ssize)
       
  1414 		enlarge(p, (p->ssize+1) / 2 * 3);	/* +50% */
       
  1415 	assert(p->slen < p->ssize);
       
  1416 
       
  1417 	/* finally, it's all reduced to the easy case */
       
  1418 	#ifdef __SYMBIAN32__
       
  1419 	if(p->error != REG_ESPACE)
       
  1420 		{
       
  1421 		p->strip[p->slen++] = SOP(op, opnd);
       
  1422 		}
       
  1423 	#else
       
  1424 	p->strip[p->slen++] = SOP(op, opnd);
       
  1425 	#endif
       
  1426 }
       
  1427 
       
  1428 /*
       
  1429  - doinsert - insert a sop into the strip
       
  1430  == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
       
  1431  */
       
  1432 static void
       
  1433 doinsert(p, op, opnd, pos)
       
  1434 struct parse *p;
       
  1435 sop op;
       
  1436 size_t opnd;
       
  1437 sopno pos;
       
  1438 {
       
  1439 	sopno sn;
       
  1440 	sop s;
       
  1441 	int i;
       
  1442 
       
  1443 	/* avoid making error situations worse */
       
  1444 	if (p->error != 0)
       
  1445 		return;
       
  1446 
       
  1447 	sn = HERE();
       
  1448 	EMIT(op, opnd);		/* do checks, ensure space */
       
  1449 	assert(HERE() == sn+1);
       
  1450 	s = p->strip[sn];
       
  1451 
       
  1452 	/* adjust paren pointers */
       
  1453 	assert(pos > 0);
       
  1454 	for (i = 1; i < NPAREN; i++) {
       
  1455 		if (p->pbegin[i] >= pos) {
       
  1456 			p->pbegin[i]++;
       
  1457 		}
       
  1458 		if (p->pend[i] >= pos) {
       
  1459 			p->pend[i]++;
       
  1460 		}
       
  1461 	}
       
  1462 
       
  1463 	memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
       
  1464 						(HERE()-pos-1)*sizeof(sop));
       
  1465 	p->strip[pos] = s;
       
  1466 }
       
  1467 
       
  1468 /*
       
  1469  - dofwd - complete a forward reference
       
  1470  == static void dofwd(struct parse *p, sopno pos, sop value);
       
  1471  */
       
  1472 static void
       
  1473 dofwd(p, pos, value)
       
  1474 struct parse *p;
       
  1475 sopno pos;
       
  1476 sop value;
       
  1477 {
       
  1478 	/* avoid making error situations worse */
       
  1479 	if (p->error != 0)
       
  1480 		return;
       
  1481 
       
  1482 	assert(value < 1<<OPSHIFT);
       
  1483 	p->strip[pos] = OP(p->strip[pos]) | value;
       
  1484 }
       
  1485 
       
  1486 /*
       
  1487  - enlarge - enlarge the strip
       
  1488  == static void enlarge(struct parse *p, sopno size);
       
  1489  */
       
  1490 static void
       
  1491 enlarge(p, size)
       
  1492 struct parse *p;
       
  1493 sopno size;
       
  1494 {
       
  1495 	sop *sp;
       
  1496 
       
  1497 	if (p->ssize >= size)
       
  1498 		return;
       
  1499 
       
  1500 	sp = (sop *)realloc(p->strip, size*sizeof(sop));
       
  1501 	if (sp == NULL) {
       
  1502 		SETERROR(REG_ESPACE);
       
  1503 		return;
       
  1504 	}
       
  1505 	p->strip = sp;
       
  1506 	p->ssize = size;
       
  1507 }
       
  1508 
       
  1509 /*
       
  1510  - stripsnug - compact the strip
       
  1511  == static void stripsnug(struct parse *p, struct re_guts *g);
       
  1512  */
       
  1513 static void
       
  1514 stripsnug(p, g)
       
  1515 struct parse *p;
       
  1516 struct re_guts *g;
       
  1517 {
       
  1518 	g->nstates = p->slen;
       
  1519 
       
  1520 	#ifdef __SYMBIAN32__
       
  1521 	if(p->error==REG_ESPACE)
       
  1522 		{
       
  1523 		g->strip=p->strip;
       
  1524 		return;
       
  1525 		}
       
  1526 	#endif
       
  1527 		
       
  1528 	g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
       
  1529 	if (g->strip == NULL) {
       
  1530 		SETERROR(REG_ESPACE);
       
  1531 		g->strip = p->strip;
       
  1532 	}
       
  1533 }
       
  1534 
       
  1535 /*
       
  1536  - findmust - fill in must and mlen with longest mandatory literal string
       
  1537  == static void findmust(struct parse *p, struct re_guts *g);
       
  1538  *
       
  1539  * This algorithm could do fancy things like analyzing the operands of |
       
  1540  * for common subsequences.  Someday.  This code is simple and finds most
       
  1541  * of the interesting cases.
       
  1542  *
       
  1543  * Note that must and mlen got initialized during setup.
       
  1544  */
       
  1545 static void
       
  1546 findmust(p, g)
       
  1547 struct parse *p;
       
  1548 struct re_guts *g;
       
  1549 {
       
  1550 	sop *scan;
       
  1551 	sop *start;
       
  1552 	sop *newstart;
       
  1553 	sopno newlen;
       
  1554 	sop s;
       
  1555 	char *cp;
       
  1556 	int offset;
       
  1557 	char buf[MB_LEN_MAX];
       
  1558 	size_t clen;
       
  1559 	mbstate_t mbs;
       
  1560 
       
  1561 	/* avoid making error situations worse */
       
  1562 	if (p->error != 0)
       
  1563 		return;
       
  1564 
       
  1565 	/*
       
  1566 	 * It's not generally safe to do a ``char'' substring search on
       
  1567 	 * multibyte character strings, but it's safe for at least
       
  1568 	 * UTF-8 (see RFC 3629).
       
  1569 	 */
       
  1570 	if (MB_CUR_MAX > 1 &&
       
  1571 			#ifdef __SYMBIAN32__
       
  1572 	    strcmp(nl_langinfo(CODESET), "UTF-8") != 0) 
       
  1573 	    #else
       
  1574 	    strcmp(_CurrentRuneLocale->__encoding, "UTF-8")!= 0)
       
  1575 	    #endif
       
  1576 		return;
       
  1577 
       
  1578 	/* find the longest OCHAR sequence in strip */
       
  1579 	newlen = 0;
       
  1580 	offset = 0;
       
  1581 	g->moffset = 0;
       
  1582 	scan = g->strip + 1;
       
  1583 	do {
       
  1584 		s = *scan++;
       
  1585 		switch (OP(s)) {
       
  1586 		case OCHAR:		/* sequence member */
       
  1587 			if (newlen == 0) {		/* new sequence */
       
  1588 				memset(&mbs, 0, sizeof(mbs));
       
  1589 				newstart = scan - 1;
       
  1590 			}
       
  1591 			clen = wcrtomb(buf, OPND(s), &mbs);
       
  1592 			if (clen == (size_t)-1)
       
  1593 				goto toohard;
       
  1594 			newlen += clen;
       
  1595 			break;
       
  1596 		case OPLUS_:		/* things that don't break one */
       
  1597 		case OLPAREN:
       
  1598 		case ORPAREN:
       
  1599 			break;
       
  1600 		case OQUEST_:		/* things that must be skipped */
       
  1601 		case OCH_:
       
  1602 			offset = altoffset(scan, offset);
       
  1603 			scan--;
       
  1604 			do {
       
  1605 				scan += OPND(s);
       
  1606 				s = *scan;
       
  1607 				/* assert() interferes w debug printouts */
       
  1608 				if (OP(s) != O_QUEST && OP(s) != O_CH &&
       
  1609 							OP(s) != OOR2) {
       
  1610 					g->iflags |= BAD;
       
  1611 					return;
       
  1612 				}
       
  1613 			} while (OP(s) != O_QUEST && OP(s) != O_CH);
       
  1614 			/* FALLTHROUGH */
       
  1615 		case OBOW:		/* things that break a sequence */
       
  1616 		case OEOW:
       
  1617 		case OBOL:
       
  1618 		case OEOL:
       
  1619 		case O_QUEST:
       
  1620 		case O_CH:
       
  1621 		case OEND:
       
  1622 			if (newlen > g->mlen) {		/* ends one */
       
  1623 				start = newstart;
       
  1624 				g->mlen = newlen;
       
  1625 				if (offset > -1) {
       
  1626 					g->moffset += offset;
       
  1627 					offset = newlen;
       
  1628 				} else
       
  1629 					g->moffset = offset;
       
  1630 			} else {
       
  1631 				if (offset > -1)
       
  1632 					offset += newlen;
       
  1633 			}
       
  1634 			newlen = 0;
       
  1635 			break;
       
  1636 		case OANY:
       
  1637 			if (newlen > g->mlen) {		/* ends one */
       
  1638 				start = newstart;
       
  1639 				g->mlen = newlen;
       
  1640 				if (offset > -1) {
       
  1641 					g->moffset += offset;
       
  1642 					offset = newlen;
       
  1643 				} else
       
  1644 					g->moffset = offset;
       
  1645 			} else {
       
  1646 				if (offset > -1)
       
  1647 					offset += newlen;
       
  1648 			}
       
  1649 			if (offset > -1)
       
  1650 				offset++;
       
  1651 			newlen = 0;
       
  1652 			break;
       
  1653 		case OANYOF:		/* may or may not invalidate offset */
       
  1654 			/* First, everything as OANY */
       
  1655 			if (newlen > g->mlen) {		/* ends one */
       
  1656 				start = newstart;
       
  1657 				g->mlen = newlen;
       
  1658 				if (offset > -1) {
       
  1659 					g->moffset += offset;
       
  1660 					offset = newlen;
       
  1661 				} else
       
  1662 					g->moffset = offset;
       
  1663 			} else {
       
  1664 				if (offset > -1)
       
  1665 					offset += newlen;
       
  1666 			}
       
  1667 			if (offset > -1)
       
  1668 				offset++;
       
  1669 			newlen = 0;
       
  1670 			break;
       
  1671 		toohard:
       
  1672 		default:
       
  1673 			/* Anything here makes it impossible or too hard
       
  1674 			 * to calculate the offset -- so we give up;
       
  1675 			 * save the last known good offset, in case the
       
  1676 			 * must sequence doesn't occur later.
       
  1677 			 */
       
  1678 			if (newlen > g->mlen) {		/* ends one */
       
  1679 				start = newstart;
       
  1680 				g->mlen = newlen;
       
  1681 				if (offset > -1)
       
  1682 					g->moffset += offset;
       
  1683 				else
       
  1684 					g->moffset = offset;
       
  1685 			}
       
  1686 			offset = -1;
       
  1687 			newlen = 0;
       
  1688 			break;
       
  1689 		}
       
  1690 	} while (OP(s) != OEND);
       
  1691 
       
  1692 	if (g->mlen == 0) {		/* there isn't one */
       
  1693 		g->moffset = -1;
       
  1694 		return;
       
  1695 	}
       
  1696 
       
  1697 	/* turn it into a character string */
       
  1698 	g->must = malloc((size_t)g->mlen + 1);
       
  1699 	if (g->must == NULL) {		/* argh; just forget it */
       
  1700 		g->mlen = 0;
       
  1701 		g->moffset = -1;
       
  1702 		return;
       
  1703 	}
       
  1704 	cp = g->must;
       
  1705 	scan = start;
       
  1706 	memset(&mbs, 0, sizeof(mbs));
       
  1707 	while (cp < g->must + g->mlen) {
       
  1708 		while (OP(s = *scan++) != OCHAR)
       
  1709 			continue;
       
  1710 		clen = wcrtomb(cp, OPND(s), &mbs);
       
  1711 		assert(clen != (size_t)-1);
       
  1712 		cp += clen;
       
  1713 	}
       
  1714 	assert(cp == g->must + g->mlen);
       
  1715 	*cp++ = '\0';		/* just on general principles */
       
  1716 }
       
  1717 
       
  1718 /*
       
  1719  - altoffset - choose biggest offset among multiple choices
       
  1720  == static int altoffset(sop *scan, int offset);
       
  1721  *
       
  1722  * Compute, recursively if necessary, the largest offset among multiple
       
  1723  * re paths.
       
  1724  */
       
  1725 static int
       
  1726 altoffset(scan, offset)
       
  1727 sop *scan;
       
  1728 int offset;
       
  1729 {
       
  1730 	int largest;
       
  1731 	int try;
       
  1732 	sop s;
       
  1733 
       
  1734 	/* If we gave up already on offsets, return */
       
  1735 	if (offset == -1)
       
  1736 		return -1;
       
  1737 
       
  1738 	largest = 0;
       
  1739 	try = 0;
       
  1740 	s = *scan++;
       
  1741 	while (OP(s) != O_QUEST && OP(s) != O_CH) {
       
  1742 		switch (OP(s)) {
       
  1743 		case OOR1:
       
  1744 			if (try > largest)
       
  1745 				largest = try;
       
  1746 			try = 0;
       
  1747 			break;
       
  1748 		case OQUEST_:
       
  1749 		case OCH_:
       
  1750 			try = altoffset(scan, try);
       
  1751 			if (try == -1)
       
  1752 				return -1;
       
  1753 			scan--;
       
  1754 			do {
       
  1755 				scan += OPND(s);
       
  1756 				s = *scan;
       
  1757 				if (OP(s) != O_QUEST && OP(s) != O_CH &&
       
  1758 							OP(s) != OOR2)
       
  1759 					return -1;
       
  1760 			} while (OP(s) != O_QUEST && OP(s) != O_CH);
       
  1761 			/* We must skip to the next position, or we'll
       
  1762 			 * leave altoffset() too early.
       
  1763 			 */
       
  1764 			scan++;
       
  1765 			break;
       
  1766 		case OANYOF:
       
  1767 		case OCHAR:
       
  1768 		case OANY:
       
  1769 			try++;
       
  1770 		case OBOW:
       
  1771 		case OEOW:
       
  1772 		case OLPAREN:
       
  1773 		case ORPAREN:
       
  1774 		case OOR2:
       
  1775 			break;
       
  1776 		default:
       
  1777 			try = -1;
       
  1778 			break;
       
  1779 		}
       
  1780 		if (try == -1)
       
  1781 			return -1;
       
  1782 		s = *scan++;
       
  1783 	}
       
  1784 
       
  1785 	if (try > largest)
       
  1786 		largest = try;
       
  1787 
       
  1788 	return largest+offset;
       
  1789 }
       
  1790 
       
  1791 /*
       
  1792  - computejumps - compute char jumps for BM scan
       
  1793  == static void computejumps(struct parse *p, struct re_guts *g);
       
  1794  *
       
  1795  * This algorithm assumes g->must exists and is has size greater than
       
  1796  * zero. It's based on the algorithm found on Computer Algorithms by
       
  1797  * Sara Baase.
       
  1798  *
       
  1799  * A char jump is the number of characters one needs to jump based on
       
  1800  * the value of the character from the text that was mismatched.
       
  1801  */
       
  1802 static void
       
  1803 computejumps(p, g)
       
  1804 struct parse *p;
       
  1805 struct re_guts *g;
       
  1806 {
       
  1807 	int ch;
       
  1808 	int mindex;
       
  1809 
       
  1810 	/* Avoid making errors worse */
       
  1811 	if (p->error != 0)
       
  1812 		return;
       
  1813 
       
  1814 	g->charjump = (int*) malloc((NC + 1) * sizeof(int));
       
  1815 	if (g->charjump == NULL)	/* Not a fatal error */
       
  1816 		return;
       
  1817 	/* Adjust for signed chars, if necessary */
       
  1818 	g->charjump = &g->charjump[-(CHAR_MIN)];
       
  1819 
       
  1820 	/* If the character does not exist in the pattern, the jump
       
  1821 	 * is equal to the number of characters in the pattern.
       
  1822 	 */
       
  1823 	for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++)
       
  1824 		g->charjump[ch] = g->mlen;
       
  1825 
       
  1826 	/* If the character does exist, compute the jump that would
       
  1827 	 * take us to the last character in the pattern equal to it
       
  1828 	 * (notice that we match right to left, so that last character
       
  1829 	 * is the first one that would be matched).
       
  1830 	 */
       
  1831 	for (mindex = 0; mindex < g->mlen; mindex++)
       
  1832 		g->charjump[(int)g->must[mindex]] = g->mlen - mindex - 1;
       
  1833 }
       
  1834 
       
  1835 /*
       
  1836  - computematchjumps - compute match jumps for BM scan
       
  1837  == static void computematchjumps(struct parse *p, struct re_guts *g);
       
  1838  *
       
  1839  * This algorithm assumes g->must exists and is has size greater than
       
  1840  * zero. It's based on the algorithm found on Computer Algorithms by
       
  1841  * Sara Baase.
       
  1842  *
       
  1843  * A match jump is the number of characters one needs to advance based
       
  1844  * on the already-matched suffix.
       
  1845  * Notice that all values here are minus (g->mlen-1), because of the way
       
  1846  * the search algorithm works.
       
  1847  */
       
  1848 static void
       
  1849 computematchjumps(p, g)
       
  1850 struct parse *p;
       
  1851 struct re_guts *g;
       
  1852 {
       
  1853 	int mindex;		/* General "must" iterator */
       
  1854 	int suffix;		/* Keeps track of matching suffix */
       
  1855 	int ssuffix;		/* Keeps track of suffixes' suffix */
       
  1856 	int* pmatches;		/* pmatches[k] points to the next i
       
  1857 				 * such that i+1...mlen is a substring
       
  1858 				 * of k+1...k+mlen-i-1
       
  1859 				 */
       
  1860 
       
  1861 	/* Avoid making errors worse */
       
  1862 	if (p->error != 0)
       
  1863 		return;
       
  1864 
       
  1865 	pmatches = (int*) malloc(g->mlen * sizeof(unsigned int));
       
  1866 	if (pmatches == NULL) {
       
  1867 		g->matchjump = NULL;
       
  1868 		return;
       
  1869 	}
       
  1870 
       
  1871 	g->matchjump = (int*) malloc(g->mlen * sizeof(unsigned int));
       
  1872 	if (g->matchjump == NULL)	/* Not a fatal error */
       
  1873 		return;
       
  1874 
       
  1875 	/* Set maximum possible jump for each character in the pattern */
       
  1876 	for (mindex = 0; mindex < g->mlen; mindex++)
       
  1877 		g->matchjump[mindex] = 2*g->mlen - mindex - 1;
       
  1878 
       
  1879 	/* Compute pmatches[] */
       
  1880 	for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0;
       
  1881 	    mindex--, suffix--) {
       
  1882 		pmatches[mindex] = suffix;
       
  1883 
       
  1884 		/* If a mismatch is found, interrupting the substring,
       
  1885 		 * compute the matchjump for that position. If no
       
  1886 		 * mismatch is found, then a text substring mismatched
       
  1887 		 * against the suffix will also mismatch against the
       
  1888 		 * substring.
       
  1889 		 */
       
  1890 		while (suffix < g->mlen
       
  1891 		    && g->must[mindex] != g->must[suffix]) {
       
  1892 			g->matchjump[suffix] = MIN(g->matchjump[suffix],
       
  1893 			    g->mlen - mindex - 1);
       
  1894 			suffix = pmatches[suffix];
       
  1895 		}
       
  1896 	}
       
  1897 
       
  1898 	/* Compute the matchjump up to the last substring found to jump
       
  1899 	 * to the beginning of the largest must pattern prefix matching
       
  1900 	 * it's own suffix.
       
  1901 	 */
       
  1902 	for (mindex = 0; mindex <= suffix; mindex++)
       
  1903 		g->matchjump[mindex] = MIN(g->matchjump[mindex],
       
  1904 		    g->mlen + suffix - mindex);
       
  1905 
       
  1906         ssuffix = pmatches[suffix];
       
  1907         while (suffix < g->mlen) {
       
  1908                 while (suffix <= ssuffix && suffix < g->mlen) {
       
  1909                         g->matchjump[suffix] = MIN(g->matchjump[suffix],
       
  1910 			    g->mlen + ssuffix - suffix);
       
  1911                         suffix++;
       
  1912                 }
       
  1913 		if (suffix < g->mlen)
       
  1914                 	ssuffix = pmatches[ssuffix];
       
  1915         }
       
  1916 
       
  1917 	free(pmatches);
       
  1918 }
       
  1919 
       
  1920 /*
       
  1921  - pluscount - count + nesting
       
  1922  == static sopno pluscount(struct parse *p, struct re_guts *g);
       
  1923  */
       
  1924 static sopno			/* nesting depth */
       
  1925 pluscount(p, g)
       
  1926 struct parse *p;
       
  1927 struct re_guts *g;
       
  1928 {
       
  1929 	sop *scan;
       
  1930 	sop s;
       
  1931 	sopno plusnest = 0;
       
  1932 	sopno maxnest = 0;
       
  1933 
       
  1934 	if (p->error != 0)
       
  1935 		return(0);	/* there may not be an OEND */
       
  1936 
       
  1937 	scan = g->strip + 1;
       
  1938 	do {
       
  1939 		s = *scan++;
       
  1940 		switch (OP(s)) {
       
  1941 		case OPLUS_:
       
  1942 			plusnest++;
       
  1943 			break;
       
  1944 		case O_PLUS:
       
  1945 			if (plusnest > maxnest)
       
  1946 				maxnest = plusnest;
       
  1947 			plusnest--;
       
  1948 			break;
       
  1949 		}
       
  1950 	} while (OP(s) != OEND);
       
  1951 	if (plusnest != 0)
       
  1952 		g->iflags |= BAD;
       
  1953 	return(maxnest);
       
  1954 }