|
1 // subst.c - various substitutions |
|
2 // |
|
3 // © Portions Copyright (c) Symbian Software Ltd 2007. All rights reserved. |
|
4 // |
|
5 /* |
|
6 * This file is part of zsh, the Z shell. |
|
7 * |
|
8 * Copyright (c) 1992-1997 Paul Falstad |
|
9 * All rights reserved. |
|
10 * |
|
11 * Permission is hereby granted, without written agreement and without |
|
12 * license or royalty fees, to use, copy, modify, and distribute this |
|
13 * software and to distribute modified versions of this software for any |
|
14 * purpose, provided that the above copyright notice and the following |
|
15 * two paragraphs appear in all copies of this software. |
|
16 * |
|
17 * In no event shall Paul Falstad or the Zsh Development Group be liable |
|
18 * to any party for direct, indirect, special, incidental, or consequential |
|
19 * damages arising out of the use of this software and its documentation, |
|
20 * even if Paul Falstad and the Zsh Development Group have been advised of |
|
21 * the possibility of such damage. |
|
22 * |
|
23 * Paul Falstad and the Zsh Development Group specifically disclaim any |
|
24 * warranties, including, but not limited to, the implied warranties of |
|
25 * merchantability and fitness for a particular purpose. The software |
|
26 * provided hereunder is on an "as is" basis, and Paul Falstad and the |
|
27 * Zsh Development Group have no obligation to provide maintenance, |
|
28 * support, updates, enhancements, or modifications. |
|
29 * |
|
30 */ |
|
31 |
|
32 #include "zsh.mdh" |
|
33 #include "subst.pro" |
|
34 |
|
35 #ifdef __SYMBIAN32__ |
|
36 #ifdef __WINSCW__ |
|
37 #pragma warn_possunwant off |
|
38 #endif//__WINSCW__ |
|
39 #endif//__SYMBIAN32__ |
|
40 |
|
41 /**/ |
|
42 char nulstring[] = {Nularg, '\0'}; |
|
43 |
|
44 /* Do substitutions before fork. These are: |
|
45 * - Process substitution: <(...), >(...), =(...) |
|
46 * - Parameter substitution |
|
47 * - Command substitution |
|
48 * Followed by |
|
49 * - Quote removal |
|
50 * - Brace expansion |
|
51 * - Tilde and equals substitution |
|
52 * |
|
53 * PF_* flags are defined in zsh.h |
|
54 */ |
|
55 |
|
56 /**/ |
|
57 mod_export void |
|
58 prefork(LinkList list, int flags) |
|
59 { |
|
60 LinkNode node, stop = 0; |
|
61 int keep = 0, asssub = (flags & PF_TYPESET) && isset(KSHTYPESET); |
|
62 |
|
63 queue_signals(); |
|
64 for (node = firstnode(list); node; incnode(node)) { |
|
65 char *str, c; |
|
66 |
|
67 str = (char *)getdata(node); |
|
68 if (((c = *str) == Inang || c == Outang || c == Equals) && |
|
69 str[1] == Inpar) { |
|
70 if (c == Inang || c == Outang) |
|
71 setdata(node, (void *) getproc(str)); /* <(...) or >(...) */ |
|
72 else |
|
73 setdata(node, (void *) getoutputfile(str)); /* =(...) */ |
|
74 if (!getdata(node)) { |
|
75 unqueue_signals(); |
|
76 return; |
|
77 } |
|
78 } else { |
|
79 if (isset(SHFILEEXPANSION)) |
|
80 filesub((char **)getaddrdata(node), |
|
81 flags & (PF_TYPESET|PF_ASSIGN)); |
|
82 if (!(node = stringsubst(list, node, flags & PF_SINGLE, asssub))) { |
|
83 unqueue_signals(); |
|
84 return; |
|
85 } |
|
86 } |
|
87 } |
|
88 for (node = firstnode(list); node; incnode(node)) { |
|
89 if (node == stop) |
|
90 keep = 0; |
|
91 if (*(char *)getdata(node)) { |
|
92 remnulargs(getdata(node)); |
|
93 if (unset(IGNOREBRACES) && !(flags & PF_SINGLE)) { |
|
94 if (!keep) |
|
95 stop = nextnode(node); |
|
96 while (hasbraces(getdata(node))) { |
|
97 keep = 1; |
|
98 xpandbraces(list, &node); |
|
99 } |
|
100 } |
|
101 if (unset(SHFILEEXPANSION)) |
|
102 filesub((char **)getaddrdata(node), |
|
103 flags & (PF_TYPESET|PF_ASSIGN)); |
|
104 } else if (!(flags & PF_SINGLE) && !keep) |
|
105 uremnode(list, node); |
|
106 if (errflag) { |
|
107 unqueue_signals(); |
|
108 return; |
|
109 } |
|
110 } |
|
111 unqueue_signals(); |
|
112 } |
|
113 |
|
114 /**/ |
|
115 static LinkNode |
|
116 stringsubst(LinkList list, LinkNode node, int ssub, int asssub) |
|
117 { |
|
118 int qt; |
|
119 char *str3 = (char *)getdata(node); |
|
120 char *str = str3, c; |
|
121 |
|
122 while (!errflag && (c = *str)) { |
|
123 if ((qt = c == Qstring) || c == String) { |
|
124 if ((c = str[1]) == Inpar) { |
|
125 if (!qt) |
|
126 mult_isarr = 1; |
|
127 str++; |
|
128 goto comsub; |
|
129 } else if (c == Inbrack) { |
|
130 /* $[...] */ |
|
131 char *str2 = str; |
|
132 str2++; |
|
133 if (skipparens(Inbrack, Outbrack, &str2)) { |
|
134 zerr("closing bracket missing", NULL, 0); |
|
135 return NULL; |
|
136 } |
|
137 str2[-1] = *str = '\0'; |
|
138 str = arithsubst(str + 2, &str3, str2); |
|
139 setdata(node, (void *) str3); |
|
140 continue; |
|
141 } else if (c == Snull) { |
|
142 str = getkeystring(str, NULL, 4, NULL); |
|
143 continue; |
|
144 } else { |
|
145 node = paramsubst(list, node, &str, qt, ssub); |
|
146 if (errflag || !node) |
|
147 return NULL; |
|
148 str3 = (char *)getdata(node); |
|
149 continue; |
|
150 } |
|
151 } else if ((qt = c == Qtick) || (c == Tick ? (mult_isarr = 1) : 0)) |
|
152 comsub: { |
|
153 LinkList pl; |
|
154 char *s, *str2 = str; |
|
155 char endchar; |
|
156 int l1, l2; |
|
157 |
|
158 if (c == Inpar) { |
|
159 endchar = Outpar; |
|
160 str[-1] = '\0'; |
|
161 #ifdef DEBUG |
|
162 if (skipparens(Inpar, Outpar, &str)) |
|
163 dputs("BUG: parse error in command substitution"); |
|
164 #else |
|
165 skipparens(Inpar, Outpar, &str); |
|
166 #endif |
|
167 str--; |
|
168 } else { |
|
169 endchar = c; |
|
170 *str = '\0'; |
|
171 |
|
172 while (*++str != endchar) |
|
173 DPUTS(!*str, "BUG: parse error in command substitution"); |
|
174 } |
|
175 *str++ = '\0'; |
|
176 if (endchar == Outpar && str2[1] == '(' && str[-2] == ')') { |
|
177 /* Math substitution of the form $((...)) */ |
|
178 str[-2] = '\0'; |
|
179 str = arithsubst(str2 + 2, &str3, str); |
|
180 setdata(node, (void *) str3); |
|
181 continue; |
|
182 } |
|
183 |
|
184 /* It is a command substitution, which will be parsed again * |
|
185 * by the lexer, so we untokenize it first, but we cannot use * |
|
186 * untokenize() since in the case of `...` some Bnulls should * |
|
187 * be left unchanged. Note that the lexer doesn't tokenize * |
|
188 * the body of a command substitution so if there are some * |
|
189 * tokens here they are from a ${(e)~...} substitution. */ |
|
190 for (str = str2; (c = *++str); ) |
|
191 if (itok(c) && c != Nularg && |
|
192 !(endchar != Outpar && c == Bnull && |
|
193 (str[1] == '$' || str[1] == '\\' || str[1] == '`' || |
|
194 (qt && str[1] == '"')))) |
|
195 *str = ztokens[c - Pound]; |
|
196 str++; |
|
197 if (!(pl = getoutput(str2 + 1, qt || ssub))) { |
|
198 zerr("parse error in command substitution", NULL, 0); |
|
199 return NULL; |
|
200 } |
|
201 if (endchar == Outpar) |
|
202 str2--; |
|
203 if (!(s = (char *) ugetnode(pl))) { |
|
204 str = strcpy(str2, str); |
|
205 continue; |
|
206 } |
|
207 if (!qt && ssub && isset(GLOBSUBST)) |
|
208 shtokenize(s); |
|
209 l1 = str2 - str3; |
|
210 l2 = strlen(s); |
|
211 if (nonempty(pl)) { |
|
212 LinkNode n = lastnode(pl); |
|
213 str2 = (char *) hcalloc(l1 + l2 + 1); |
|
214 strcpy(str2, str3); |
|
215 strcpy(str2 + l1, s); |
|
216 setdata(node, str2); |
|
217 insertlinklist(pl, node, list); |
|
218 s = (char *) getdata(node = n); |
|
219 l1 = 0; |
|
220 l2 = strlen(s); |
|
221 } |
|
222 str2 = (char *) hcalloc(l1 + l2 + strlen(str) + 1); |
|
223 if (l1) |
|
224 strcpy(str2, str3); |
|
225 strcpy(str2 + l1, s); |
|
226 str = strcpy(str2 + l1 + l2, str); |
|
227 str3 = str2; |
|
228 setdata(node, str3); |
|
229 continue; |
|
230 } else if (asssub && ((c == '=') || c == Equals) && str != str3) { |
|
231 /* |
|
232 * We are in a normal argument which looks like an assignment |
|
233 * and is to be treated like one, with no word splitting. |
|
234 */ |
|
235 ssub = 1; |
|
236 } |
|
237 str++; |
|
238 } |
|
239 return errflag ? NULL : node; |
|
240 } |
|
241 |
|
242 /* |
|
243 * Simplified version of the prefork/singsub processing where |
|
244 * we only do substitutions appropriate to quoting. Currently |
|
245 * this means only the expansions in $'....'. This is used |
|
246 * for the end tag for here documents. As we are not doing |
|
247 * `...` expansions, we just use those for quoting. However, |
|
248 * they stay in the text. This is weird, but that's not |
|
249 * my fault. |
|
250 * |
|
251 * The remnulargs() makes this consistent with the other forms |
|
252 * of substitution, indicating that quotes have been fully |
|
253 * processed. |
|
254 */ |
|
255 |
|
256 /**/ |
|
257 void |
|
258 quotesubst(char *str) |
|
259 { |
|
260 char *s = str; |
|
261 |
|
262 while (*s) { |
|
263 if (*s == String && s[1] == Snull) { |
|
264 s = getkeystring(s, NULL, 4, NULL); |
|
265 } else { |
|
266 s++; |
|
267 } |
|
268 } |
|
269 remnulargs(str); |
|
270 } |
|
271 |
|
272 /**/ |
|
273 mod_export void |
|
274 globlist(LinkList list, int nountok) |
|
275 { |
|
276 LinkNode node, next; |
|
277 |
|
278 badcshglob = 0; |
|
279 for (node = firstnode(list); !errflag && node; node = next) { |
|
280 next = nextnode(node); |
|
281 zglob(list, node, nountok); |
|
282 } |
|
283 if (badcshglob == 1) |
|
284 zerr("no match", NULL, 0); |
|
285 } |
|
286 |
|
287 /* perform substitution on a single word */ |
|
288 |
|
289 /**/ |
|
290 mod_export void |
|
291 singsub(char **s) |
|
292 { |
|
293 int omi = mult_isarr; |
|
294 local_list1(foo); |
|
295 |
|
296 init_list1(foo, *s); |
|
297 |
|
298 prefork(&foo, PF_SINGLE); |
|
299 mult_isarr = omi; |
|
300 if (errflag) |
|
301 return; |
|
302 *s = (char *) ugetnode(&foo); |
|
303 DPUTS(nonempty(&foo), "BUG: singsub() produced more than one word!"); |
|
304 } |
|
305 |
|
306 /* Perform substitution on a single word. Unlike with singsub, the * |
|
307 * result can have more than one word. A single word result is stored * |
|
308 * in *s and *isarr is set to zero; otherwise *isarr is set to 1 and * |
|
309 * the result is stored in *a. If `a' is zero a multiple word result is * |
|
310 * joined using sep or the IFS parameter if sep is zero and the result * |
|
311 * is returned in *s. The return value is true iff the expansion * |
|
312 * resulted in an empty list. * |
|
313 * The mult_isarr variable is used by paramsubst() to tell if it yields * |
|
314 * an array. */ |
|
315 |
|
316 /**/ |
|
317 static int mult_isarr; |
|
318 |
|
319 /**/ |
|
320 static int |
|
321 multsub(char **s, char ***a, int *isarr, UNUSED(char *sep)) |
|
322 { |
|
323 int l, omi = mult_isarr; |
|
324 char **r, **p; |
|
325 local_list1(foo); |
|
326 #ifdef __SYMBIAN32__ |
|
327 sep=sep; |
|
328 #endif |
|
329 mult_isarr = 0; |
|
330 init_list1(foo, *s); |
|
331 prefork(&foo, 0); |
|
332 if (errflag) { |
|
333 if (isarr) |
|
334 *isarr = 0; |
|
335 mult_isarr = omi; |
|
336 return 0; |
|
337 } |
|
338 if ((l = countlinknodes(&foo))) { |
|
339 p = r = hcalloc((l + 1) * sizeof(char*)); |
|
340 while (nonempty(&foo)) |
|
341 *p++ = (char *)ugetnode(&foo); |
|
342 *p = NULL; |
|
343 /* |
|
344 * This is the most obscure way of deciding whether a value is |
|
345 * an array it would be possible to imagine. It seems to result |
|
346 * partly because we don't pass down the qt and ssub flags from |
|
347 * paramsubst() through prefork() properly, partly because we |
|
348 * don't tidy up to get back the return type from multsub we |
|
349 * need properly. The crux of neatening this up is to get rid |
|
350 * of the following test. |
|
351 */ |
|
352 if (a && mult_isarr) { |
|
353 *a = r; |
|
354 *isarr = SCANPM_MATCHMANY; |
|
355 mult_isarr = omi; |
|
356 return 0; |
|
357 } |
|
358 *s = sepjoin(r, NULL, 1); |
|
359 mult_isarr = omi; |
|
360 if (isarr) |
|
361 *isarr = 0; |
|
362 return 0; |
|
363 } |
|
364 if (l) |
|
365 *s = (char *) ugetnode(&foo); |
|
366 else |
|
367 *s = dupstring(""); |
|
368 if (isarr) |
|
369 *isarr = 0; |
|
370 mult_isarr = omi; |
|
371 return !l; |
|
372 } |
|
373 |
|
374 /* |
|
375 * ~, = subs: assign & PF_TYPESET => typeset or magic equals |
|
376 * assign & PF_ASSIGN => normal assignment |
|
377 */ |
|
378 |
|
379 /**/ |
|
380 mod_export void |
|
381 filesub(char **namptr, int assign) |
|
382 { |
|
383 char *eql = NULL, *sub = NULL, *str, *ptr; |
|
384 int len; |
|
385 |
|
386 filesubstr(namptr, assign); |
|
387 |
|
388 if (!assign) |
|
389 return; |
|
390 |
|
391 if (assign & PF_TYPESET) { |
|
392 if ((*namptr)[1] && (eql = sub = strchr(*namptr + 1, Equals))) { |
|
393 str = sub + 1; |
|
394 if ((sub[1] == Tilde || sub[1] == Equals) && filesubstr(&str, assign)) { |
|
395 sub[1] = '\0'; |
|
396 *namptr = dyncat(*namptr, str); |
|
397 } |
|
398 } else |
|
399 return; |
|
400 } |
|
401 |
|
402 ptr = *namptr; |
|
403 while ((sub = strchr(ptr, ':'))) { |
|
404 str = sub + 1; |
|
405 len = sub - *namptr; |
|
406 if (sub > eql && |
|
407 (sub[1] == Tilde || sub[1] == Equals) && |
|
408 filesubstr(&str, assign)) { |
|
409 sub[1] = '\0'; |
|
410 *namptr = dyncat(*namptr, str); |
|
411 } |
|
412 ptr = *namptr + len + 1; |
|
413 } |
|
414 } |
|
415 |
|
416 /**/ |
|
417 mod_export int |
|
418 filesubstr(char **namptr, int assign) |
|
419 { |
|
420 #define isend(c) ( !(c) || (c)=='/' || (c)==Inpar || (assign && (c)==':') ) |
|
421 #define isend2(c) ( !(c) || (c)==Inpar || (assign && (c)==':') ) |
|
422 char *str = *namptr; |
|
423 |
|
424 if (*str == Tilde && str[1] != '=' && str[1] != Equals) { |
|
425 char *ptr; |
|
426 int val; |
|
427 |
|
428 val = zstrtol(str + 1, &ptr, 10); |
|
429 if (isend(str[1])) { /* ~ */ |
|
430 *namptr = dyncat(home, str + 1); |
|
431 return 1; |
|
432 } else if (str[1] == '+' && isend(str[2])) { /* ~+ */ |
|
433 *namptr = dyncat(pwd, str + 2); |
|
434 return 1; |
|
435 } else if (str[1] == '-' && isend(str[2])) { /* ~- */ |
|
436 char *tmp; |
|
437 *namptr = dyncat((tmp = oldpwd) ? tmp : pwd, str + 2); |
|
438 return 1; |
|
439 } else if (!inblank(str[1]) && isend(*ptr) && |
|
440 (!idigit(str[1]) || (ptr - str < 4))) { |
|
441 char *ds; |
|
442 |
|
443 if (val < 0) |
|
444 val = -val; |
|
445 ds = dstackent(str[1], val); |
|
446 if (!ds) |
|
447 return 0; |
|
448 *namptr = dyncat(ds, ptr); |
|
449 return 1; |
|
450 } else if (iuser(str[1])) { /* ~foo */ |
|
451 char *ptr, *hom, save; |
|
452 |
|
453 for (ptr = ++str; *ptr && iuser(*ptr); ptr++); |
|
454 save = *ptr; |
|
455 if (!isend(save)) |
|
456 return 0; |
|
457 *ptr = 0; |
|
458 if (!(hom = getnameddir(str))) { |
|
459 if (isset(NOMATCH)) |
|
460 zerr("no such user or named directory: %s", str, 0); |
|
461 *ptr = save; |
|
462 return 0; |
|
463 } |
|
464 *ptr = save; |
|
465 *namptr = dyncat(hom, ptr); |
|
466 return 1; |
|
467 } |
|
468 } else if (*str == Equals && isset(EQUALS) && str[1]) { /* =foo */ |
|
469 char sav, *pp, *cnam; |
|
470 |
|
471 for (pp = str + 1; !isend2(*pp); pp++); |
|
472 sav = *pp; |
|
473 *pp = 0; |
|
474 if (!(cnam = findcmd(str + 1, 1))) { |
|
475 if (isset(NOMATCH)) |
|
476 zerr("%s not found", str + 1, 0); |
|
477 return 0; |
|
478 } |
|
479 *namptr = dupstring(cnam); |
|
480 if (sav) { |
|
481 *pp = sav; |
|
482 *namptr = dyncat(*namptr, pp); |
|
483 } |
|
484 return 1; |
|
485 } |
|
486 return 0; |
|
487 #undef isend |
|
488 #undef isend2 |
|
489 } |
|
490 |
|
491 /**/ |
|
492 static char * |
|
493 strcatsub(char **d, char *pb, char *pe, char *src, int l, char *s, int glbsub, |
|
494 int copied) |
|
495 { |
|
496 char *dest; |
|
497 int pl = pe - pb; |
|
498 |
|
499 if (!pl && (!s || !*s)) { |
|
500 *d = dest = (copied ? src : dupstring(src)); |
|
501 if (glbsub) |
|
502 shtokenize(dest); |
|
503 } else { |
|
504 *d = dest = hcalloc(pl + l + (s ? strlen(s) : 0) + 1); |
|
505 strncpy(dest, pb, pl); |
|
506 dest += pl; |
|
507 strcpy(dest, src); |
|
508 if (glbsub) |
|
509 shtokenize(dest); |
|
510 dest += l; |
|
511 if (s) |
|
512 strcpy(dest, s); |
|
513 } |
|
514 return dest; |
|
515 } |
|
516 |
|
517 typedef int (*CompareFn) _((const void *, const void *)); |
|
518 |
|
519 /**/ |
|
520 int |
|
521 strpcmp(const void *a, const void *b) |
|
522 { |
|
523 #ifdef HAVE_STRCOLL |
|
524 return strcoll(*(char **)a, *(char **)b); |
|
525 #else |
|
526 return strcmp(*(char **)a, *(char **)b); |
|
527 #endif |
|
528 } |
|
529 |
|
530 /**/ |
|
531 int |
|
532 invstrpcmp(const void *a, const void *b) |
|
533 { |
|
534 #ifdef HAVE_STRCOLL |
|
535 return -strcoll(*(char **)a, *(char **)b); |
|
536 #else |
|
537 return -strcmp(*(char **)a, *(char **)b); |
|
538 #endif |
|
539 } |
|
540 |
|
541 /**/ |
|
542 int |
|
543 cstrpcmp(const void *a, const void *b) |
|
544 { |
|
545 #ifdef HAVE_STRCOLL |
|
546 VARARR(char, c, strlen(*(char **) a) + 1); |
|
547 VARARR(char, d, strlen(*(char **) b) + 1); |
|
548 char *s, *t; |
|
549 int cmp; |
|
550 |
|
551 for (s = *(char **) a, t = c; (*t++ = tulower(*s++));); |
|
552 for (s = *(char **) b, t = d; (*t++ = tulower(*s++));); |
|
553 |
|
554 cmp = strcoll(c, d); |
|
555 |
|
556 return cmp; |
|
557 #else |
|
558 char *c = *(char **)a, *d = *(char **)b; |
|
559 |
|
560 for (; *c && tulower(*c) == tulower(*d); c++, d++); |
|
561 |
|
562 return (int)STOUC(tulower(*c)) - (int)STOUC(tulower(*d)); |
|
563 #endif |
|
564 } |
|
565 |
|
566 /**/ |
|
567 int |
|
568 invcstrpcmp(const void *a, const void *b) |
|
569 { |
|
570 #ifdef HAVE_STRCOLL |
|
571 VARARR(char, c, strlen(*(char **) a) + 1); |
|
572 VARARR(char, d, strlen(*(char **) b) + 1); |
|
573 char *s, *t; |
|
574 int cmp; |
|
575 |
|
576 for (s = *(char **) a, t = c; (*t++ = tulower(*s++));); |
|
577 for (s = *(char **) b, t = d; (*t++ = tulower(*s++));); |
|
578 |
|
579 cmp = strcoll(c, d); |
|
580 |
|
581 return -cmp; |
|
582 #else |
|
583 char *c = *(char **)a, *d = *(char **)b; |
|
584 |
|
585 for (; *c && tulower(*c) == tulower(*d); c++, d++); |
|
586 |
|
587 return (int)STOUC(tulower(*d)) - (int)STOUC(tulower(*c)); |
|
588 #endif |
|
589 } |
|
590 |
|
591 /**/ |
|
592 int |
|
593 nstrpcmp(const void *a, const void *b) |
|
594 { |
|
595 char *c = *(char **)a, *d = *(char **)b; |
|
596 int cmp; |
|
597 |
|
598 #ifdef HAVE_STRCOLL |
|
599 cmp = strcoll(c, d); |
|
600 #endif |
|
601 for (; *c == *d && *c; c++, d++); |
|
602 #ifndef HAVE_STRCOLL |
|
603 cmp = (int)STOUC(*c) - (int)STOUC(*d); |
|
604 #endif |
|
605 if (idigit(*c) || idigit(*d)) { |
|
606 for (; c > *(char **)a && idigit(c[-1]); c--, d--); |
|
607 if (idigit(*c) && idigit(*d)) { |
|
608 while (*c == '0') |
|
609 c++; |
|
610 while (*d == '0') |
|
611 d++; |
|
612 for (; idigit(*c) && *c == *d; c++, d++); |
|
613 if (idigit(*c) || idigit(*d)) { |
|
614 cmp = (int)STOUC(*c) - (int)STOUC(*d); |
|
615 while (idigit(*c) && idigit(*d)) |
|
616 c++, d++; |
|
617 if (idigit(*c) && !idigit(*d)) |
|
618 return 1; |
|
619 if (idigit(*d) && !idigit(*c)) |
|
620 return -1; |
|
621 } |
|
622 } |
|
623 } |
|
624 return cmp; |
|
625 } |
|
626 |
|
627 /**/ |
|
628 int |
|
629 invnstrpcmp(const void *a, const void *b) |
|
630 { |
|
631 return -nstrpcmp(a, b); |
|
632 } |
|
633 |
|
634 /**/ |
|
635 int |
|
636 instrpcmp(const void *a, const void *b) |
|
637 { |
|
638 VARARR(char, c, strlen(*(char **) a) + 1); |
|
639 VARARR(char, d, strlen(*(char **) b) + 1); |
|
640 char **e = (char **)&c; |
|
641 char **f = (char **)&d; |
|
642 char *s, *t; |
|
643 |
|
644 for (s = *(char **) a, t = c; (*t++ = tulower(*s++));); |
|
645 for (s = *(char **) b, t = d; (*t++ = tulower(*s++));); |
|
646 |
|
647 return nstrpcmp(&e, &f); |
|
648 } |
|
649 |
|
650 /**/ |
|
651 int |
|
652 invinstrpcmp(const void *a, const void *b) |
|
653 { |
|
654 return -instrpcmp(a, b); |
|
655 } |
|
656 |
|
657 /**/ |
|
658 static char * |
|
659 dopadding(char *str, int prenum, int postnum, char *preone, char *postone, char *premul, char *postmul) |
|
660 { |
|
661 char def[3], *ret, *t, *r; |
|
662 int ls, ls2, lpreone, lpostone, lpremul, lpostmul, lr, f, m, c, cc; |
|
663 |
|
664 def[0] = *ifs ? *ifs : ' '; |
|
665 def[1] = *ifs == Meta ? ifs[1] ^ 32 : '\0'; |
|
666 def[2] = '\0'; |
|
667 if (preone && !*preone) |
|
668 preone = def; |
|
669 if (postone && !*postone) |
|
670 postone = def; |
|
671 if (!premul || !*premul) |
|
672 premul = def; |
|
673 if (!postmul || !*postmul) |
|
674 postmul = def; |
|
675 |
|
676 ls = strlen(str); |
|
677 lpreone = preone ? strlen(preone) : 0; |
|
678 lpostone = postone ? strlen(postone) : 0; |
|
679 lpremul = strlen(premul); |
|
680 lpostmul = strlen(postmul); |
|
681 |
|
682 lr = prenum + postnum; |
|
683 |
|
684 if (lr == ls) |
|
685 return str; |
|
686 |
|
687 r = ret = (char *)zhalloc(lr + 1); |
|
688 |
|
689 if (prenum) { |
|
690 if (postnum) { |
|
691 ls2 = ls / 2; |
|
692 |
|
693 f = prenum - ls2; |
|
694 if (f <= 0) |
|
695 for (str -= f, c = prenum; c--; *r++ = *str++); |
|
696 else { |
|
697 if (f <= lpreone) |
|
698 for (c = f, t = preone + lpreone - f; c--; *r++ = *t++); |
|
699 else { |
|
700 f -= lpreone; |
|
701 if ((m = f % lpremul)) |
|
702 for (c = m, t = premul + lpremul - m; c--; *r++ = *t++); |
|
703 for (cc = f / lpremul; cc--;) |
|
704 for (c = lpremul, t = premul; c--; *r++ = *t++); |
|
705 for (c = lpreone; c--; *r++ = *preone++); |
|
706 } |
|
707 for (c = ls2; c--; *r++ = *str++); |
|
708 } |
|
709 ls2 = ls - ls2; |
|
710 f = postnum - ls2; |
|
711 if (f <= 0) |
|
712 for (c = postnum; c--; *r++ = *str++); |
|
713 else { |
|
714 for (c = ls2; c--; *r++ = *str++); |
|
715 if (f <= lpostone) |
|
716 for (c = f; c--; *r++ = *postone++); |
|
717 else { |
|
718 f -= lpostone; |
|
719 for (c = lpostone; c--; *r++ = *postone++); |
|
720 for (cc = f / lpostmul; cc--;) |
|
721 for (c = lpostmul, t = postmul; c--; *r++ = *t++); |
|
722 if ((m = f % lpostmul)) |
|
723 for (; m--; *r++ = *postmul++); |
|
724 } |
|
725 } |
|
726 } else { |
|
727 f = prenum - ls; |
|
728 if (f <= 0) |
|
729 for (c = prenum, str -= f; c--; *r++ = *str++); |
|
730 else { |
|
731 if (f <= lpreone) |
|
732 for (c = f, t = preone + lpreone - f; c--; *r++ = *t++); |
|
733 else { |
|
734 f -= lpreone; |
|
735 if ((m = f % lpremul)) |
|
736 for (c = m, t = premul + lpremul - m; c--; *r++ = *t++); |
|
737 for (cc = f / lpremul; cc--;) |
|
738 for (c = lpremul, t = premul; c--; *r++ = *t++); |
|
739 for (c = lpreone; c--; *r++ = *preone++); |
|
740 } |
|
741 for (c = ls; c--; *r++ = *str++); |
|
742 } |
|
743 } |
|
744 } else if (postnum) { |
|
745 f = postnum - ls; |
|
746 if (f <= 0) |
|
747 for (c = postnum; c--; *r++ = *str++); |
|
748 else { |
|
749 for (c = ls; c--; *r++ = *str++); |
|
750 if (f <= lpostone) |
|
751 for (c = f; c--; *r++ = *postone++); |
|
752 else { |
|
753 f -= lpostone; |
|
754 for (c = lpostone; c--; *r++ = *postone++); |
|
755 for (cc = f / lpostmul; cc--;) |
|
756 for (c = lpostmul, t = postmul; c--; *r++ = *t++); |
|
757 if ((m = f % lpostmul)) |
|
758 for (; m--; *r++ = *postmul++); |
|
759 } |
|
760 } |
|
761 } |
|
762 *r = '\0'; |
|
763 |
|
764 return ret; |
|
765 } |
|
766 |
|
767 /**/ |
|
768 char * |
|
769 get_strarg(char *s) |
|
770 { |
|
771 char t = *s++; |
|
772 |
|
773 if (!t) |
|
774 return s - 1; |
|
775 |
|
776 switch (t) { |
|
777 case '(': |
|
778 t = ')'; |
|
779 break; |
|
780 case '[': |
|
781 t = ']'; |
|
782 break; |
|
783 case '{': |
|
784 t = '}'; |
|
785 break; |
|
786 case '<': |
|
787 t = '>'; |
|
788 break; |
|
789 case Inpar: |
|
790 t = Outpar; |
|
791 break; |
|
792 case Inang: |
|
793 t = Outang; |
|
794 break; |
|
795 case Inbrace: |
|
796 t = Outbrace; |
|
797 break; |
|
798 case Inbrack: |
|
799 t = Outbrack; |
|
800 break; |
|
801 } |
|
802 |
|
803 while (*s && *s != t) |
|
804 s++; |
|
805 |
|
806 return s; |
|
807 } |
|
808 |
|
809 /**/ |
|
810 static int |
|
811 get_intarg(char **s) |
|
812 { |
|
813 char *t = get_strarg(*s + 1); |
|
814 char *p, sav; |
|
815 zlong ret; |
|
816 |
|
817 if (!*t) |
|
818 return -1; |
|
819 sav = *t; |
|
820 *t = '\0'; |
|
821 p = dupstring(*s + 2); |
|
822 *s = t; |
|
823 *t = sav; |
|
824 if (parsestr(p)) |
|
825 return -1; |
|
826 singsub(&p); |
|
827 if (errflag) |
|
828 return -1; |
|
829 ret = mathevali(p); |
|
830 if (errflag) |
|
831 return -1; |
|
832 if (ret < 0) |
|
833 ret = -ret; |
|
834 return ret < 0 ? -ret : ret; |
|
835 } |
|
836 |
|
837 /* Parsing for the (e) flag. */ |
|
838 |
|
839 static int |
|
840 subst_parse_str(char **sp, int single, int err) |
|
841 { |
|
842 char *s; |
|
843 |
|
844 *sp = s = dupstring(*sp); |
|
845 |
|
846 if (!(err ? parsestr(s) : parsestrnoerr(s))) { |
|
847 if (!single) { |
|
848 int qt = 0; |
|
849 |
|
850 for (; *s; s++) |
|
851 if (!qt) { |
|
852 if (*s == Qstring) |
|
853 *s = String; |
|
854 else if (*s == Qtick) |
|
855 *s = Tick; |
|
856 } else if (*s == Dnull) |
|
857 qt = !qt; |
|
858 } |
|
859 return 0; |
|
860 } |
|
861 return 1; |
|
862 } |
|
863 |
|
864 /* parameter substitution */ |
|
865 |
|
866 #define isstring(c) ((c) == '$' || (char)(c) == String || (char)(c) == Qstring) |
|
867 #define isbrack(c) ((c) == '[' || (char)(c) == Inbrack) |
|
868 |
|
869 /* |
|
870 * Given a linked list l with node n, perform parameter substitution |
|
871 * starting from *str. Return the node with the substitutuion performed |
|
872 * or NULL if it failed. |
|
873 * |
|
874 * If qt is true, the `$' was quoted. TODO: why can't we just look |
|
875 * to see if the first character was String or Qstring? |
|
876 * |
|
877 * If ssub is true, we are being called via singsubst(), which means |
|
878 * the result will be a single word. TODO: can we generate the |
|
879 * single word at the end? TODO: if not, or maybe in any case, |
|
880 * can we pass down the ssub flag from prefork with the other flags |
|
881 * instead of pushing it into different arguments? (How exactly |
|
882 * to qt and ssub differ? Are both necessary, if so is there some |
|
883 * better way of separating the two?) |
|
884 */ |
|
885 |
|
886 /**/ |
|
887 LinkNode |
|
888 paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub) |
|
889 { |
|
890 char *aptr = *str, c, cc; |
|
891 char *s = aptr, *fstr, *idbeg, *idend, *ostr = (char *) getdata(n); |
|
892 int colf; /* != 0 means we found a colon after the name */ |
|
893 /* |
|
894 * There are far too many flags. They need to be grouped |
|
895 * together into some structure which ties them to where they |
|
896 * came from. |
|
897 * |
|
898 * Some flags have a an obscure relationship to their effect which |
|
899 * depends on incrementing them to particular values in particular |
|
900 * ways. |
|
901 */ |
|
902 /* |
|
903 * Whether the value is an array (in aval) or not (in val). There's |
|
904 * a movement from storing the value in the stuff read from the |
|
905 * parameter (the value v) to storing them in val and aval. |
|
906 * However, sometimes you find v reappearing temporarily. |
|
907 * |
|
908 * The values -1 and 2 are special to isarr. It looks like 2 is |
|
909 * some kind of an internal flag to do with whether the array's been |
|
910 * copied, in which case I don't know why we don't use the copied |
|
911 * flag, but they do both occur close together so they presumably |
|
912 * have different effects. The value -1 is isued to force us to |
|
913 * keep an empty array. It's tested in the YUK chunk (I mean the |
|
914 * one explicitly marked as such). |
|
915 */ |
|
916 int isarr = 0; |
|
917 /* |
|
918 * This is just the setting of the option except we need to |
|
919 * take account of ^ and ^^. |
|
920 */ |
|
921 int plan9 = isset(RCEXPANDPARAM); |
|
922 /* |
|
923 * Likwise, but with ~ and ~~. Also, we turn it off later |
|
924 * on if qt is passed down. |
|
925 */ |
|
926 int globsubst = isset(GLOBSUBST); |
|
927 /* |
|
928 * Indicates ${#pm}, massaged by whichlen which is set by |
|
929 * the (c), (w), and (W) flags to indicate how we take the length. |
|
930 */ |
|
931 int getlen = 0; |
|
932 int whichlen = 0; |
|
933 /* |
|
934 * Indicates ${+pm}: a simple boolean for once. |
|
935 */ |
|
936 int chkset = 0; |
|
937 /* |
|
938 * Indicates we have tried to get a value in v but that was |
|
939 * unset. I don't quite understand why (v == NULL) isn't |
|
940 * good enough, but there are places where we seem to need |
|
941 * to second guess whether a value is a real value or not. |
|
942 */ |
|
943 int vunset = 0; |
|
944 /* |
|
945 * Indicates (t) flag, i.e. print out types. The code for |
|
946 * this actually isn't too horrifically inbred compared with |
|
947 * that for (P). |
|
948 */ |
|
949 int wantt = 0; |
|
950 /* |
|
951 * Indicates spliting a string into an array. There aren't |
|
952 * actually that many special cases for this --- which may |
|
953 * be why it doesn't work properly; we split in some cases |
|
954 * where we shouldn't, in particular on the multsubs for |
|
955 * handling embedded values for ${...=...} and the like. |
|
956 */ |
|
957 int spbreak = isset(SHWORDSPLIT) && !ssub && !qt; |
|
958 /* Scalar and array value, see isarr above */ |
|
959 char *val = NULL, **aval = NULL; |
|
960 /* |
|
961 * Padding based on setting in parameter rather than substitution |
|
962 * flags. This is only used locally. |
|
963 */ |
|
964 unsigned int fwidth = 0; |
|
965 /* |
|
966 * vbuf and v are both used to retrieve parameter values; this |
|
967 * is a kludge, we pass down vbuf and it may or may not return v. |
|
968 */ |
|
969 struct value vbuf; |
|
970 Value v = NULL; |
|
971 /* |
|
972 * This expressive name refers to the set of flags which |
|
973 * is applied to matching for #, %, / and their doubled variants: |
|
974 * (M), (R), (B), (E), (N), (S). |
|
975 */ |
|
976 int flags = 0; |
|
977 /* Value from (I) flag, used for ditto. */ |
|
978 int flnum = 0; |
|
979 /* |
|
980 * sortit is an obscure combination of the settings for (o), (O), |
|
981 * (i) and (n). casind is (i) and numord is (n); these are |
|
982 * separate so we can have fun doing the obscure combinatorics later. |
|
983 * indord is the (a) flag, which for consistency doesn't get |
|
984 * combined into sortit. |
|
985 */ |
|
986 int sortit = 0, casind = 0, numord = 0, indord = 0; |
|
987 /* (u): straightforward. */ |
|
988 int unique = 0; |
|
989 /* combination of (L), (U) and (C) flags. */ |
|
990 int casmod = 0; |
|
991 /* |
|
992 * quotemod says we are doing either (q) (positive), (Q) (negative) |
|
993 * or not (0). quotetype counts the q's for the first case. |
|
994 * quoterr is simply (X) but gets passed around a lot because the |
|
995 * combination (eX) needs it. |
|
996 */ |
|
997 int quotemod = 0, quotetype = 0, quoteerr = 0; |
|
998 /* |
|
999 * (V) flag: fairly straightforward, except that as with so |
|
1000 * many flags it's not easy to decide where to put it in the order. |
|
1001 */ |
|
1002 int visiblemod = 0; |
|
1003 /* |
|
1004 * The (z) flag, nothing to do with SH_WORD_SPLIT which is tied |
|
1005 * spbreak, see above; fairly straighforward in use but c.f. |
|
1006 * the comment for visiblemod. |
|
1007 */ |
|
1008 int shsplit = 0; |
|
1009 /* |
|
1010 * The separator from (j) and (s) respectively, or (F) and (f) |
|
1011 * respectively (hardwired to "\n" in that case). Slightly |
|
1012 * confusingly also used for ${#pm}, thought that's at least |
|
1013 * documented in the manual |
|
1014 */ |
|
1015 char *sep = NULL, *spsep = NULL; |
|
1016 /* |
|
1017 * Padding strings. The left and right padding strings which |
|
1018 * are repeated, then the ones which only occur once, for |
|
1019 * the (l) and (r) flags. |
|
1020 */ |
|
1021 char *premul = NULL, *postmul = NULL, *preone = NULL, *postone = NULL; |
|
1022 /* Replacement string for /orig/repl and //orig/repl */ |
|
1023 char *replstr = NULL; |
|
1024 /* The numbers for (l) and (r) */ |
|
1025 zlong prenum = 0, postnum = 0; |
|
1026 /* |
|
1027 * Whether the value has been copied. Optimisation: if we |
|
1028 * are modifying an expression, we only need to copy it the |
|
1029 * first time, and if we don't modify it we can just use the |
|
1030 * value from the parameter or input. |
|
1031 */ |
|
1032 int copied = 0; |
|
1033 /* |
|
1034 * The (A) flag for array assignment, with consequences for |
|
1035 * splitting and joining; (AA) gives arrasg == 2 for associative |
|
1036 * arrays. |
|
1037 */ |
|
1038 int arrasg = 0; |
|
1039 /* |
|
1040 * The (e) flag. As we need to do extra work not quite |
|
1041 * at the end, the effect of this is kludged in in several places. |
|
1042 */ |
|
1043 int eval = 0; |
|
1044 /* |
|
1045 * The (P) flag. This interacts a bit obscurely with whether |
|
1046 * or not we are dealing with a sub expression (subexp). |
|
1047 */ |
|
1048 int aspar = 0; |
|
1049 /* |
|
1050 * The (%) flag, c.f. visiblemod again. |
|
1051 */ |
|
1052 int presc = 0; |
|
1053 /* |
|
1054 * The (@) flag; interacts obscurely with qt and isarr. |
|
1055 * This is one of the things that decides whether multsub |
|
1056 * will produce an array, but in an extremely indirect fashion. |
|
1057 */ |
|
1058 int nojoin = 0; |
|
1059 /* |
|
1060 * != 0 means ${...}, otherwise $... What works without braces |
|
1061 * is largely a historical artefact (everything works with braces, |
|
1062 * I sincerely hope). |
|
1063 */ |
|
1064 char inbrace = 0; |
|
1065 /* |
|
1066 * Use for the (k) flag. Goes down into the parameter code, |
|
1067 * sometimes. |
|
1068 */ |
|
1069 char hkeys = 0; |
|
1070 /* |
|
1071 * Used for the (v) flag, ditto. Not quite sure why they're |
|
1072 * separate, but the tradition seems to be that things only |
|
1073 * get combined when that makes the result more obscure rather |
|
1074 * than less. |
|
1075 */ |
|
1076 char hvals = 0; |
|
1077 /* |
|
1078 * Whether we had to evaluate a subexpression, i.e. an |
|
1079 * internal ${...} or $(...) or plain $pm. We almost don't |
|
1080 * need to remember this (which would be neater), but the (P) |
|
1081 * flag means the subexp and !subexp code is obscurely combined, |
|
1082 * and the argument passing to fetchvalue has another kludge. |
|
1083 */ |
|
1084 int subexp; |
|
1085 |
|
1086 *s++ = '\0'; |
|
1087 /* |
|
1088 * Nothing to do unless the character following the $ is |
|
1089 * something we recognise. |
|
1090 * |
|
1091 * Shouldn't this be a table or something? We test for all |
|
1092 * these later on, too. |
|
1093 */ |
|
1094 if (!ialnum(c = *s) && c != '#' && c != Pound && c != '-' && |
|
1095 c != '!' && c != '$' && c != String && c != Qstring && |
|
1096 c != '?' && c != Quest && c != '_' && |
|
1097 c != '*' && c != Star && c != '@' && c != '{' && |
|
1098 c != Inbrace && c != '=' && c != Equals && c != Hat && |
|
1099 c != '^' && c != '~' && c != Tilde && c != '+') { |
|
1100 s[-1] = '$'; |
|
1101 *str = s; |
|
1102 return n; |
|
1103 } |
|
1104 DPUTS(c == '{', "BUG: inbrace == '{' in paramsubst()"); |
|
1105 /* |
|
1106 * Extra processing if there is an opening brace: mostly |
|
1107 * flags in parentheses, but also one ksh hack. |
|
1108 */ |
|
1109 if (c == Inbrace) { |
|
1110 inbrace = 1; |
|
1111 s++; |
|
1112 /* |
|
1113 * In ksh emulation a leading `!' is a special flag working |
|
1114 * sort of like our (k). |
|
1115 * TODO: this is one of very few cases tied directly to |
|
1116 * the emulation mode rather than an option. Since ksh |
|
1117 * doesn't have parameter flags it might be neater to |
|
1118 * handle this with the ^, =, ~ stuff, below. |
|
1119 */ |
|
1120 if ((c = *s) == '!' && s[1] != Outbrace && emulation == EMULATE_KSH) { |
|
1121 hkeys = SCANPM_WANTKEYS; |
|
1122 s++; |
|
1123 } else if (c == '(' || c == Inpar) { |
|
1124 char *t, sav; |
|
1125 int tt = 0; |
|
1126 zlong num; |
|
1127 /* |
|
1128 * The (p) flag is (uniquely) only remembered within |
|
1129 * this block. It says we do print-style handling |
|
1130 * on the values for flags, but only on those. |
|
1131 * This explains the ghastly macro, but why can't it |
|
1132 * be a function? UNTOK_AND_ESCAPE is defined |
|
1133 * so that the argument must be an lvalue. |
|
1134 */ |
|
1135 int escapes = 0; |
|
1136 int klen; |
|
1137 #define UNTOK(C) (itok(C) ? ztokens[(C) - Pound] : (C)) |
|
1138 #define UNTOK_AND_ESCAPE(X) {\ |
|
1139 untokenize(X = dupstring(s + 1));\ |
|
1140 if (escapes) {\ |
|
1141 X = getkeystring(X, &klen, 3, NULL);\ |
|
1142 X = metafy(X, klen, META_HREALLOC);\ |
|
1143 }\ |
|
1144 } |
|
1145 |
|
1146 for (s++; (c = *s) != ')' && c != Outpar; s++, tt = 0) { |
|
1147 switch (c) { |
|
1148 case ')': |
|
1149 case Outpar: |
|
1150 break; |
|
1151 case 'A': |
|
1152 ++arrasg; |
|
1153 break; |
|
1154 case '@': |
|
1155 nojoin = 1; |
|
1156 break; |
|
1157 case 'M': |
|
1158 flags |= SUB_MATCH; |
|
1159 break; |
|
1160 case 'R': |
|
1161 flags |= SUB_REST; |
|
1162 break; |
|
1163 case 'B': |
|
1164 flags |= SUB_BIND; |
|
1165 break; |
|
1166 case 'E': |
|
1167 flags |= SUB_EIND; |
|
1168 break; |
|
1169 case 'N': |
|
1170 flags |= SUB_LEN; |
|
1171 break; |
|
1172 case 'S': |
|
1173 flags |= SUB_SUBSTR; |
|
1174 break; |
|
1175 case 'I': |
|
1176 flnum = get_intarg(&s); |
|
1177 if (flnum < 0) |
|
1178 goto flagerr; |
|
1179 break; |
|
1180 |
|
1181 case 'L': |
|
1182 casmod = 2; |
|
1183 break; |
|
1184 case 'U': |
|
1185 casmod = 1; |
|
1186 break; |
|
1187 case 'C': |
|
1188 casmod = 3; |
|
1189 break; |
|
1190 |
|
1191 case 'o': |
|
1192 sortit = 1; |
|
1193 break; |
|
1194 case 'O': |
|
1195 sortit = 2; |
|
1196 break; |
|
1197 case 'i': |
|
1198 casind = 1; |
|
1199 break; |
|
1200 case 'n': |
|
1201 numord = 1; |
|
1202 break; |
|
1203 case 'a': |
|
1204 indord = 1; |
|
1205 break; |
|
1206 |
|
1207 case 'V': |
|
1208 visiblemod++; |
|
1209 break; |
|
1210 |
|
1211 case 'q': |
|
1212 quotemod++, quotetype++; |
|
1213 break; |
|
1214 case 'Q': |
|
1215 quotemod--; |
|
1216 break; |
|
1217 case 'X': |
|
1218 quoteerr = 1; |
|
1219 break; |
|
1220 |
|
1221 case 'e': |
|
1222 eval = 1; |
|
1223 break; |
|
1224 case 'P': |
|
1225 aspar = 1; |
|
1226 break; |
|
1227 |
|
1228 case 'c': |
|
1229 whichlen = 1; |
|
1230 break; |
|
1231 case 'w': |
|
1232 whichlen = 2; |
|
1233 break; |
|
1234 case 'W': |
|
1235 whichlen = 3; |
|
1236 break; |
|
1237 |
|
1238 case 'f': |
|
1239 spsep = "\n"; |
|
1240 break; |
|
1241 case 'F': |
|
1242 sep = "\n"; |
|
1243 break; |
|
1244 |
|
1245 case 's': |
|
1246 tt = 1; |
|
1247 /* fall through */ |
|
1248 case 'j': |
|
1249 t = get_strarg(++s); |
|
1250 if (*t) { |
|
1251 sav = *t; |
|
1252 *t = '\0'; |
|
1253 if (tt) |
|
1254 UNTOK_AND_ESCAPE(spsep) |
|
1255 else |
|
1256 UNTOK_AND_ESCAPE(sep) |
|
1257 *t = sav; |
|
1258 s = t; |
|
1259 } else |
|
1260 goto flagerr; |
|
1261 break; |
|
1262 |
|
1263 case 'l': |
|
1264 tt = 1; |
|
1265 /* fall through */ |
|
1266 case 'r': |
|
1267 sav = s[1]; |
|
1268 num = get_intarg(&s); |
|
1269 if (num < 0) |
|
1270 goto flagerr; |
|
1271 if (tt) |
|
1272 prenum = num; |
|
1273 else |
|
1274 postnum = num; |
|
1275 if (UNTOK(s[1]) != UNTOK(sav)) |
|
1276 break; |
|
1277 t = get_strarg(++s); |
|
1278 if (!*t) |
|
1279 goto flagerr; |
|
1280 sav = *t; |
|
1281 *t = '\0'; |
|
1282 if (tt) |
|
1283 UNTOK_AND_ESCAPE(premul) |
|
1284 else |
|
1285 UNTOK_AND_ESCAPE(postmul) |
|
1286 *t = sav; |
|
1287 sav = *s; |
|
1288 s = t + 1; |
|
1289 if (UNTOK(*s) != UNTOK(sav)) { |
|
1290 s--; |
|
1291 break; |
|
1292 } |
|
1293 t = get_strarg(s); |
|
1294 if (!*t) |
|
1295 goto flagerr; |
|
1296 sav = *t; |
|
1297 *t = '\0'; |
|
1298 if (tt) |
|
1299 UNTOK_AND_ESCAPE(preone) |
|
1300 else |
|
1301 UNTOK_AND_ESCAPE(postone) |
|
1302 *t = sav; |
|
1303 s = t; |
|
1304 break; |
|
1305 |
|
1306 case 'p': |
|
1307 escapes = 1; |
|
1308 break; |
|
1309 |
|
1310 case 'k': |
|
1311 hkeys = SCANPM_WANTKEYS; |
|
1312 break; |
|
1313 case 'v': |
|
1314 hvals = SCANPM_WANTVALS; |
|
1315 break; |
|
1316 |
|
1317 case 't': |
|
1318 wantt = 1; |
|
1319 break; |
|
1320 |
|
1321 case '%': |
|
1322 presc++; |
|
1323 break; |
|
1324 |
|
1325 case 'z': |
|
1326 shsplit = 1; |
|
1327 break; |
|
1328 |
|
1329 case 'u': |
|
1330 unique = 1; |
|
1331 break; |
|
1332 |
|
1333 default: |
|
1334 flagerr: |
|
1335 zerr("error in flags", NULL, 0); |
|
1336 return NULL; |
|
1337 } |
|
1338 } |
|
1339 s++; |
|
1340 } |
|
1341 } |
|
1342 /* Sort is done by indexing on sortit-1: |
|
1343 * bit 1: ascending (o)/descending (O) |
|
1344 * bit 2: case sensitive/independent (i) |
|
1345 * bit 3: strict order/numeric (n) |
|
1346 * unless indord (a) is set set, in which case only test for |
|
1347 * descending by assuming only (O) is possible (not verified). |
|
1348 */ |
|
1349 if (sortit) |
|
1350 sortit += (casind << 1) + (numord << 2); |
|
1351 |
|
1352 /* |
|
1353 * premul, postmul specify the padding character to be used |
|
1354 * multiple times with the (l) and (r) flags respectively. |
|
1355 */ |
|
1356 if (!premul) |
|
1357 premul = " "; |
|
1358 if (!postmul) |
|
1359 postmul = " "; |
|
1360 |
|
1361 /* |
|
1362 * Look for special unparenthesised flags. |
|
1363 * TODO: could make these able to appear inside parentheses, too, |
|
1364 * i.e. ${(^)...} etc. |
|
1365 */ |
|
1366 for (;;) { |
|
1367 if ((c = *s) == '^' || c == Hat) { |
|
1368 /* RC_EXPAND_PARAM on or off (doubled )*/ |
|
1369 if ((c = *++s) == '^' || c == Hat) { |
|
1370 plan9 = 0; |
|
1371 s++; |
|
1372 } else |
|
1373 plan9 = 1; |
|
1374 } else if ((c = *s) == '=' || c == Equals) { |
|
1375 /* SH_WORD_SPLIT on or off (doubled). spbreak = 2 means force */ |
|
1376 if ((c = *++s) == '=' || c == Equals) { |
|
1377 spbreak = 0; |
|
1378 s++; |
|
1379 } else |
|
1380 spbreak = 2; |
|
1381 } else if ((c == '#' || c == Pound) && |
|
1382 (iident(cc = s[1]) |
|
1383 || cc == '*' || cc == Star || cc == '@' |
|
1384 || cc == '-' || (cc == ':' && s[2] == '-') |
|
1385 || (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar)))) { |
|
1386 getlen = 1 + whichlen, s++; |
|
1387 /* |
|
1388 * Return the length of the parameter. |
|
1389 * getlen can be more than 1 to indicate characters (2), |
|
1390 * words ignoring multiple delimiters (3), words taking |
|
1391 * account of multiple delimiters. delimiter is in |
|
1392 * spsep, NULL means $IFS. |
|
1393 */ |
|
1394 } else if (c == '~' || c == Tilde) { |
|
1395 /* GLOB_SUBST on or off (doubled) */ |
|
1396 if ((c = *++s) == '~' || c == Tilde) { |
|
1397 globsubst = 0; |
|
1398 s++; |
|
1399 } else |
|
1400 globsubst = 1; |
|
1401 } else if (c == '+') { |
|
1402 /* |
|
1403 * Return whether indicated parameter is set. |
|
1404 * Try to handle this when parameter is named |
|
1405 * by (P) (second part of test). |
|
1406 */ |
|
1407 if (iident(s[1]) || (aspar && isstring(s[1]) && |
|
1408 (s[2] == Inbrace || s[2] == Inpar))) |
|
1409 chkset = 1, s++; |
|
1410 else if (!inbrace) { |
|
1411 /* Special case for `$+' on its own --- leave unmodified */ |
|
1412 *aptr = '$'; |
|
1413 *str = aptr + 1; |
|
1414 return n; |
|
1415 } else { |
|
1416 zerr("bad substitution", NULL, 0); |
|
1417 return NULL; |
|
1418 } |
|
1419 } else if (inbrace && INULL(*s)) { |
|
1420 /* |
|
1421 * Handles things like ${(f)"$(<file)"} by skipping |
|
1422 * the double quotes. We don't need to know what was |
|
1423 * actually there; the presence of a String or Qstring |
|
1424 * is good enough. |
|
1425 */ |
|
1426 s++; |
|
1427 } else |
|
1428 break; |
|
1429 } |
|
1430 /* Don't activate special pattern characters if inside quotes */ |
|
1431 globsubst = globsubst && !qt; |
|
1432 |
|
1433 /* |
|
1434 * At this point, we usually expect a parameter name. |
|
1435 * However, there may be a nested ${...} or $(...). |
|
1436 * These say that the parameter itself is somewhere inside, |
|
1437 * or that there isn't a parameter and we will get the values |
|
1438 * from a command substitution itself. In either case, |
|
1439 * the current instance of paramsubst() doesn't fetch a value, |
|
1440 * it just operates on what gets passed up. |
|
1441 * (The first ought to have been {...}, reserving ${...} |
|
1442 * for substituting a value at that point, but it's too late now.) |
|
1443 */ |
|
1444 idbeg = s; |
|
1445 if ((subexp = (inbrace && s[-1] && isstring(*s) && |
|
1446 (s[1] == Inbrace || s[1] == Inpar)))) { |
|
1447 int sav; |
|
1448 int quoted = *s == Qstring; |
|
1449 |
|
1450 val = s++; |
|
1451 skipparens(*s, *s == Inpar ? Outpar : Outbrace, &s); |
|
1452 sav = *s; |
|
1453 *s = 0; |
|
1454 /* |
|
1455 * This handles arrays. TODO: this is not the most obscure call to |
|
1456 * multsub() (see below) but even so it would be nicer to pass down |
|
1457 * and back the arrayness more rationally. In that case, we should |
|
1458 * remove the aspar test and extract a value from an array, if |
|
1459 * necessary, when we handle (P) lower down. |
|
1460 */ |
|
1461 if (multsub(&val, (aspar ? NULL : &aval), &isarr, NULL) && quoted) { |
|
1462 /* Empty quoted string --- treat as null string, not elided */ |
|
1463 isarr = -1; |
|
1464 aval = (char **) hcalloc(sizeof(char *)); |
|
1465 aspar = 0; |
|
1466 } else if (aspar) |
|
1467 idbeg = val; |
|
1468 *s = sav; |
|
1469 /* |
|
1470 * This tests for the second double quote in an expression |
|
1471 * like ${(f)"$(<file)"}, compare above. |
|
1472 */ |
|
1473 while (INULL(*s)) |
|
1474 s++; |
|
1475 v = (Value) NULL; |
|
1476 } else if (aspar) { |
|
1477 /* |
|
1478 * No subexpression, but in any case the value is going |
|
1479 * to give us the name of a parameter on which we do |
|
1480 * our remaining processing. In other words, this |
|
1481 * makes ${(P)param} work like ${(P)${param}}. (Probably |
|
1482 * better looked at, this is the basic code for ${(P)param} |
|
1483 * and it's been kludged into the subexp code because no |
|
1484 * opportunity for a kludge has been neglected.) |
|
1485 */ |
|
1486 if ((v = fetchvalue(&vbuf, &s, 1, (qt ? SCANPM_DQUOTED : 0)))) { |
|
1487 val = idbeg = getstrvalue(v); |
|
1488 subexp = 1; |
|
1489 } else |
|
1490 vunset = 1; |
|
1491 } |
|
1492 /* |
|
1493 * We need to retrieve a value either if we haven't already |
|
1494 * got it from a subexpression, or if the processing so |
|
1495 * far has just yielded us a parameter name to be processed |
|
1496 * with (P). |
|
1497 */ |
|
1498 if (!subexp || aspar) { |
|
1499 char *ov = val; |
|
1500 |
|
1501 /* |
|
1502 * Second argument: decide whether to use the subexpression or |
|
1503 * the string next on the line as the parameter name. |
|
1504 * Third argument: decide how processing for brackets |
|
1505 * 1 means full processing |
|
1506 * -1 appears to mean something along the lines of |
|
1507 * only handle single digits and don't handle brackets. |
|
1508 * I *think* (but it's really only a guess) that this |
|
1509 * is used by the test below the wantt handling, so |
|
1510 * that in certain cases we handle brackets there. |
|
1511 * 0 would apparently mean something like we know we |
|
1512 * should have the name of a scalar and we get cross |
|
1513 * if there's anything present which disagrees with that |
|
1514 * but you will search fetchvalue() in vain for comments on this. |
|
1515 * Fourth argument gives flags to do with keys, values, quoting, |
|
1516 * assigning depending on context and parameter flags. |
|
1517 * |
|
1518 * This is the last mention of subexp, so presumably this |
|
1519 * is what the code which makes sure subexp is set if aspar (the |
|
1520 * (P) flag) is set. I *think* what's going on here is the |
|
1521 * second argument is for both input and output: with |
|
1522 * subexp, we only want the input effect, whereas normally |
|
1523 * we let fetchvalue set the main string pointer s to |
|
1524 * the end of the bit it's fetched. |
|
1525 */ |
|
1526 if (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s), |
|
1527 (wantt ? -1 : |
|
1528 ((unset(KSHARRAYS) || inbrace) ? 1 : -1)), |
|
1529 hkeys|hvals| |
|
1530 (arrasg ? SCANPM_ASSIGNING : 0)| |
|
1531 (qt ? SCANPM_DQUOTED : 0))) || |
|
1532 (v->pm && (v->pm->flags & PM_UNSET))) |
|
1533 vunset = 1; |
|
1534 |
|
1535 if (wantt) { |
|
1536 /* |
|
1537 * Handle the (t) flag: value now becomes the type |
|
1538 * information for the parameter. |
|
1539 */ |
|
1540 if (v && v->pm && !(v->pm->flags & PM_UNSET)) { |
|
1541 int f = v->pm->flags; |
|
1542 |
|
1543 switch (PM_TYPE(f)) { |
|
1544 case PM_SCALAR: val = "scalar"; break; |
|
1545 case PM_ARRAY: val = "array"; break; |
|
1546 case PM_INTEGER: val = "integer"; break; |
|
1547 case PM_EFLOAT: |
|
1548 case PM_FFLOAT: val = "float"; break; |
|
1549 case PM_HASHED: val = "association"; break; |
|
1550 } |
|
1551 val = dupstring(val); |
|
1552 if (v->pm->level) |
|
1553 val = dyncat(val, "-local"); |
|
1554 if (f & PM_LEFT) |
|
1555 val = dyncat(val, "-left"); |
|
1556 if (f & PM_RIGHT_B) |
|
1557 val = dyncat(val, "-right_blanks"); |
|
1558 if (f & PM_RIGHT_Z) |
|
1559 val = dyncat(val, "-right_zeros"); |
|
1560 if (f & PM_LOWER) |
|
1561 val = dyncat(val, "-lower"); |
|
1562 if (f & PM_UPPER) |
|
1563 val = dyncat(val, "-upper"); |
|
1564 if (f & PM_READONLY) |
|
1565 val = dyncat(val, "-readonly"); |
|
1566 if (f & PM_TAGGED) |
|
1567 val = dyncat(val, "-tag"); |
|
1568 if (f & PM_EXPORTED) |
|
1569 val = dyncat(val, "-export"); |
|
1570 if (f & PM_UNIQUE) |
|
1571 val = dyncat(val, "-unique"); |
|
1572 if (f & PM_HIDE) |
|
1573 val = dyncat(val, "-hide"); |
|
1574 if (f & PM_HIDE) |
|
1575 val = dyncat(val, "-hideval"); |
|
1576 if (f & PM_SPECIAL) |
|
1577 val = dyncat(val, "-special"); |
|
1578 vunset = 0; |
|
1579 } else |
|
1580 val = dupstring(""); |
|
1581 |
|
1582 v = NULL; |
|
1583 isarr = 0; |
|
1584 } |
|
1585 } |
|
1586 /* |
|
1587 * We get in here two ways; either we need to convert v into |
|
1588 * the local value system, or we need to get rid of brackets |
|
1589 * even if there isn't a v. |
|
1590 */ |
|
1591 while (v || ((inbrace || (unset(KSHARRAYS) && vunset)) && isbrack(*s))) { |
|
1592 if (!v) { |
|
1593 /* |
|
1594 * Index applied to non-existent parameter; we may or may |
|
1595 * not have a value to index, however. Create a temporary |
|
1596 * empty parameter as a trick, and index on that. This |
|
1597 * usually happens the second time around the loop when |
|
1598 * we've used up the original parameter value and want to |
|
1599 * apply a subscript to what's left. However, it's also |
|
1600 * possible it's got something to do with some of that murky |
|
1601 * passing of -1's as the third argument to fetchvalue() to |
|
1602 * inhibit bracket parsing at that stage. |
|
1603 */ |
|
1604 Param pm; |
|
1605 char *os = s; |
|
1606 |
|
1607 if (!isbrack(*s)) |
|
1608 break; |
|
1609 if (vunset) { |
|
1610 val = dupstring(""); |
|
1611 isarr = 0; |
|
1612 } |
|
1613 pm = createparam(nulstring, isarr ? PM_ARRAY : PM_SCALAR); |
|
1614 DPUTS(!pm, "BUG: parameter not created"); |
|
1615 if (isarr) |
|
1616 pm->u.arr = aval; |
|
1617 else |
|
1618 pm->u.str = val; |
|
1619 v = (Value) hcalloc(sizeof *v); |
|
1620 v->isarr = isarr; |
|
1621 v->pm = pm; |
|
1622 v->end = -1; |
|
1623 if (getindex(&s, v, qt) || s == os) |
|
1624 break; |
|
1625 } |
|
1626 /* |
|
1627 * This is where we extract a value (we know now we have |
|
1628 * one) into the local parameters for a scalar (val) or |
|
1629 * array (aval) value. TODO: move val and aval into |
|
1630 * a structure with a discriminator. Hope we can make |
|
1631 * more things array values at this point and dearrayify later. |
|
1632 * v->isarr tells us whether the stuff form down below looks |
|
1633 * like an array. Unlike multsub() this is probably clean |
|
1634 * enough to keep, although possibly the parameter passing |
|
1635 * needs reorganising. |
|
1636 * |
|
1637 * I think we get to discard the existing value of isarr |
|
1638 * here because it's already been taken account of, either |
|
1639 * in the subexp stuff or immediately above. |
|
1640 */ |
|
1641 if ((isarr = v->isarr)) { |
|
1642 /* No way to get here with v->inv != 0, so getvaluearr() * |
|
1643 * is called by getarrvalue(); needn't test PM_HASHED. */ |
|
1644 if (v->isarr == SCANPM_WANTINDEX) { |
|
1645 isarr = v->isarr = 0; |
|
1646 val = dupstring(v->pm->nam); |
|
1647 } else |
|
1648 aval = getarrvalue(v); |
|
1649 } else { |
|
1650 /* Value retrieved from parameter/subexpression is scalar */ |
|
1651 if (v->pm->flags & PM_ARRAY) { |
|
1652 /* |
|
1653 * Although the value is a scalar, the parameter |
|
1654 * itself is an array. Presumably this is due to |
|
1655 * being quoted, or doing single substitution or something, |
|
1656 * TODO: we're about to do some definitely stringy |
|
1657 * stuff, so something like this bit is probably |
|
1658 * necessary. However, I'd like to leave any |
|
1659 * necessary joining of arrays until this point |
|
1660 * to avoid the multsub() horror. |
|
1661 */ |
|
1662 int tmplen = arrlen(v->pm->gsu.a->getfn(v->pm)); |
|
1663 |
|
1664 if (v->start < 0) |
|
1665 v->start += tmplen + v->inv; |
|
1666 if (!v->inv && (v->start >= tmplen || v->start < 0)) |
|
1667 vunset = 1; |
|
1668 } |
|
1669 if (!vunset) { |
|
1670 /* |
|
1671 * There really is a value. Apply any necessary |
|
1672 * padding or case transformation. Note these |
|
1673 * are the per-parameter transformations specified |
|
1674 * with typeset, not the per-substitution ones set |
|
1675 * by flags. TODO: maybe therefore this would |
|
1676 * be more consistent if moved into getstrvalue()? |
|
1677 * Bet that's easier said than done. |
|
1678 */ |
|
1679 val = getstrvalue(v); |
|
1680 fwidth = v->pm->width ? v->pm->width : (int)strlen(val); |
|
1681 switch (v->pm->flags & (PM_LEFT | PM_RIGHT_B | PM_RIGHT_Z)) { |
|
1682 char *t; |
|
1683 unsigned int t0; |
|
1684 |
|
1685 case PM_LEFT: |
|
1686 case PM_LEFT | PM_RIGHT_Z: |
|
1687 t = val; |
|
1688 if (v->pm->flags & PM_RIGHT_Z) |
|
1689 while (*t == '0') |
|
1690 t++; |
|
1691 else |
|
1692 while (iblank(*t)) |
|
1693 t++; |
|
1694 val = (char *) hcalloc(fwidth + 1); |
|
1695 val[fwidth] = '\0'; |
|
1696 if ((t0 = strlen(t)) > fwidth) |
|
1697 t0 = fwidth; |
|
1698 memset(val, ' ', fwidth); |
|
1699 strncpy(val, t, t0); |
|
1700 break; |
|
1701 case PM_RIGHT_B: |
|
1702 case PM_RIGHT_Z: |
|
1703 case PM_RIGHT_Z | PM_RIGHT_B: |
|
1704 { |
|
1705 int zero = 1; |
|
1706 |
|
1707 if (strlen(val) < fwidth) { |
|
1708 char *valprefend = val; |
|
1709 if (v->pm->flags & PM_RIGHT_Z) { |
|
1710 /* |
|
1711 * This is a documented feature: when deciding |
|
1712 * whether to pad with zeroes, ignore |
|
1713 * leading blanks already in the value; |
|
1714 * only look for numbers after that. |
|
1715 * Not sure how useful this really is. |
|
1716 * It's certainly confusing to code around. |
|
1717 */ |
|
1718 for (t = val; iblank(*t); t++) |
|
1719 ; |
|
1720 /* |
|
1721 * Allow padding after initial minus |
|
1722 * for numeric variables. |
|
1723 */ |
|
1724 if ((v->pm->flags & |
|
1725 (PM_INTEGER|PM_EFLOAT|PM_FFLOAT)) && |
|
1726 *t == '-') |
|
1727 t++; |
|
1728 /* |
|
1729 * Allow padding after initial 0x or |
|
1730 * base# for integer variables. |
|
1731 */ |
|
1732 if (v->pm->flags & PM_INTEGER) { |
|
1733 if (isset(CBASES) && |
|
1734 t[0] == '0' && t[1] == 'x') |
|
1735 t += 2; |
|
1736 else if ((valprefend = strchr(t, '#'))) |
|
1737 t = valprefend + 1; |
|
1738 } |
|
1739 valprefend = t; |
|
1740 if (!*t) |
|
1741 zero = 0; |
|
1742 else if (v->pm->flags & |
|
1743 (PM_INTEGER|PM_EFLOAT|PM_FFLOAT)) { |
|
1744 /* zero always OK */ |
|
1745 } else if (!idigit(*t)) |
|
1746 zero = 0; |
|
1747 } |
|
1748 t = (char *) hcalloc(fwidth + 1); |
|
1749 memset(t, (((v->pm->flags & PM_RIGHT_B) || !zero) ? |
|
1750 ' ' : '0'), fwidth); |
|
1751 /* |
|
1752 * How can the following trigger? We |
|
1753 * haven't altered val or fwidth since |
|
1754 * the last time we tested this. |
|
1755 */ |
|
1756 if ((t0 = strlen(val)) > fwidth) |
|
1757 t0 = fwidth; |
|
1758 /* |
|
1759 * Copy - or 0x or base# before any padding |
|
1760 * zeroes. |
|
1761 */ |
|
1762 if (zero && val != valprefend) { |
|
1763 int preflen = valprefend - val; |
|
1764 memcpy(t, val, preflen); |
|
1765 strcpy(t + (fwidth - t0) + preflen, |
|
1766 valprefend); |
|
1767 } else |
|
1768 strcpy(t + (fwidth - t0), val); |
|
1769 val = t; |
|
1770 } else { |
|
1771 t = (char *) hcalloc(fwidth + 1); |
|
1772 t[fwidth] = '\0'; |
|
1773 strncpy(t, val + strlen(val) - fwidth, fwidth); |
|
1774 val = t; |
|
1775 } |
|
1776 } |
|
1777 break; |
|
1778 } |
|
1779 switch (v->pm->flags & (PM_LOWER | PM_UPPER)) { |
|
1780 char *t; |
|
1781 |
|
1782 case PM_LOWER: |
|
1783 t = val; |
|
1784 for (; (c = *t); t++) |
|
1785 *t = tulower(c); |
|
1786 break; |
|
1787 case PM_UPPER: |
|
1788 t = val; |
|
1789 for (; (c = *t); t++) |
|
1790 *t = tuupper(c); |
|
1791 break; |
|
1792 } |
|
1793 } |
|
1794 } |
|
1795 /* |
|
1796 * Finished with the original parameter and its indices; |
|
1797 * carry on looping to see if we need to do more indexing. |
|
1798 * This means we final get rid of v in favour of val and |
|
1799 * aval. We could do with somehow encapsulating the bit |
|
1800 * where we need v. |
|
1801 */ |
|
1802 v = NULL; |
|
1803 if (!inbrace) |
|
1804 break; |
|
1805 } |
|
1806 /* |
|
1807 * We're now past the name or subexpression; the only things |
|
1808 * which can happen now are a closing brace, one of the standard |
|
1809 * parameter postmodifiers, or a history-style colon-modifier. |
|
1810 * |
|
1811 * Again, this duplicates tests for characters we're about to |
|
1812 * examine properly later on. |
|
1813 */ |
|
1814 if (inbrace && |
|
1815 (c = *s) != '-' && c != '+' && c != ':' && c != '%' && c != '/' && |
|
1816 c != '=' && c != Equals && |
|
1817 c != '#' && c != Pound && |
|
1818 c != '?' && c != Quest && |
|
1819 c != '}' && c != Outbrace) { |
|
1820 zerr("bad substitution", NULL, 0); |
|
1821 return NULL; |
|
1822 } |
|
1823 /* |
|
1824 * Join arrays up if we're in quotes and there isn't some |
|
1825 * override such as (@). |
|
1826 * TODO: hmm, if we're called as part of some recursive |
|
1827 * substitution do we want to delay this until we get back to |
|
1828 * the top level? Or is if there's a qt (i.e. this parameter |
|
1829 * substitution is in quotes) always good enough? Potentially |
|
1830 * we may be OK by now --- all potential `@'s and subexpressions |
|
1831 * have been handled, including any [@] index which comes up |
|
1832 * by virture of v->isarr being set to SCANPM_ISVAR_AT which |
|
1833 * is now in isarr. |
|
1834 * |
|
1835 * However, if we are replacing multsub() with something that |
|
1836 * doesn't mangle arrays, we may need to delay this step until after |
|
1837 * the foo:- or foo:= or whatever that causes that. Note the value |
|
1838 * (string or array) at this point is irrelevant if we are going to |
|
1839 * be doing that. This would mean // and stuff get applied |
|
1840 * arraywise even if quoted. That's probably wrong, so maybe |
|
1841 * this just stays. |
|
1842 * |
|
1843 * We do a separate stage of dearrayification in the YUK chunk, |
|
1844 * I think mostly because of the way we make array or scalar |
|
1845 * values appear to the caller. |
|
1846 */ |
|
1847 if (isarr) { |
|
1848 if (nojoin) |
|
1849 isarr = -1; |
|
1850 if (qt && !getlen && isarr > 0) { |
|
1851 val = sepjoin(aval, sep, 1); |
|
1852 isarr = 0; |
|
1853 } |
|
1854 } |
|
1855 |
|
1856 idend = s; |
|
1857 if (inbrace) { |
|
1858 /* |
|
1859 * This is to match a closing double quote in case |
|
1860 * we didn't have a subexpression, e.g. ${"foo"}. |
|
1861 * This form is pointless, but logically it ought to work. |
|
1862 */ |
|
1863 while (INULL(*s)) |
|
1864 s++; |
|
1865 } |
|
1866 /* |
|
1867 * We don't yet know whether a `:' introduces a history-style |
|
1868 * colon modifier or qualifies something like ${...:=...}. |
|
1869 * But if we remember the colon here it's easy to check later. |
|
1870 */ |
|
1871 if ((colf = *s == ':')) |
|
1872 s++; |
|
1873 |
|
1874 |
|
1875 /* fstr is to be the text following the substitution. If we have * |
|
1876 * braces, we look for it here, else we infer it later on. */ |
|
1877 fstr = s; |
|
1878 if (inbrace) { |
|
1879 int bct; |
|
1880 for (bct = 1; (c = *fstr); fstr++) { |
|
1881 if (c == Inbrace) |
|
1882 bct++; |
|
1883 else if (c == Outbrace && !--bct) |
|
1884 break; |
|
1885 } |
|
1886 |
|
1887 if (bct) { |
|
1888 noclosebrace: |
|
1889 zerr("closing brace expected", NULL, 0); |
|
1890 return NULL; |
|
1891 } |
|
1892 if (c) |
|
1893 *fstr++ = '\0'; |
|
1894 } |
|
1895 |
|
1896 /* Check for ${..?..} or ${..=..} or one of those. * |
|
1897 * Only works if the name is in braces. */ |
|
1898 |
|
1899 if (inbrace && ((c = *s) == '-' || |
|
1900 c == '+' || |
|
1901 c == ':' || /* i.e. a doubled colon */ |
|
1902 c == '=' || c == Equals || |
|
1903 c == '%' || |
|
1904 c == '#' || c == Pound || |
|
1905 c == '?' || c == Quest || |
|
1906 c == '/')) { |
|
1907 |
|
1908 /* |
|
1909 * Default index is 1 if no (I) or (I) gave zero. But |
|
1910 * why don't we set the default explicitly at the start |
|
1911 * and massage any passed index where we set flnum anyway? |
|
1912 */ |
|
1913 if (!flnum) |
|
1914 flnum++; |
|
1915 if (c == '%') |
|
1916 flags |= SUB_END; |
|
1917 |
|
1918 /* Check for ${..%%..} or ${..##..} */ |
|
1919 if ((c == '%' || c == '#' || c == Pound) && c == s[1]) { |
|
1920 s++; |
|
1921 /* we have %%, not %, or ##, not # */ |
|
1922 flags |= SUB_LONG; |
|
1923 } |
|
1924 s++; |
|
1925 if (s[-1] == '/') { |
|
1926 char *ptr; |
|
1927 /* |
|
1928 * previous flags are irrelevant, except for (S) which |
|
1929 * indicates shortest substring; else look for longest. |
|
1930 */ |
|
1931 flags = (flags & SUB_SUBSTR) ? 0 : SUB_LONG; |
|
1932 if ((c = *s) == '/') { |
|
1933 /* doubled, so replace all occurrences */ |
|
1934 flags |= SUB_GLOBAL; |
|
1935 c = *++s; |
|
1936 } |
|
1937 /* Check for anchored substitution */ |
|
1938 if (c == '%') { |
|
1939 /* anchor at tail */ |
|
1940 flags |= SUB_END; |
|
1941 s++; |
|
1942 } else if (c == '#' || c == Pound) { |
|
1943 /* anchor at head: this is the `normal' case in getmatch */ |
|
1944 s++; |
|
1945 } else |
|
1946 flags |= SUB_SUBSTR; |
|
1947 /* |
|
1948 * Find the / marking the end of the search pattern. |
|
1949 * If there isn't one, we're just going to delete that, |
|
1950 * i.e. replace it with an empty string. |
|
1951 * |
|
1952 * We used to use double backslashes to quote slashes, |
|
1953 * but actually that was buggy and using a single backslash |
|
1954 * is easier and more obvious. |
|
1955 */ |
|
1956 for (ptr = s; (c = *ptr) && c != '/'; ptr++) |
|
1957 { |
|
1958 if ((c == Bnull || c == '\\') && ptr[1]) |
|
1959 { |
|
1960 if (ptr[1] == '/') |
|
1961 chuck(ptr); |
|
1962 else |
|
1963 ptr++; |
|
1964 } |
|
1965 } |
|
1966 replstr = (*ptr && ptr[1]) ? ptr+1 : ""; |
|
1967 *ptr = '\0'; |
|
1968 } |
|
1969 |
|
1970 /* See if this was ${...:-...}, ${...:=...}, etc. */ |
|
1971 if (colf) |
|
1972 flags |= SUB_ALL; |
|
1973 /* |
|
1974 * With no special flags, i.e. just a # or % or whatever, |
|
1975 * the matched portion is removed and we keep the rest. |
|
1976 * We also want the rest when we're doing a substitution. |
|
1977 */ |
|
1978 if (!(flags & (SUB_MATCH|SUB_REST|SUB_BIND|SUB_EIND|SUB_LEN))) |
|
1979 flags |= SUB_REST; |
|
1980 |
|
1981 if (colf && !vunset) |
|
1982 vunset = (isarr) ? !*aval : !*val || (*val == Nularg && !val[1]); |
|
1983 |
|
1984 switch (s[-1]) { |
|
1985 case '+': |
|
1986 if (vunset) { |
|
1987 val = dupstring(""); |
|
1988 copied = 1; |
|
1989 isarr = 0; |
|
1990 break; |
|
1991 } |
|
1992 vunset = 1; |
|
1993 /* Fall Through! */ |
|
1994 case '-': |
|
1995 if (vunset) { |
|
1996 val = dupstring(s); |
|
1997 /* |
|
1998 * This is not good enough for sh emulation! Sh would |
|
1999 * split unquoted substrings, yet not split quoted ones |
|
2000 * (except according to $@ rules); but this leaves the |
|
2001 * unquoted substrings unsplit, and other code below |
|
2002 * for spbreak splits even within the quoted substrings. |
|
2003 * |
|
2004 * TODO: I think multsub needs to be told enough to |
|
2005 * decide about splitting with spbreak at this point |
|
2006 * (and equally in the `=' handler below). Then |
|
2007 * we can turn off spbreak to avoid the join & split |
|
2008 * nastiness later. |
|
2009 * |
|
2010 * What we really want to do is make this look as |
|
2011 * if it were the result of an assignment from |
|
2012 * the same value, taking account of quoting. |
|
2013 */ |
|
2014 multsub(&val, (aspar ? NULL : &aval), &isarr, NULL); |
|
2015 copied = 1; |
|
2016 } |
|
2017 break; |
|
2018 case ':': |
|
2019 /* this must be `::=', unconditional assignment */ |
|
2020 if (*s != '=' && *s != Equals) |
|
2021 goto noclosebrace; |
|
2022 vunset = 1; |
|
2023 s++; |
|
2024 /* Fall through */ |
|
2025 case '=': |
|
2026 case Equals: |
|
2027 if (vunset) { |
|
2028 char sav = *idend; |
|
2029 int l; |
|
2030 |
|
2031 *idend = '\0'; |
|
2032 val = dupstring(s); |
|
2033 isarr = 0; |
|
2034 /* |
|
2035 * TODO: this is one of those places where I don't |
|
2036 * think we want to do the joining until later on. |
|
2037 * We also need to handle spbreak and spsep at this |
|
2038 * point and unset them. |
|
2039 */ |
|
2040 if (spsep || spbreak || !arrasg) |
|
2041 multsub(&val, NULL, NULL, sep); |
|
2042 else |
|
2043 multsub(&val, &aval, &isarr, NULL); |
|
2044 if (arrasg) { |
|
2045 /* |
|
2046 * This is an array assignment in a context |
|
2047 * where we have no syntactic way of finding |
|
2048 * out what an array element is. So we just guess. |
|
2049 */ |
|
2050 char *arr[2], **t, **a, **p; |
|
2051 if (spsep || spbreak) { |
|
2052 aval = sepsplit(val, spsep, 0, 1); |
|
2053 isarr = 2; |
|
2054 l = arrlen(aval); |
|
2055 if (l && !*(aval[l-1])) |
|
2056 l--; |
|
2057 if (l && !**aval) |
|
2058 l--, t = aval + 1; |
|
2059 else |
|
2060 t = aval; |
|
2061 } else if (!isarr) { |
|
2062 if (!*val && arrasg > 1) { |
|
2063 arr[0] = NULL; |
|
2064 l = 0; |
|
2065 } else { |
|
2066 arr[0] = val; |
|
2067 arr[1] = NULL; |
|
2068 l = 1; |
|
2069 } |
|
2070 t = aval = arr; |
|
2071 } else |
|
2072 l = arrlen(aval), t = aval; |
|
2073 p = a = zalloc(sizeof(char *) * (l + 1)); |
|
2074 while (l--) { |
|
2075 untokenize(*t); |
|
2076 *p++ = ztrdup(*t++); |
|
2077 } |
|
2078 *p++ = NULL; |
|
2079 if (arrasg > 1) { |
|
2080 Param pm = sethparam(idbeg, a); |
|
2081 if (pm) |
|
2082 aval = paramvalarr(pm->gsu.h->getfn(pm), hkeys|hvals); |
|
2083 } else |
|
2084 setaparam(idbeg, a); |
|
2085 } else { |
|
2086 untokenize(val); |
|
2087 setsparam(idbeg, ztrdup(val)); |
|
2088 } |
|
2089 *idend = sav; |
|
2090 copied = 1; |
|
2091 if (isarr) { |
|
2092 if (nojoin) |
|
2093 isarr = -1; |
|
2094 if (qt && !getlen && isarr > 0 && !spsep && spbreak < 2) { |
|
2095 val = sepjoin(aval, sep, 1); |
|
2096 isarr = 0; |
|
2097 } |
|
2098 sep = spsep = NULL; |
|
2099 spbreak = 0; |
|
2100 } |
|
2101 } |
|
2102 break; |
|
2103 case '?': |
|
2104 case Quest: |
|
2105 if (vunset) { |
|
2106 char *msg; |
|
2107 |
|
2108 *idend = '\0'; |
|
2109 msg = tricat(idbeg, ": ", *s ? s : "parameter not set"); |
|
2110 zerr("%s", msg, 0); |
|
2111 zsfree(msg); |
|
2112 if (!interact) |
|
2113 exit(1); |
|
2114 return NULL; |
|
2115 } |
|
2116 break; |
|
2117 case '%': |
|
2118 case '#': |
|
2119 case Pound: |
|
2120 case '/': |
|
2121 /* This once was executed only `if (qt) ...'. But with that |
|
2122 * patterns in a expansion resulting from a ${(e)...} aren't |
|
2123 * tokenized even though this function thinks they are (it thinks |
|
2124 * they are because subst_parse_str() turns Qstring tokens |
|
2125 * into String tokens and for unquoted parameter expansions the |
|
2126 * lexer normally does tokenize patterns inside parameter |
|
2127 * expansions). */ |
|
2128 { |
|
2129 int one = noerrs, oef = errflag, haserr; |
|
2130 |
|
2131 if (!quoteerr) |
|
2132 noerrs = 1; |
|
2133 haserr = parse_subst_string(s); |
|
2134 noerrs = one; |
|
2135 if (!quoteerr) { |
|
2136 errflag = oef; |
|
2137 if (haserr) |
|
2138 shtokenize(s); |
|
2139 } else if (haserr || errflag) { |
|
2140 zerr("parse error in ${...%c...} substitution", |
|
2141 NULL, s[-1]); |
|
2142 return NULL; |
|
2143 } |
|
2144 } |
|
2145 { |
|
2146 #if 0 |
|
2147 /* |
|
2148 * This allows # and % to be at the start of |
|
2149 * a parameter in the substitution, which is |
|
2150 * a bit nasty, and can be done (although |
|
2151 * less efficiently) with anchors. |
|
2152 */ |
|
2153 |
|
2154 char t = s[-1]; |
|
2155 |
|
2156 singsub(&s); |
|
2157 |
|
2158 if (t == '/' && (flags & SUB_SUBSTR)) { |
|
2159 if ((c = *s) == '#' || c == '%') { |
|
2160 flags &= ~SUB_SUBSTR; |
|
2161 if (c == '%') |
|
2162 flags |= SUB_END; |
|
2163 s++; |
|
2164 } else if (c == '\\') { |
|
2165 s++; |
|
2166 } |
|
2167 } |
|
2168 #else |
|
2169 singsub(&s); |
|
2170 #endif |
|
2171 } |
|
2172 |
|
2173 /* |
|
2174 * Either loop over an array doing replacements or |
|
2175 * do the replacment on a string. |
|
2176 */ |
|
2177 if (!vunset && isarr) { |
|
2178 getmatcharr(&aval, s, flags, flnum, replstr); |
|
2179 copied = 1; |
|
2180 } else { |
|
2181 if (vunset) |
|
2182 val = dupstring(""); |
|
2183 getmatch(&val, s, flags, flnum, replstr); |
|
2184 copied = 1; |
|
2185 } |
|
2186 break; |
|
2187 } |
|
2188 } else { /* no ${...=...} or anything, but possible modifiers. */ |
|
2189 /* |
|
2190 * Handler ${+...}. TODO: strange, why do we handle this only |
|
2191 * if there isn't a trailing modifier? Why don't we do this |
|
2192 * e.g. when we hanlder the ${(t)...} flag? |
|
2193 */ |
|
2194 if (chkset) { |
|
2195 val = dupstring(vunset ? "0" : "1"); |
|
2196 isarr = 0; |
|
2197 } else if (vunset) { |
|
2198 if (unset(UNSET)) { |
|
2199 *idend = '\0'; |
|
2200 zerr("%s: parameter not set", idbeg, 0); |
|
2201 return NULL; |
|
2202 } |
|
2203 val = dupstring(""); |
|
2204 } |
|
2205 if (colf) { |
|
2206 /* |
|
2207 * History style colon modifiers. May need to apply |
|
2208 * on multiple elements of an array. |
|
2209 */ |
|
2210 s--; |
|
2211 if (unset(KSHARRAYS) || inbrace) { |
|
2212 if (!isarr) |
|
2213 modify(&val, &s); |
|
2214 else { |
|
2215 char *ss; |
|
2216 char **ap = aval; |
|
2217 char **pp = aval = (char **) hcalloc(sizeof(char *) * |
|
2218 (arrlen(aval) + 1)); |
|
2219 |
|
2220 while ((*pp = *ap++)) { |
|
2221 ss = s; |
|
2222 modify(pp++, &ss); |
|
2223 } |
|
2224 if (pp == aval) { |
|
2225 char *t = ""; |
|
2226 ss = s; |
|
2227 modify(&t, &ss); |
|
2228 } |
|
2229 s = ss; |
|
2230 } |
|
2231 copied = 1; |
|
2232 if (inbrace && *s) { |
|
2233 if (*s == ':' && !imeta(s[1])) |
|
2234 zerr("unrecognized modifier `%c'", NULL, s[1]); |
|
2235 else |
|
2236 zerr("unrecognized modifier", NULL, 0); |
|
2237 return NULL; |
|
2238 } |
|
2239 } |
|
2240 } |
|
2241 if (!inbrace) |
|
2242 fstr = s; |
|
2243 } |
|
2244 if (errflag) |
|
2245 return NULL; |
|
2246 /* |
|
2247 * This handles taking a length with ${#foo} and variations. |
|
2248 * TODO: again. one might naively have thought this had the |
|
2249 * same sort of effect as the ${(t)...} flag and the ${+...} |
|
2250 * test, although in this case we do need the value rather |
|
2251 * the the parameter, so maybe it's a bit different. |
|
2252 */ |
|
2253 if (getlen) { |
|
2254 long len = 0; |
|
2255 char buf[14]; |
|
2256 |
|
2257 if (isarr) { |
|
2258 char **ctr; |
|
2259 int sl = sep ? ztrlen(sep) : 1; |
|
2260 |
|
2261 if (getlen == 1) |
|
2262 for (ctr = aval; *ctr; ctr++, len++); |
|
2263 else if (getlen == 2) { |
|
2264 if (*aval) |
|
2265 for (len = -sl, ctr = aval; |
|
2266 len += sl + ztrlen(*ctr), *++ctr;); |
|
2267 } |
|
2268 else |
|
2269 for (ctr = aval; |
|
2270 *ctr; |
|
2271 len += wordcount(*ctr, spsep, getlen > 3), ctr++); |
|
2272 } else { |
|
2273 if (getlen < 3) |
|
2274 len = ztrlen(val); |
|
2275 else |
|
2276 len = wordcount(val, spsep, getlen > 3); |
|
2277 } |
|
2278 |
|
2279 sprintf(buf, "%ld", len); |
|
2280 val = dupstring(buf); |
|
2281 isarr = 0; |
|
2282 } |
|
2283 /* |
|
2284 * I think this mult_isarr stuff here is used to pass back |
|
2285 * the setting of whether we are an array to multsub, and |
|
2286 * thence to the top-level paramsubst(). The way the |
|
2287 * setting is passed back is completely obscure, however. |
|
2288 * It's presumably at this point because we try to remember |
|
2289 * whether the value was `really' an array before massaging |
|
2290 * some special cases. |
|
2291 * |
|
2292 * TODO: YUK. This is not the right place to turn arrays into |
|
2293 * scalars; we should pass back as an array, and let the calling |
|
2294 * code decide how to deal with it. This is almost certainly |
|
2295 * a lot harder than it sounds. Do we really need to handle |
|
2296 * one-element arrays as scalars at this point? Couldn't |
|
2297 * we just test for it later rather than having a multiple-valued |
|
2298 * wave-function for isarr? |
|
2299 */ |
|
2300 mult_isarr = isarr; |
|
2301 if (isarr > 0 && !plan9 && (!aval || !aval[0])) { |
|
2302 val = dupstring(""); |
|
2303 isarr = 0; |
|
2304 } else if (isarr && aval && aval[0] && !aval[1]) { |
|
2305 /* treat a one-element array as a scalar for purposes of * |
|
2306 * concatenation with surrounding text (some${param}thing) * |
|
2307 * and rc_expand_param handling. Note: mult_isarr (above) * |
|
2308 * propagates the true array type from nested expansions. */ |
|
2309 val = aval[0]; |
|
2310 isarr = 0; |
|
2311 } |
|
2312 /* ssub is true when we are called from singsub (via prefork). |
|
2313 * It means that we must join arrays and should not split words. */ |
|
2314 /* |
|
2315 * TODO: this is what is screwing up the use of SH_WORD_SPLIT |
|
2316 * after `:-' etc. If we fix multsub(), we might get away |
|
2317 * with simply unsetting the appropriate flags when they |
|
2318 * get handled. |
|
2319 */ |
|
2320 if (ssub || spbreak || spsep || sep) { |
|
2321 if (isarr) |
|
2322 val = sepjoin(aval, sep, 1), isarr = 0; |
|
2323 if (!ssub && (spbreak || spsep)) { |
|
2324 aval = sepsplit(val, spsep, 0, 1); |
|
2325 if (!aval || !aval[0]) |
|
2326 val = dupstring(""); |
|
2327 else if (!aval[1]) |
|
2328 val = aval[0]; |
|
2329 else |
|
2330 isarr = 2; |
|
2331 } |
|
2332 mult_isarr = isarr; |
|
2333 } |
|
2334 /* |
|
2335 * Perform case modififications. |
|
2336 */ |
|
2337 if (casmod) { |
|
2338 if (isarr) { |
|
2339 char **ap; |
|
2340 |
|
2341 if (!copied) |
|
2342 aval = arrdup(aval), copied = 1; |
|
2343 ap = aval; |
|
2344 |
|
2345 if (casmod == 1) |
|
2346 for (; *ap; ap++) |
|
2347 makeuppercase(ap); |
|
2348 else if (casmod == 2) |
|
2349 for (; *ap; ap++) |
|
2350 makelowercase(ap); |
|
2351 else |
|
2352 for (; *ap; ap++) |
|
2353 makecapitals(ap); |
|
2354 |
|
2355 } else { |
|
2356 if (!copied) |
|
2357 val = dupstring(val), copied = 1; |
|
2358 if (casmod == 1) |
|
2359 makeuppercase(&val); |
|
2360 else if (casmod == 2) |
|
2361 makelowercase(&val); |
|
2362 else |
|
2363 makecapitals(&val); |
|
2364 } |
|
2365 } |
|
2366 /* |
|
2367 * Perform prompt-style modifications. |
|
2368 */ |
|
2369 if (presc) { |
|
2370 int ops = opts[PROMPTSUBST], opb = opts[PROMPTBANG]; |
|
2371 int opp = opts[PROMPTPERCENT], len; |
|
2372 |
|
2373 if (presc < 2) { |
|
2374 opts[PROMPTPERCENT] = 1; |
|
2375 opts[PROMPTSUBST] = opts[PROMPTBANG] = 0; |
|
2376 } |
|
2377 /* |
|
2378 * TODO: It would be really quite nice to abstract the |
|
2379 * isarr and !issarr code into a function which gets |
|
2380 * passed a pointer to a function with the effect of |
|
2381 * the promptexpand bit. Then we could use this for |
|
2382 * a lot of stuff and bury val/aval/isarr inside a structure |
|
2383 * which gets passed to it. |
|
2384 */ |
|
2385 if (isarr) { |
|
2386 char **ap; |
|
2387 |
|
2388 if (!copied) |
|
2389 aval = arrdup(aval), copied = 1; |
|
2390 ap = aval; |
|
2391 for (; *ap; ap++) { |
|
2392 char *tmps; |
|
2393 unmetafy(*ap, &len); |
|
2394 untokenize(*ap); |
|
2395 tmps = unmetafy(promptexpand(metafy(*ap, len, META_NOALLOC), |
|
2396 0, NULL, NULL), &len); |
|
2397 *ap = dupstring(tmps); |
|
2398 free(tmps); |
|
2399 } |
|
2400 } else { |
|
2401 char *tmps; |
|
2402 if (!copied) |
|
2403 val = dupstring(val), copied = 1; |
|
2404 unmetafy(val, &len); |
|
2405 untokenize(val); |
|
2406 tmps = unmetafy(promptexpand(metafy(val, len, META_NOALLOC), |
|
2407 0, NULL, NULL), &len); |
|
2408 val = dupstring(tmps); |
|
2409 free(tmps); |
|
2410 } |
|
2411 opts[PROMPTSUBST] = ops; |
|
2412 opts[PROMPTBANG] = opb; |
|
2413 opts[PROMPTPERCENT] = opp; |
|
2414 } |
|
2415 /* |
|
2416 * One of the possible set of quotes to apply, depending on |
|
2417 * the repetitions of the (q) flag. |
|
2418 */ |
|
2419 if (quotemod) { |
|
2420 if (--quotetype > 3) |
|
2421 quotetype = 3; |
|
2422 if (isarr) { |
|
2423 char **ap; |
|
2424 |
|
2425 if (!copied) |
|
2426 aval = arrdup(aval), copied = 1; |
|
2427 ap = aval; |
|
2428 |
|
2429 if (quotemod > 0) { |
|
2430 if (quotetype) { |
|
2431 int sl; |
|
2432 char *tmp; |
|
2433 |
|
2434 for (; *ap; ap++) { |
|
2435 int pre = quotetype != 3 ? 1 : 2; |
|
2436 tmp = bslashquote(*ap, NULL, quotetype); |
|
2437 sl = strlen(tmp); |
|
2438 *ap = (char *) zhalloc(pre + sl + 2); |
|
2439 strcpy((*ap) + pre, tmp); |
|
2440 ap[0][pre - 1] = ap[0][pre + sl] = (quotetype != 2 ? '\'' : '"'); |
|
2441 ap[0][pre + sl + 1] = '\0'; |
|
2442 if (quotetype == 3) |
|
2443 ap[0][0] = '$'; |
|
2444 } |
|
2445 } else |
|
2446 for (; *ap; ap++) |
|
2447 *ap = bslashquote(*ap, NULL, 0); |
|
2448 } else { |
|
2449 int one = noerrs, oef = errflag, haserr = 0; |
|
2450 |
|
2451 if (!quoteerr) |
|
2452 noerrs = 1; |
|
2453 for (; *ap; ap++) { |
|
2454 haserr |= parse_subst_string(*ap); |
|
2455 remnulargs(*ap); |
|
2456 untokenize(*ap); |
|
2457 } |
|
2458 noerrs = one; |
|
2459 if (!quoteerr) |
|
2460 errflag = oef; |
|
2461 else if (haserr || errflag) { |
|
2462 zerr("parse error in parameter value", NULL, 0); |
|
2463 return NULL; |
|
2464 } |
|
2465 } |
|
2466 } else { |
|
2467 if (!copied) |
|
2468 val = dupstring(val), copied = 1; |
|
2469 if (quotemod > 0) { |
|
2470 if (quotetype) { |
|
2471 int pre = quotetype != 3 ? 1 : 2; |
|
2472 int sl; |
|
2473 char *tmp; |
|
2474 tmp = bslashquote(val, NULL, quotetype); |
|
2475 sl = strlen(tmp); |
|
2476 val = (char *) zhalloc(pre + sl + 2); |
|
2477 strcpy(val + pre, tmp); |
|
2478 val[pre - 1] = val[pre + sl] = (quotetype != 2 ? '\'' : '"'); |
|
2479 val[pre + sl + 1] = '\0'; |
|
2480 if (quotetype == 3) |
|
2481 val[0] = '$'; |
|
2482 } else |
|
2483 val = bslashquote(val, NULL, 0); |
|
2484 } else { |
|
2485 int one = noerrs, oef = errflag, haserr; |
|
2486 |
|
2487 if (!quoteerr) |
|
2488 noerrs = 1; |
|
2489 haserr = parse_subst_string(val); |
|
2490 noerrs = one; |
|
2491 if (!quoteerr) |
|
2492 errflag = oef; |
|
2493 else if (haserr || errflag) { |
|
2494 zerr("parse error in parameter value", NULL, 0); |
|
2495 return NULL; |
|
2496 } |
|
2497 remnulargs(val); |
|
2498 untokenize(val); |
|
2499 } |
|
2500 } |
|
2501 } |
|
2502 /* |
|
2503 * Transform special characters in the string to make them |
|
2504 * printable. |
|
2505 */ |
|
2506 if (visiblemod) { |
|
2507 if (isarr) { |
|
2508 char **ap; |
|
2509 if (!copied) |
|
2510 aval = arrdup(aval), copied = 1; |
|
2511 for (ap = aval; *ap; ap++) |
|
2512 *ap = nicedupstring(*ap); |
|
2513 } else { |
|
2514 if (!copied) |
|
2515 val = dupstring(val), copied = 1; |
|
2516 val = nicedupstring(val); |
|
2517 } |
|
2518 } |
|
2519 /* |
|
2520 * Nothing particularly to do with SH_WORD_SPLIT --- this |
|
2521 * performs lexical splitting on a string as specified by |
|
2522 * the (z) flag. |
|
2523 */ |
|
2524 if (shsplit) { |
|
2525 LinkList list = NULL; |
|
2526 |
|
2527 if (isarr) { |
|
2528 char **ap; |
|
2529 for (ap = aval; *ap; ap++) |
|
2530 list = bufferwords(list, *ap, NULL); |
|
2531 isarr = 0; |
|
2532 } else |
|
2533 list = bufferwords(NULL, val, NULL); |
|
2534 |
|
2535 if (!list || !firstnode(list)) |
|
2536 val = dupstring(""); |
|
2537 else if (!nextnode(firstnode(list))) |
|
2538 val = getdata(firstnode(list)); |
|
2539 else { |
|
2540 char **ap; |
|
2541 LinkNode node; |
|
2542 |
|
2543 aval = ap = (char **) zhalloc((countlinknodes(list) + 1) * |
|
2544 sizeof(char *)); |
|
2545 for (node = firstnode(list); node; incnode(node)) |
|
2546 *ap++ = (char *) getdata(node); |
|
2547 *ap = NULL; |
|
2548 mult_isarr = isarr = 2; |
|
2549 } |
|
2550 copied = 1; |
|
2551 } |
|
2552 /* |
|
2553 * TODO: hmm. At this point we have to be on our toes about |
|
2554 * whether we're putting stuff into a line or not, i.e. |
|
2555 * we don't want to do this from a recursive call; this is |
|
2556 * probably part of the point of the mult_isarr monkey business. |
|
2557 * Rather than passing back flags in a non-trivial way, maybe |
|
2558 * we could decide on the basis of flags passed down to us. |
|
2559 * |
|
2560 * This is the ideal place to do any last-minute conversion from |
|
2561 * array to strings. However, given all the transformations we've |
|
2562 * already done, probably if it's going to be done it will already |
|
2563 * have been. (I'd really like to keep everying in aval or |
|
2564 * equivalent and only locally decide if we need to treat it |
|
2565 * as a scalar.) |
|
2566 */ |
|
2567 if (isarr) { |
|
2568 char *x; |
|
2569 char *y; |
|
2570 int xlen; |
|
2571 int i; |
|
2572 LinkNode on = n; |
|
2573 |
|
2574 /* Handle the (u) flag; we need this before the next test */ |
|
2575 if (unique) { |
|
2576 if(!copied) |
|
2577 aval = arrdup(aval); |
|
2578 |
|
2579 i = arrlen(aval); |
|
2580 if (i > 1) |
|
2581 zhuniqarray(aval); |
|
2582 } |
|
2583 if ((!aval[0] || !aval[1]) && !plan9) { |
|
2584 /* |
|
2585 * Empty array or single element. Currently you only |
|
2586 * get a single element array at this point from the |
|
2587 * unique expansion above. but we can potentially |
|
2588 * have other reasons. |
|
2589 * |
|
2590 * The following test removes the markers |
|
2591 * from surrounding double quotes, but I don't know why |
|
2592 * that's necessary. |
|
2593 */ |
|
2594 int vallen; |
|
2595 if (aptr > (char *) getdata(n) && |
|
2596 aptr[-1] == Dnull && *fstr == Dnull) |
|
2597 *--aptr = '\0', fstr++; |
|
2598 vallen = aval[0] ? strlen(aval[0]) : 0; |
|
2599 y = (char *) hcalloc((aptr - ostr) + vallen + strlen(fstr) + 1); |
|
2600 strcpy(y, ostr); |
|
2601 *str = y + (aptr - ostr); |
|
2602 if (vallen) |
|
2603 { |
|
2604 strcpy(*str, aval[0]); |
|
2605 *str += vallen; |
|
2606 } |
|
2607 strcpy(*str, fstr); |
|
2608 setdata(n, y); |
|
2609 return n; |
|
2610 } |
|
2611 /* Handle (o) and (O) and their variants */ |
|
2612 if (sortit) { |
|
2613 if (!copied) |
|
2614 aval = arrdup(aval); |
|
2615 if (indord) { |
|
2616 if (sortit & 2) { |
|
2617 char *copy; |
|
2618 char **end = aval + arrlen(aval) - 1, **start = aval; |
|
2619 |
|
2620 /* reverse the array */ |
|
2621 while (start < end) { |
|
2622 copy = *end; |
|
2623 *end-- = *start; |
|
2624 *start++ = copy; |
|
2625 } |
|
2626 } |
|
2627 } else { |
|
2628 static CompareFn sortfn[] = { |
|
2629 strpcmp, invstrpcmp, cstrpcmp, invcstrpcmp, |
|
2630 nstrpcmp, invnstrpcmp, instrpcmp, invinstrpcmp |
|
2631 }; |
|
2632 |
|
2633 i = arrlen(aval); |
|
2634 if (i && (*aval[i-1] || --i)) |
|
2635 qsort(aval, i, sizeof(char *), sortfn[sortit-1]); |
|
2636 } |
|
2637 } |
|
2638 if (plan9) { |
|
2639 /* Handle RC_EXPAND_PARAM */ |
|
2640 LinkNode tn; |
|
2641 local_list1(tl); |
|
2642 |
|
2643 *--fstr = Marker; |
|
2644 init_list1(tl, fstr); |
|
2645 if (!eval && !stringsubst(&tl, firstnode(&tl), ssub, 0)) |
|
2646 return NULL; |
|
2647 *str = aptr; |
|
2648 tn = firstnode(&tl); |
|
2649 while ((x = *aval++)) { |
|
2650 if (prenum || postnum) |
|
2651 x = dopadding(x, prenum, postnum, preone, postone, |
|
2652 premul, postmul); |
|
2653 if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr)) |
|
2654 return NULL; |
|
2655 xlen = strlen(x); |
|
2656 for (tn = firstnode(&tl); |
|
2657 tn && *(y = (char *) getdata(tn)) == Marker; |
|
2658 incnode(tn)) { |
|
2659 strcatsub(&y, ostr, aptr, x, xlen, y + 1, globsubst, |
|
2660 copied); |
|
2661 if (qt && !*y && isarr != 2) |
|
2662 y = dupstring(nulstring); |
|
2663 if (plan9) |
|
2664 setdata(n, (void *) y), plan9 = 0; |
|
2665 else |
|
2666 insertlinknode(l, n, (void *) y), incnode(n); |
|
2667 } |
|
2668 } |
|
2669 for (; tn; incnode(tn)) { |
|
2670 y = (char *) getdata(tn); |
|
2671 if (*y == Marker) |
|
2672 continue; |
|
2673 if (qt && !*y && isarr != 2) |
|
2674 y = dupstring(nulstring); |
|
2675 if (plan9) |
|
2676 setdata(n, (void *) y), plan9 = 0; |
|
2677 else |
|
2678 insertlinknode(l, n, (void *) y), incnode(n); |
|
2679 } |
|
2680 if (plan9) { |
|
2681 uremnode(l, n); |
|
2682 return n; |
|
2683 } |
|
2684 } else { |
|
2685 /* |
|
2686 * Not RC_EXPAND_PARAM: simply join the first and |
|
2687 * last values. |
|
2688 * TODO: how about removing the restriction that |
|
2689 * aval[1] is non-NULL to promote consistency?, or |
|
2690 * simply changing the test so that we drop into |
|
2691 * the scalar branch, instead of tricking isarr? |
|
2692 */ |
|
2693 x = aval[0]; |
|
2694 if (prenum || postnum) |
|
2695 x = dopadding(x, prenum, postnum, preone, postone, |
|
2696 premul, postmul); |
|
2697 if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr)) |
|
2698 return NULL; |
|
2699 xlen = strlen(x); |
|
2700 strcatsub(&y, ostr, aptr, x, xlen, NULL, globsubst, copied); |
|
2701 if (qt && !*y && isarr != 2) |
|
2702 y = dupstring(nulstring); |
|
2703 setdata(n, (void *) y); |
|
2704 |
|
2705 i = 1; |
|
2706 /* aval[1] is non-null here */ |
|
2707 while (aval[i + 1]) { |
|
2708 x = aval[i++]; |
|
2709 if (prenum || postnum) |
|
2710 x = dopadding(x, prenum, postnum, preone, postone, |
|
2711 premul, postmul); |
|
2712 if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr)) |
|
2713 return NULL; |
|
2714 if (qt && !*x && isarr != 2) |
|
2715 y = dupstring(nulstring); |
|
2716 else { |
|
2717 y = dupstring(x); |
|
2718 if (globsubst) |
|
2719 shtokenize(y); |
|
2720 } |
|
2721 insertlinknode(l, n, (void *) y), incnode(n); |
|
2722 } |
|
2723 |
|
2724 x = aval[i]; |
|
2725 if (prenum || postnum) |
|
2726 x = dopadding(x, prenum, postnum, preone, postone, |
|
2727 premul, postmul); |
|
2728 if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr)) |
|
2729 return NULL; |
|
2730 xlen = strlen(x); |
|
2731 *str = strcatsub(&y, aptr, aptr, x, xlen, fstr, globsubst, copied); |
|
2732 if (qt && !*y && isarr != 2) |
|
2733 y = dupstring(nulstring); |
|
2734 insertlinknode(l, n, (void *) y), incnode(n); |
|
2735 } |
|
2736 if (eval) |
|
2737 n = on; |
|
2738 } else { |
|
2739 /* |
|
2740 * Scalar value. Handle last minute transformations |
|
2741 * such as left- or right-padding and the (e) flag to |
|
2742 * revaluate the result. |
|
2743 */ |
|
2744 int xlen; |
|
2745 char *x; |
|
2746 char *y; |
|
2747 |
|
2748 x = val; |
|
2749 if (prenum || postnum) |
|
2750 x = dopadding(x, prenum, postnum, preone, postone, |
|
2751 premul, postmul); |
|
2752 if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr)) |
|
2753 return NULL; |
|
2754 xlen = strlen(x); |
|
2755 *str = strcatsub(&y, ostr, aptr, x, xlen, fstr, globsubst, copied); |
|
2756 if (qt && !*y) |
|
2757 y = dupstring(nulstring); |
|
2758 setdata(n, (void *) y); |
|
2759 } |
|
2760 if (eval) |
|
2761 *str = (char *) getdata(n); |
|
2762 |
|
2763 return n; |
|
2764 } |
|
2765 |
|
2766 /* |
|
2767 * Arithmetic substitution: `a' is the string to be evaluated, `bptr' |
|
2768 * points to the beginning of the string containing it. The tail of |
|
2769 * the string is given by `rest'. *bptr is modified with the substituted |
|
2770 * string. The function returns a pointer to the tail in the substituted |
|
2771 * string. |
|
2772 */ |
|
2773 |
|
2774 /**/ |
|
2775 static char * |
|
2776 arithsubst(char *a, char **bptr, char *rest) |
|
2777 { |
|
2778 char *s = *bptr, *t; |
|
2779 char buf[BDIGBUFSIZE], *b = buf; |
|
2780 mnumber v; |
|
2781 |
|
2782 singsub(&a); |
|
2783 v = matheval(a); |
|
2784 if ((v.type & MN_FLOAT) && !outputradix) |
|
2785 b = convfloat(v.u.d, 0, 0, NULL); |
|
2786 else { |
|
2787 if (v.type & MN_FLOAT) |
|
2788 v.u.l = (zlong) v.u.d; |
|
2789 convbase(buf, v.u.l, outputradix); |
|
2790 } |
|
2791 t = *bptr = (char *) hcalloc(strlen(*bptr) + strlen(b) + |
|
2792 strlen(rest) + 1); |
|
2793 t--; |
|
2794 while ((*++t = *s++)); |
|
2795 t--; |
|
2796 while ((*++t = *b++)); |
|
2797 strcat(t, rest); |
|
2798 return t; |
|
2799 } |
|
2800 |
|
2801 /**/ |
|
2802 void |
|
2803 modify(char **str, char **ptr) |
|
2804 { |
|
2805 char *ptr1, *ptr2, *ptr3, del, *lptr, c, *test, *sep, *t, *tt, tc, *e; |
|
2806 char *copy, *all, *tmp, sav; |
|
2807 int gbal, wall, rec, al, nl; |
|
2808 |
|
2809 test = NULL; |
|
2810 |
|
2811 if (**ptr == ':') |
|
2812 *str = dupstring(*str); |
|
2813 |
|
2814 while (**ptr == ':') { |
|
2815 lptr = *ptr; |
|
2816 (*ptr)++; |
|
2817 wall = gbal = 0; |
|
2818 rec = 1; |
|
2819 c = '\0'; |
|
2820 sep = NULL; |
|
2821 |
|
2822 for (; !c && **ptr;) { |
|
2823 switch (**ptr) { |
|
2824 case 'h': |
|
2825 case 'r': |
|
2826 case 'e': |
|
2827 case 't': |
|
2828 case 'l': |
|
2829 case 'u': |
|
2830 case 'q': |
|
2831 case 'Q': |
|
2832 c = **ptr; |
|
2833 break; |
|
2834 |
|
2835 case 's': |
|
2836 c = **ptr; |
|
2837 (*ptr)++; |
|
2838 ptr1 = *ptr; |
|
2839 del = *ptr1++; |
|
2840 for (ptr2 = ptr1; *ptr2 != del && *ptr2; ptr2++); |
|
2841 if (!*ptr2) { |
|
2842 zerr("bad substitution", NULL, 0); |
|
2843 return; |
|
2844 } |
|
2845 *ptr2++ = '\0'; |
|
2846 for (ptr3 = ptr2; *ptr3 != del && *ptr3; ptr3++); |
|
2847 if ((sav = *ptr3)) |
|
2848 *ptr3++ = '\0'; |
|
2849 if (*ptr1) { |
|
2850 zsfree(hsubl); |
|
2851 hsubl = ztrdup(ptr1); |
|
2852 } |
|
2853 if (!hsubl) { |
|
2854 zerr("no previous substitution", NULL, 0); |
|
2855 return; |
|
2856 } |
|
2857 zsfree(hsubr); |
|
2858 for (tt = hsubl; *tt; tt++) |
|
2859 if (INULL(*tt)) |
|
2860 chuck(tt--); |
|
2861 untokenize(hsubl); |
|
2862 for (tt = hsubr = ztrdup(ptr2); *tt; tt++) |
|
2863 if (INULL(*tt)) |
|
2864 chuck(tt--); |
|
2865 ptr2[-1] = del; |
|
2866 if (sav) |
|
2867 ptr3[-1] = sav; |
|
2868 *ptr = ptr3 - 1; |
|
2869 break; |
|
2870 |
|
2871 case '&': |
|
2872 c = 's'; |
|
2873 break; |
|
2874 |
|
2875 case 'g': |
|
2876 (*ptr)++; |
|
2877 gbal = 1; |
|
2878 break; |
|
2879 |
|
2880 case 'w': |
|
2881 wall = 1; |
|
2882 (*ptr)++; |
|
2883 break; |
|
2884 case 'W': |
|
2885 wall = 1; |
|
2886 (*ptr)++; |
|
2887 ptr1 = get_strarg(ptr2 = *ptr); |
|
2888 if ((sav = *ptr1)) |
|
2889 *ptr1 = '\0'; |
|
2890 sep = dupstring(ptr2 + 1); |
|
2891 if (sav) |
|
2892 *ptr1 = sav; |
|
2893 *ptr = ptr1 + 1; |
|
2894 c = '\0'; |
|
2895 break; |
|
2896 |
|
2897 case 'f': |
|
2898 rec = -1; |
|
2899 (*ptr)++; |
|
2900 break; |
|
2901 case 'F': |
|
2902 rec = get_intarg(ptr); |
|
2903 (*ptr)++; |
|
2904 break; |
|
2905 default: |
|
2906 *ptr = lptr; |
|
2907 return; |
|
2908 } |
|
2909 } |
|
2910 (*ptr)++; |
|
2911 if (!c) { |
|
2912 *ptr = lptr; |
|
2913 return; |
|
2914 } |
|
2915 if (rec < 0) |
|
2916 test = dupstring(*str); |
|
2917 |
|
2918 while (rec--) { |
|
2919 if (wall) { |
|
2920 al = 0; |
|
2921 all = NULL; |
|
2922 for (t = e = *str; (tt = findword(&e, sep));) { |
|
2923 tc = *e; |
|
2924 *e = '\0'; |
|
2925 copy = dupstring(tt); |
|
2926 *e = tc; |
|
2927 switch (c) { |
|
2928 case 'h': |
|
2929 remtpath(©); |
|
2930 break; |
|
2931 case 'r': |
|
2932 remtext(©); |
|
2933 break; |
|
2934 case 'e': |
|
2935 rembutext(©); |
|
2936 break; |
|
2937 case 't': |
|
2938 remlpaths(©); |
|
2939 break; |
|
2940 case 'l': |
|
2941 downcase(©); |
|
2942 break; |
|
2943 case 'u': |
|
2944 upcase(©); |
|
2945 break; |
|
2946 case 's': |
|
2947 if (hsubl && hsubr) |
|
2948 subst(©, hsubl, hsubr, gbal); |
|
2949 break; |
|
2950 case 'q': |
|
2951 copy = bslashquote(copy, NULL, 0); |
|
2952 break; |
|
2953 case 'Q': |
|
2954 { |
|
2955 int one = noerrs, oef = errflag; |
|
2956 |
|
2957 noerrs = 1; |
|
2958 parse_subst_string(copy); |
|
2959 noerrs = one; |
|
2960 errflag = oef; |
|
2961 remnulargs(copy); |
|
2962 untokenize(copy); |
|
2963 } |
|
2964 break; |
|
2965 } |
|
2966 tc = *tt; |
|
2967 *tt = '\0'; |
|
2968 nl = al + strlen(t) + strlen(copy); |
|
2969 ptr1 = tmp = (char *)zhalloc(nl + 1); |
|
2970 if (all) |
|
2971 for (ptr2 = all; *ptr2;) |
|
2972 *ptr1++ = *ptr2++; |
|
2973 for (ptr2 = t; *ptr2;) |
|
2974 *ptr1++ = *ptr2++; |
|
2975 *tt = tc; |
|
2976 for (ptr2 = copy; *ptr2;) |
|
2977 *ptr1++ = *ptr2++; |
|
2978 *ptr1 = '\0'; |
|
2979 al = nl; |
|
2980 all = tmp; |
|
2981 t = e; |
|
2982 } |
|
2983 *str = all; |
|
2984 |
|
2985 } else { |
|
2986 switch (c) { |
|
2987 case 'h': |
|
2988 remtpath(str); |
|
2989 break; |
|
2990 case 'r': |
|
2991 remtext(str); |
|
2992 break; |
|
2993 case 'e': |
|
2994 rembutext(str); |
|
2995 break; |
|
2996 case 't': |
|
2997 remlpaths(str); |
|
2998 break; |
|
2999 case 'l': |
|
3000 downcase(str); |
|
3001 break; |
|
3002 case 'u': |
|
3003 upcase(str); |
|
3004 break; |
|
3005 case 's': |
|
3006 if (hsubl && hsubr) { |
|
3007 char *oldstr = *str; |
|
3008 |
|
3009 subst(str, hsubl, hsubr, gbal); |
|
3010 if (*str != oldstr) { |
|
3011 *str = dupstring(oldstr = *str); |
|
3012 zsfree(oldstr); |
|
3013 } |
|
3014 } |
|
3015 break; |
|
3016 case 'q': |
|
3017 *str = bslashquote(*str, NULL, 0); |
|
3018 break; |
|
3019 case 'Q': |
|
3020 { |
|
3021 int one = noerrs, oef = errflag; |
|
3022 |
|
3023 noerrs = 1; |
|
3024 parse_subst_string(*str); |
|
3025 noerrs = one; |
|
3026 errflag = oef; |
|
3027 remnulargs(*str); |
|
3028 untokenize(*str); |
|
3029 } |
|
3030 break; |
|
3031 } |
|
3032 } |
|
3033 if (rec < 0) { |
|
3034 if (!strcmp(test, *str)) |
|
3035 rec = 0; |
|
3036 else |
|
3037 test = dupstring(*str); |
|
3038 } |
|
3039 } |
|
3040 } |
|
3041 } |
|
3042 |
|
3043 /* get a directory stack entry */ |
|
3044 |
|
3045 /**/ |
|
3046 static char * |
|
3047 dstackent(char ch, int val) |
|
3048 { |
|
3049 int backwards; |
|
3050 LinkNode end=(LinkNode)dirstack, n; |
|
3051 |
|
3052 backwards = ch == (isset(PUSHDMINUS) ? '+' : '-'); |
|
3053 if(!backwards && !val--) |
|
3054 return pwd; |
|
3055 if (backwards) |
|
3056 for (n=lastnode(dirstack); n != end && val; val--, n=prevnode(n)); |
|
3057 else |
|
3058 for (end=NULL, n=firstnode(dirstack); n && val; val--, n=nextnode(n)); |
|
3059 if (n == end) { |
|
3060 if (backwards && !val) |
|
3061 return pwd; |
|
3062 if (isset(NOMATCH)) |
|
3063 zerr("not enough directory stack entries.", NULL, 0); |
|
3064 return NULL; |
|
3065 } |
|
3066 return (char *)getdata(n); |
|
3067 } |