| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: regcomp.c,v 1.2 2013/11/22 15:52:06 christos Exp $ */ | | 1 | /* $NetBSD: regcomp.c,v 1.3 2014/01/07 21:48:12 christos Exp $ */ |
2 | /*- | | 2 | /*- |
3 | * Copyright (c) 1992, 1993, 1994 Henry Spencer. | | 3 | * Copyright (c) 1992, 1993, 1994 Henry Spencer. |
4 | * Copyright (c) 1992, 1993, 1994 | | 4 | * Copyright (c) 1992, 1993, 1994 |
5 | * The Regents of the University of California. All rights reserved. | | 5 | * The Regents of the University of California. All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to Berkeley by | | 7 | * This code is derived from software contributed to Berkeley by |
8 | * Henry Spencer of the University of Toronto. | | 8 | * Henry Spencer of the University of Toronto. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
| @@ -182,30 +182,30 @@ static int never = 0; /* for use in ass | | | @@ -182,30 +182,30 @@ static int never = 0; /* for use in ass |
182 | = #define REG_BASIC 0000 | | 182 | = #define REG_BASIC 0000 |
183 | = #define REG_EXTENDED 0001 | | 183 | = #define REG_EXTENDED 0001 |
184 | = #define REG_ICASE 0002 | | 184 | = #define REG_ICASE 0002 |
185 | = #define REG_NOSUB 0004 | | 185 | = #define REG_NOSUB 0004 |
186 | = #define REG_NEWLINE 0010 | | 186 | = #define REG_NEWLINE 0010 |
187 | = #define REG_NOSPEC 0020 | | 187 | = #define REG_NOSPEC 0020 |
188 | = #define REG_PEND 0040 | | 188 | = #define REG_PEND 0040 |
189 | = #define REG_DUMP 0200 | | 189 | = #define REG_DUMP 0200 |
190 | */ | | 190 | */ |
191 | int /* 0 success, otherwise REG_something */ | | 191 | int /* 0 success, otherwise REG_something */ |
192 | regcomp(regex_t *preg, const RCHAR_T *pattern, int cflags) | | 192 | regcomp(regex_t *preg, const RCHAR_T *pattern, int cflags) |
193 | { | | 193 | { |
194 | struct parse pa; | | 194 | struct parse pa; |
195 | register struct re_guts *g; | | 195 | struct re_guts *g; |
196 | register struct parse *p = &pa; | | 196 | struct parse *p = &pa; |
197 | register int i; | | 197 | int i; |
198 | register size_t len; | | 198 | size_t len; |
199 | #ifdef REDEBUG | | 199 | #ifdef REDEBUG |
200 | # define GOODFLAGS(f) (f) | | 200 | # define GOODFLAGS(f) (f) |
201 | #else | | 201 | #else |
202 | # define GOODFLAGS(f) ((f)&~REG_DUMP) | | 202 | # define GOODFLAGS(f) ((f)&~REG_DUMP) |
203 | #endif | | 203 | #endif |
204 | | | 204 | |
205 | cflags = GOODFLAGS(cflags); | | 205 | cflags = GOODFLAGS(cflags); |
206 | if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) | | 206 | if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) |
207 | return(REG_INVARG); | | 207 | return(REG_INVARG); |
208 | | | 208 | |
209 | if (cflags®_PEND) { | | 209 | if (cflags®_PEND) { |
210 | if (preg->re_endp < pattern) | | 210 | if (preg->re_endp < pattern) |
211 | return(REG_INVARG); | | 211 | return(REG_INVARG); |
| @@ -285,38 +285,38 @@ regcomp(regex_t *preg, const RCHAR_T *pa | | | @@ -285,38 +285,38 @@ regcomp(regex_t *preg, const RCHAR_T *pa |
285 | /* not debugging, so can't rely on the assert() in regexec() */ | | 285 | /* not debugging, so can't rely on the assert() in regexec() */ |
286 | if (g->iflags&BAD) | | 286 | if (g->iflags&BAD) |
287 | SETERROR(REG_ASSERT); | | 287 | SETERROR(REG_ASSERT); |
288 | #endif | | 288 | #endif |
289 | | | 289 | |
290 | /* win or lose, we're done */ | | 290 | /* win or lose, we're done */ |
291 | if (p->error != 0) /* lose */ | | 291 | if (p->error != 0) /* lose */ |
292 | regfree(preg); | | 292 | regfree(preg); |
293 | return(p->error); | | 293 | return(p->error); |
294 | } | | 294 | } |
295 | | | 295 | |
296 | /* | | 296 | /* |
297 | - p_ere - ERE parser top level, concatenation and alternation | | 297 | - p_ere - ERE parser top level, concatenation and alternation |
298 | == static void p_ere(register struct parse *p, int stop, size_t reclimit); | | 298 | == static void p_ere(struct parse *p, int stop, size_t reclimit); |
299 | */ | | 299 | */ |
300 | static void | | 300 | static void |
301 | p_ere(register struct parse *p, int stop, size_t reclimit) | | 301 | p_ere(struct parse *p, int stop, size_t reclimit) |
302 | | | 302 | |
303 | /* character this ERE should end at */ | | 303 | /* character this ERE should end at */ |
304 | { | | 304 | { |
305 | register char c; | | 305 | char c; |
306 | register sopno prevback = 0; | | 306 | sopno prevback = 0; |
307 | register sopno prevfwd = 0; | | 307 | sopno prevfwd = 0; |
308 | register sopno conc; | | 308 | sopno conc; |
309 | register int first = 1; /* is this the first alternative? */ | | 309 | int first = 1; /* is this the first alternative? */ |
310 | | | 310 | |
311 | if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { | | 311 | if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { |
312 | p->error = REG_ESPACE; | | 312 | p->error = REG_ESPACE; |
313 | return; | | 313 | return; |
314 | } | | 314 | } |
315 | | | 315 | |
316 | for (;;) { | | 316 | for (;;) { |
317 | /* do a bunch of concatenated expressions */ | | 317 | /* do a bunch of concatenated expressions */ |
318 | conc = HERE(); | | 318 | conc = HERE(); |
319 | while (MORE() && (c = PEEK()) != '|' && c != stop) | | 319 | while (MORE() && (c = PEEK()) != '|' && c != stop) |
320 | p_ere_exp(p, reclimit); | | 320 | p_ere_exp(p, reclimit); |
321 | (void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ | | 321 | (void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ |
322 | | | 322 | |
| @@ -336,36 +336,36 @@ p_ere(register struct parse *p, int stop | | | @@ -336,36 +336,36 @@ p_ere(register struct parse *p, int stop |
336 | EMIT(OOR2, 0); /* offset is very wrong */ | | 336 | EMIT(OOR2, 0); /* offset is very wrong */ |
337 | } | | 337 | } |
338 | | | 338 | |
339 | if (!first) { /* tail-end fixups */ | | 339 | if (!first) { /* tail-end fixups */ |
340 | AHEAD(prevfwd); | | 340 | AHEAD(prevfwd); |
341 | ASTERN(O_CH, prevback); | | 341 | ASTERN(O_CH, prevback); |
342 | } | | 342 | } |
343 | | | 343 | |
344 | assert(!MORE() || SEE(stop)); | | 344 | assert(!MORE() || SEE(stop)); |
345 | } | | 345 | } |
346 | | | 346 | |
347 | /* | | 347 | /* |
348 | - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op | | 348 | - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op |
349 | == static void p_ere_exp(register struct parse *p); | | 349 | == static void p_ere_exp(struct parse *p); |
350 | */ | | 350 | */ |
351 | static void | | 351 | static void |
352 | p_ere_exp(register struct parse *p, size_t reclimit) | | 352 | p_ere_exp(struct parse *p, size_t reclimit) |
353 | { | | 353 | { |
354 | register char c; | | 354 | char c; |
355 | register sopno pos; | | 355 | sopno pos; |
356 | register int count; | | 356 | int count; |
357 | register int count2; | | 357 | int count2; |
358 | register sopno subno; | | 358 | sopno subno; |
359 | int wascaret = 0; | | 359 | int wascaret = 0; |
360 | | | 360 | |
361 | assert(MORE()); /* caller should have ensured this */ | | 361 | assert(MORE()); /* caller should have ensured this */ |
362 | c = GETNEXT(); | | 362 | c = GETNEXT(); |
363 | | | 363 | |
364 | pos = HERE(); | | 364 | pos = HERE(); |
365 | switch (c) { | | 365 | switch (c) { |
366 | case '(': | | 366 | case '(': |
367 | (void)REQUIRE(MORE(), REG_EPAREN); | | 367 | (void)REQUIRE(MORE(), REG_EPAREN); |
368 | p->g->nsub++; | | 368 | p->g->nsub++; |
369 | subno = p->g->nsub; | | 369 | subno = p->g->nsub; |
370 | if (subno < NPAREN) | | 370 | if (subno < NPAREN) |
371 | p->pbegin[subno] = HERE(); | | 371 | p->pbegin[subno] = HERE(); |
| @@ -484,57 +484,57 @@ p_ere_exp(register struct parse *p, size | | | @@ -484,57 +484,57 @@ p_ere_exp(register struct parse *p, size |
484 | } | | 484 | } |
485 | | | 485 | |
486 | if (!MORE()) | | 486 | if (!MORE()) |
487 | return; | | 487 | return; |
488 | c = PEEK(); | | 488 | c = PEEK(); |
489 | if (!( c == '*' || c == '+' || c == '?' || | | 489 | if (!( c == '*' || c == '+' || c == '?' || |
490 | (c == '{' && MORE2() && ISDIGIT((UCHAR_T)PEEK2())) ) ) | | 490 | (c == '{' && MORE2() && ISDIGIT((UCHAR_T)PEEK2())) ) ) |
491 | return; | | 491 | return; |
492 | SETERROR(REG_BADRPT); | | 492 | SETERROR(REG_BADRPT); |
493 | } | | 493 | } |
494 | | | 494 | |
495 | /* | | 495 | /* |
496 | - p_str - string (no metacharacters) "parser" | | 496 | - p_str - string (no metacharacters) "parser" |
497 | == static void p_str(register struct parse *p); | | 497 | == static void p_str(struct parse *p); |
498 | */ | | 498 | */ |
499 | static void | | 499 | static void |
500 | p_str(register struct parse *p) | | 500 | p_str(struct parse *p) |
501 | { | | 501 | { |
502 | (void)REQUIRE(MORE(), REG_EMPTY); | | 502 | (void)REQUIRE(MORE(), REG_EMPTY); |
503 | while (MORE()) | | 503 | while (MORE()) |
504 | ordinary(p, GETNEXT()); | | 504 | ordinary(p, GETNEXT()); |
505 | } | | 505 | } |
506 | | | 506 | |
507 | /* | | 507 | /* |
508 | - p_bre - BRE parser top level, anchoring and concatenation | | 508 | - p_bre - BRE parser top level, anchoring and concatenation |
509 | == static void p_bre(register struct parse *p, register int end1, \ | | 509 | == static void p_bre(struct parse *p, int end1, \ |
510 | == register int end2, size_t reclimit); | | 510 | == int end2, size_t reclimit); |
511 | * Giving end1 as OUT essentially eliminates the end1/end2 check. | | 511 | * Giving end1 as OUT essentially eliminates the end1/end2 check. |
512 | * | | 512 | * |
513 | * This implementation is a bit of a kludge, in that a trailing $ is first | | 513 | * This implementation is a bit of a kludge, in that a trailing $ is first |
514 | * taken as an ordinary character and then revised to be an anchor. The | | 514 | * taken as an ordinary character and then revised to be an anchor. The |
515 | * only undesirable side effect is that '$' gets included as a character | | 515 | * only undesirable side effect is that '$' gets included as a character |
516 | * category in such cases. This is fairly harmless; not worth fixing. | | 516 | * category in such cases. This is fairly harmless; not worth fixing. |
517 | * The amount of lookahead needed to avoid this kludge is excessive. | | 517 | * The amount of lookahead needed to avoid this kludge is excessive. |
518 | */ | | 518 | */ |
519 | static void | | 519 | static void |
520 | p_bre(register struct parse *p, register int end1, register int end2, size_t reclimit) | | 520 | p_bre(struct parse *p, int end1, int end2, size_t reclimit) |
521 | | | 521 | |
522 | /* first terminating character */ | | 522 | /* first terminating character */ |
523 | /* second terminating character */ | | 523 | /* second terminating character */ |
524 | { | | 524 | { |
525 | register sopno start; | | 525 | sopno start; |
526 | register int first = 1; /* first subexpression? */ | | 526 | int first = 1; /* first subexpression? */ |
527 | register int wasdollar = 0; | | 527 | int wasdollar = 0; |
528 | | | 528 | |
529 | if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { | | 529 | if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { |
530 | p->error = REG_ESPACE; | | 530 | p->error = REG_ESPACE; |
531 | return; | | 531 | return; |
532 | } | | 532 | } |
533 | | | 533 | |
534 | start = HERE(); | | 534 | start = HERE(); |
535 | | | 535 | |
536 | if (EAT('^')) { | | 536 | if (EAT('^')) { |
537 | EMIT(OBOL, 0); | | 537 | EMIT(OBOL, 0); |
538 | p->g->iflags |= USEBOL; | | 538 | p->g->iflags |= USEBOL; |
539 | p->g->nbol++; | | 539 | p->g->nbol++; |
540 | } | | 540 | } |
| @@ -544,39 +544,39 @@ p_bre(register struct parse *p, register | | | @@ -544,39 +544,39 @@ p_bre(register struct parse *p, register |
544 | } | | 544 | } |
545 | if (wasdollar) { /* oops, that was a trailing anchor */ | | 545 | if (wasdollar) { /* oops, that was a trailing anchor */ |
546 | DROP(1); | | 546 | DROP(1); |
547 | EMIT(OEOL, 0); | | 547 | EMIT(OEOL, 0); |
548 | p->g->iflags |= USEEOL; | | 548 | p->g->iflags |= USEEOL; |
549 | p->g->neol++; | | 549 | p->g->neol++; |
550 | } | | 550 | } |
551 | | | 551 | |
552 | (void)REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ | | 552 | (void)REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ |
553 | } | | 553 | } |
554 | | | 554 | |
555 | /* | | 555 | /* |
556 | - p_simp_re - parse a simple RE, an atom possibly followed by a repetition | | 556 | - p_simp_re - parse a simple RE, an atom possibly followed by a repetition |
557 | == static int p_simp_re(register struct parse *p, int starordinary, size_t reclimit); | | 557 | == static int p_simp_re(struct parse *p, int starordinary, size_t reclimit); |
558 | */ | | 558 | */ |
559 | static int /* was the simple RE an unbackslashed $? */ | | 559 | static int /* was the simple RE an unbackslashed $? */ |
560 | p_simp_re(register struct parse *p, int starordinary, size_t reclimit) | | 560 | p_simp_re(struct parse *p, int starordinary, size_t reclimit) |
561 | | | 561 | |
562 | /* is a leading * an ordinary character? */ | | 562 | /* is a leading * an ordinary character? */ |
563 | { | | 563 | { |
564 | register int c; | | 564 | int c; |
565 | register int count; | | 565 | int count; |
566 | register int count2; | | 566 | int count2; |
567 | register sopno pos; | | 567 | sopno pos; |
568 | register int i; | | 568 | int i; |
569 | register sopno subno; | | 569 | sopno subno; |
570 | int backsl; | | 570 | int backsl; |
571 | | | 571 | |
572 | pos = HERE(); /* repetion op, if any, covers from here */ | | 572 | pos = HERE(); /* repetion op, if any, covers from here */ |
573 | | | 573 | |
574 | assert(MORE()); /* caller should have ensured this */ | | 574 | assert(MORE()); /* caller should have ensured this */ |
575 | c = GETNEXT(); | | 575 | c = GETNEXT(); |
576 | backsl = c == '\\'; | | 576 | backsl = c == '\\'; |
577 | if (backsl) { | | 577 | if (backsl) { |
578 | (void)REQUIRE(MORE(), REG_EESCAPE); | | 578 | (void)REQUIRE(MORE(), REG_EESCAPE); |
579 | c = (unsigned char)GETNEXT(); | | 579 | c = (unsigned char)GETNEXT(); |
580 | switch (c) { | | 580 | switch (c) { |
581 | case '{': | | 581 | case '{': |
582 | SETERROR(REG_BADRPT); | | 582 | SETERROR(REG_BADRPT); |
| @@ -669,55 +669,55 @@ p_simp_re(register struct parse *p, int | | | @@ -669,55 +669,55 @@ p_simp_re(register struct parse *p, int |
669 | while (MORE() && !SEETWO('\\', '}')) | | 669 | while (MORE() && !SEETWO('\\', '}')) |
670 | NEXT(); | | 670 | NEXT(); |
671 | (void)REQUIRE(MORE(), REG_EBRACE); | | 671 | (void)REQUIRE(MORE(), REG_EBRACE); |
672 | SETERROR(REG_BADBR); | | 672 | SETERROR(REG_BADBR); |
673 | } | | 673 | } |
674 | } else if (!backsl && c == (unsigned char)'$') /* $ (but not \$) ends it */ | | 674 | } else if (!backsl && c == (unsigned char)'$') /* $ (but not \$) ends it */ |
675 | return(1); | | 675 | return(1); |
676 | | | 676 | |
677 | return(0); | | 677 | return(0); |
678 | } | | 678 | } |
679 | | | 679 | |
680 | /* | | 680 | /* |
681 | - p_count - parse a repetition count | | 681 | - p_count - parse a repetition count |
682 | == static int p_count(register struct parse *p); | | 682 | == static int p_count(struct parse *p); |
683 | */ | | 683 | */ |
684 | static int /* the value */ | | 684 | static int /* the value */ |
685 | p_count(register struct parse *p) | | 685 | p_count(struct parse *p) |
686 | { | | 686 | { |
687 | register int count = 0; | | 687 | int count = 0; |
688 | register int ndigits = 0; | | 688 | int ndigits = 0; |
689 | | | 689 | |
690 | while (MORE() && ISDIGIT((UCHAR_T)PEEK()) && count <= DUPMAX) { | | 690 | while (MORE() && ISDIGIT((UCHAR_T)PEEK()) && count <= DUPMAX) { |
691 | count = count*10 + (GETNEXT() - '0'); | | 691 | count = count*10 + (GETNEXT() - '0'); |
692 | ndigits++; | | 692 | ndigits++; |
693 | } | | 693 | } |
694 | | | 694 | |
695 | (void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); | | 695 | (void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); |
696 | return(count); | | 696 | return(count); |
697 | } | | 697 | } |
698 | | | 698 | |
699 | /* | | 699 | /* |
700 | - p_bracket - parse a bracketed character list | | 700 | - p_bracket - parse a bracketed character list |
701 | == static void p_bracket(register struct parse *p); | | 701 | == static void p_bracket(struct parse *p); |
702 | * | | 702 | * |
703 | * Note a significant property of this code: if the allocset() did SETERROR, | | 703 | * Note a significant property of this code: if the allocset() did SETERROR, |
704 | * no set operations are done. | | 704 | * no set operations are done. |
705 | */ | | 705 | */ |
706 | static void | | 706 | static void |
707 | p_bracket(register struct parse *p) | | 707 | p_bracket(struct parse *p) |
708 | { | | 708 | { |
709 | register cset *cs; | | 709 | cset *cs; |
710 | register int invert = 0; | | 710 | int invert = 0; |
711 | static RCHAR_T bow[] = { '[', ':', '<', ':', ']', ']' }; | | 711 | static RCHAR_T bow[] = { '[', ':', '<', ':', ']', ']' }; |
712 | static RCHAR_T eow[] = { '[', ':', '>', ':', ']', ']' }; | | 712 | static RCHAR_T eow[] = { '[', ':', '>', ':', ']', ']' }; |
713 | | | 713 | |
714 | cs = allocset(p); | | 714 | cs = allocset(p); |
715 | if (cs == NULL) | | 715 | if (cs == NULL) |
716 | return; | | 716 | return; |
717 | | | 717 | |
718 | /* Dept of Truly Sickening Special-Case Kludges */ | | 718 | /* Dept of Truly Sickening Special-Case Kludges */ |
719 | if (p->next + 5 < p->end && MEMCMP(p->next, bow, 6) == 0) { | | 719 | if (p->next + 5 < p->end && MEMCMP(p->next, bow, 6) == 0) { |
720 | EMIT(OBOW, 0); | | 720 | EMIT(OBOW, 0); |
721 | NEXTn(6); | | 721 | NEXTn(6); |
722 | return; | | 722 | return; |
723 | } | | 723 | } |
| @@ -733,71 +733,71 @@ p_bracket(register struct parse *p) | | | @@ -733,71 +733,71 @@ p_bracket(register struct parse *p) |
733 | CHadd(cs, ']'); | | 733 | CHadd(cs, ']'); |
734 | else if (EAT('-')) | | 734 | else if (EAT('-')) |
735 | CHadd(cs, '-'); | | 735 | CHadd(cs, '-'); |
736 | while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) | | 736 | while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) |
737 | p_b_term(p, cs); | | 737 | p_b_term(p, cs); |
738 | if (EAT('-')) | | 738 | if (EAT('-')) |
739 | CHadd(cs, '-'); | | 739 | CHadd(cs, '-'); |
740 | (void)MUSTEAT(']', REG_EBRACK); | | 740 | (void)MUSTEAT(']', REG_EBRACK); |
741 | | | 741 | |
742 | if (p->error != 0) /* don't mess things up further */ | | 742 | if (p->error != 0) /* don't mess things up further */ |
743 | return; | | 743 | return; |
744 | | | 744 | |
745 | if (p->g->cflags®_ICASE) { | | 745 | if (p->g->cflags®_ICASE) { |
746 | register int i; | | 746 | int i; |
747 | register int ci; | | 747 | int ci; |
748 | | | 748 | |
749 | for (i = p->g->csetsize - 1; i >= 0; i--) | | 749 | for (i = p->g->csetsize - 1; i >= 0; i--) |
750 | if (CHIN(cs, i) && isalpha(i)) { | | 750 | if (CHIN(cs, i) && isalpha(i)) { |
751 | ci = othercase(i); | | 751 | ci = othercase(i); |
752 | if (ci != i) | | 752 | if (ci != i) |
753 | CHadd(cs, ci); | | 753 | CHadd(cs, ci); |
754 | } | | 754 | } |
755 | if (cs->multis != NULL) | | 755 | if (cs->multis != NULL) |
756 | mccase(p, cs); | | 756 | mccase(p, cs); |
757 | } | | 757 | } |
758 | if (invert) { | | 758 | if (invert) { |
759 | register int i; | | 759 | int i; |
760 | | | 760 | |
761 | for (i = p->g->csetsize - 1; i >= 0; i--) | | 761 | for (i = p->g->csetsize - 1; i >= 0; i--) |
762 | if (CHIN(cs, i)) | | 762 | if (CHIN(cs, i)) |
763 | CHsub(cs, i); | | 763 | CHsub(cs, i); |
764 | else | | 764 | else |
765 | CHadd(cs, i); | | 765 | CHadd(cs, i); |
766 | if (p->g->cflags®_NEWLINE) | | 766 | if (p->g->cflags®_NEWLINE) |
767 | CHsub(cs, '\n'); | | 767 | CHsub(cs, '\n'); |
768 | if (cs->multis != NULL) | | 768 | if (cs->multis != NULL) |
769 | mcinvert(p, cs); | | 769 | mcinvert(p, cs); |
770 | } | | 770 | } |
771 | | | 771 | |
772 | assert(cs->multis == NULL); /* xxx */ | | 772 | assert(cs->multis == NULL); /* xxx */ |
773 | | | 773 | |
774 | if (nch(p, cs) == 1) { /* optimize singleton sets */ | | 774 | if (nch(p, cs) == 1) { /* optimize singleton sets */ |
775 | ordinary(p, firstch(p, cs)); | | 775 | ordinary(p, firstch(p, cs)); |
776 | freeset(p, cs); | | 776 | freeset(p, cs); |
777 | } else | | 777 | } else |
778 | EMIT(OANYOF, freezeset(p, cs)); | | 778 | EMIT(OANYOF, freezeset(p, cs)); |
779 | } | | 779 | } |
780 | | | 780 | |
781 | /* | | 781 | /* |
782 | - p_b_term - parse one term of a bracketed character list | | 782 | - p_b_term - parse one term of a bracketed character list |
783 | == static void p_b_term(register struct parse *p, register cset *cs); | | 783 | == static void p_b_term(struct parse *p, cset *cs); |
784 | */ | | 784 | */ |
785 | static void | | 785 | static void |
786 | p_b_term(register struct parse *p, register cset *cs) | | 786 | p_b_term(struct parse *p, cset *cs) |
787 | { | | 787 | { |
788 | register char c; | | 788 | char c; |
789 | register char start, finish; | | 789 | char start, finish; |
790 | register int i; | | 790 | int i; |
791 | | | 791 | |
792 | /* classify what we've got */ | | 792 | /* classify what we've got */ |
793 | switch ((MORE()) ? PEEK() : '\0') { | | 793 | switch ((MORE()) ? PEEK() : '\0') { |
794 | case '[': | | 794 | case '[': |
795 | c = (MORE2()) ? PEEK2() : '\0'; | | 795 | c = (MORE2()) ? PEEK2() : '\0'; |
796 | break; | | 796 | break; |
797 | case '-': | | 797 | case '-': |
798 | SETERROR(REG_ERANGE); | | 798 | SETERROR(REG_ERANGE); |
799 | return; /* NOTE RETURN */ | | 799 | return; /* NOTE RETURN */ |
800 | break; | | 800 | break; |
801 | default: | | 801 | default: |
802 | c = '\0'; | | 802 | c = '\0'; |
803 | break; | | 803 | break; |
| @@ -834,102 +834,102 @@ p_b_term(register struct parse *p, regis | | | @@ -834,102 +834,102 @@ p_b_term(register struct parse *p, regis |
834 | finish = p_b_symbol(p); | | 834 | finish = p_b_symbol(p); |
835 | } else | | 835 | } else |
836 | finish = start; | | 836 | finish = start; |
837 | /* xxx what about signed chars here... */ | | 837 | /* xxx what about signed chars here... */ |
838 | (void)REQUIRE(start <= finish, REG_ERANGE); | | 838 | (void)REQUIRE(start <= finish, REG_ERANGE); |
839 | for (i = start; i <= finish; i++) | | 839 | for (i = start; i <= finish; i++) |
840 | CHadd(cs, i); | | 840 | CHadd(cs, i); |
841 | break; | | 841 | break; |
842 | } | | 842 | } |
843 | } | | 843 | } |
844 | | | 844 | |
845 | /* | | 845 | /* |
846 | - p_b_cclass - parse a character-class name and deal with it | | 846 | - p_b_cclass - parse a character-class name and deal with it |
847 | == static void p_b_cclass(register struct parse *p, register cset *cs); | | 847 | == static void p_b_cclass(struct parse *p, cset *cs); |
848 | */ | | 848 | */ |
849 | static void | | 849 | static void |
850 | p_b_cclass(register struct parse *p, register cset *cs) | | 850 | p_b_cclass(struct parse *p, cset *cs) |
851 | { | | 851 | { |
852 | register RCHAR_T *sp = p->next; | | 852 | RCHAR_T *sp = p->next; |
853 | register struct cclass *cp; | | 853 | struct cclass *cp; |
854 | register size_t len; | | 854 | size_t len; |
855 | register const char *u; | | 855 | const char *u; |
856 | register char c; | | 856 | char c; |
857 | | | 857 | |
858 | while (MORE() && isalpha(PEEK())) | | 858 | while (MORE() && isalpha(PEEK())) |
859 | NEXT(); | | 859 | NEXT(); |
860 | len = p->next - sp; | | 860 | len = p->next - sp; |
861 | for (cp = cclasses; cp->name != NULL; cp++) | | 861 | for (cp = cclasses; cp->name != NULL; cp++) |
862 | if (STRLEN(cp->name) == len && !MEMCMP(cp->name, sp, len)) | | 862 | if (STRLEN(cp->name) == len && !MEMCMP(cp->name, sp, len)) |
863 | break; | | 863 | break; |
864 | if (cp->name == NULL) { | | 864 | if (cp->name == NULL) { |
865 | /* oops, didn't find it */ | | 865 | /* oops, didn't find it */ |
866 | SETERROR(REG_ECTYPE); | | 866 | SETERROR(REG_ECTYPE); |
867 | return; | | 867 | return; |
868 | } | | 868 | } |
869 | | | 869 | |
870 | u = cp->chars; | | 870 | u = cp->chars; |
871 | while ((c = *u++) != '\0') | | 871 | while ((c = *u++) != '\0') |
872 | CHadd(cs, c); | | 872 | CHadd(cs, c); |
873 | for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) | | 873 | for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) |
874 | MCadd(p, cs, u); | | 874 | MCadd(p, cs, u); |
875 | } | | 875 | } |
876 | | | 876 | |
877 | /* | | 877 | /* |
878 | - p_b_eclass - parse an equivalence-class name and deal with it | | 878 | - p_b_eclass - parse an equivalence-class name and deal with it |
879 | == static void p_b_eclass(register struct parse *p, register cset *cs); | | 879 | == static void p_b_eclass(struct parse *p, cset *cs); |
880 | * | | 880 | * |
881 | * This implementation is incomplete. xxx | | 881 | * This implementation is incomplete. xxx |
882 | */ | | 882 | */ |
883 | static void | | 883 | static void |
884 | p_b_eclass(register struct parse *p, register cset *cs) | | 884 | p_b_eclass(struct parse *p, cset *cs) |
885 | { | | 885 | { |
886 | register char c; | | 886 | char c; |
887 | | | 887 | |
888 | c = p_b_coll_elem(p, '='); | | 888 | c = p_b_coll_elem(p, '='); |
889 | CHadd(cs, c); | | 889 | CHadd(cs, c); |
890 | } | | 890 | } |
891 | | | 891 | |
892 | /* | | 892 | /* |
893 | - p_b_symbol - parse a character or [..]ed multicharacter collating symbol | | 893 | - p_b_symbol - parse a character or [..]ed multicharacter collating symbol |
894 | == static char p_b_symbol(register struct parse *p); | | 894 | == static char p_b_symbol(struct parse *p); |
895 | */ | | 895 | */ |
896 | static char /* value of symbol */ | | 896 | static char /* value of symbol */ |
897 | p_b_symbol(register struct parse *p) | | 897 | p_b_symbol(struct parse *p) |
898 | { | | 898 | { |
899 | register char value; | | 899 | char value; |
900 | | | 900 | |
901 | (void)REQUIRE(MORE(), REG_EBRACK); | | 901 | (void)REQUIRE(MORE(), REG_EBRACK); |
902 | if (!EATTWO('[', '.')) | | 902 | if (!EATTWO('[', '.')) |
903 | return(GETNEXT()); | | 903 | return(GETNEXT()); |
904 | | | 904 | |
905 | /* collating symbol */ | | 905 | /* collating symbol */ |
906 | value = p_b_coll_elem(p, '.'); | | 906 | value = p_b_coll_elem(p, '.'); |
907 | (void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); | | 907 | (void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); |
908 | return(value); | | 908 | return(value); |
909 | } | | 909 | } |
910 | | | 910 | |
911 | /* | | 911 | /* |
912 | - p_b_coll_elem - parse a collating-element name and look it up | | 912 | - p_b_coll_elem - parse a collating-element name and look it up |
913 | == static char p_b_coll_elem(register struct parse *p, int endc); | | 913 | == static char p_b_coll_elem(struct parse *p, int endc); |
914 | */ | | 914 | */ |
915 | static char /* value of collating element */ | | 915 | static char /* value of collating element */ |
916 | p_b_coll_elem(register struct parse *p, int endc) | | 916 | p_b_coll_elem(struct parse *p, int endc) |
917 | | | 917 | |
918 | /* name ended by endc,']' */ | | 918 | /* name ended by endc,']' */ |
919 | { | | 919 | { |
920 | register RCHAR_T *sp = p->next; | | 920 | RCHAR_T *sp = p->next; |
921 | register struct cname *cp; | | 921 | struct cname *cp; |
922 | register size_t len; | | 922 | size_t len; |
923 | | | 923 | |
924 | while (MORE() && !SEETWO(endc, ']')) | | 924 | while (MORE() && !SEETWO(endc, ']')) |
925 | NEXT(); | | 925 | NEXT(); |
926 | if (!MORE()) { | | 926 | if (!MORE()) { |
927 | SETERROR(REG_EBRACK); | | 927 | SETERROR(REG_EBRACK); |
928 | return(0); | | 928 | return(0); |
929 | } | | 929 | } |
930 | len = p->next - sp; | | 930 | len = p->next - sp; |
931 | for (cp = cnames; cp->name != NULL; cp++) | | 931 | for (cp = cnames; cp->name != NULL; cp++) |
932 | if (STRLEN(cp->name) == len && MEMCMP(cp->name, sp, len)) | | 932 | if (STRLEN(cp->name) == len && MEMCMP(cp->name, sp, len)) |
933 | return(cp->code); /* known name */ | | 933 | return(cp->code); /* known name */ |
934 | if (len == 1) | | 934 | if (len == 1) |
935 | return(*sp); /* single character */ | | 935 | return(*sp); /* single character */ |
| @@ -945,113 +945,113 @@ static char /* if no counterpart, retu | | | @@ -945,113 +945,113 @@ static char /* if no counterpart, retu |
945 | othercase(int ch) | | 945 | othercase(int ch) |
946 | { | | 946 | { |
947 | assert(isalpha(ch)); | | 947 | assert(isalpha(ch)); |
948 | if (isupper(ch)) | | 948 | if (isupper(ch)) |
949 | return(tolower(ch)); | | 949 | return(tolower(ch)); |
950 | else if (islower(ch)) | | 950 | else if (islower(ch)) |
951 | return(toupper(ch)); | | 951 | return(toupper(ch)); |
952 | else /* peculiar, but could happen */ | | 952 | else /* peculiar, but could happen */ |
953 | return(ch); | | 953 | return(ch); |
954 | } | | 954 | } |
955 | | | 955 | |
956 | /* | | 956 | /* |
957 | - bothcases - emit a dualcase version of a two-case character | | 957 | - bothcases - emit a dualcase version of a two-case character |
958 | == static void bothcases(register struct parse *p, int ch); | | 958 | == static void bothcases(struct parse *p, int ch); |
959 | * | | 959 | * |
960 | * Boy, is this implementation ever a kludge... | | 960 | * Boy, is this implementation ever a kludge... |
961 | */ | | 961 | */ |
962 | static void | | 962 | static void |
963 | bothcases(register struct parse *p, int ch) | | 963 | bothcases(struct parse *p, int ch) |
964 | { | | 964 | { |
965 | register RCHAR_T *oldnext = p->next; | | 965 | RCHAR_T *oldnext = p->next; |
966 | register RCHAR_T *oldend = p->end; | | 966 | RCHAR_T *oldend = p->end; |
967 | RCHAR_T bracket[3]; | | 967 | RCHAR_T bracket[3]; |
968 | | | 968 | |
969 | assert(othercase(ch) != ch); /* p_bracket() would recurse */ | | 969 | assert(othercase(ch) != ch); /* p_bracket() would recurse */ |
970 | p->next = bracket; | | 970 | p->next = bracket; |
971 | p->end = bracket+2; | | 971 | p->end = bracket+2; |
972 | bracket[0] = ch; | | 972 | bracket[0] = ch; |
973 | bracket[1] = ']'; | | 973 | bracket[1] = ']'; |
974 | bracket[2] = '\0'; | | 974 | bracket[2] = '\0'; |
975 | p_bracket(p); | | 975 | p_bracket(p); |
976 | assert(p->next == bracket+2); | | 976 | assert(p->next == bracket+2); |
977 | p->next = oldnext; | | 977 | p->next = oldnext; |
978 | p->end = oldend; | | 978 | p->end = oldend; |
979 | } | | 979 | } |
980 | | | 980 | |
981 | /* | | 981 | /* |
982 | - ordinary - emit an ordinary character | | 982 | - ordinary - emit an ordinary character |
983 | == static void ordinary(register struct parse *p, register int ch); | | 983 | == static void ordinary(struct parse *p, int ch); |
984 | */ | | 984 | */ |
985 | static void | | 985 | static void |
986 | ordinary(register struct parse *p, register int ch) | | 986 | ordinary(struct parse *p, int ch) |
987 | { | | 987 | { |
988 | /* | | 988 | /* |
989 | register cat_t *cap = p->g->categories; | | 989 | cat_t *cap = p->g->categories; |
990 | */ | | 990 | */ |
991 | | | 991 | |
992 | if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) | | 992 | if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) |
993 | bothcases(p, ch); | | 993 | bothcases(p, ch); |
994 | else { | | 994 | else { |
995 | EMIT(OCHAR, (UCHAR_T)ch); | | 995 | EMIT(OCHAR, (UCHAR_T)ch); |
996 | /* | | 996 | /* |
997 | if (cap[ch] == 0) | | 997 | if (cap[ch] == 0) |
998 | cap[ch] = p->g->ncategories++; | | 998 | cap[ch] = p->g->ncategories++; |
999 | */ | | 999 | */ |
1000 | } | | 1000 | } |
1001 | } | | 1001 | } |
1002 | | | 1002 | |
1003 | /* | | 1003 | /* |
1004 | - nonnewline - emit REG_NEWLINE version of OANY | | 1004 | - nonnewline - emit REG_NEWLINE version of OANY |
1005 | == static void nonnewline(register struct parse *p); | | 1005 | == static void nonnewline(struct parse *p); |
1006 | * | | 1006 | * |
1007 | * Boy, is this implementation ever a kludge... | | 1007 | * Boy, is this implementation ever a kludge... |
1008 | */ | | 1008 | */ |
1009 | static void | | 1009 | static void |
1010 | nonnewline(register struct parse *p) | | 1010 | nonnewline(struct parse *p) |
1011 | { | | 1011 | { |
1012 | register RCHAR_T *oldnext = p->next; | | 1012 | RCHAR_T *oldnext = p->next; |
1013 | register RCHAR_T *oldend = p->end; | | 1013 | RCHAR_T *oldend = p->end; |
1014 | RCHAR_T bracket[4]; | | 1014 | RCHAR_T bracket[4]; |
1015 | | | 1015 | |
1016 | p->next = bracket; | | 1016 | p->next = bracket; |
1017 | p->end = bracket+3; | | 1017 | p->end = bracket+3; |
1018 | bracket[0] = '^'; | | 1018 | bracket[0] = '^'; |
1019 | bracket[1] = '\n'; | | 1019 | bracket[1] = '\n'; |
1020 | bracket[2] = ']'; | | 1020 | bracket[2] = ']'; |
1021 | bracket[3] = '\0'; | | 1021 | bracket[3] = '\0'; |
1022 | p_bracket(p); | | 1022 | p_bracket(p); |
1023 | assert(p->next == bracket+3); | | 1023 | assert(p->next == bracket+3); |
1024 | p->next = oldnext; | | 1024 | p->next = oldnext; |
1025 | p->end = oldend; | | 1025 | p->end = oldend; |
1026 | } | | 1026 | } |
1027 | | | 1027 | |
1028 | /* | | 1028 | /* |
1029 | - repeat - generate code for a bounded repetition, recursively if needed | | 1029 | - repeat - generate code for a bounded repetition, recursively if needed |
1030 | == static void repeat(register struct parse *p, sopno start, int from, int to, size_t reclimit); | | 1030 | == static void repeat(struct parse *p, sopno start, int from, int to, size_t reclimit); |
1031 | */ | | 1031 | */ |
1032 | static void | | 1032 | static void |
1033 | repeat(register struct parse *p, sopno start, int from, int to, size_t reclimit) | | 1033 | repeat(struct parse *p, sopno start, int from, int to, size_t reclimit) |
1034 | | | 1034 | |
1035 | /* operand from here to end of strip */ | | 1035 | /* operand from here to end of strip */ |
1036 | /* repeated from this number */ | | 1036 | /* repeated from this number */ |
1037 | /* to this number of times (maybe INFINITY) */ | | 1037 | /* to this number of times (maybe INFINITY) */ |
1038 | { | | 1038 | { |
1039 | register sopno finish; | | 1039 | sopno finish; |
1040 | # define N 2 | | 1040 | # define N 2 |
1041 | # define INF 3 | | 1041 | # define INF 3 |
1042 | # define REP(f, t) ((f)*8 + (t)) | | 1042 | # define REP(f, t) ((f)*8 + (t)) |
1043 | # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) | | 1043 | # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) |
1044 | register sopno copy; | | 1044 | sopno copy; |
1045 | | | 1045 | |
1046 | if (reclimit++ > RECLIMIT) | | 1046 | if (reclimit++ > RECLIMIT) |
1047 | p->error = REG_ESPACE; | | 1047 | p->error = REG_ESPACE; |
1048 | if (p->error) | | 1048 | if (p->error) |
1049 | return; | | 1049 | return; |
1050 | | | 1050 | |
1051 | finish = HERE(); | | 1051 | finish = HERE(); |
1052 | | | 1052 | |
1053 | assert(from <= to); | | 1053 | assert(from <= to); |
1054 | | | 1054 | |
1055 | switch (REP(MAP(from), MAP(to))) { | | 1055 | switch (REP(MAP(from), MAP(to))) { |
1056 | case REP(0, 0): /* must be user doing this */ | | 1056 | case REP(0, 0): /* must be user doing this */ |
1057 | DROP(finish-start); /* drop the operand */ | | 1057 | DROP(finish-start); /* drop the operand */ |
| @@ -1093,51 +1093,51 @@ repeat(register struct parse *p, sopno s | | | @@ -1093,51 +1093,51 @@ repeat(register struct parse *p, sopno s |
1093 | break; | | 1093 | break; |
1094 | case REP(N, INF): /* as xx{n-1,INF} */ | | 1094 | case REP(N, INF): /* as xx{n-1,INF} */ |
1095 | copy = dupl(p, start, finish); | | 1095 | copy = dupl(p, start, finish); |
1096 | repeat(p, copy, from-1, to, reclimit); | | 1096 | repeat(p, copy, from-1, to, reclimit); |
1097 | break; | | 1097 | break; |
1098 | default: /* "can't happen" */ | | 1098 | default: /* "can't happen" */ |
1099 | SETERROR(REG_ASSERT); /* just in case */ | | 1099 | SETERROR(REG_ASSERT); /* just in case */ |
1100 | break; | | 1100 | break; |
1101 | } | | 1101 | } |
1102 | } | | 1102 | } |
1103 | | | 1103 | |
1104 | /* | | 1104 | /* |
1105 | - seterr - set an error condition | | 1105 | - seterr - set an error condition |
1106 | == static int seterr(register struct parse *p, int e); | | 1106 | == static int seterr(struct parse *p, int e); |
1107 | */ | | 1107 | */ |
1108 | static int /* useless but makes type checking happy */ | | 1108 | static int /* useless but makes type checking happy */ |
1109 | seterr(register struct parse *p, int e) | | 1109 | seterr(struct parse *p, int e) |
1110 | { | | 1110 | { |
1111 | if (p->error == 0) /* keep earliest error condition */ | | 1111 | if (p->error == 0) /* keep earliest error condition */ |
1112 | p->error = e; | | 1112 | p->error = e; |
1113 | p->next = nuls; /* try to bring things to a halt */ | | 1113 | p->next = nuls; /* try to bring things to a halt */ |
1114 | p->end = nuls; | | 1114 | p->end = nuls; |
1115 | return(0); /* make the return value well-defined */ | | 1115 | return(0); /* make the return value well-defined */ |
1116 | } | | 1116 | } |
1117 | | | 1117 | |
1118 | /* | | 1118 | /* |
1119 | - allocset - allocate a set of characters for [] | | 1119 | - allocset - allocate a set of characters for [] |
1120 | == static cset *allocset(register struct parse *p); | | 1120 | == static cset *allocset(struct parse *p); |
1121 | */ | | 1121 | */ |
1122 | static cset * | | 1122 | static cset * |
1123 | allocset(register struct parse *p) | | 1123 | allocset(struct parse *p) |
1124 | { | | 1124 | { |
1125 | register int no = p->g->ncsets++; | | 1125 | int no = p->g->ncsets++; |
1126 | register size_t nc; | | 1126 | size_t nc; |
1127 | register size_t nbytes; | | 1127 | size_t nbytes; |
1128 | register cset *cs; | | 1128 | cset *cs; |
1129 | register size_t css = (size_t)p->g->csetsize; | | 1129 | size_t css = (size_t)p->g->csetsize; |
1130 | register int i; | | 1130 | int i; |
1131 | | | 1131 | |
1132 | if (no >= p->ncsalloc) { /* need another column of space */ | | 1132 | if (no >= p->ncsalloc) { /* need another column of space */ |
1133 | p->ncsalloc += CHAR_BIT; | | 1133 | p->ncsalloc += CHAR_BIT; |
1134 | nc = p->ncsalloc; | | 1134 | nc = p->ncsalloc; |
1135 | assert(nc % CHAR_BIT == 0); | | 1135 | assert(nc % CHAR_BIT == 0); |
1136 | nbytes = nc / CHAR_BIT * css; | | 1136 | nbytes = nc / CHAR_BIT * css; |
1137 | if (MEMSIZE(p) > MEMLIMIT) | | 1137 | if (MEMSIZE(p) > MEMLIMIT) |
1138 | goto oomem; | | 1138 | goto oomem; |
1139 | if (p->g->sets == NULL) | | 1139 | if (p->g->sets == NULL) |
1140 | p->g->sets = (cset *)malloc(nc * sizeof(cset)); | | 1140 | p->g->sets = (cset *)malloc(nc * sizeof(cset)); |
1141 | else | | 1141 | else |
1142 | p->g->sets = (cset *)realloc((char *)p->g->sets, | | 1142 | p->g->sets = (cset *)realloc((char *)p->g->sets, |
1143 | nc * sizeof(cset)); | | 1143 | nc * sizeof(cset)); |
| @@ -1164,351 +1164,351 @@ oomem: | | | @@ -1164,351 +1164,351 @@ oomem: |
1164 | | | 1164 | |
1165 | cs = &p->g->sets[no]; | | 1165 | cs = &p->g->sets[no]; |
1166 | cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); | | 1166 | cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); |
1167 | cs->mask = 1 << ((no) % CHAR_BIT); | | 1167 | cs->mask = 1 << ((no) % CHAR_BIT); |
1168 | cs->hash = 0; | | 1168 | cs->hash = 0; |
1169 | cs->smultis = 0; | | 1169 | cs->smultis = 0; |
1170 | cs->multis = NULL; | | 1170 | cs->multis = NULL; |
1171 | | | 1171 | |
1172 | return(cs); | | 1172 | return(cs); |
1173 | } | | 1173 | } |
1174 | | | 1174 | |
1175 | /* | | 1175 | /* |
1176 | - freeset - free a now-unused set | | 1176 | - freeset - free a now-unused set |
1177 | == static void freeset(register struct parse *p, register cset *cs); | | 1177 | == static void freeset(struct parse *p, cset *cs); |
1178 | */ | | 1178 | */ |
1179 | static void | | 1179 | static void |
1180 | freeset(register struct parse *p, register cset *cs) | | 1180 | freeset(struct parse *p, cset *cs) |
1181 | { | | 1181 | { |
1182 | register size_t i; | | 1182 | size_t i; |
1183 | register cset *top = &p->g->sets[p->g->ncsets]; | | 1183 | cset *top = &p->g->sets[p->g->ncsets]; |
1184 | register size_t css = (size_t)p->g->csetsize; | | 1184 | size_t css = (size_t)p->g->csetsize; |
1185 | | | 1185 | |
1186 | for (i = 0; i < css; i++) | | 1186 | for (i = 0; i < css; i++) |
1187 | CHsub(cs, i); | | 1187 | CHsub(cs, i); |
1188 | if (cs == top-1) /* recover only the easy case */ | | 1188 | if (cs == top-1) /* recover only the easy case */ |
1189 | p->g->ncsets--; | | 1189 | p->g->ncsets--; |
1190 | } | | 1190 | } |
1191 | | | 1191 | |
1192 | /* | | 1192 | /* |
1193 | - freezeset - final processing on a set of characters | | 1193 | - freezeset - final processing on a set of characters |
1194 | == static int freezeset(register struct parse *p, register cset *cs); | | 1194 | == static int freezeset(struct parse *p, cset *cs); |
1195 | * | | 1195 | * |
1196 | * The main task here is merging identical sets. This is usually a waste | | 1196 | * The main task here is merging identical sets. This is usually a waste |
1197 | * of time (although the hash code minimizes the overhead), but can win | | 1197 | * of time (although the hash code minimizes the overhead), but can win |
1198 | * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash | | 1198 | * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash |
1199 | * is done using addition rather than xor -- all ASCII [aA] sets xor to | | 1199 | * is done using addition rather than xor -- all ASCII [aA] sets xor to |
1200 | * the same value! | | 1200 | * the same value! |
1201 | */ | | 1201 | */ |
1202 | static int /* set number */ | | 1202 | static int /* set number */ |
1203 | freezeset(register struct parse *p, register cset *cs) | | 1203 | freezeset(struct parse *p, cset *cs) |
1204 | { | | 1204 | { |
1205 | register uch h = cs->hash; | | 1205 | uch h = cs->hash; |
1206 | register size_t i; | | 1206 | size_t i; |
1207 | register cset *top = &p->g->sets[p->g->ncsets]; | | 1207 | cset *top = &p->g->sets[p->g->ncsets]; |
1208 | register cset *cs2; | | 1208 | cset *cs2; |
1209 | register size_t css = (size_t)p->g->csetsize; | | 1209 | size_t css = (size_t)p->g->csetsize; |
1210 | | | 1210 | |
1211 | /* look for an earlier one which is the same */ | | 1211 | /* look for an earlier one which is the same */ |
1212 | for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) | | 1212 | for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) |
1213 | if (cs2->hash == h && cs2 != cs) { | | 1213 | if (cs2->hash == h && cs2 != cs) { |
1214 | /* maybe */ | | 1214 | /* maybe */ |
1215 | for (i = 0; i < css; i++) | | 1215 | for (i = 0; i < css; i++) |
1216 | if (!!CHIN(cs2, i) != !!CHIN(cs, i)) | | 1216 | if (!!CHIN(cs2, i) != !!CHIN(cs, i)) |
1217 | break; /* no */ | | 1217 | break; /* no */ |
1218 | if (i == css) | | 1218 | if (i == css) |
1219 | break; /* yes */ | | 1219 | break; /* yes */ |
1220 | } | | 1220 | } |
1221 | | | 1221 | |
1222 | if (cs2 < top) { /* found one */ | | 1222 | if (cs2 < top) { /* found one */ |
1223 | freeset(p, cs); | | 1223 | freeset(p, cs); |
1224 | cs = cs2; | | 1224 | cs = cs2; |
1225 | } | | 1225 | } |
1226 | | | 1226 | |
1227 | return((int)(cs - p->g->sets)); | | 1227 | return((int)(cs - p->g->sets)); |
1228 | } | | 1228 | } |
1229 | | | 1229 | |
1230 | /* | | 1230 | /* |
1231 | - firstch - return first character in a set (which must have at least one) | | 1231 | - firstch - return first character in a set (which must have at least one) |
1232 | == static int firstch(register struct parse *p, register cset *cs); | | 1232 | == static int firstch(struct parse *p, cset *cs); |
1233 | */ | | 1233 | */ |
1234 | static int /* character; there is no "none" value */ | | 1234 | static int /* character; there is no "none" value */ |
1235 | firstch(register struct parse *p, register cset *cs) | | 1235 | firstch(struct parse *p, cset *cs) |
1236 | { | | 1236 | { |
1237 | register size_t i; | | 1237 | size_t i; |
1238 | register size_t css = (size_t)p->g->csetsize; | | 1238 | size_t css = (size_t)p->g->csetsize; |
1239 | | | 1239 | |
1240 | for (i = 0; i < css; i++) | | 1240 | for (i = 0; i < css; i++) |
1241 | if (CHIN(cs, i)) | | 1241 | if (CHIN(cs, i)) |
1242 | return((char)i); | | 1242 | return((char)i); |
1243 | assert(never); | | 1243 | assert(never); |
1244 | return(0); /* arbitrary */ | | 1244 | return(0); /* arbitrary */ |
1245 | } | | 1245 | } |
1246 | | | 1246 | |
1247 | /* | | 1247 | /* |
1248 | - nch - number of characters in a set | | 1248 | - nch - number of characters in a set |
1249 | == static int nch(register struct parse *p, register cset *cs); | | 1249 | == static int nch(struct parse *p, cset *cs); |
1250 | */ | | 1250 | */ |
1251 | static int | | 1251 | static int |
1252 | nch(register struct parse *p, register cset *cs) | | 1252 | nch(struct parse *p, cset *cs) |
1253 | { | | 1253 | { |
1254 | register size_t i; | | 1254 | size_t i; |
1255 | register size_t css = (size_t)p->g->csetsize; | | 1255 | size_t css = (size_t)p->g->csetsize; |
1256 | register int n = 0; | | 1256 | int n = 0; |
1257 | | | 1257 | |
1258 | for (i = 0; i < css; i++) | | 1258 | for (i = 0; i < css; i++) |
1259 | if (CHIN(cs, i)) | | 1259 | if (CHIN(cs, i)) |
1260 | n++; | | 1260 | n++; |
1261 | return(n); | | 1261 | return(n); |
1262 | } | | 1262 | } |
1263 | | | 1263 | |
1264 | /* | | 1264 | /* |
1265 | - mcadd - add a collating element to a cset | | 1265 | - mcadd - add a collating element to a cset |
1266 | == static void mcadd(register struct parse *p, register cset *cs, \ | | 1266 | == static void mcadd(struct parse *p, cset *cs, \ |
1267 | == register char *cp); | | 1267 | == char *cp); |
1268 | */ | | 1268 | */ |
1269 | static void | | 1269 | static void |
1270 | mcadd(register struct parse *p, register cset *cs, register const char *cp) | | 1270 | mcadd(struct parse *p, cset *cs, const char *cp) |
1271 | { | | 1271 | { |
1272 | register size_t oldend = cs->smultis; | | 1272 | size_t oldend = cs->smultis; |
1273 | | | 1273 | |
1274 | cs->smultis += strlen(cp) + 1; | | 1274 | cs->smultis += strlen(cp) + 1; |
1275 | if (cs->multis == NULL) | | 1275 | if (cs->multis == NULL) |
1276 | cs->multis = malloc(cs->smultis); | | 1276 | cs->multis = malloc(cs->smultis); |
1277 | else | | 1277 | else |
1278 | cs->multis = realloc(cs->multis, cs->smultis); | | 1278 | cs->multis = realloc(cs->multis, cs->smultis); |
1279 | if (cs->multis == NULL) { | | 1279 | if (cs->multis == NULL) { |
1280 | SETERROR(REG_ESPACE); | | 1280 | SETERROR(REG_ESPACE); |
1281 | return; | | 1281 | return; |
1282 | } | | 1282 | } |
1283 | | | 1283 | |
1284 | (void) strcpy(cs->multis + oldend - 1, cp); | | 1284 | (void) strcpy(cs->multis + oldend - 1, cp); |
1285 | cs->multis[cs->smultis - 1] = '\0'; | | 1285 | cs->multis[cs->smultis - 1] = '\0'; |
1286 | } | | 1286 | } |
1287 | | | 1287 | |
1288 | #ifdef notdef | | 1288 | #ifdef notdef |
1289 | /* | | 1289 | /* |
1290 | - mcsub - subtract a collating element from a cset | | 1290 | - mcsub - subtract a collating element from a cset |
1291 | == static void mcsub(register cset *cs, register char *cp); | | 1291 | == static void mcsub(cset *cs, char *cp); |
1292 | */ | | 1292 | */ |
1293 | static void | | 1293 | static void |
1294 | mcsub(register cset *cs, register char *cp) | | 1294 | mcsub(cset *cs, char *cp) |
1295 | { | | 1295 | { |
1296 | register char *fp = mcfind(cs, cp); | | 1296 | char *fp = mcfind(cs, cp); |
1297 | register size_t len = strlen(fp); | | 1297 | size_t len = strlen(fp); |
1298 | | | 1298 | |
1299 | assert(fp != NULL); | | 1299 | assert(fp != NULL); |
1300 | (void) memmove(fp, fp + len + 1, | | 1300 | (void) memmove(fp, fp + len + 1, |
1301 | cs->smultis - (fp + len + 1 - cs->multis)); | | 1301 | cs->smultis - (fp + len + 1 - cs->multis)); |
1302 | cs->smultis -= len; | | 1302 | cs->smultis -= len; |
1303 | | | 1303 | |
1304 | if (cs->smultis == 0) { | | 1304 | if (cs->smultis == 0) { |
1305 | free(cs->multis); | | 1305 | free(cs->multis); |
1306 | cs->multis = NULL; | | 1306 | cs->multis = NULL; |
1307 | return; | | 1307 | return; |
1308 | } | | 1308 | } |
1309 | | | 1309 | |
1310 | cs->multis = realloc(cs->multis, cs->smultis); | | 1310 | cs->multis = realloc(cs->multis, cs->smultis); |
1311 | assert(cs->multis != NULL); | | 1311 | assert(cs->multis != NULL); |
1312 | } | | 1312 | } |
1313 | | | 1313 | |
1314 | /* | | 1314 | /* |
1315 | - mcin - is a collating element in a cset? | | 1315 | - mcin - is a collating element in a cset? |
1316 | == static int mcin(register cset *cs, register char *cp); | | 1316 | == static int mcin(cset *cs, char *cp); |
1317 | */ | | 1317 | */ |
1318 | static int | | 1318 | static int |
1319 | mcin(register cset *cs, register char *cp) | | 1319 | mcin(cset *cs, char *cp) |
1320 | { | | 1320 | { |
1321 | return(mcfind(cs, cp) != NULL); | | 1321 | return(mcfind(cs, cp) != NULL); |
1322 | } | | 1322 | } |
1323 | | | 1323 | |
1324 | /* | | 1324 | /* |
1325 | - mcfind - find a collating element in a cset | | 1325 | - mcfind - find a collating element in a cset |
1326 | == static char *mcfind(register cset *cs, register char *cp); | | 1326 | == static char *mcfind(cset *cs, char *cp); |
1327 | */ | | 1327 | */ |
1328 | static char * | | 1328 | static char * |
1329 | mcfind(register cset *cs, register char *cp) | | 1329 | mcfind(cset *cs, char *cp) |
1330 | { | | 1330 | { |
1331 | register char *p; | | 1331 | char *p; |
1332 | | | 1332 | |
1333 | if (cs->multis == NULL) | | 1333 | if (cs->multis == NULL) |
1334 | return(NULL); | | 1334 | return(NULL); |
1335 | for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) | | 1335 | for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) |
1336 | if (strcmp(cp, p) == 0) | | 1336 | if (strcmp(cp, p) == 0) |
1337 | return(p); | | 1337 | return(p); |
1338 | return(NULL); | | 1338 | return(NULL); |
1339 | } | | 1339 | } |
1340 | #endif | | 1340 | #endif |
1341 | | | 1341 | |
1342 | /* | | 1342 | /* |
1343 | - mcinvert - invert the list of collating elements in a cset | | 1343 | - mcinvert - invert the list of collating elements in a cset |
1344 | == static void mcinvert(register struct parse *p, register cset *cs); | | 1344 | == static void mcinvert(struct parse *p, cset *cs); |
1345 | * | | 1345 | * |
1346 | * This would have to know the set of possibilities. Implementation | | 1346 | * This would have to know the set of possibilities. Implementation |
1347 | * is deferred. | | 1347 | * is deferred. |
1348 | */ | | 1348 | */ |
1349 | static void | | 1349 | static void |
1350 | mcinvert(register struct parse *p, register cset *cs) | | 1350 | mcinvert(struct parse *p, cset *cs) |
1351 | { | | 1351 | { |
1352 | assert(cs->multis == NULL); /* xxx */ | | 1352 | assert(cs->multis == NULL); /* xxx */ |
1353 | } | | 1353 | } |
1354 | | | 1354 | |
1355 | /* | | 1355 | /* |
1356 | - mccase - add case counterparts of the list of collating elements in a cset | | 1356 | - mccase - add case counterparts of the list of collating elements in a cset |
1357 | == static void mccase(register struct parse *p, register cset *cs); | | 1357 | == static void mccase(struct parse *p, cset *cs); |
1358 | * | | 1358 | * |
1359 | * This would have to know the set of possibilities. Implementation | | 1359 | * This would have to know the set of possibilities. Implementation |
1360 | * is deferred. | | 1360 | * is deferred. |
1361 | */ | | 1361 | */ |
1362 | static void | | 1362 | static void |
1363 | mccase(register struct parse *p, register cset *cs) | | 1363 | mccase(struct parse *p, cset *cs) |
1364 | { | | 1364 | { |
1365 | assert(cs->multis == NULL); /* xxx */ | | 1365 | assert(cs->multis == NULL); /* xxx */ |
1366 | } | | 1366 | } |
1367 | | | 1367 | |
1368 | #ifdef notdef | | 1368 | #ifdef notdef |
1369 | /* | | 1369 | /* |
1370 | - isinsets - is this character in any sets? | | 1370 | - isinsets - is this character in any sets? |
1371 | == static int isinsets(register struct re_guts *g, int c); | | 1371 | == static int isinsets(struct re_guts *g, int c); |
1372 | */ | | 1372 | */ |
1373 | static int /* predicate */ | | 1373 | static int /* predicate */ |
1374 | isinsets(register struct re_guts *g, int c) | | 1374 | isinsets(struct re_guts *g, int c) |
1375 | { | | 1375 | { |
1376 | register uch *col; | | 1376 | uch *col; |
1377 | register int i; | | 1377 | int i; |
1378 | register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; | | 1378 | int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; |
1379 | register unsigned uc = (unsigned char)c; | | 1379 | unsigned uc = (unsigned char)c; |
1380 | | | 1380 | |
1381 | for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) | | 1381 | for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) |
1382 | if (col[uc] != 0) | | 1382 | if (col[uc] != 0) |
1383 | return(1); | | 1383 | return(1); |
1384 | return(0); | | 1384 | return(0); |
1385 | } | | 1385 | } |
1386 | | | 1386 | |
1387 | /* | | 1387 | /* |
1388 | - samesets - are these two characters in exactly the same sets? | | 1388 | - samesets - are these two characters in exactly the same sets? |
1389 | == static int samesets(register struct re_guts *g, int c1, int c2); | | 1389 | == static int samesets(struct re_guts *g, int c1, int c2); |
1390 | */ | | 1390 | */ |
1391 | static int /* predicate */ | | 1391 | static int /* predicate */ |
1392 | samesets(register struct re_guts *g, int c1, int c2) | | 1392 | samesets(struct re_guts *g, int c1, int c2) |
1393 | { | | 1393 | { |
1394 | register uch *col; | | 1394 | uch *col; |
1395 | register int i; | | 1395 | int i; |
1396 | register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; | | 1396 | int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; |
1397 | register unsigned uc1 = (unsigned char)c1; | | 1397 | unsigned uc1 = (unsigned char)c1; |
1398 | register unsigned uc2 = (unsigned char)c2; | | 1398 | unsigned uc2 = (unsigned char)c2; |
1399 | | | 1399 | |
1400 | for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) | | 1400 | for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) |
1401 | if (col[uc1] != col[uc2]) | | 1401 | if (col[uc1] != col[uc2]) |
1402 | return(0); | | 1402 | return(0); |
1403 | return(1); | | 1403 | return(1); |
1404 | } | | 1404 | } |
1405 | #endif | | 1405 | #endif |
1406 | | | 1406 | |
1407 | /* | | 1407 | /* |
1408 | - categorize - sort out character categories | | 1408 | - categorize - sort out character categories |
1409 | == static void categorize(struct parse *p, register struct re_guts *g); | | 1409 | == static void categorize(struct parse *p, struct re_guts *g); |
1410 | */ | | 1410 | */ |
1411 | static void | | 1411 | static void |
1412 | categorize(struct parse *p, register struct re_guts *g) | | 1412 | categorize(struct parse *p, struct re_guts *g) |
1413 | { | | 1413 | { |
1414 | #ifdef notdef | | 1414 | #ifdef notdef |
1415 | register cat_t *cats = g->categories; | | 1415 | cat_t *cats = g->categories; |
1416 | register int c; | | 1416 | int c; |
1417 | register int c2; | | 1417 | int c2; |
1418 | register cat_t cat; | | 1418 | cat_t cat; |
1419 | | | 1419 | |
1420 | /* avoid making error situations worse */ | | 1420 | /* avoid making error situations worse */ |
1421 | if (p->error != 0) | | 1421 | if (p->error != 0) |
1422 | return; | | 1422 | return; |
1423 | | | 1423 | |
1424 | for (c = CHAR_MIN; c <= CHAR_MAX; c++) | | 1424 | for (c = CHAR_MIN; c <= CHAR_MAX; c++) |
1425 | if (cats[c] == 0 && isinsets(g, c)) { | | 1425 | if (cats[c] == 0 && isinsets(g, c)) { |
1426 | cat = g->ncategories++; | | 1426 | cat = g->ncategories++; |
1427 | cats[c] = cat; | | 1427 | cats[c] = cat; |
1428 | for (c2 = c+1; c2 <= CHAR_MAX; c2++) | | 1428 | for (c2 = c+1; c2 <= CHAR_MAX; c2++) |
1429 | if (cats[c2] == 0 && samesets(g, c, c2)) | | 1429 | if (cats[c2] == 0 && samesets(g, c, c2)) |
1430 | cats[c2] = cat; | | 1430 | cats[c2] = cat; |
1431 | } | | 1431 | } |
1432 | #endif | | 1432 | #endif |
1433 | } | | 1433 | } |
1434 | | | 1434 | |
1435 | /* | | 1435 | /* |
1436 | - dupl - emit a duplicate of a bunch of sops | | 1436 | - dupl - emit a duplicate of a bunch of sops |
1437 | == static sopno dupl(register struct parse *p, sopno start, sopno finish); | | 1437 | == static sopno dupl(struct parse *p, sopno start, sopno finish); |
1438 | */ | | 1438 | */ |
1439 | static sopno /* start of duplicate */ | | 1439 | static sopno /* start of duplicate */ |
1440 | dupl(register struct parse *p, sopno start, sopno finish) | | 1440 | dupl(struct parse *p, sopno start, sopno finish) |
1441 | | | 1441 | |
1442 | /* from here */ | | 1442 | /* from here */ |
1443 | /* to this less one */ | | 1443 | /* to this less one */ |
1444 | { | | 1444 | { |
1445 | register sopno ret = HERE(); | | 1445 | sopno ret = HERE(); |
1446 | register sopno len = finish - start; | | 1446 | sopno len = finish - start; |
1447 | | | 1447 | |
1448 | assert(finish >= start); | | 1448 | assert(finish >= start); |
1449 | if (len == 0) | | 1449 | if (len == 0) |
1450 | return(ret); | | 1450 | return(ret); |
1451 | if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */ | | 1451 | if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */ |
1452 | return ret; | | 1452 | return ret; |
1453 | assert(p->ssize >= p->slen + len); | | 1453 | assert(p->ssize >= p->slen + len); |
1454 | (void) memcpy((char *)(p->strip + p->slen), | | 1454 | (void) memcpy((char *)(p->strip + p->slen), |
1455 | (char *)(p->strip + start), (size_t)len*sizeof(sop)); | | 1455 | (char *)(p->strip + start), (size_t)len*sizeof(sop)); |
1456 | (void) memcpy((char *)(p->stripdata + p->slen), | | 1456 | (void) memcpy((char *)(p->stripdata + p->slen), |
1457 | (char *)(p->stripdata + start), (size_t)len*sizeof(RCHAR_T)); | | 1457 | (char *)(p->stripdata + start), (size_t)len*sizeof(RCHAR_T)); |
1458 | p->slen += len; | | 1458 | p->slen += len; |
1459 | return(ret); | | 1459 | return(ret); |
1460 | } | | 1460 | } |
1461 | | | 1461 | |
1462 | /* | | 1462 | /* |
1463 | - doemit - emit a strip operator | | 1463 | - doemit - emit a strip operator |
1464 | == static void doemit(register struct parse *p, sop op, size_t opnd); | | 1464 | == static void doemit(struct parse *p, sop op, size_t opnd); |
1465 | * | | 1465 | * |
1466 | * It might seem better to implement this as a macro with a function as | | 1466 | * It might seem better to implement this as a macro with a function as |
1467 | * hard-case backup, but it's just too big and messy unless there are | | 1467 | * hard-case backup, but it's just too big and messy unless there are |
1468 | * some changes to the data structures. Maybe later. | | 1468 | * some changes to the data structures. Maybe later. |
1469 | */ | | 1469 | */ |
1470 | static void | | 1470 | static void |
1471 | doemit(register struct parse *p, sop op, size_t opnd) | | 1471 | doemit(struct parse *p, sop op, size_t opnd) |
1472 | { | | 1472 | { |
1473 | /* avoid making error situations worse */ | | 1473 | /* avoid making error situations worse */ |
1474 | if (p->error != 0) | | 1474 | if (p->error != 0) |
1475 | return; | | 1475 | return; |
1476 | | | 1476 | |
1477 | /* deal with oversize operands ("can't happen", more or less) */ | | 1477 | /* deal with oversize operands ("can't happen", more or less) */ |
1478 | assert(opnd < 1); | | 1478 | assert(opnd < 1); |
1479 | | | 1479 | |
1480 | /* deal with undersized strip */ | | 1480 | /* deal with undersized strip */ |
1481 | if (p->slen >= p->ssize) | | 1481 | if (p->slen >= p->ssize) |
1482 | if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */ | | 1482 | if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */ |
1483 | return; | | 1483 | return; |
1484 | | | 1484 | |
1485 | /* finally, it's all reduced to the easy case */ | | 1485 | /* finally, it's all reduced to the easy case */ |
1486 | p->strip[p->slen] = op; | | 1486 | p->strip[p->slen] = op; |
1487 | p->stripdata[p->slen] = opnd; | | 1487 | p->stripdata[p->slen] = opnd; |
1488 | p->slen++; | | 1488 | p->slen++; |
1489 | } | | 1489 | } |
1490 | | | 1490 | |
1491 | /* | | 1491 | /* |
1492 | - doinsert - insert a sop into the strip | | 1492 | - doinsert - insert a sop into the strip |
1493 | == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); | | 1493 | == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos); |
1494 | */ | | 1494 | */ |
1495 | static void | | 1495 | static void |
1496 | doinsert(register struct parse *p, sop op, size_t opnd, sopno pos) | | 1496 | doinsert(struct parse *p, sop op, size_t opnd, sopno pos) |
1497 | { | | 1497 | { |
1498 | register sopno sn; | | 1498 | sopno sn; |
1499 | register sop s; | | 1499 | sop s; |
1500 | register RCHAR_T d; | | 1500 | RCHAR_T d; |
1501 | register int i; | | 1501 | int i; |
1502 | | | 1502 | |
1503 | /* avoid making error situations worse */ | | 1503 | /* avoid making error situations worse */ |
1504 | if (p->error != 0) | | 1504 | if (p->error != 0) |
1505 | return; | | 1505 | return; |
1506 | | | 1506 | |
1507 | sn = HERE(); | | 1507 | sn = HERE(); |
1508 | EMIT(op, opnd); /* do checks, ensure space */ | | 1508 | EMIT(op, opnd); /* do checks, ensure space */ |
1509 | assert(HERE() == sn+1); | | 1509 | assert(HERE() == sn+1); |
1510 | s = p->strip[sn]; | | 1510 | s = p->strip[sn]; |
1511 | d = p->stripdata[sn]; | | 1511 | d = p->stripdata[sn]; |
1512 | | | 1512 | |
1513 | /* adjust paren pointers */ | | 1513 | /* adjust paren pointers */ |
1514 | assert(pos > 0); | | 1514 | assert(pos > 0); |
| @@ -1521,118 +1521,118 @@ doinsert(register struct parse *p, sop o | | | @@ -1521,118 +1521,118 @@ doinsert(register struct parse *p, sop o |
1521 | } | | 1521 | } |
1522 | } | | 1522 | } |
1523 | | | 1523 | |
1524 | memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], | | 1524 | memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], |
1525 | (HERE()-pos-1)*sizeof(sop)); | | 1525 | (HERE()-pos-1)*sizeof(sop)); |
1526 | memmove((char *)&p->stripdata[pos+1], (char *)&p->stripdata[pos], | | 1526 | memmove((char *)&p->stripdata[pos+1], (char *)&p->stripdata[pos], |
1527 | (HERE()-pos-1)*sizeof(RCHAR_T)); | | 1527 | (HERE()-pos-1)*sizeof(RCHAR_T)); |
1528 | p->strip[pos] = s; | | 1528 | p->strip[pos] = s; |
1529 | p->stripdata[pos] = d; | | 1529 | p->stripdata[pos] = d; |
1530 | } | | 1530 | } |
1531 | | | 1531 | |
1532 | /* | | 1532 | /* |
1533 | - dofwd - complete a forward reference | | 1533 | - dofwd - complete a forward reference |
1534 | == static void dofwd(register struct parse *p, sopno pos, sop value); | | 1534 | == static void dofwd(struct parse *p, sopno pos, sop value); |
1535 | */ | | 1535 | */ |
1536 | static void | | 1536 | static void |
1537 | dofwd(register struct parse *p, register sopno pos, sop value) | | 1537 | dofwd(struct parse *p, sopno pos, sop value) |
1538 | { | | 1538 | { |
1539 | /* avoid making error situations worse */ | | 1539 | /* avoid making error situations worse */ |
1540 | if (p->error != 0) | | 1540 | if (p->error != 0) |
1541 | return; | | 1541 | return; |
1542 | | | 1542 | |
1543 | assert(value < 1); | | 1543 | assert(value < 1); |
1544 | p->stripdata[pos] = value; | | 1544 | p->stripdata[pos] = value; |
1545 | } | | 1545 | } |
1546 | | | 1546 | |
1547 | /* | | 1547 | /* |
1548 | - enlarge - enlarge the strip | | 1548 | - enlarge - enlarge the strip |
1549 | == static int enlarge(register struct parse *p, sopno size); | | 1549 | == static int enlarge(struct parse *p, sopno size); |
1550 | */ | | 1550 | */ |
1551 | static int | | 1551 | static int |
1552 | enlarge(register struct parse *p, register sopno size) | | 1552 | enlarge(struct parse *p, sopno size) |
1553 | { | | 1553 | { |
1554 | register sop *sp; | | 1554 | sop *sp; |
1555 | register RCHAR_T *dp; | | 1555 | RCHAR_T *dp; |
1556 | sopno osize; | | 1556 | sopno osize; |
1557 | | | 1557 | |
1558 | if (p->ssize >= size) | | 1558 | if (p->ssize >= size) |
1559 | return 1; | | 1559 | return 1; |
1560 | | | 1560 | |
1561 | osize = p->ssize; | | 1561 | osize = p->ssize; |
1562 | p->ssize = size; | | 1562 | p->ssize = size; |
1563 | if (MEMSIZE(p) > MEMLIMIT) | | 1563 | if (MEMSIZE(p) > MEMLIMIT) |
1564 | goto oomem; | | 1564 | goto oomem; |
1565 | sp = realloc(p->strip, p->ssize * sizeof(sop)); | | 1565 | sp = realloc(p->strip, p->ssize * sizeof(sop)); |
1566 | if (sp == NULL) | | 1566 | if (sp == NULL) |
1567 | goto oomem; | | 1567 | goto oomem; |
1568 | p->strip = sp; | | 1568 | p->strip = sp; |
1569 | dp = realloc(p->stripdata, p->ssize * sizeof(RCHAR_T)); | | 1569 | dp = realloc(p->stripdata, p->ssize * sizeof(RCHAR_T)); |
1570 | if (dp == NULL) { | | 1570 | if (dp == NULL) { |
1571 | oomem: | | 1571 | oomem: |
1572 | p->ssize = osize; | | 1572 | p->ssize = osize; |
1573 | SETERROR(REG_ESPACE); | | 1573 | SETERROR(REG_ESPACE); |
1574 | return 0; | | 1574 | return 0; |
1575 | } | | 1575 | } |
1576 | p->stripdata = dp; | | 1576 | p->stripdata = dp; |
1577 | return 1; | | 1577 | return 1; |
1578 | } | | 1578 | } |
1579 | | | 1579 | |
1580 | /* | | 1580 | /* |
1581 | - stripsnug - compact the strip | | 1581 | - stripsnug - compact the strip |
1582 | == static void stripsnug(register struct parse *p, register struct re_guts *g); | | 1582 | == static void stripsnug(struct parse *p, struct re_guts *g); |
1583 | */ | | 1583 | */ |
1584 | static void | | 1584 | static void |
1585 | stripsnug(register struct parse *p, register struct re_guts *g) | | 1585 | stripsnug(struct parse *p, struct re_guts *g) |
1586 | { | | 1586 | { |
1587 | g->nstates = p->slen; | | 1587 | g->nstates = p->slen; |
1588 | g->strip = (sop *)realloc((char *)p->strip, | | 1588 | g->strip = (sop *)realloc((char *)p->strip, |
1589 | p->slen * sizeof(sop)); | | 1589 | p->slen * sizeof(sop)); |
1590 | if (g->strip == NULL) { | | 1590 | if (g->strip == NULL) { |
1591 | SETERROR(REG_ESPACE); | | 1591 | SETERROR(REG_ESPACE); |
1592 | g->strip = p->strip; | | 1592 | g->strip = p->strip; |
1593 | } | | 1593 | } |
1594 | g->stripdata = (RCHAR_T *)realloc((char *)p->stripdata, | | 1594 | g->stripdata = (RCHAR_T *)realloc((char *)p->stripdata, |
1595 | p->slen * sizeof(RCHAR_T)); | | 1595 | p->slen * sizeof(RCHAR_T)); |
1596 | if (g->stripdata == NULL) { | | 1596 | if (g->stripdata == NULL) { |
1597 | SETERROR(REG_ESPACE); | | 1597 | SETERROR(REG_ESPACE); |
1598 | g->stripdata = p->stripdata; | | 1598 | g->stripdata = p->stripdata; |
1599 | } | | 1599 | } |
1600 | } | | 1600 | } |
1601 | | | 1601 | |
1602 | /* | | 1602 | /* |
1603 | - findmust - fill in must and mlen with longest mandatory literal string | | 1603 | - findmust - fill in must and mlen with longest mandatory literal string |
1604 | == static void findmust(register struct parse *p, register struct re_guts *g); | | 1604 | == static void findmust(struct parse *p, struct re_guts *g); |
1605 | * | | 1605 | * |
1606 | * This algorithm could do fancy things like analyzing the operands of | | | 1606 | * This algorithm could do fancy things like analyzing the operands of | |
1607 | * for common subsequences. Someday. This code is simple and finds most | | 1607 | * for common subsequences. Someday. This code is simple and finds most |
1608 | * of the interesting cases. | | 1608 | * of the interesting cases. |
1609 | * | | 1609 | * |
1610 | * Note that must and mlen got initialized during setup. | | 1610 | * Note that must and mlen got initialized during setup. |
1611 | */ | | 1611 | */ |
1612 | static void | | 1612 | static void |
1613 | findmust(struct parse *p, register struct re_guts *g) | | 1613 | findmust(struct parse *p, struct re_guts *g) |
1614 | { | | 1614 | { |
1615 | register sop *scans; | | 1615 | sop *scans; |
1616 | register RCHAR_T *scand; | | 1616 | RCHAR_T *scand; |
1617 | sop *starts = 0; | | 1617 | sop *starts = 0; |
1618 | RCHAR_T *startd = NULL; | | 1618 | RCHAR_T *startd = NULL; |
1619 | register sop *newstarts = 0; | | 1619 | sop *newstarts = 0; |
1620 | register RCHAR_T *newstartd = NULL; | | 1620 | RCHAR_T *newstartd = NULL; |
1621 | register sopno newlen; | | 1621 | sopno newlen; |
1622 | register sop s; | | 1622 | sop s; |
1623 | register RCHAR_T d; | | 1623 | RCHAR_T d; |
1624 | register RCHAR_T *cp; | | 1624 | RCHAR_T *cp; |
1625 | register sopno i; | | 1625 | sopno i; |
1626 | | | 1626 | |
1627 | /* avoid making error situations worse */ | | 1627 | /* avoid making error situations worse */ |
1628 | if (p->error != 0) | | 1628 | if (p->error != 0) |
1629 | return; | | 1629 | return; |
1630 | | | 1630 | |
1631 | /* find the longest OCHAR sequence in strip */ | | 1631 | /* find the longest OCHAR sequence in strip */ |
1632 | newlen = 0; | | 1632 | newlen = 0; |
1633 | scans = g->strip + 1; | | 1633 | scans = g->strip + 1; |
1634 | scand = g->stripdata + 1; | | 1634 | scand = g->stripdata + 1; |
1635 | do { | | 1635 | do { |
1636 | s = *scans++; | | 1636 | s = *scans++; |
1637 | d = *scand++; | | 1637 | d = *scand++; |
1638 | switch (s) { | | 1638 | switch (s) { |
| @@ -1692,35 +1692,35 @@ findmust(struct parse *p, register struc | | | @@ -1692,35 +1692,35 @@ findmust(struct parse *p, register struc |
1692 | d = *scand++; | | 1692 | d = *scand++; |
1693 | if (s == OCHAR) | | 1693 | if (s == OCHAR) |
1694 | break; | | 1694 | break; |
1695 | } | | 1695 | } |
1696 | assert(cp < g->must + g->mlen); | | 1696 | assert(cp < g->must + g->mlen); |
1697 | *cp++ = d; | | 1697 | *cp++ = d; |
1698 | } | | 1698 | } |
1699 | assert(cp == g->must + g->mlen); | | 1699 | assert(cp == g->must + g->mlen); |
1700 | *cp++ = '\0'; /* just on general principles */ | | 1700 | *cp++ = '\0'; /* just on general principles */ |
1701 | } | | 1701 | } |
1702 | | | 1702 | |
1703 | /* | | 1703 | /* |
1704 | - pluscount - count + nesting | | 1704 | - pluscount - count + nesting |
1705 | == static sopno pluscount(register struct parse *p, register struct re_guts *g); | | 1705 | == static sopno pluscount(struct parse *p, struct re_guts *g); |
1706 | */ | | 1706 | */ |
1707 | static sopno /* nesting depth */ | | 1707 | static sopno /* nesting depth */ |
1708 | pluscount(struct parse *p, register struct re_guts *g) | | 1708 | pluscount(struct parse *p, struct re_guts *g) |
1709 | { | | 1709 | { |
1710 | register sop *scan; | | 1710 | sop *scan; |
1711 | register sop s; | | 1711 | sop s; |
1712 | register sopno plusnest = 0; | | 1712 | sopno plusnest = 0; |
1713 | register sopno maxnest = 0; | | 1713 | sopno maxnest = 0; |
1714 | | | 1714 | |
1715 | if (p->error != 0) | | 1715 | if (p->error != 0) |
1716 | return(0); /* there may not be an OEND */ | | 1716 | return(0); /* there may not be an OEND */ |
1717 | | | 1717 | |
1718 | scan = g->strip + 1; | | 1718 | scan = g->strip + 1; |
1719 | do { | | 1719 | do { |
1720 | s = *scan++; | | 1720 | s = *scan++; |
1721 | switch (s) { | | 1721 | switch (s) { |
1722 | case OPLUS_: | | 1722 | case OPLUS_: |
1723 | plusnest++; | | 1723 | plusnest++; |
1724 | break; | | 1724 | break; |
1725 | case O_PLUS: | | 1725 | case O_PLUS: |
1726 | if (plusnest > maxnest) | | 1726 | if (plusnest > maxnest) |