make CHAR_T/RCHAR_T as 32bit clean. nvi-1.81 restrict wchar_t as 24bit, so under some locale(eg. zh_CN.GB18030) search/regex doesn't work and sometimes dumps core(because of negative wchar_t value).diff -r1.3 -r1.4 src/dist/nvi/common/key.c
(tnozaki)
--- src/dist/nvi/common/Attic/key.c 2008/12/05 22:51:42 1.3
+++ src/dist/nvi/common/Attic/key.c 2009/01/02 00:32:11 1.4
@@ -1,14 +1,14 @@ | @@ -1,14 +1,14 @@ | |||
1 | /* $NetBSD: key.c,v 1.3 2008/12/05 22:51:42 christos Exp $ */ | 1 | /* $NetBSD: key.c,v 1.4 2009/01/02 00:32:11 tnozaki Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 1991, 1993, 1994 | 4 | * Copyright (c) 1991, 1993, 1994 | |
5 | * The Regents of the University of California. All rights reserved. | 5 | * The Regents of the University of California. All rights reserved. | |
6 | * Copyright (c) 1991, 1993, 1994, 1995, 1996 | 6 | * Copyright (c) 1991, 1993, 1994, 1995, 1996 | |
7 | * Keith Bostic. All rights reserved. | 7 | * Keith Bostic. All rights reserved. | |
8 | * | 8 | * | |
9 | * See the LICENSE file for redistribution information. | 9 | * See the LICENSE file for redistribution information. | |
10 | */ | 10 | */ | |
11 | 11 | |||
12 | #include "config.h" | 12 | #include "config.h" | |
13 | 13 | |||
14 | #ifndef lint | 14 | #ifndef lint | |
@@ -618,27 +618,28 @@ newmap: evp = &wp->i_event[wp->i_next]; | @@ -618,27 +618,28 @@ newmap: evp = &wp->i_event[wp->i_next]; | |||
618 | 618 | |||
619 | /* | 619 | /* | |
620 | * If the key isn't mappable because: | 620 | * If the key isn't mappable because: | |
621 | * | 621 | * | |
622 | * + ... the timeout has expired | 622 | * + ... the timeout has expired | |
623 | * + ... it's not a mappable key | 623 | * + ... it's not a mappable key | |
624 | * + ... neither the command or input map flags are set | 624 | * + ... neither the command or input map flags are set | |
625 | * + ... there are no maps that can apply to it | 625 | * + ... there are no maps that can apply to it | |
626 | * | 626 | * | |
627 | * return it forthwith. | 627 | * return it forthwith. | |
628 | */ | 628 | */ | |
629 | if (istimeout || FL_ISSET(evp->e_flags, CH_NOMAP) || | 629 | if (istimeout || FL_ISSET(evp->e_flags, CH_NOMAP) || | |
630 | !LF_ISSET(EC_MAPCOMMAND | EC_MAPINPUT) || | 630 | !LF_ISSET(EC_MAPCOMMAND | EC_MAPINPUT) || | |
631 | (evp->e_c < MAX_BIT_SEQ && !bit_test(gp->seqb, evp->e_c))) | 631 | ((UCHAR_T)evp->e_c < MAX_BIT_SEQ && | |
632 | !bit_test(gp->seqb, (UCHAR_T)evp->e_c))) | |||
632 | goto nomap; | 633 | goto nomap; | |
633 | 634 | |||
634 | /* Search the map. */ | 635 | /* Search the map. */ | |
635 | qp = seq_find(sp, NULL, evp, NULL, wp->i_cnt, | 636 | qp = seq_find(sp, NULL, evp, NULL, wp->i_cnt, | |
636 | LF_ISSET(EC_MAPCOMMAND) ? SEQ_COMMAND : SEQ_INPUT, &ispartial); | 637 | LF_ISSET(EC_MAPCOMMAND) ? SEQ_COMMAND : SEQ_INPUT, &ispartial); | |
637 | 638 | |||
638 | /* | 639 | /* | |
639 | * If get a partial match, get more characters and retry the map. | 640 | * If get a partial match, get more characters and retry the map. | |
640 | * If time out without further characters, return the characters | 641 | * If time out without further characters, return the characters | |
641 | * unmapped. | 642 | * unmapped. | |
642 | * | 643 | * | |
643 | * !!! | 644 | * !!! | |
644 | * <escape> characters are a problem. Cursor keys start with <escape> | 645 | * <escape> characters are a problem. Cursor keys start with <escape> |
--- src/dist/nvi/common/Attic/multibyte.h 2008/05/18 14:29:48 1.1.1.2
+++ src/dist/nvi/common/Attic/multibyte.h 2009/01/02 00:32:11 1.2
@@ -1,47 +1,50 @@ | @@ -1,47 +1,50 @@ | |||
1 | /* $NetBSD: multibyte.h,v 1.1.1.2 2008/05/18 14:29:48 aymeric Exp $ */ | 1 | /* $NetBSD: multibyte.h,v 1.2 2009/01/02 00:32:11 tnozaki Exp $ */ | |
2 | 2 | |||
3 | #ifndef MULTIBYTE_H | 3 | #ifndef MULTIBYTE_H | |
4 | #define MULTIBYTE_H | 4 | #define MULTIBYTE_H | |
5 | 5 | |||
6 | #ifdef USE_WIDECHAR | 6 | #ifdef USE_WIDECHAR | |
7 | #include <wchar.h> | 7 | #include <wchar.h> | |
8 | #include <wctype.h> | 8 | #include <wctype.h> | |
9 | 9 | |||
10 | typedef wchar_t RCHAR_T; | 10 | typedef wchar_t RCHAR_T; | |
11 | #define RCHAR_T_MAX ((1 << 24)-1) | 11 | typedef wchar_t CHAR_T; | |
12 | typedef wchar_t CHAR_T; | 12 | #if defined(__NetBSD__) | |
13 | #define MAX_CHAR_T 0xffffff /* XXXX */ | 13 | #define RCHAR_T_MAX 0xffffffff | |
14 | typedef u_int UCHAR_T; | 14 | #define MAX_CHAR_T 0xffffffff | |
15 | #define RCHAR_BIT 24 | 15 | #else | |
16 | #define RCHAR_T_MAX WCHAR_MAX | |||
17 | #define MAX_CHAR_T WCHAR_MAX | |||
18 | #endif | |||
19 | typedef u_int UCHAR_T; | |||
16 | 20 | |||
17 | #define STRLEN wcslen | 21 | #define STRLEN wcslen | |
18 | #define STRTOL wcstol | 22 | #define STRTOL wcstol | |
19 | #define STRTOUL wcstoul | 23 | #define STRTOUL wcstoul | |
20 | #define SPRINTF swprintf | 24 | #define SPRINTF swprintf | |
21 | #define STRCMP wcscmp | 25 | #define STRCMP wcscmp | |
22 | #define STRPBRK wcspbrk | 26 | #define STRPBRK wcspbrk | |
23 | #define TOUPPER towupper | 27 | #define TOUPPER towupper | |
24 | #define STRSET wmemset | 28 | #define STRSET wmemset | |
25 | 29 | |||
26 | #define L(ch) L ## ch | 30 | #define L(ch) L ## ch | |
27 | 31 | |||
28 | #else | 32 | #else | |
29 | typedef char RCHAR_T; | 33 | typedef char RCHAR_T; | |
30 | #define RCHAR_T_MAX CHAR_MAX | 34 | #define RCHAR_T_MAX CHAR_MAX | |
31 | typedef u_char CHAR_T; | 35 | typedef u_char CHAR_T; | |
32 | #define MAX_CHAR_T 0xff | 36 | #define MAX_CHAR_T 0xff | |
33 | typedef u_char UCHAR_T; | 37 | typedef u_char UCHAR_T; | |
34 | #define RCHAR_BIT CHAR_BIT | |||
35 | 38 | |||
36 | #define STRLEN strlen | 39 | #define STRLEN strlen | |
37 | #define STRTOL strtol | 40 | #define STRTOL strtol | |
38 | #define STRTOUL strtoul | 41 | #define STRTOUL strtoul | |
39 | #define SPRINTF snprintf | 42 | #define SPRINTF snprintf | |
40 | #define STRCMP strcmp | 43 | #define STRCMP strcmp | |
41 | #define STRPBRK strpbrk | 44 | #define STRPBRK strpbrk | |
42 | #define TOUPPER toupper | 45 | #define TOUPPER toupper | |
43 | #define STRSET memset | 46 | #define STRSET memset | |
44 | 47 | |||
45 | #define L(ch) ch | 48 | #define L(ch) ch | |
46 | 49 | |||
47 | #endif | 50 | #endif |
--- src/dist/nvi/regex/Attic/engine.c 2008/12/05 22:51:43 1.2
+++ src/dist/nvi/regex/Attic/engine.c 2009/01/02 00:32:11 1.3
@@ -1,14 +1,14 @@ | @@ -1,14 +1,14 @@ | |||
1 | /* $NetBSD: engine.c,v 1.2 2008/12/05 22:51:43 christos Exp $ */ | 1 | /* $NetBSD: engine.c,v 1.3 2009/01/02 00:32:11 tnozaki Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 1992, 1993, 1994 Henry Spencer. | 4 | * Copyright (c) 1992, 1993, 1994 Henry Spencer. | |
5 | * Copyright (c) 1992, 1993, 1994 | 5 | * Copyright (c) 1992, 1993, 1994 | |
6 | * The Regents of the University of California. All rights reserved. | 6 | * The Regents of the University of California. All rights reserved. | |
7 | * | 7 | * | |
8 | * This code is derived from software contributed to Berkeley by | 8 | * This code is derived from software contributed to Berkeley by | |
9 | * Henry Spencer of the University of Toronto. | 9 | * Henry Spencer of the University of Toronto. | |
10 | * | 10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | 11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | 12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | 13 | * are met: | |
14 | * 1. Redistributions of source code must retain the above copyright | 14 | * 1. Redistributions of source code must retain the above copyright | |
@@ -320,40 +320,40 @@ sopno stopst; | @@ -320,40 +320,40 @@ sopno stopst; | |||
320 | register RCHAR_T *tail; /* string unmatched by rest of RE */ | 320 | register RCHAR_T *tail; /* string unmatched by rest of RE */ | |
321 | register sopno ssub; /* start sop of subsubRE */ | 321 | register sopno ssub; /* start sop of subsubRE */ | |
322 | register sopno esub; /* end sop of subsubRE */ | 322 | register sopno esub; /* end sop of subsubRE */ | |
323 | register RCHAR_T *ssp; /* start of string matched by subsubRE */ | 323 | register RCHAR_T *ssp; /* start of string matched by subsubRE */ | |
324 | register RCHAR_T *sep; /* end of string matched by subsubRE */ | 324 | register RCHAR_T *sep; /* end of string matched by subsubRE */ | |
325 | register RCHAR_T *oldssp; /* previous ssp */ | 325 | register RCHAR_T *oldssp; /* previous ssp */ | |
326 | register RCHAR_T *dp; | 326 | register RCHAR_T *dp; | |
327 | 327 | |||
328 | AT("diss", start, stop, startst, stopst); | 328 | AT("diss", start, stop, startst, stopst); | |
329 | sp = start; | 329 | sp = start; | |
330 | for (ss = startst; ss < stopst; ss = es) { | 330 | for (ss = startst; ss < stopst; ss = es) { | |
331 | /* identify end of subRE */ | 331 | /* identify end of subRE */ | |
332 | es = ss; | 332 | es = ss; | |
333 | switch (OP(m->g->strip[es])) { | 333 | switch (m->g->strip[es]) { | |
334 | case OPLUS_: | 334 | case OPLUS_: | |
335 | case OQUEST_: | 335 | case OQUEST_: | |
336 | es += OPND(m->g->strip[es]); | 336 | es += m->g->stripdata[es]; | |
337 | break; | 337 | break; | |
338 | case OCH_: | 338 | case OCH_: | |
339 | while (OP(m->g->strip[es]) != O_CH) | 339 | while (m->g->strip[es] != O_CH) | |
340 | es += OPND(m->g->strip[es]); | 340 | es += m->g->stripdata[es]; | |
341 | break; | 341 | break; | |
342 | } | 342 | } | |
343 | es++; | 343 | es++; | |
344 | 344 | |||
345 | /* figure out what it matched */ | 345 | /* figure out what it matched */ | |
346 | switch (OP(m->g->strip[ss])) { | 346 | switch (m->g->strip[ss]) { | |
347 | case OEND: | 347 | case OEND: | |
348 | assert(nope); | 348 | assert(nope); | |
349 | break; | 349 | break; | |
350 | case OCHAR: | 350 | case OCHAR: | |
351 | sp++; | 351 | sp++; | |
352 | break; | 352 | break; | |
353 | case OBOL: | 353 | case OBOL: | |
354 | case OEOL: | 354 | case OEOL: | |
355 | case OBOW: | 355 | case OBOW: | |
356 | case OEOW: | 356 | case OEOW: | |
357 | break; | 357 | break; | |
358 | case OANY: | 358 | case OANY: | |
359 | case OANYOF: | 359 | case OANYOF: | |
@@ -429,60 +429,60 @@ sopno stopst; | @@ -429,60 +429,60 @@ sopno stopst; | |||
429 | for (;;) { | 429 | for (;;) { | |
430 | /* how long could this one be? */ | 430 | /* how long could this one be? */ | |
431 | rest = slow(m, sp, stp, ss, es); | 431 | rest = slow(m, sp, stp, ss, es); | |
432 | assert(rest != NULL); /* it did match */ | 432 | assert(rest != NULL); /* it did match */ | |
433 | /* could the rest match the rest? */ | 433 | /* could the rest match the rest? */ | |
434 | tail = slow(m, rest, stop, es, stopst); | 434 | tail = slow(m, rest, stop, es, stopst); | |
435 | if (tail == stop) | 435 | if (tail == stop) | |
436 | break; /* yes! */ | 436 | break; /* yes! */ | |
437 | /* no -- try a shorter match for this one */ | 437 | /* no -- try a shorter match for this one */ | |
438 | stp = rest - 1; | 438 | stp = rest - 1; | |
439 | assert(stp >= sp); /* it did work */ | 439 | assert(stp >= sp); /* it did work */ | |
440 | } | 440 | } | |
441 | ssub = ss + 1; | 441 | ssub = ss + 1; | |
442 | esub = ss + OPND(m->g->strip[ss]) - 1; | 442 | esub = ss + m->g->stripdata[ss] - 1; | |
443 | assert(OP(m->g->strip[esub]) == OOR1); | 443 | assert(m->g->strip[esub] == OOR1); | |
444 | for (;;) { /* find first matching branch */ | 444 | for (;;) { /* find first matching branch */ | |
445 | if (slow(m, sp, rest, ssub, esub) == rest) | 445 | if (slow(m, sp, rest, ssub, esub) == rest) | |
446 | break; /* it matched all of it */ | 446 | break; /* it matched all of it */ | |
447 | /* that one missed, try next one */ | 447 | /* that one missed, try next one */ | |
448 | assert(OP(m->g->strip[esub]) == OOR1); | 448 | assert(m->g->strip[esub] == OOR1); | |
449 | esub++; | 449 | esub++; | |
450 | assert(OP(m->g->strip[esub]) == OOR2); | 450 | assert(m->g->strip[esub] == OOR2); | |
451 | ssub = esub + 1; | 451 | ssub = esub + 1; | |
452 | esub += OPND(m->g->strip[esub]); | 452 | esub += m->g->stripdata[esub]; | |
453 | if (OP(m->g->strip[esub]) == OOR2) | 453 | if (m->g->strip[esub] == OOR2) | |
454 | esub--; | 454 | esub--; | |
455 | else | 455 | else | |
456 | assert(OP(m->g->strip[esub]) == O_CH); | 456 | assert(m->g->strip[esub] == O_CH); | |
457 | } | 457 | } | |
458 | dp = dissect(m, sp, rest, ssub, esub); | 458 | dp = dissect(m, sp, rest, ssub, esub); | |
459 | assert(dp == rest); | 459 | assert(dp == rest); | |
460 | sp = rest; | 460 | sp = rest; | |
461 | break; | 461 | break; | |
462 | case O_PLUS: | 462 | case O_PLUS: | |
463 | case O_QUEST: | 463 | case O_QUEST: | |
464 | case OOR1: | 464 | case OOR1: | |
465 | case OOR2: | 465 | case OOR2: | |
466 | case O_CH: | 466 | case O_CH: | |
467 | assert(nope); | 467 | assert(nope); | |
468 | break; | 468 | break; | |
469 | case OLPAREN: | 469 | case OLPAREN: | |
470 | i = OPND(m->g->strip[ss]); | 470 | i = m->g->stripdata[ss]; | |
471 | assert(0 < i && i <= m->g->nsub); | 471 | assert(0 < i && i <= m->g->nsub); | |
472 | m->pmatch[i].rm_so = sp - m->offp; | 472 | m->pmatch[i].rm_so = sp - m->offp; | |
473 | break; | 473 | break; | |
474 | case ORPAREN: | 474 | case ORPAREN: | |
475 | i = OPND(m->g->strip[ss]); | 475 | i = m->g->stripdata[ss]; | |
476 | assert(0 < i && i <= m->g->nsub); | 476 | assert(0 < i && i <= m->g->nsub); | |
477 | m->pmatch[i].rm_eo = sp - m->offp; | 477 | m->pmatch[i].rm_eo = sp - m->offp; | |
478 | break; | 478 | break; | |
479 | default: /* uh oh */ | 479 | default: /* uh oh */ | |
480 | assert(nope); | 480 | assert(nope); | |
481 | break; | 481 | break; | |
482 | } | 482 | } | |
483 | } | 483 | } | |
484 | 484 | |||
485 | assert(sp == stop); | 485 | assert(sp == stop); | |
486 | return(sp); | 486 | return(sp); | |
487 | } | 487 | } | |
488 | 488 | |||
@@ -500,47 +500,50 @@ sopno startst; | @@ -500,47 +500,50 @@ sopno startst; | |||
500 | sopno stopst; | 500 | sopno stopst; | |
501 | sopno lev; /* PLUS nesting level */ | 501 | sopno lev; /* PLUS nesting level */ | |
502 | { | 502 | { | |
503 | register int i; | 503 | register int i; | |
504 | register sopno ss; /* start sop of current subRE */ | 504 | register sopno ss; /* start sop of current subRE */ | |
505 | register RCHAR_T *sp; /* start of string matched by it */ | 505 | register RCHAR_T *sp; /* start of string matched by it */ | |
506 | register sopno ssub; /* start sop of subsubRE */ | 506 | register sopno ssub; /* start sop of subsubRE */ | |
507 | register sopno esub; /* end sop of subsubRE */ | 507 | register sopno esub; /* end sop of subsubRE */ | |
508 | register RCHAR_T *ssp; /* start of string matched by subsubRE */ | 508 | register RCHAR_T *ssp; /* start of string matched by subsubRE */ | |
509 | register RCHAR_T *dp; | 509 | register RCHAR_T *dp; | |
510 | register size_t len; | 510 | register size_t len; | |
511 | register int hard; | 511 | register int hard; | |
512 | register sop s; | 512 | register sop s; | |
513 | register RCHAR_T d; | |||
513 | register regoff_t offsave; | 514 | register regoff_t offsave; | |
514 | register cset *cs; | 515 | register cset *cs; | |
515 | 516 | |||
516 | AT("back", start, stop, startst, stopst); | 517 | AT("back", start, stop, startst, stopst); | |
517 | sp = start; | 518 | sp = start; | |
518 | 519 | |||
519 | /* get as far as we can with easy stuff */ | 520 | /* get as far as we can with easy stuff */ | |
520 | hard = 0; | 521 | hard = 0; | |
521 | for (ss = startst; !hard && ss < stopst; ss++) | 522 | for (ss = startst; !hard && ss < stopst; ss++) { | |
522 | switch (OP(s = m->g->strip[ss])) { | 523 | s = m->g->strip[ss]; | |
524 | d = m->g->stripdata[ss]; | |||
525 | switch (s) { | |||
523 | case OCHAR: | 526 | case OCHAR: | |
524 | if (sp == stop || *sp++ != (RCHAR_T)OPND(s)) | 527 | if (sp == stop || *sp++ != d) | |
525 | return(NULL); | 528 | return(NULL); | |
526 | break; | 529 | break; | |
527 | case OANY: | 530 | case OANY: | |
528 | if (sp == stop) | 531 | if (sp == stop) | |
529 | return(NULL); | 532 | return(NULL); | |
530 | sp++; | 533 | sp++; | |
531 | break; | 534 | break; | |
532 | case OANYOF: | 535 | case OANYOF: | |
533 | cs = &m->g->sets[OPND(s)]; | 536 | cs = &m->g->sets[d]; | |
534 | if (sp == stop || !CHIN(cs, *sp++)) | 537 | if (sp == stop || !CHIN(cs, *sp++)) | |
535 | return(NULL); | 538 | return(NULL); | |
536 | break; | 539 | break; | |
537 | case OBOL: | 540 | case OBOL: | |
538 | if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || | 541 | if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || | |
539 | (sp < m->endp && *(sp-1) == '\n' && | 542 | (sp < m->endp && *(sp-1) == '\n' && | |
540 | (m->g->cflags®_NEWLINE)) ) | 543 | (m->g->cflags®_NEWLINE)) ) | |
541 | { /* yes */ } | 544 | { /* yes */ } | |
542 | else | 545 | else | |
543 | return(NULL); | 546 | return(NULL); | |
544 | break; | 547 | break; | |
545 | case OEOL: | 548 | case OEOL: | |
546 | if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || | 549 | if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || | |
@@ -566,121 +569,126 @@ sopno lev; /* PLUS nesting level */ | @@ -566,121 +569,126 @@ sopno lev; /* PLUS nesting level */ | |||
566 | (sp < m->endp && *sp == '\n' && | 569 | (sp < m->endp && *sp == '\n' && | |
567 | (m->g->cflags®_NEWLINE)) || | 570 | (m->g->cflags®_NEWLINE)) || | |
568 | (sp < m->endp && !ISWORD(*sp)) ) && | 571 | (sp < m->endp && !ISWORD(*sp)) ) && | |
569 | (sp > m->beginp && ISWORD(*(sp-1))) ) | 572 | (sp > m->beginp && ISWORD(*(sp-1))) ) | |
570 | { /* yes */ } | 573 | { /* yes */ } | |
571 | else | 574 | else | |
572 | return(NULL); | 575 | return(NULL); | |
573 | break; | 576 | break; | |
574 | case O_QUEST: | 577 | case O_QUEST: | |
575 | break; | 578 | break; | |
576 | case OOR1: /* matches null but needs to skip */ | 579 | case OOR1: /* matches null but needs to skip */ | |
577 | ss++; | 580 | ss++; | |
578 | s = m->g->strip[ss]; | 581 | s = m->g->strip[ss]; | |
582 | d = m->g->stripdata[ss]; | |||
579 | do { | 583 | do { | |
580 | assert(OP(s) == OOR2); | 584 | assert(s == OOR2); | |
581 | ss += OPND(s); | 585 | ss += d; | |
582 | } while (OP(s = m->g->strip[ss]) != O_CH); | 586 | s = m->g->strip[ss]; | |
587 | d = m->g->stripdata[ss]; | |||
588 | } while (s != O_CH); | |||
583 | /* note that the ss++ gets us past the O_CH */ | 589 | /* note that the ss++ gets us past the O_CH */ | |
584 | break; | 590 | break; | |
585 | default: /* have to make a choice */ | 591 | default: /* have to make a choice */ | |
586 | hard = 1; | 592 | hard = 1; | |
587 | break; | 593 | break; | |
588 | } | 594 | } | |
595 | } | |||
589 | if (!hard) { /* that was it! */ | 596 | if (!hard) { /* that was it! */ | |
590 | if (sp != stop) | 597 | if (sp != stop) | |
591 | return(NULL); | 598 | return(NULL); | |
592 | return(sp); | 599 | return(sp); | |
593 | } | 600 | } | |
594 | ss--; /* adjust for the for's final increment */ | 601 | ss--; /* adjust for the for's final increment */ | |
595 | 602 | |||
596 | /* the hard stuff */ | 603 | /* the hard stuff */ | |
597 | AT("hard", sp, stop, ss, stopst); | 604 | AT("hard", sp, stop, ss, stopst); | |
598 | s = m->g->strip[ss]; | 605 | s = m->g->strip[ss]; | |
599 | switch (OP(s)) { | 606 | d = m->g->stripdata[ss]; | |
607 | switch (s) { | |||
600 | case OBACK_: /* the vilest depths */ | 608 | case OBACK_: /* the vilest depths */ | |
601 | i = OPND(s); | 609 | i = d; | |
602 | assert(0 < i && i <= m->g->nsub); | 610 | assert(0 < i && i <= m->g->nsub); | |
603 | if (m->pmatch[i].rm_eo == -1) | 611 | if (m->pmatch[i].rm_eo == -1) | |
604 | return(NULL); | 612 | return(NULL); | |
605 | assert(m->pmatch[i].rm_so != -1); | 613 | assert(m->pmatch[i].rm_so != -1); | |
606 | len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; | 614 | len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; | |
607 | assert(stop - m->beginp >= len); | 615 | assert(stop - m->beginp >= len); | |
608 | if (sp > stop - len) | 616 | if (sp > stop - len) | |
609 | return(NULL); /* not enough left to match */ | 617 | return(NULL); /* not enough left to match */ | |
610 | ssp = m->offp + m->pmatch[i].rm_so; | 618 | ssp = m->offp + m->pmatch[i].rm_so; | |
611 | if (memcmp(sp, ssp, len) != 0) | 619 | if (memcmp(sp, ssp, len) != 0) | |
612 | return(NULL); | 620 | return(NULL); | |
613 | while (m->g->strip[ss] != SOP(O_BACK, i)) | 621 | while (m->g->strip[ss] != O_BACK || m->g->stripdata[ss] != i) | |
614 | ss++; | 622 | ss++; | |
615 | return(backref(m, sp+len, stop, ss+1, stopst, lev)); | 623 | return(backref(m, sp+len, stop, ss+1, stopst, lev)); | |
616 | break; | 624 | break; | |
617 | case OQUEST_: /* to null or not */ | 625 | case OQUEST_: /* to null or not */ | |
618 | dp = backref(m, sp, stop, ss+1, stopst, lev); | 626 | dp = backref(m, sp, stop, ss+1, stopst, lev); | |
619 | if (dp != NULL) | 627 | if (dp != NULL) | |
620 | return(dp); /* not */ | 628 | return(dp); /* not */ | |
621 | return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); | 629 | return(backref(m, sp, stop, ss+d+1, stopst, lev)); | |
622 | break; | 630 | break; | |
623 | case OPLUS_: | 631 | case OPLUS_: | |
624 | assert(m->lastpos != NULL); | 632 | assert(m->lastpos != NULL); | |
625 | assert(lev+1 <= m->g->nplus); | 633 | assert(lev+1 <= m->g->nplus); | |
626 | m->lastpos[lev+1] = sp; | 634 | m->lastpos[lev+1] = sp; | |
627 | return(backref(m, sp, stop, ss+1, stopst, lev+1)); | 635 | return(backref(m, sp, stop, ss+1, stopst, lev+1)); | |
628 | break; | 636 | break; | |
629 | case O_PLUS: | 637 | case O_PLUS: | |
630 | if (sp == m->lastpos[lev]) /* last pass matched null */ | 638 | if (sp == m->lastpos[lev]) /* last pass matched null */ | |
631 | return(backref(m, sp, stop, ss+1, stopst, lev-1)); | 639 | return(backref(m, sp, stop, ss+1, stopst, lev-1)); | |
632 | /* try another pass */ | 640 | /* try another pass */ | |
633 | m->lastpos[lev] = sp; | 641 | m->lastpos[lev] = sp; | |
634 | dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); | 642 | dp = backref(m, sp, stop, ss-d+1, stopst, lev); | |
635 | if (dp == NULL) | 643 | if (dp == NULL) | |
636 | return(backref(m, sp, stop, ss+1, stopst, lev-1)); | 644 | return(backref(m, sp, stop, ss+1, stopst, lev-1)); | |
637 | else | 645 | else | |
638 | return(dp); | 646 | return(dp); | |
639 | break; | 647 | break; | |
640 | case OCH_: /* find the right one, if any */ | 648 | case OCH_: /* find the right one, if any */ | |
641 | ssub = ss + 1; | 649 | ssub = ss + 1; | |
642 | esub = ss + OPND(s) - 1; | 650 | esub = ss + d - 1; | |
643 | assert(OP(m->g->strip[esub]) == OOR1); | 651 | assert(m->g->strip[esub] == OOR1); | |
644 | for (;;) { /* find first matching branch */ | 652 | for (;;) { /* find first matching branch */ | |
645 | dp = backref(m, sp, stop, ssub, esub, lev); | 653 | dp = backref(m, sp, stop, ssub, esub, lev); | |
646 | if (dp != NULL) | 654 | if (dp != NULL) | |
647 | return(dp); | 655 | return(dp); | |
648 | /* that one missed, try next one */ | 656 | /* that one missed, try next one */ | |
649 | if (OP(m->g->strip[esub]) == O_CH) | 657 | if (m->g->strip[esub] == O_CH) | |
650 | return(NULL); /* there is none */ | 658 | return(NULL); /* there is none */ | |
651 | esub++; | 659 | esub++; | |
652 | assert(OP(m->g->strip[esub]) == OOR2); | 660 | assert(m->g->strip[esub] == OOR2); | |
653 | ssub = esub + 1; | 661 | ssub = esub + 1; | |
654 | esub += OPND(m->g->strip[esub]); | 662 | esub += m->g->stripdata[esub]; | |
655 | if (OP(m->g->strip[esub]) == OOR2) | 663 | if (m->g->strip[esub] == OOR2) | |
656 | esub--; | 664 | esub--; | |
657 | else | 665 | else | |
658 | assert(OP(m->g->strip[esub]) == O_CH); | 666 | assert(m->g->strip[esub] == O_CH); | |
659 | } | 667 | } | |
660 | break; | 668 | break; | |
661 | case OLPAREN: /* must undo assignment if rest fails */ | 669 | case OLPAREN: /* must undo assignment if rest fails */ | |
662 | i = OPND(s); | 670 | i = d; | |
663 | assert(0 < i && i <= m->g->nsub); | 671 | assert(0 < i && i <= m->g->nsub); | |
664 | offsave = m->pmatch[i].rm_so; | 672 | offsave = m->pmatch[i].rm_so; | |
665 | m->pmatch[i].rm_so = sp - m->offp; | 673 | m->pmatch[i].rm_so = sp - m->offp; | |
666 | dp = backref(m, sp, stop, ss+1, stopst, lev); | 674 | dp = backref(m, sp, stop, ss+1, stopst, lev); | |
667 | if (dp != NULL) | 675 | if (dp != NULL) | |
668 | return(dp); | 676 | return(dp); | |
669 | m->pmatch[i].rm_so = offsave; | 677 | m->pmatch[i].rm_so = offsave; | |
670 | return(NULL); | 678 | return(NULL); | |
671 | break; | 679 | break; | |
672 | case ORPAREN: /* must undo assignment if rest fails */ | 680 | case ORPAREN: /* must undo assignment if rest fails */ | |
673 | i = OPND(s); | 681 | i = d; | |
674 | assert(0 < i && i <= m->g->nsub); | 682 | assert(0 < i && i <= m->g->nsub); | |
675 | offsave = m->pmatch[i].rm_eo; | 683 | offsave = m->pmatch[i].rm_eo; | |
676 | m->pmatch[i].rm_eo = sp - m->offp; | 684 | m->pmatch[i].rm_eo = sp - m->offp; | |
677 | dp = backref(m, sp, stop, ss+1, stopst, lev); | 685 | dp = backref(m, sp, stop, ss+1, stopst, lev); | |
678 | if (dp != NULL) | 686 | if (dp != NULL) | |
679 | return(dp); | 687 | return(dp); | |
680 | m->pmatch[i].rm_eo = offsave; | 688 | m->pmatch[i].rm_eo = offsave; | |
681 | return(NULL); | 689 | return(NULL); | |
682 | break; | 690 | break; | |
683 | default: /* uh oh */ | 691 | default: /* uh oh */ | |
684 | assert(nope); | 692 | assert(nope); | |
685 | break; | 693 | break; | |
686 | } | 694 | } | |
@@ -884,115 +892,120 @@ sopno stopst; | @@ -884,115 +892,120 @@ sopno stopst; | |||
884 | == #define NNONCHAR (CODEMAX-CHAR_MAX) | 892 | == #define NNONCHAR (CODEMAX-CHAR_MAX) | |
885 | */ | 893 | */ | |
886 | static states | 894 | static states | |
887 | step(g, start, stop, bef, ch, aft) | 895 | step(g, start, stop, bef, ch, aft) | |
888 | register struct re_guts *g; | 896 | register struct re_guts *g; | |
889 | sopno start; /* start state within strip */ | 897 | sopno start; /* start state within strip */ | |
890 | sopno stop; /* state after stop state within strip */ | 898 | sopno stop; /* state after stop state within strip */ | |
891 | register states bef; /* states reachable before */ | 899 | register states bef; /* states reachable before */ | |
892 | int ch; /* character or NONCHAR code */ | 900 | int ch; /* character or NONCHAR code */ | |
893 | register states aft; /* states already known reachable after */ | 901 | register states aft; /* states already known reachable after */ | |
894 | { | 902 | { | |
895 | register cset *cs; | 903 | register cset *cs; | |
896 | register sop s; | 904 | register sop s; | |
905 | register RCHAR_T d; | |||
897 | register sopno pc; | 906 | register sopno pc; | |
898 | register onestate here; /* note, macros know this name */ | 907 | register onestate here; /* note, macros know this name */ | |
899 | register sopno look; | 908 | register sopno look; | |
900 | register int i; | 909 | register int i; | |
901 | 910 | |||
902 | for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { | 911 | for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { | |
903 | s = g->strip[pc]; | 912 | s = g->strip[pc]; | |
904 | switch (OP(s)) { | 913 | d = g->stripdata[pc]; | |
914 | switch (s) { | |||
905 | case OEND: | 915 | case OEND: | |
906 | assert(pc == stop-1); | 916 | assert(pc == stop-1); | |
907 | break; | 917 | break; | |
908 | case OCHAR: | 918 | case OCHAR: | |
909 | /* only characters can match */ | 919 | /* only characters can match */ | |
910 | assert(!NONCHAR(ch) || ch != (RCHAR_T)OPND(s)); | 920 | assert(!NONCHAR(ch) || ch != d); | |
911 | if (ch == (RCHAR_T)OPND(s)) | 921 | if (ch == d) | |
912 | FWD(aft, bef, 1); | 922 | FWD(aft, bef, 1); | |
913 | break; | 923 | break; | |
914 | case OBOL: | 924 | case OBOL: | |
915 | if (ch == BOL || ch == BOLEOL) | 925 | if (ch == BOL || ch == BOLEOL) | |
916 | FWD(aft, bef, 1); | 926 | FWD(aft, bef, 1); | |
917 | break; | 927 | break; | |
918 | case OEOL: | 928 | case OEOL: | |
919 | if (ch == EOL || ch == BOLEOL) | 929 | if (ch == EOL || ch == BOLEOL) | |
920 | FWD(aft, bef, 1); | 930 | FWD(aft, bef, 1); | |
921 | break; | 931 | break; | |
922 | case OBOW: | 932 | case OBOW: | |
923 | if (ch == BOW) | 933 | if (ch == BOW) | |
924 | FWD(aft, bef, 1); | 934 | FWD(aft, bef, 1); | |
925 | break; | 935 | break; | |
926 | case OEOW: | 936 | case OEOW: | |
927 | if (ch == EOW) | 937 | if (ch == EOW) | |
928 | FWD(aft, bef, 1); | 938 | FWD(aft, bef, 1); | |
929 | break; | 939 | break; | |
930 | case OANY: | 940 | case OANY: | |
931 | if (!NONCHAR(ch)) | 941 | if (!NONCHAR(ch)) | |
932 | FWD(aft, bef, 1); | 942 | FWD(aft, bef, 1); | |
933 | break; | 943 | break; | |
934 | case OANYOF: | 944 | case OANYOF: | |
935 | cs = &g->sets[OPND(s)]; | 945 | cs = &g->sets[d]; | |
936 | if (!NONCHAR(ch) && CHIN(cs, ch)) | 946 | if (!NONCHAR(ch) && CHIN(cs, ch)) | |
937 | FWD(aft, bef, 1); | 947 | FWD(aft, bef, 1); | |
938 | break; | 948 | break; | |
939 | case OBACK_: /* ignored here */ | 949 | case OBACK_: /* ignored here */ | |
940 | case O_BACK: | 950 | case O_BACK: | |
941 | FWD(aft, aft, 1); | 951 | FWD(aft, aft, 1); | |
942 | break; | 952 | break; | |
943 | case OPLUS_: /* forward, this is just an empty */ | 953 | case OPLUS_: /* forward, this is just an empty */ | |
944 | FWD(aft, aft, 1); | 954 | FWD(aft, aft, 1); | |
945 | break; | 955 | break; | |
946 | case O_PLUS: /* both forward and back */ | 956 | case O_PLUS: /* both forward and back */ | |
947 | FWD(aft, aft, 1); | 957 | FWD(aft, aft, 1); | |
948 | i = ISSETBACK(aft, OPND(s)); | 958 | i = ISSETBACK(aft, d); | |
949 | BACK(aft, aft, OPND(s)); | 959 | BACK(aft, aft, d); | |
950 | if (!i && ISSETBACK(aft, OPND(s))) { | 960 | if (!i && ISSETBACK(aft, d)) { | |
951 | /* oho, must reconsider loop body */ | 961 | /* oho, must reconsider loop body */ | |
952 | pc -= OPND(s) + 1; | 962 | pc -= d + 1; | |
953 | INIT(here, pc); | 963 | INIT(here, pc); | |
954 | } | 964 | } | |
955 | break; | 965 | break; | |
956 | case OQUEST_: /* two branches, both forward */ | 966 | case OQUEST_: /* two branches, both forward */ | |
957 | FWD(aft, aft, 1); | 967 | FWD(aft, aft, 1); | |
958 | FWD(aft, aft, OPND(s)); | 968 | FWD(aft, aft, d); | |
959 | break; | 969 | break; | |
960 | case O_QUEST: /* just an empty */ | 970 | case O_QUEST: /* just an empty */ | |
961 | FWD(aft, aft, 1); | 971 | FWD(aft, aft, 1); | |
962 | break; | 972 | break; | |
963 | case OLPAREN: /* not significant here */ | 973 | case OLPAREN: /* not significant here */ | |
964 | case ORPAREN: | 974 | case ORPAREN: | |
965 | FWD(aft, aft, 1); | 975 | FWD(aft, aft, 1); | |
966 | break; | 976 | break; | |
967 | case OCH_: /* mark the first two branches */ | 977 | case OCH_: /* mark the first two branches */ | |
968 | FWD(aft, aft, 1); | 978 | FWD(aft, aft, 1); | |
969 | assert(OP(g->strip[pc+OPND(s)]) == OOR2); | 979 | assert(OP(g->strip[pc+d]) == OOR2); | |
970 | FWD(aft, aft, OPND(s)); | 980 | FWD(aft, aft, d); | |
971 | break; | 981 | break; | |
972 | case OOR1: /* done a branch, find the O_CH */ | 982 | case OOR1: /* done a branch, find the O_CH */ | |
973 | if (ISSTATEIN(aft, here)) { | 983 | if (ISSTATEIN(aft, here)) { | |
974 | for (look = 1; | 984 | for (look = 1; /**/; look += d) { | |
975 | OP(s = g->strip[pc+look]) != O_CH; | 985 | s = g->strip[pc+look]; | |
976 | look += OPND(s)) | 986 | d = g->stripdata[pc+look]; | |
977 | assert(OP(s) == OOR2); | 987 | if (s == O_CH) | |
988 | break; | |||
989 | assert(s == OOR2); | |||
990 | } | |||
978 | FWD(aft, aft, look); | 991 | FWD(aft, aft, look); | |
979 | } | 992 | } | |
980 | break; | 993 | break; | |
981 | case OOR2: /* propagate OCH_'s marking */ | 994 | case OOR2: /* propagate OCH_'s marking */ | |
982 | FWD(aft, aft, 1); | 995 | FWD(aft, aft, 1); | |
983 | if (OP(g->strip[pc+OPND(s)]) != O_CH) { | 996 | if (g->strip[pc+d] != O_CH) { | |
984 | assert(OP(g->strip[pc+OPND(s)]) == OOR2); | 997 | assert(g->strip[pc+d] == OOR2); | |
985 | FWD(aft, aft, OPND(s)); | 998 | FWD(aft, aft, d); | |
986 | } | 999 | } | |
987 | break; | 1000 | break; | |
988 | case O_CH: /* just empty */ | 1001 | case O_CH: /* just empty */ | |
989 | FWD(aft, aft, 1); | 1002 | FWD(aft, aft, 1); | |
990 | break; | 1003 | break; | |
991 | default: /* ooooops... */ | 1004 | default: /* ooooops... */ | |
992 | assert(nope); | 1005 | assert(nope); | |
993 | break; | 1006 | break; | |
994 | } | 1007 | } | |
995 | } | 1008 | } | |
996 | 1009 | |||
997 | return(aft); | 1010 | return(aft); | |
998 | } | 1011 | } |
--- src/dist/nvi/regex/Attic/regcomp.c 2008/12/05 22:51:43 1.2
+++ src/dist/nvi/regex/Attic/regcomp.c 2009/01/02 00:32:11 1.3
@@ -1,14 +1,14 @@ | @@ -1,14 +1,14 @@ | |||
1 | /* $NetBSD: regcomp.c,v 1.2 2008/12/05 22:51:43 christos Exp $ */ | 1 | /* $NetBSD: regcomp.c,v 1.3 2009/01/02 00:32:11 tnozaki Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 1992, 1993, 1994 Henry Spencer. | 4 | * Copyright (c) 1992, 1993, 1994 Henry Spencer. | |
5 | * Copyright (c) 1992, 1993, 1994 | 5 | * Copyright (c) 1992, 1993, 1994 | |
6 | * The Regents of the University of California. All rights reserved. | 6 | * The Regents of the University of California. All rights reserved. | |
7 | * | 7 | * | |
8 | * This code is derived from software contributed to Berkeley by | 8 | * This code is derived from software contributed to Berkeley by | |
9 | * Henry Spencer of the University of Toronto. | 9 | * Henry Spencer of the University of Toronto. | |
10 | * | 10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | 11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | 12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | 13 | * are met: | |
14 | * 1. Redistributions of source code must retain the above copyright | 14 | * 1. Redistributions of source code must retain the above copyright | |
@@ -56,26 +56,27 @@ static char sccsid[] = "@(#)regcomp.c 8. | @@ -56,26 +56,27 @@ static char sccsid[] = "@(#)regcomp.c 8. | |||
56 | 56 | |||
57 | #include "cclass.h" | 57 | #include "cclass.h" | |
58 | #include "cname.h" | 58 | #include "cname.h" | |
59 | 59 | |||
60 | /* | 60 | /* | |
61 | * parse structure, passed up and down to avoid global variables and | 61 | * parse structure, passed up and down to avoid global variables and | |
62 | * other clumsinesses | 62 | * other clumsinesses | |
63 | */ | 63 | */ | |
64 | struct parse { | 64 | struct parse { | |
65 | RCHAR_T *next; /* next character in RE */ | 65 | RCHAR_T *next; /* next character in RE */ | |
66 | RCHAR_T *end; /* end of string (-> NUL normally) */ | 66 | RCHAR_T *end; /* end of string (-> NUL normally) */ | |
67 | int error; /* has an error been seen? */ | 67 | int error; /* has an error been seen? */ | |
68 | sop *strip; /* malloced strip */ | 68 | sop *strip; /* malloced strip */ | |
69 | RCHAR_T *stripdata; /* malloced stripdata */ | |||
69 | sopno ssize; /* malloced strip size (allocated) */ | 70 | sopno ssize; /* malloced strip size (allocated) */ | |
70 | sopno slen; /* malloced strip length (used) */ | 71 | sopno slen; /* malloced strip length (used) */ | |
71 | int ncsalloc; /* number of csets allocated */ | 72 | int ncsalloc; /* number of csets allocated */ | |
72 | struct re_guts *g; | 73 | struct re_guts *g; | |
73 | # define NPAREN 10 /* we need to remember () 1-9 for back refs */ | 74 | # define NPAREN 10 /* we need to remember () 1-9 for back refs */ | |
74 | sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ | 75 | sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ | |
75 | sopno pend[NPAREN]; /* -> ) ([0] unused) */ | 76 | sopno pend[NPAREN]; /* -> ) ([0] unused) */ | |
76 | }; | 77 | }; | |
77 | 78 | |||
78 | /* ========= begin header generated by ./mkh ========= */ | 79 | /* ========= begin header generated by ./mkh ========= */ | |
79 | #ifdef __cplusplus | 80 | #ifdef __cplusplus | |
80 | extern "C" { | 81 | extern "C" { | |
81 | #endif | 82 | #endif | |
@@ -203,31 +204,37 @@ regcomp(regex_t *preg, const RCHAR_T *pa | @@ -203,31 +204,37 @@ regcomp(regex_t *preg, const RCHAR_T *pa | |||
203 | if (preg->re_endp < pattern) | 204 | if (preg->re_endp < pattern) | |
204 | return(REG_INVARG); | 205 | return(REG_INVARG); | |
205 | len = preg->re_endp - pattern; | 206 | len = preg->re_endp - pattern; | |
206 | } else | 207 | } else | |
207 | len = STRLEN(pattern); | 208 | len = STRLEN(pattern); | |
208 | 209 | |||
209 | /* do the mallocs early so failure handling is easy */ | 210 | /* do the mallocs early so failure handling is easy */ | |
210 | g = (struct re_guts *)malloc(sizeof(struct re_guts) + | 211 | g = (struct re_guts *)malloc(sizeof(struct re_guts) + | |
211 | (NC-1)*sizeof(cat_t)); | 212 | (NC-1)*sizeof(cat_t)); | |
212 | if (g == NULL) | 213 | if (g == NULL) | |
213 | return(REG_ESPACE); | 214 | return(REG_ESPACE); | |
214 | p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ | 215 | p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ | |
215 | p->strip = (sop *)malloc(p->ssize * sizeof(sop)); | 216 | p->strip = (sop *)malloc(p->ssize * sizeof(sop)); | |
216 | p->slen = 0; | |||
217 | if (p->strip == NULL) { | 217 | if (p->strip == NULL) { | |
218 | free((char *)g); | 218 | free((char *)g); | |
219 | return(REG_ESPACE); | 219 | return(REG_ESPACE); | |
220 | } | 220 | } | |
221 | p->stripdata = (RCHAR_T *)malloc(p->ssize * sizeof(RCHAR_T)); | |||
222 | if (p->stripdata == NULL) { | |||
223 | free((char *)p->strip); | |||
224 | free((char *)g); | |||
225 | return(REG_ESPACE); | |||
226 | } | |||
227 | p->slen = 0; | |||
221 | 228 | |||
222 | /* set things up */ | 229 | /* set things up */ | |
223 | p->g = g; | 230 | p->g = g; | |
224 | p->next = (RCHAR_T *)__UNCONST(pattern); /* convenience; we do not modify it */ | 231 | p->next = (RCHAR_T *)__UNCONST(pattern); /* convenience; we do not modify it */ | |
225 | p->end = p->next + len; | 232 | p->end = p->next + len; | |
226 | p->error = 0; | 233 | p->error = 0; | |
227 | p->ncsalloc = 0; | 234 | p->ncsalloc = 0; | |
228 | for (i = 0; i < NPAREN; i++) { | 235 | for (i = 0; i < NPAREN; i++) { | |
229 | p->pbegin[i] = 0; | 236 | p->pbegin[i] = 0; | |
230 | p->pend[i] = 0; | 237 | p->pend[i] = 0; | |
231 | } | 238 | } | |
232 | g->csetsize = NC; | 239 | g->csetsize = NC; | |
233 | g->sets = NULL; | 240 | g->sets = NULL; | |
@@ -532,98 +539,103 @@ p_bre(register struct parse *p, register | @@ -532,98 +539,103 @@ p_bre(register struct parse *p, register | |||
532 | == static int p_simp_re(register struct parse *p, int starordinary); | 539 | == static int p_simp_re(register struct parse *p, int starordinary); | |
533 | */ | 540 | */ | |
534 | static int /* was the simple RE an unbackslashed $? */ | 541 | static int /* was the simple RE an unbackslashed $? */ | |
535 | p_simp_re(register struct parse *p, int starordinary) | 542 | p_simp_re(register struct parse *p, int starordinary) | |
536 | 543 | |||
537 | /* is a leading * an ordinary character? */ | 544 | /* is a leading * an ordinary character? */ | |
538 | { | 545 | { | |
539 | register int c; | 546 | register int c; | |
540 | register int count; | 547 | register int count; | |
541 | register int count2; | 548 | register int count2; | |
542 | register sopno pos; | 549 | register sopno pos; | |
543 | register int i; | 550 | register int i; | |
544 | register sopno subno; | 551 | register sopno subno; | |
545 | # define BACKSL (1<<RCHAR_BIT) | |||
546 | 552 | |||
547 | pos = HERE(); /* repetion op, if any, covers from here */ | 553 | pos = HERE(); /* repetion op, if any, covers from here */ | |
548 | 554 | |||
549 | assert(MORE()); /* caller should have ensured this */ | 555 | assert(MORE()); /* caller should have ensured this */ | |
550 | c = GETNEXT(); | 556 | c = GETNEXT(); | |
551 | if (c == '\\') { | 557 | if (c == '\\') { | |
552 | (void)REQUIRE(MORE(), REG_EESCAPE); | 558 | (void)REQUIRE(MORE(), REG_EESCAPE); | |
553 | c = BACKSL | (unsigned char)GETNEXT(); | 559 | c = (unsigned char)GETNEXT(); | |
554 | } | 560 | switch (c) { | |
555 | switch (c) { | 561 | case '{': | |
556 | case '.': | 562 | SETERROR(REG_BADRPT); | |
557 | if (p->g->cflags®_NEWLINE) | 563 | break; | |
558 | nonnewline(p); | 564 | case '(': | |
559 | else | 565 | p->g->nsub++; | |
560 | EMIT(OANY, 0); | 566 | subno = p->g->nsub; | |
561 | break; | 567 | if (subno < NPAREN) | |
562 | case '[': | 568 | p->pbegin[subno] = HERE(); | |
563 | p_bracket(p); | 569 | EMIT(OLPAREN, subno); | |
564 | break; | 570 | /* the MORE here is an error heuristic */ | |
565 | case BACKSL|'{': | 571 | if (MORE() && !SEETWO('\\', ')')) | |
566 | SETERROR(REG_BADRPT); | 572 | p_bre(p, '\\', ')'); | |
567 | break; | 573 | if (subno < NPAREN) { | |
568 | case BACKSL|'(': | 574 | p->pend[subno] = HERE(); | |
569 | p->g->nsub++; | 575 | assert(p->pend[subno] != 0); | |
570 | subno = p->g->nsub; | 576 | } | |
571 | if (subno < NPAREN) | 577 | EMIT(ORPAREN, subno); | |
572 | p->pbegin[subno] = HERE(); | 578 | (void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN); | |
573 | EMIT(OLPAREN, subno); | 579 | break; | |
574 | /* the MORE here is an error heuristic */ | 580 | case ')': /* should not get here -- must be user */ | |
575 | if (MORE() && !SEETWO('\\', ')')) | 581 | case '}': | |
576 | p_bre(p, '\\', ')'); | 582 | SETERROR(REG_EPAREN); | |
577 | if (subno < NPAREN) { | 583 | break; | |
578 | p->pend[subno] = HERE(); | 584 | case '1': | |
579 | assert(p->pend[subno] != 0); | 585 | case '2': | |
586 | case '3': | |||
587 | case '4': | |||
588 | case '5': | |||
589 | case '6': | |||
590 | case '7': | |||
591 | case '8': | |||
592 | case '9': | |||
593 | i = c - '0'; | |||
594 | assert(i < NPAREN); | |||
595 | if (p->pend[i] != 0) { | |||
596 | assert(i <= p->g->nsub); | |||
597 | EMIT(OBACK_, i); | |||
598 | assert(p->pbegin[i] != 0); | |||
599 | assert(p->strip[p->pbegin[i]] == OLPAREN); | |||
600 | assert(p->strip[p->pend[i]] == ORPAREN); | |||
601 | (void) dupl(p, p->pbegin[i]+1, p->pend[i]); | |||
602 | EMIT(O_BACK, i); | |||
603 | } else | |||
604 | SETERROR(REG_ESUBREG); | |||
605 | p->g->backrefs = 1; | |||
606 | break; | |||
607 | default: | |||
608 | ordinary(p, c); | |||
609 | break; | |||
610 | } | |||
611 | } else { | |||
612 | switch (c) { | |||
613 | case '.': | |||
614 | if (p->g->cflags®_NEWLINE) | |||
615 | nonnewline(p); | |||
616 | else | |||
617 | EMIT(OANY, 0); | |||
618 | break; | |||
619 | case '[': | |||
620 | p_bracket(p); | |||
621 | break; | |||
622 | case '*': | |||
623 | (void)REQUIRE(starordinary, REG_BADRPT); | |||
624 | /* FALLTHROUGH */ | |||
625 | default: | |||
626 | ordinary(p, c); | |||
627 | break; | |||
580 | } | 628 | } | |
581 | EMIT(ORPAREN, subno); | |||
582 | (void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN); | |||
583 | break; | |||
584 | case BACKSL|')': /* should not get here -- must be user */ | |||
585 | case BACKSL|'}': | |||
586 | SETERROR(REG_EPAREN); | |||
587 | break; | |||
588 | case BACKSL|'1': | |||
589 | case BACKSL|'2': | |||
590 | case BACKSL|'3': | |||
591 | case BACKSL|'4': | |||
592 | case BACKSL|'5': | |||
593 | case BACKSL|'6': | |||
594 | case BACKSL|'7': | |||
595 | case BACKSL|'8': | |||
596 | case BACKSL|'9': | |||
597 | i = (c&~BACKSL) - '0'; | |||
598 | assert(i < NPAREN); | |||
599 | if (p->pend[i] != 0) { | |||
600 | assert(i <= p->g->nsub); | |||
601 | EMIT(OBACK_, i); | |||
602 | assert(p->pbegin[i] != 0); | |||
603 | assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); | |||
604 | assert(OP(p->strip[p->pend[i]]) == ORPAREN); | |||
605 | (void) dupl(p, p->pbegin[i]+1, p->pend[i]); | |||
606 | EMIT(O_BACK, i); | |||
607 | } else | |||
608 | SETERROR(REG_ESUBREG); | |||
609 | p->g->backrefs = 1; | |||
610 | break; | |||
611 | case '*': | |||
612 | (void)REQUIRE(starordinary, REG_BADRPT); | |||
613 | /* FALLTHROUGH */ | |||
614 | default: | |||
615 | ordinary(p, c &~ BACKSL); | |||
616 | break; | |||
617 | } | 629 | } | |
618 | 630 | |||
619 | if (EAT('*')) { /* implemented as +? */ | 631 | if (EAT('*')) { /* implemented as +? */ | |
620 | /* this case does not require the (y|) trick, noKLUDGE */ | 632 | /* this case does not require the (y|) trick, noKLUDGE */ | |
621 | INSERT(OPLUS_, pos); | 633 | INSERT(OPLUS_, pos); | |
622 | ASTERN(O_PLUS, pos); | 634 | ASTERN(O_PLUS, pos); | |
623 | INSERT(OQUEST_, pos); | 635 | INSERT(OQUEST_, pos); | |
624 | ASTERN(O_QUEST, pos); | 636 | ASTERN(O_QUEST, pos); | |
625 | } else if (EATTWO('\\', '{')) { | 637 | } else if (EATTWO('\\', '{')) { | |
626 | count = p_count(p); | 638 | count = p_count(p); | |
627 | if (EAT(',')) { | 639 | if (EAT(',')) { | |
628 | if (MORE() && isdigit(PEEK())) { | 640 | if (MORE() && isdigit(PEEK())) { | |
629 | count2 = p_count(p); | 641 | count2 = p_count(p); | |
@@ -1399,249 +1411,288 @@ dupl(register struct parse *p, sopno sta | @@ -1399,249 +1411,288 @@ dupl(register struct parse *p, sopno sta | |||
1399 | /* from here */ | 1411 | /* from here */ | |
1400 | /* to this less one */ | 1412 | /* to this less one */ | |
1401 | { | 1413 | { | |
1402 | register sopno ret = HERE(); | 1414 | register sopno ret = HERE(); | |
1403 | register sopno len = finish - start; | 1415 | register sopno len = finish - start; | |
1404 | 1416 | |||
1405 | assert(finish >= start); | 1417 | assert(finish >= start); | |
1406 | if (len == 0) | 1418 | if (len == 0) | |
1407 | return(ret); | 1419 | return(ret); | |
1408 | enlarge(p, p->ssize + len); /* this many unexpected additions */ | 1420 | enlarge(p, p->ssize + len); /* this many unexpected additions */ | |
1409 | assert(p->ssize >= p->slen + len); | 1421 | assert(p->ssize >= p->slen + len); | |
1410 | (void) memcpy((char *)(p->strip + p->slen), | 1422 | (void) memcpy((char *)(p->strip + p->slen), | |
1411 | (char *)(p->strip + start), (size_t)len*sizeof(sop)); | 1423 | (char *)(p->strip + start), (size_t)len*sizeof(sop)); | |
1424 | (void) memcpy((char *)(p->stripdata + p->slen), | |||
1425 | (char *)(p->stripdata + start), (size_t)len*sizeof(RCHAR_T)); | |||
1412 | p->slen += len; | 1426 | p->slen += len; | |
1413 | return(ret); | 1427 | return(ret); | |
1414 | } | 1428 | } | |
1415 | 1429 | |||
1416 | /* | 1430 | /* | |
1417 | - doemit - emit a strip operator | 1431 | - doemit - emit a strip operator | |
1418 | == static void doemit(register struct parse *p, sop op, size_t opnd); | 1432 | == static void doemit(register struct parse *p, sop op, size_t opnd); | |
1419 | * | 1433 | * | |
1420 | * It might seem better to implement this as a macro with a function as | 1434 | * It might seem better to implement this as a macro with a function as | |
1421 | * hard-case backup, but it's just too big and messy unless there are | 1435 | * hard-case backup, but it's just too big and messy unless there are | |
1422 | * some changes to the data structures. Maybe later. | 1436 | * some changes to the data structures. Maybe later. | |
1423 | */ | 1437 | */ | |
1424 | static void | 1438 | static void | |
1425 | doemit(register struct parse *p, sop op, size_t opnd) | 1439 | doemit(register struct parse *p, sop op, size_t opnd) | |
1426 | { | 1440 | { | |
1427 | /* avoid making error situations worse */ | 1441 | /* avoid making error situations worse */ | |
1428 | if (p->error != 0) | 1442 | if (p->error != 0) | |
1429 | return; | 1443 | return; | |
1430 | 1444 | |||
1431 | /* deal with oversize operands ("can't happen", more or less) */ | 1445 | /* deal with oversize operands ("can't happen", more or less) */ | |
1432 | assert(opnd < 1<<OPSHIFT); | 1446 | assert(opnd < 1); | |
1433 | 1447 | |||
1434 | /* deal with undersized strip */ | 1448 | /* deal with undersized strip */ | |
1435 | if (p->slen >= p->ssize) | 1449 | if (p->slen >= p->ssize) | |
1436 | enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ | 1450 | enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ | |
1437 | assert(p->slen < p->ssize); | 1451 | assert(p->slen < p->ssize); | |
1438 | 1452 | |||
1439 | /* finally, it's all reduced to the easy case */ | 1453 | /* finally, it's all reduced to the easy case */ | |
1440 | p->strip[p->slen++] = SOP(op, opnd); | 1454 | p->strip[p->slen] = op; | |
1455 | p->stripdata[p->slen] = opnd; | |||
1456 | p->slen++; | |||
1441 | } | 1457 | } | |
1442 | 1458 | |||
1443 | /* | 1459 | /* | |
1444 | - doinsert - insert a sop into the strip | 1460 | - doinsert - insert a sop into the strip | |
1445 | == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); | 1461 | == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); | |
1446 | */ | 1462 | */ | |
1447 | static void | 1463 | static void | |
1448 | doinsert(register struct parse *p, sop op, size_t opnd, sopno pos) | 1464 | doinsert(register struct parse *p, sop op, size_t opnd, sopno pos) | |
1449 | { | 1465 | { | |
1450 | register sopno sn; | 1466 | register sopno sn; | |
1451 | register sop s; | 1467 | register sop s; | |
1468 | register RCHAR_T d; | |||
1452 | register int i; | 1469 | register int i; | |
1453 | 1470 | |||
1454 | /* avoid making error situations worse */ | 1471 | /* avoid making error situations worse */ | |
1455 | if (p->error != 0) | 1472 | if (p->error != 0) | |
1456 | return; | 1473 | return; | |
1457 | 1474 | |||
1458 | sn = HERE(); | 1475 | sn = HERE(); | |
1459 | EMIT(op, opnd); /* do checks, ensure space */ | 1476 | EMIT(op, opnd); /* do checks, ensure space */ | |
1460 | assert(HERE() == sn+1); | 1477 | assert(HERE() == sn+1); | |
1461 | s = p->strip[sn]; | 1478 | s = p->strip[sn]; | |
1479 | d = p->stripdata[sn]; | |||
1462 | 1480 | |||
1463 | /* adjust paren pointers */ | 1481 | /* adjust paren pointers */ | |
1464 | assert(pos > 0); | 1482 | assert(pos > 0); | |
1465 | for (i = 1; i < NPAREN; i++) { | 1483 | for (i = 1; i < NPAREN; i++) { | |
1466 | if (p->pbegin[i] >= pos) { | 1484 | if (p->pbegin[i] >= pos) { | |
1467 | p->pbegin[i]++; | 1485 | p->pbegin[i]++; | |
1468 | } | 1486 | } | |
1469 | if (p->pend[i] >= pos) { | 1487 | if (p->pend[i] >= pos) { | |
1470 | p->pend[i]++; | 1488 | p->pend[i]++; | |
1471 | } | 1489 | } | |
1472 | } | 1490 | } | |
1473 | 1491 | |||
1474 | memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], | 1492 | memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], | |
1475 | (HERE()-pos-1)*sizeof(sop)); | 1493 | (HERE()-pos-1)*sizeof(sop)); | |
1494 | memmove((char *)&p->stripdata[pos+1], (char *)&p->stripdata[pos], | |||
1495 | (HERE()-pos-1)*sizeof(RCHAR_T)); | |||
1476 | p->strip[pos] = s; | 1496 | p->strip[pos] = s; | |
1497 | p->stripdata[pos] = d; | |||
1477 | } | 1498 | } | |
1478 | 1499 | |||
1479 | /* | 1500 | /* | |
1480 | - dofwd - complete a forward reference | 1501 | - dofwd - complete a forward reference | |
1481 | == static void dofwd(register struct parse *p, sopno pos, sop value); | 1502 | == static void dofwd(register struct parse *p, sopno pos, sop value); | |
1482 | */ | 1503 | */ | |
1483 | static void | 1504 | static void | |
1484 | dofwd(register struct parse *p, register sopno pos, sop value) | 1505 | dofwd(register struct parse *p, register sopno pos, sop value) | |
1485 | { | 1506 | { | |
1486 | /* avoid making error situations worse */ | 1507 | /* avoid making error situations worse */ | |
1487 | if (p->error != 0) | 1508 | if (p->error != 0) | |
1488 | return; | 1509 | return; | |
1489 | 1510 | |||
1490 | assert(value < 1<<OPSHIFT); | 1511 | assert(value < 1); | |
1491 | p->strip[pos] = OP(p->strip[pos]) | value; | 1512 | p->stripdata[pos] = value; | |
1492 | } | 1513 | } | |
1493 | 1514 | |||
1494 | /* | 1515 | /* | |
1495 | - enlarge - enlarge the strip | 1516 | - enlarge - enlarge the strip | |
1496 | == static void enlarge(register struct parse *p, sopno size); | 1517 | == static void enlarge(register struct parse *p, sopno size); | |
1497 | */ | 1518 | */ | |
1498 | static void | 1519 | static void | |
1499 | enlarge(register struct parse *p, register sopno size) | 1520 | enlarge(register struct parse *p, register sopno size) | |
1500 | { | 1521 | { | |
1501 | register sop *sp; | 1522 | register sop *sp; | |
1523 | register RCHAR_T *dp; | |||
1502 | 1524 | |||
1503 | if (p->ssize >= size) | 1525 | if (p->ssize >= size) | |
1504 | return; | 1526 | return; | |
1505 | 1527 | |||
1506 | sp = (sop *)realloc(p->strip, size*sizeof(sop)); | 1528 | sp = (sop *)realloc(p->strip, size*sizeof(sop)); | |
1507 | if (sp == NULL) { | 1529 | if (sp == NULL) { | |
1508 | SETERROR(REG_ESPACE); | 1530 | SETERROR(REG_ESPACE); | |
1509 | return; | 1531 | return; | |
1510 | } | 1532 | } | |
1511 | p->strip = sp; | 1533 | p->strip = sp; | |
1534 | dp = (RCHAR_T *)realloc(p->stripdata, size*sizeof(RCHAR_T)); | |||
1535 | if (dp == NULL) { | |||
1536 | SETERROR(REG_ESPACE); | |||
1537 | return; | |||
1538 | } | |||
1539 | p->stripdata = dp; | |||
1512 | p->ssize = size; | 1540 | p->ssize = size; | |
1513 | } | 1541 | } | |
1514 | 1542 | |||
1515 | /* | 1543 | /* | |
1516 | - stripsnug - compact the strip | 1544 | - stripsnug - compact the strip | |
1517 | == static void stripsnug(register struct parse *p, register struct re_guts *g); | 1545 | == static void stripsnug(register struct parse *p, register struct re_guts *g); | |
1518 | */ | 1546 | */ | |
1519 | static void | 1547 | static void | |
1520 | stripsnug(register struct parse *p, register struct re_guts *g) | 1548 | stripsnug(register struct parse *p, register struct re_guts *g) | |
1521 | { | 1549 | { | |
1522 | g->nstates = p->slen; | 1550 | g->nstates = p->slen; | |
1523 | g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); | 1551 | g->strip = (sop *)realloc((char *)p->strip, | |
1552 | p->slen * sizeof(sop)); | |||
1524 | if (g->strip == NULL) { | 1553 | if (g->strip == NULL) { | |
1525 | SETERROR(REG_ESPACE); | 1554 | SETERROR(REG_ESPACE); | |
1526 | g->strip = p->strip; | 1555 | g->strip = p->strip; | |
1527 | } | 1556 | } | |
1557 | g->stripdata = (RCHAR_T *)realloc((char *)p->stripdata, | |||
1558 | p->slen * sizeof(RCHAR_T)); | |||
1559 | if (g->stripdata == NULL) { | |||
1560 | SETERROR(REG_ESPACE); | |||
1561 | g->stripdata = p->stripdata; | |||
1562 | } | |||
1528 | } | 1563 | } | |
1529 | 1564 | |||
1530 | /* | 1565 | /* | |
1531 | - findmust - fill in must and mlen with longest mandatory literal string | 1566 | - findmust - fill in must and mlen with longest mandatory literal string | |
1532 | == static void findmust(register struct parse *p, register struct re_guts *g); | 1567 | == static void findmust(register struct parse *p, register struct re_guts *g); | |
1533 | * | 1568 | * | |
1534 | * This algorithm could do fancy things like analyzing the operands of | | 1569 | * This algorithm could do fancy things like analyzing the operands of | | |
1535 | * for common subsequences. Someday. This code is simple and finds most | 1570 | * for common subsequences. Someday. This code is simple and finds most | |
1536 | * of the interesting cases. | 1571 | * of the interesting cases. | |
1537 | * | 1572 | * | |
1538 | * Note that must and mlen got initialized during setup. | 1573 | * Note that must and mlen got initialized during setup. | |
1539 | */ | 1574 | */ | |
1540 | static void | 1575 | static void | |
1541 | findmust(struct parse *p, register struct re_guts *g) | 1576 | findmust(struct parse *p, register struct re_guts *g) | |
1542 | { | 1577 | { | |
1543 | register sop *scan; | 1578 | register sop *scans; | |
1544 | sop *start = 0; | 1579 | register RCHAR_T *scand; | |
1545 | register sop *newstart = 0; | 1580 | sop *starts = 0; | |
1581 | RCHAR_T *startd = NULL; | |||
1582 | register sop *newstarts = 0; | |||
1583 | register RCHAR_T *newstartd = NULL; | |||
1546 | register sopno newlen; | 1584 | register sopno newlen; | |
1547 | register sop s; | 1585 | register sop s; | |
1586 | register RCHAR_T d; | |||
1548 | register RCHAR_T *cp; | 1587 | register RCHAR_T *cp; | |
1549 | register sopno i; | 1588 | register sopno i; | |
1550 | 1589 | |||
1551 | /* avoid making error situations worse */ | 1590 | /* avoid making error situations worse */ | |
1552 | if (p->error != 0) | 1591 | if (p->error != 0) | |
1553 | return; | 1592 | return; | |
1554 | 1593 | |||
1555 | /* find the longest OCHAR sequence in strip */ | 1594 | /* find the longest OCHAR sequence in strip */ | |
1556 | newlen = 0; | 1595 | newlen = 0; | |
1557 | scan = g->strip + 1; | 1596 | scans = g->strip + 1; | |
1597 | scand = g->stripdata + 1; | |||
1558 | do { | 1598 | do { | |
1559 | s = *scan++; | 1599 | s = *scans++; | |
1560 | switch (OP(s)) { | 1600 | d = *scand++; | |
1601 | switch (s) { | |||
1561 | case OCHAR: /* sequence member */ | 1602 | case OCHAR: /* sequence member */ | |
1562 | if (newlen == 0) /* new sequence */ | 1603 | if (newlen == 0) { /* new sequence */ | |
1563 | newstart = scan - 1; | 1604 | newstarts = scans - 1; | |
1605 | newstartd = scand - 1; | |||
1606 | } | |||
1564 | newlen++; | 1607 | newlen++; | |
1565 | break; | 1608 | break; | |
1566 | case OPLUS_: /* things that don't break one */ | 1609 | case OPLUS_: /* things that don't break one */ | |
1567 | case OLPAREN: | 1610 | case OLPAREN: | |
1568 | case ORPAREN: | 1611 | case ORPAREN: | |
1569 | break; | 1612 | break; | |
1570 | case OQUEST_: /* things that must be skipped */ | 1613 | case OQUEST_: /* things that must be skipped */ | |
1571 | case OCH_: | 1614 | case OCH_: | |
1572 | scan--; | 1615 | scans--; | |
1616 | scand--; | |||
1573 | do { | 1617 | do { | |
1574 | scan += OPND(s); | 1618 | scans += d; | |
1575 | s = *scan; | 1619 | scand += d; | |
1620 | s = *scans; | |||
1621 | d = *scand; | |||
1576 | /* assert() interferes w debug printouts */ | 1622 | /* assert() interferes w debug printouts */ | |
1577 | if (OP(s) != O_QUEST && OP(s) != O_CH && | 1623 | if (s != O_QUEST && s != O_CH && s != OOR2) { | |
1578 | OP(s) != OOR2) { | |||
1579 | g->iflags |= BAD; | 1624 | g->iflags |= BAD; | |
1580 | return; | 1625 | return; | |
1581 | } | 1626 | } | |
1582 | } while (OP(s) != O_QUEST && OP(s) != O_CH); | 1627 | } while (s != O_QUEST && s != O_CH); | |
1583 | /* fallthrough */ | 1628 | /* fallthrough */ | |
1584 | default: /* things that break a sequence */ | 1629 | default: /* things that break a sequence */ | |
1585 | if (newlen > g->mlen) { /* ends one */ | 1630 | if (newlen > g->mlen) { /* ends one */ | |
1586 | start = newstart; | 1631 | starts = newstarts; | |
1632 | startd = newstartd; | |||
1587 | g->mlen = newlen; | 1633 | g->mlen = newlen; | |
1588 | } | 1634 | } | |
1589 | newlen = 0; | 1635 | newlen = 0; | |
1590 | break; | 1636 | break; | |
1591 | } | 1637 | } | |
1592 | } while (OP(s) != OEND); | 1638 | } while (s != OEND); | |
1593 | 1639 | |||
1594 | if (g->mlen == 0) /* there isn't one */ | 1640 | if (g->mlen == 0) /* there isn't one */ | |
1595 | return; | 1641 | return; | |
1596 | 1642 | |||
1597 | /* turn it into a character string */ | 1643 | /* turn it into a character string */ | |
1598 | g->must = malloc(((size_t)g->mlen + 1) * sizeof(RCHAR_T)); | 1644 | g->must = malloc(((size_t)g->mlen + 1) * sizeof(RCHAR_T)); | |
1599 | if (g->must == NULL) { /* argh; just forget it */ | 1645 | if (g->must == NULL) { /* argh; just forget it */ | |
1600 | g->mlen = 0; | 1646 | g->mlen = 0; | |
1601 | return; | 1647 | return; | |
1602 | } | 1648 | } | |
1603 | cp = g->must; | 1649 | cp = g->must; | |
1604 | scan = start; | 1650 | scans = starts; | |
1651 | scand = startd; | |||
1605 | for (i = g->mlen; i > 0; i--) { | 1652 | for (i = g->mlen; i > 0; i--) { | |
1606 | while (OP(s = *scan++) != OCHAR) | 1653 | for (;;) { | |
1607 | continue; | 1654 | s = *scans++; | |
1655 | d = *scand++; | |||
1656 | if (s == OCHAR) | |||
1657 | break; | |||
1658 | } | |||
1608 | assert(cp < g->must + g->mlen); | 1659 | assert(cp < g->must + g->mlen); | |
1609 | *cp++ = (RCHAR_T)OPND(s); | 1660 | *cp++ = d; | |
1610 | } | 1661 | } | |
1611 | assert(cp == g->must + g->mlen); | 1662 | assert(cp == g->must + g->mlen); | |
1612 | *cp++ = '\0'; /* just on general principles */ | 1663 | *cp++ = '\0'; /* just on general principles */ | |
1613 | } | 1664 | } | |
1614 | 1665 | |||
1615 | /* | 1666 | /* | |
1616 | - pluscount - count + nesting | 1667 | - pluscount - count + nesting | |
1617 | == static sopno pluscount(register struct parse *p, register struct re_guts *g); | 1668 | == static sopno pluscount(register struct parse *p, register struct re_guts *g); | |
1618 | */ | 1669 | */ | |
1619 | static sopno /* nesting depth */ | 1670 | static sopno /* nesting depth */ | |
1620 | pluscount(struct parse *p, register struct re_guts *g) | 1671 | pluscount(struct parse *p, register struct re_guts *g) | |
1621 | { | 1672 | { | |
1622 | register sop *scan; | 1673 | register sop *scan; | |
1623 | register sop s; | 1674 | register sop s; | |
1624 | register sopno plusnest = 0; | 1675 | register sopno plusnest = 0; | |
1625 | register sopno maxnest = 0; | 1676 | register sopno maxnest = 0; | |
1626 | 1677 | |||
1627 | if (p->error != 0) | 1678 | if (p->error != 0) | |
1628 | return(0); /* there may not be an OEND */ | 1679 | return(0); /* there may not be an OEND */ | |
1629 | 1680 | |||
1630 | scan = g->strip + 1; | 1681 | scan = g->strip + 1; | |
1631 | do { | 1682 | do { | |
1632 | s = *scan++; | 1683 | s = *scan++; | |
1633 | switch (OP(s)) { | 1684 | switch (s) { | |
1634 | case OPLUS_: | 1685 | case OPLUS_: | |
1635 | plusnest++; | 1686 | plusnest++; | |
1636 | break; | 1687 | break; | |
1637 | case O_PLUS: | 1688 | case O_PLUS: | |
1638 | if (plusnest > maxnest) | 1689 | if (plusnest > maxnest) | |
1639 | maxnest = plusnest; | 1690 | maxnest = plusnest; | |
1640 | plusnest--; | 1691 | plusnest--; | |
1641 | break; | 1692 | break; | |
1642 | } | 1693 | } | |
1643 | } while (OP(s) != OEND); | 1694 | } while (s != OEND); | |
1644 | if (plusnest != 0) | 1695 | if (plusnest != 0) | |
1645 | g->iflags |= BAD; | 1696 | g->iflags |= BAD; | |
1646 | return(maxnest); | 1697 | return(maxnest); | |
1647 | } | 1698 | } |
--- src/dist/nvi/regex/Attic/regex2.h 2008/05/18 14:31:38 1.1.1.2
+++ src/dist/nvi/regex/Attic/regex2.h 2009/01/02 00:32:11 1.2
@@ -1,14 +1,14 @@ | @@ -1,14 +1,14 @@ | |||
1 | /* $NetBSD: regex2.h,v 1.1.1.2 2008/05/18 14:31:38 aymeric Exp $ */ | 1 | /* $NetBSD: regex2.h,v 1.2 2009/01/02 00:32:11 tnozaki Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 1992, 1993, 1994 Henry Spencer. | 4 | * Copyright (c) 1992, 1993, 1994 Henry Spencer. | |
5 | * Copyright (c) 1992, 1993, 1994 | 5 | * Copyright (c) 1992, 1993, 1994 | |
6 | * The Regents of the University of California. All rights reserved. | 6 | * The Regents of the University of California. All rights reserved. | |
7 | * | 7 | * | |
8 | * This code is derived from software contributed to Berkeley by | 8 | * This code is derived from software contributed to Berkeley by | |
9 | * Henry Spencer of the University of Toronto. | 9 | * Henry Spencer of the University of Toronto. | |
10 | * | 10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | 11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | 12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | 13 | * are met: | |
14 | * 1. Redistributions of source code must retain the above copyright | 14 | * 1. Redistributions of source code must retain the above copyright | |
@@ -67,56 +67,50 @@ | @@ -67,56 +67,50 @@ | |||
67 | * flow must be marked at both its source and its destination. Some | 67 | * flow must be marked at both its source and its destination. Some | |
68 | * fine points: | 68 | * fine points: | |
69 | * | 69 | * | |
70 | * - OPLUS_ and O_PLUS are *inside* the loop they create. | 70 | * - OPLUS_ and O_PLUS are *inside* the loop they create. | |
71 | * - OQUEST_ and O_QUEST are *outside* the bypass they create. | 71 | * - OQUEST_ and O_QUEST are *outside* the bypass they create. | |
72 | * - OCH_ and O_CH are *outside* the multi-way branch they create, while | 72 | * - OCH_ and O_CH are *outside* the multi-way branch they create, while | |
73 | * OOR1 and OOR2 are respectively the end and the beginning of one of | 73 | * OOR1 and OOR2 are respectively the end and the beginning of one of | |
74 | * the branches. Note that there is an implicit OOR2 following OCH_ | 74 | * the branches. Note that there is an implicit OOR2 following OCH_ | |
75 | * and an implicit OOR1 preceding O_CH. | 75 | * and an implicit OOR1 preceding O_CH. | |
76 | * | 76 | * | |
77 | * In state representations, an operator's bit is on to signify a state | 77 | * In state representations, an operator's bit is on to signify a state | |
78 | * immediately *preceding* "execution" of that operator. | 78 | * immediately *preceding* "execution" of that operator. | |
79 | */ | 79 | */ | |
80 | typedef unsigned long sop; /* strip operator */ | 80 | typedef char sop; /* strip operator */ | |
81 | typedef int sopno; | 81 | typedef int sopno; | |
82 | #define OPRMASK 0xf8000000 | |||
83 | #define OPDMASK 0x07ffffff | |||
84 | #define OPSHIFT ((unsigned)27) | |||
85 | #define OP(n) ((n)&OPRMASK) | |||
86 | #define OPND(n) ((n)&OPDMASK) | |||
87 | #define SOP(op, opnd) ((op)|(opnd)) | |||
88 | /* operators meaning operand */ | 82 | /* operators meaning operand */ | |
89 | /* (back, fwd are offsets) */ | 83 | /* (back, fwd are offsets) */ | |
90 | #define OEND (1UL<<OPSHIFT) /* endmarker - */ | 84 | #define OEND (1) /* endmarker - */ | |
91 | #define OCHAR (2UL<<OPSHIFT) /* character unsigned char */ | 85 | #define OCHAR (2) /* character unsigned char */ | |
92 | #define OBOL (3UL<<OPSHIFT) /* left anchor - */ | 86 | #define OBOL (3) /* left anchor - */ | |
93 | #define OEOL (4UL<<OPSHIFT) /* right anchor - */ | 87 | #define OEOL (4) /* right anchor - */ | |
94 | #define OANY (5UL<<OPSHIFT) /* . - */ | 88 | #define OANY (5) /* . - */ | |
95 | #define OANYOF (6UL<<OPSHIFT) /* [...] set number */ | 89 | #define OANYOF (6) /* [...] set number */ | |
96 | #define OBACK_ (7UL<<OPSHIFT) /* begin \d paren number */ | 90 | #define OBACK_ (7) /* begin \d paren number */ | |
97 | #define O_BACK (8UL<<OPSHIFT) /* end \d paren number */ | 91 | #define O_BACK (8) /* end \d paren number */ | |
98 | #define OPLUS_ (9UL<<OPSHIFT) /* + prefix fwd to suffix */ | 92 | #define OPLUS_ (9) /* + prefix fwd to suffix */ | |
99 | #define O_PLUS (10UL<<OPSHIFT) /* + suffix back to prefix */ | 93 | #define O_PLUS (10) /* + suffix back to prefix */ | |
100 | #define OQUEST_ (11UL<<OPSHIFT) /* ? prefix fwd to suffix */ | 94 | #define OQUEST_ (11) /* ? prefix fwd to suffix */ | |
101 | #define O_QUEST (12UL<<OPSHIFT) /* ? suffix back to prefix */ | 95 | #define O_QUEST (12) /* ? suffix back to prefix */ | |
102 | #define OLPAREN (13UL<<OPSHIFT) /* ( fwd to ) */ | 96 | #define OLPAREN (13) /* ( fwd to ) */ | |
103 | #define ORPAREN (14UL<<OPSHIFT) /* ) back to ( */ | 97 | #define ORPAREN (14) /* ) back to ( */ | |
104 | #define OCH_ (15UL<<OPSHIFT) /* begin choice fwd to OOR2 */ | 98 | #define OCH_ (15) /* begin choice fwd to OOR2 */ | |
105 | #define OOR1 (16UL<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */ | 99 | #define OOR1 (16) /* | pt. 1 back to OOR1 or OCH_ */ | |
106 | #define OOR2 (17UL<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */ | 100 | #define OOR2 (17) /* | pt. 2 fwd to OOR2 or O_CH */ | |
107 | #define O_CH (18UL<<OPSHIFT) /* end choice back to OOR1 */ | 101 | #define O_CH (18) /* end choice back to OOR1 */ | |
108 | #define OBOW (19UL<<OPSHIFT) /* begin word - */ | 102 | #define OBOW (19) /* begin word - */ | |
109 | #define OEOW (20UL<<OPSHIFT) /* end word - */ | 103 | #define OEOW (20) /* end word - */ | |
110 | 104 | |||
111 | /* | 105 | /* | |
112 | * Structure for [] character-set representation. Character sets are | 106 | * Structure for [] character-set representation. Character sets are | |
113 | * done as bit vectors, grouped 8 to a byte vector for compactness. | 107 | * done as bit vectors, grouped 8 to a byte vector for compactness. | |
114 | * The individual set therefore has both a pointer to the byte vector | 108 | * The individual set therefore has both a pointer to the byte vector | |
115 | * and a mask to pick out the relevant bit of each byte. A hash code | 109 | * and a mask to pick out the relevant bit of each byte. A hash code | |
116 | * simplifies testing whether two sets could be identical. | 110 | * simplifies testing whether two sets could be identical. | |
117 | * | 111 | * | |
118 | * This will get trickier for multicharacter collating elements. As | 112 | * This will get trickier for multicharacter collating elements. As | |
119 | * preliminary hooks for dealing with such things, we also carry along | 113 | * preliminary hooks for dealing with such things, we also carry along | |
120 | * a string of multi-character elements, and decide the size of the | 114 | * a string of multi-character elements, and decide the size of the | |
121 | * vectors at run time. | 115 | * vectors at run time. | |
122 | */ | 116 | */ | |
@@ -135,26 +129,27 @@ typedef struct { | @@ -135,26 +129,27 @@ typedef struct { | |||
135 | #define MCsub(p, cs, cp) mcsub(p, cs, cp) | 129 | #define MCsub(p, cs, cp) mcsub(p, cs, cp) | |
136 | #define MCin(p, cs, cp) mcin(p, cs, cp) | 130 | #define MCin(p, cs, cp) mcin(p, cs, cp) | |
137 | 131 | |||
138 | /* stuff for character categories */ | 132 | /* stuff for character categories */ | |
139 | typedef RCHAR_T cat_t; | 133 | typedef RCHAR_T cat_t; | |
140 | 134 | |||
141 | /* | 135 | /* | |
142 | * main compiled-expression structure | 136 | * main compiled-expression structure | |
143 | */ | 137 | */ | |
144 | struct re_guts { | 138 | struct re_guts { | |
145 | int magic; | 139 | int magic; | |
146 | # define MAGIC2 ((('R'^0200)<<8)|'E') | 140 | # define MAGIC2 ((('R'^0200)<<8)|'E') | |
147 | sop *strip; /* malloced area for strip */ | 141 | sop *strip; /* malloced area for strip */ | |
142 | RCHAR_T *stripdata; /* malloced area for stripdata */ | |||
148 | int csetsize; /* number of bits in a cset vector */ | 143 | int csetsize; /* number of bits in a cset vector */ | |
149 | int ncsets; /* number of csets in use */ | 144 | int ncsets; /* number of csets in use */ | |
150 | cset *sets; /* -> cset [ncsets] */ | 145 | cset *sets; /* -> cset [ncsets] */ | |
151 | uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ | 146 | uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ | |
152 | int cflags; /* copy of regcomp() cflags argument */ | 147 | int cflags; /* copy of regcomp() cflags argument */ | |
153 | sopno nstates; /* = number of sops */ | 148 | sopno nstates; /* = number of sops */ | |
154 | sopno firststate; /* the initial OEND (normally 0) */ | 149 | sopno firststate; /* the initial OEND (normally 0) */ | |
155 | sopno laststate; /* the final OEND */ | 150 | sopno laststate; /* the final OEND */ | |
156 | int iflags; /* internal flags */ | 151 | int iflags; /* internal flags */ | |
157 | # define USEBOL 01 /* used ^ */ | 152 | # define USEBOL 01 /* used ^ */ | |
158 | # define USEEOL 02 /* used $ */ | 153 | # define USEEOL 02 /* used $ */ | |
159 | # define BAD 04 /* something wrong */ | 154 | # define BAD 04 /* something wrong */ | |
160 | int nbol; /* number of ^ used */ | 155 | int nbol; /* number of ^ used */ | |
@@ -165,15 +160,15 @@ struct re_guts { | @@ -165,15 +160,15 @@ struct re_guts { | |||
165 | #endif | 160 | #endif | |
166 | RCHAR_T *must; /* match must contain this string */ | 161 | RCHAR_T *must; /* match must contain this string */ | |
167 | int mlen; /* length of must */ | 162 | int mlen; /* length of must */ | |
168 | size_t nsub; /* copy of re_nsub */ | 163 | size_t nsub; /* copy of re_nsub */ | |
169 | int backrefs; /* does it use back references? */ | 164 | int backrefs; /* does it use back references? */ | |
170 | sopno nplus; /* how deep does it nest +s? */ | 165 | sopno nplus; /* how deep does it nest +s? */ | |
171 | /* catspace must be last */ | 166 | /* catspace must be last */ | |
172 | #if 0 | 167 | #if 0 | |
173 | cat_t catspace[1]; /* actually [NC] */ | 168 | cat_t catspace[1]; /* actually [NC] */ | |
174 | #endif | 169 | #endif | |
175 | }; | 170 | }; | |
176 | 171 | |||
177 | /* misc utilities */ | 172 | /* misc utilities */ | |
178 | #define OUT (RCHAR_T_MAX+1) /* a non-character value */ | 173 | #define OUT WEOF /* a non-character value */ | |
179 | #define ISWORD(c) ((c <= 0xFF && isalnum(c)) || (c) == '_') | 174 | #define ISWORD(c) (((UCHAR_T)c <= 0xFF && isalnum((unsigned char)c)) || (c) == '_') |
--- src/dist/nvi/regex/Attic/regfree.c 2008/05/18 14:31:39 1.1.1.2
+++ src/dist/nvi/regex/Attic/regfree.c 2009/01/02 00:32:11 1.2
@@ -1,14 +1,14 @@ | @@ -1,14 +1,14 @@ | |||
1 | /* $NetBSD: regfree.c,v 1.1.1.2 2008/05/18 14:31:39 aymeric Exp $ */ | 1 | /* $NetBSD: regfree.c,v 1.2 2009/01/02 00:32:11 tnozaki Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 1992, 1993, 1994 Henry Spencer. | 4 | * Copyright (c) 1992, 1993, 1994 Henry Spencer. | |
5 | * Copyright (c) 1992, 1993, 1994 | 5 | * Copyright (c) 1992, 1993, 1994 | |
6 | * The Regents of the University of California. All rights reserved. | 6 | * The Regents of the University of California. All rights reserved. | |
7 | * | 7 | * | |
8 | * This code is derived from software contributed to Berkeley by | 8 | * This code is derived from software contributed to Berkeley by | |
9 | * Henry Spencer of the University of Toronto. | 9 | * Henry Spencer of the University of Toronto. | |
10 | * | 10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | 11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | 12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | 13 | * are met: | |
14 | * 1. Redistributions of source code must retain the above copyright | 14 | * 1. Redistributions of source code must retain the above copyright | |
@@ -61,21 +61,23 @@ regfree(regex_t *preg) | @@ -61,21 +61,23 @@ regfree(regex_t *preg) | |||
61 | register struct re_guts *g; | 61 | register struct re_guts *g; | |
62 | 62 | |||
63 | if (preg->re_magic != MAGIC1) /* oops */ | 63 | if (preg->re_magic != MAGIC1) /* oops */ | |
64 | return; /* nice to complain, but hard */ | 64 | return; /* nice to complain, but hard */ | |
65 | 65 | |||
66 | g = preg->re_g; | 66 | g = preg->re_g; | |
67 | if (g == NULL || g->magic != MAGIC2) /* oops again */ | 67 | if (g == NULL || g->magic != MAGIC2) /* oops again */ | |
68 | return; | 68 | return; | |
69 | preg->re_magic = 0; /* mark it invalid */ | 69 | preg->re_magic = 0; /* mark it invalid */ | |
70 | g->magic = 0; /* mark it invalid */ | 70 | g->magic = 0; /* mark it invalid */ | |
71 | 71 | |||
72 | if (g->strip != NULL) | 72 | if (g->strip != NULL) | |
73 | free((char *)g->strip); | 73 | free((char *)g->strip); | |
74 | if (g->stripdata != NULL) | |||
75 | free((char *)g->stripdata); | |||
74 | if (g->sets != NULL) | 76 | if (g->sets != NULL) | |
75 | free((char *)g->sets); | 77 | free((char *)g->sets); | |
76 | if (g->setbits != NULL) | 78 | if (g->setbits != NULL) | |
77 | free((char *)g->setbits); | 79 | free((char *)g->setbits); | |
78 | if (g->must != NULL) | 80 | if (g->must != NULL) | |
79 | free(g->must); | 81 | free(g->must); | |
80 | free((char *)g); | 82 | free((char *)g); | |
81 | } | 83 | } |