Sun Mar 27 14:39:33 2016 UTC ()
PR bin/50993 - this is a significant rewrite of the way that here
documents are processed.  Now, when first detected, they are
simply read (the only change made to the text is to join lines
ended with a \ to the subsequent line, otherwise end marker detection
does not work correctly (for here docs with an unquoted endmarker
only of course.)  This patch also moves the "internal subroutine"
for looking for the end marker out of readtoken1() (which had to
happen as readtoken1 is no longer reading the here doc when it is
needed) - that uses code mostly taken from FreeBSD's sh (thanks!)
and along the way results in some restrictions on what the end
marker can be being removed.   We still do not allow all we should.
(from kre@)


(christos)
diff -r1.98 -r1.99 src/bin/sh/expand.c
diff -r1.110 -r1.111 src/bin/sh/parser.c
diff -r1.19 -r1.20 src/bin/sh/parser.h

cvs diff -r1.98 -r1.99 src/bin/sh/expand.c (expand / switch to unified diff)

--- src/bin/sh/expand.c 2016/03/27 14:34:46 1.98
+++ src/bin/sh/expand.c 2016/03/27 14:39:33 1.99
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: expand.c,v 1.98 2016/03/27 14:34:46 christos Exp $ */ 1/* $NetBSD: expand.c,v 1.99 2016/03/27 14:39:33 christos Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1991, 1993 4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved. 5 * The Regents of the University of California. All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to Berkeley by 7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist. 8 * Kenneth Almquist.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
@@ -27,27 +27,27 @@ @@ -27,27 +27,27 @@
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE. 32 * SUCH DAMAGE.
33 */ 33 */
34 34
35#include <sys/cdefs.h> 35#include <sys/cdefs.h>
36#ifndef lint 36#ifndef lint
37#if 0 37#if 0
38static char sccsid[] = "@(#)expand.c 8.5 (Berkeley) 5/15/95"; 38static char sccsid[] = "@(#)expand.c 8.5 (Berkeley) 5/15/95";
39#else 39#else
40__RCSID("$NetBSD: expand.c,v 1.98 2016/03/27 14:34:46 christos Exp $"); 40__RCSID("$NetBSD: expand.c,v 1.99 2016/03/27 14:39:33 christos Exp $");
41#endif 41#endif
42#endif /* not lint */ 42#endif /* not lint */
43 43
44#include <sys/types.h> 44#include <sys/types.h>
45#include <sys/time.h> 45#include <sys/time.h>
46#include <sys/stat.h> 46#include <sys/stat.h>
47#include <errno.h> 47#include <errno.h>
48#include <dirent.h> 48#include <dirent.h>
49#include <unistd.h> 49#include <unistd.h>
50#include <pwd.h> 50#include <pwd.h>
51#include <limits.h> 51#include <limits.h>
52#include <stdlib.h> 52#include <stdlib.h>
53#include <stdio.h> 53#include <stdio.h>
@@ -111,26 +111,32 @@ STATIC void expmeta(char *, char *); @@ -111,26 +111,32 @@ STATIC void expmeta(char *, char *);
111STATIC void addfname(char *); 111STATIC void addfname(char *);
112STATIC struct strlist *expsort(struct strlist *); 112STATIC struct strlist *expsort(struct strlist *);
113STATIC struct strlist *msort(struct strlist *, int); 113STATIC struct strlist *msort(struct strlist *, int);
114STATIC int pmatch(char *, char *, int); 114STATIC int pmatch(char *, char *, int);
115STATIC char *cvtnum(int, char *); 115STATIC char *cvtnum(int, char *);
116 116
117/* 117/*
118 * Expand shell variables and backquotes inside a here document. 118 * Expand shell variables and backquotes inside a here document.
119 */ 119 */
120 120
121void 121void
122expandhere(union node *arg, int fd) 122expandhere(union node *arg, int fd)
123{ 123{
 124 /*
 125 * First, parse the content of the here doc (to internal form)
 126 * It was initially saved as (almost) unmodified text.
 127 */
 128 parse_heredoc(arg);
 129
124 herefd = fd; 130 herefd = fd;
125 expandarg(arg, NULL, 0); 131 expandarg(arg, NULL, 0);
126 xwrite(fd, stackblock(), expdest - stackblock()); 132 xwrite(fd, stackblock(), expdest - stackblock());
127} 133}
128 134
129 135
130/* 136/*
131 * Perform variable substitution and command substitution on an argument, 137 * Perform variable substitution and command substitution on an argument,
132 * placing the resulting list of arguments in arglist. If EXP_FULL is true, 138 * placing the resulting list of arguments in arglist. If EXP_FULL is true,
133 * perform splitting and file name expansion. When arglist is NULL, perform 139 * perform splitting and file name expansion. When arglist is NULL, perform
134 * here document expansion. 140 * here document expansion.
135 */ 141 */
136 142

cvs diff -r1.110 -r1.111 src/bin/sh/parser.c (expand / switch to unified diff)

--- src/bin/sh/parser.c 2016/03/27 14:36:29 1.110
+++ src/bin/sh/parser.c 2016/03/27 14:39:33 1.111
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: parser.c,v 1.110 2016/03/27 14:36:29 christos Exp $ */ 1/* $NetBSD: parser.c,v 1.111 2016/03/27 14:39:33 christos Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1991, 1993 4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved. 5 * The Regents of the University of California. All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to Berkeley by 7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist. 8 * Kenneth Almquist.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
@@ -27,27 +27,27 @@ @@ -27,27 +27,27 @@
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE. 32 * SUCH DAMAGE.
33 */ 33 */
34 34
35#include <sys/cdefs.h> 35#include <sys/cdefs.h>
36#ifndef lint 36#ifndef lint
37#if 0 37#if 0
38static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95"; 38static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95";
39#else 39#else
40__RCSID("$NetBSD: parser.c,v 1.110 2016/03/27 14:36:29 christos Exp $"); 40__RCSID("$NetBSD: parser.c,v 1.111 2016/03/27 14:39:33 christos Exp $");
41#endif 41#endif
42#endif /* not lint */ 42#endif /* not lint */
43 43
44#include <stdio.h> 44#include <stdio.h>
45#include <stdlib.h> 45#include <stdlib.h>
46#include <limits.h> 46#include <limits.h>
47 47
48#include "shell.h" 48#include "shell.h"
49#include "parser.h" 49#include "parser.h"
50#include "nodes.h" 50#include "nodes.h"
51#include "expand.h" /* defines rmescapes() */ 51#include "expand.h" /* defines rmescapes() */
52#include "eval.h" /* defines commandname */ 52#include "eval.h" /* defines commandname */
53#include "redir.h" /* defines copyfd() */ 53#include "redir.h" /* defines copyfd() */
@@ -59,28 +59,26 @@ __RCSID("$NetBSD: parser.c,v 1.110 2016/ @@ -59,28 +59,26 @@ __RCSID("$NetBSD: parser.c,v 1.110 2016/
59#include "error.h" 59#include "error.h"
60#include "memalloc.h" 60#include "memalloc.h"
61#include "mystring.h" 61#include "mystring.h"
62#include "alias.h" 62#include "alias.h"
63#include "show.h" 63#include "show.h"
64#ifndef SMALL 64#ifndef SMALL
65#include "myhistedit.h" 65#include "myhistedit.h"
66#endif 66#endif
67 67
68/* 68/*
69 * Shell command parser. 69 * Shell command parser.
70 */ 70 */
71 71
72#define EOFMARKLEN 79 
73 
74/* values returned by readtoken */ 72/* values returned by readtoken */
75#include "token.h" 73#include "token.h"
76 74
77#define OPENBRACE '{' 75#define OPENBRACE '{'
78#define CLOSEBRACE '}' 76#define CLOSEBRACE '}'
79 77
80 78
81struct heredoc { 79struct heredoc {
82 struct heredoc *next; /* next here document in list */ 80 struct heredoc *next; /* next here document in list */
83 union node *here; /* redirection node */ 81 union node *here; /* redirection node */
84 char *eofmark; /* string indicating end of input */ 82 char *eofmark; /* string indicating end of input */
85 int striptabs; /* if set, strip leading tabs */ 83 int striptabs; /* if set, strip leading tabs */
86}; 84};
@@ -101,31 +99,32 @@ union node *redirnode; @@ -101,31 +99,32 @@ union node *redirnode;
101struct heredoc *heredoc; 99struct heredoc *heredoc;
102int quoteflag; /* set if (part of) last token was quoted */ 100int quoteflag; /* set if (part of) last token was quoted */
103int startlinno; /* line # where last token started */ 101int startlinno; /* line # where last token started */
104int funclinno; /* line # where the current function started */ 102int funclinno; /* line # where the current function started */
105 103
106 104
107STATIC union node *list(int, int); 105STATIC union node *list(int, int);
108STATIC union node *andor(void); 106STATIC union node *andor(void);
109STATIC union node *pipeline(void); 107STATIC union node *pipeline(void);
110STATIC union node *command(void); 108STATIC union node *command(void);
111STATIC union node *simplecmd(union node **, union node *); 109STATIC union node *simplecmd(union node **, union node *);
112STATIC union node *makename(void); 110STATIC union node *makename(void);
113STATIC void parsefname(void); 111STATIC void parsefname(void);
114STATIC void parseheredoc(void); 112STATIC void slurp_heredoc(char *const, int, int);
 113STATIC void readheredocs(void);
115STATIC int peektoken(void); 114STATIC int peektoken(void);
116STATIC int readtoken(void); 115STATIC int readtoken(void);
117STATIC int xxreadtoken(void); 116STATIC int xxreadtoken(void);
118STATIC int readtoken1(int, char const *, char *, int); 117STATIC int readtoken1(int, char const *, int);
119STATIC int noexpand(char *); 118STATIC int noexpand(char *);
120STATIC void synexpect(int, const char *) __dead; 119STATIC void synexpect(int, const char *) __dead;
121STATIC void synerror(const char *) __dead; 120STATIC void synerror(const char *) __dead;
122STATIC void setprompt(int); 121STATIC void setprompt(int);
123 122
124 123
125static const char EOFhere[] = "EOF reading here (<<) document"; 124static const char EOFhere[] = "EOF reading here (<<) document";
126 125
127 126
128/* 127/*
129 * Read and parse a command. Returns NEOF on end of file. (NULL is a 128 * Read and parse a command. Returns NEOF on end of file. (NULL is a
130 * valid parse tree indicating a blank line.) 129 * valid parse tree indicating a blank line.)
131 */ 130 */
@@ -186,39 +185,39 @@ list(int nlflag, int erflag) @@ -186,39 +185,39 @@ list(int nlflag, int erflag)
186 n3 = stalloc(sizeof(struct nbinary)); 185 n3 = stalloc(sizeof(struct nbinary));
187 n3->type = NSEMI; 186 n3->type = NSEMI;
188 n3->nbinary.ch1 = n1; 187 n3->nbinary.ch1 = n1;
189 n3->nbinary.ch2 = n2; 188 n3->nbinary.ch2 = n2;
190 n1 = n3; 189 n1 = n3;
191 } 190 }
192 switch (tok) { 191 switch (tok) {
193 case TBACKGND: 192 case TBACKGND:
194 case TSEMI: 193 case TSEMI:
195 tok = readtoken(); 194 tok = readtoken();
196 /* FALLTHROUGH */ 195 /* FALLTHROUGH */
197 case TNL: 196 case TNL:
198 if (tok == TNL) { 197 if (tok == TNL) {
199 parseheredoc(); 198 readheredocs();
200 if (nlflag) 199 if (nlflag)
201 return n1; 200 return n1;
202 } else { 201 } else {
203 tokpushback++; 202 tokpushback++;
204 } 203 }
205 checkkwd = 2; 204 checkkwd = 2;
206 if (tokendlist[peektoken()]) 205 if (tokendlist[peektoken()])
207 return n1; 206 return n1;
208 break; 207 break;
209 case TEOF: 208 case TEOF:
210 if (heredoclist) 209 if (heredoclist)
211 parseheredoc(); 210 readheredocs();
212 else 211 else
213 pungetc(); /* push back EOF on input */ 212 pungetc(); /* push back EOF on input */
214 return n1; 213 return n1;
215 default: 214 default:
216 if (nlflag || erflag) 215 if (nlflag || erflag)
217 synexpect(-1, 0); 216 synexpect(-1, 0);
218 tokpushback++; 217 tokpushback++;
219 return n1; 218 return n1;
220 } 219 }
221 } 220 }
222} 221}
223 222
224STATIC union node * 223STATIC union node *
@@ -661,89 +660,230 @@ fixredir(union node *n, const char *text @@ -661,89 +660,230 @@ fixredir(union node *n, const char *text
661} 660}
662 661
663 662
664STATIC void 663STATIC void
665parsefname(void) 664parsefname(void)
666{ 665{
667 union node *n = redirnode; 666 union node *n = redirnode;
668 667
669 if (readtoken() != TWORD) 668 if (readtoken() != TWORD)
670 synexpect(-1, 0); 669 synexpect(-1, 0);
671 if (n->type == NHERE) { 670 if (n->type == NHERE) {
672 struct heredoc *here = heredoc; 671 struct heredoc *here = heredoc;
673 struct heredoc *p; 672 struct heredoc *p;
674 int i; 
675 673
676 if (quoteflag == 0) 674 if (quoteflag == 0)
677 n->type = NXHERE; 675 n->type = NXHERE;
678 TRACE(("Here document %d\n", n->type)); 676 TRACE(("Here document %d\n", n->type));
679 if (here->striptabs) { 677 if (here->striptabs) {
680 while (*wordtext == '\t') 678 while (*wordtext == '\t')
681 wordtext++; 679 wordtext++;
682 } 680 }
683 if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN) 681
 682 /*
 683 * this test is not really necessary, we are not
 684 * required to expand wordtext, but there's no reason
 685 * it cannot be $$ or something like that - that would
 686 * not mean the pid, but literally two '$' characters.
 687 * There is no need for limits on what the word can be.
 688 * However, it needs to stay literal as entered, not
 689 * have $ converted to CTLVAR or something, which as
 690 * the parser is, at the minute, is impossible to prevent.
 691 * So, leave it like this until the rest of the parser is fixed.
 692 */
 693 if (! noexpand(wordtext))
684 synerror("Illegal eof marker for << redirection"); 694 synerror("Illegal eof marker for << redirection");
 695
685 rmescapes(wordtext); 696 rmescapes(wordtext);
686 here->eofmark = wordtext; 697 here->eofmark = wordtext;
687 here->next = NULL; 698 here->next = NULL;
688 if (heredoclist == NULL) 699 if (heredoclist == NULL)
689 heredoclist = here; 700 heredoclist = here;
690 else { 701 else {
691 for (p = heredoclist ; p->next ; p = p->next) 702 for (p = heredoclist ; p->next ; p = p->next)
692 continue; 703 continue;
693 p->next = here; 704 p->next = here;
694 } 705 }
695 } else if (n->type == NTOFD || n->type == NFROMFD) { 706 } else if (n->type == NTOFD || n->type == NFROMFD) {
696 fixredir(n, wordtext, 0); 707 fixredir(n, wordtext, 0);
697 } else { 708 } else {
698 n->nfile.fname = makename(); 709 n->nfile.fname = makename();
699 } 710 }
700} 711}
701 712
 713/*
 714 * Check to see whether we are at the end of the here document. When this
 715 * is called, c is set to the first character of the next input line. If
 716 * we are at the end of the here document, this routine sets the c to PEOF.
 717 * The new value of c is returned.
 718 */
 719
 720static int
 721checkend(int c, char * const eofmark, const int striptabs)
 722{
 723 if (striptabs) {
 724 while (c == '\t')
 725 c = pgetc();
 726 }
 727 if (c == PEOF) {
 728 if (*eofmark == '\0')
 729 return (c);
 730 synerror(EOFhere);
 731 }
 732 if (c == *eofmark) {
 733 int c2;
 734 char *q;
 735
 736 for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++)
 737 ;
 738 if ((c2 == PEOF || c2 == '\n') && *q == '\0') {
 739 c = PEOF;
 740 if (c2 == '\n') {
 741 plinno++;
 742 needprompt = doprompt;
 743 }
 744 } else {
 745 pungetc();
 746 pushstring(eofmark + 1, q - (eofmark + 1), NULL);
 747 }
 748 } else if (c == '\n' && *eofmark == '\0') {
 749 c = PEOF;
 750 plinno++;
 751 needprompt = doprompt;
 752 }
 753 return (c);
 754}
 755
702 756
703/* 757/*
704 * Input any here documents. 758 * Input any here documents.
705 */ 759 */
706 760
707STATIC void 761STATIC void
708parseheredoc(void) 762slurp_heredoc(char *const eofmark, int striptabs, int sq)
 763{
 764 int c;
 765 char *out;
 766
 767 c = pgetc();
 768
 769 /*
 770 * If we hit EOF on the input, and the eofmark is a null string ('')
 771 * we consider this empty line to be the eofmark, and exit without err.
 772 */
 773 if (c == PEOF && *eofmark != '\0')
 774 synerror(EOFhere);
 775
 776 STARTSTACKSTR(out);
 777
 778 while ((c = checkend(c, eofmark, striptabs)) != PEOF) {
 779 do {
 780 if (sq) {
 781 /*
 782 * in single quoted mode (eofmark quoted)
 783 * all we look for is \n so we can check
 784 * for the epfmark - everything saved literally.
 785 */
 786 STPUTC(c, out);
 787 if (c == '\n')
 788 break;
 789 continue;
 790 }
 791 /*
 792 * In double quoted (non-quoted eofmark)
 793 * we must handle \ followed by \n here
 794 * otherwise we can mismatch the end mark.
 795 * All other uses of \ will be handled later
 796 * when the here doc is expanded.
 797 *
 798 * This also makes sure \\ followed by \n does
 799 * not suppress the newline (the \ quotes itself)
 800 */
 801 if (c == '\\') { /* A backslash */
 802 c = pgetc(); /* followed by */
 803 if (c == '\n') /* a newline? */
 804 continue; /* y:drop both */
 805 STPUTC('\\', out); /* else keep \ */
 806 }
 807 STPUTC(c, out); /* keep the char */
 808 if (c == '\n') /* at end of line */
 809 break; /* look for eofmark */
 810
 811 } while ((c = pgetc()) != PEOF);
 812
 813 /*
 814 * If we have read a line, and reached EOF, without
 815 * finding the eofmark, whether the EOF comes before
 816 * or immediately after the \n, that is an error.
 817 */
 818 if (c == PEOF || (c = pgetc()) == PEOF)
 819 synerror(EOFhere);
 820 }
 821 STPUTC('\0', out);
 822
 823 c = out - stackblock();
 824 out = stackblock();
 825 grabstackblock(c);
 826 wordtext = out;
 827
 828 TRACE(("Slurped a heredoc (to '%s')%s: len %d, \"%.16s\"...\n",
 829 eofmark, striptabs ? " tab stripped" : "", c, wordtext));
 830}
 831
 832STATIC void
 833readheredocs(void)
709{ 834{
710 struct heredoc *here; 835 struct heredoc *here;
711 union node *n; 836 union node *n;
712 837
713 while (heredoclist) { 838 while (heredoclist) {
714 int c; 
715 
716 here = heredoclist; 839 here = heredoclist;
717 heredoclist = here->next; 840 heredoclist = here->next;
718 if (needprompt) { 841 if (needprompt) {
719 setprompt(2); 842 setprompt(2);
720 needprompt = 0; 843 needprompt = 0;
721 } 844 }
722 if ((c = pgetc()) == PEOF) { 845
723 synerror(EOFhere); 846 slurp_heredoc(here->eofmark, here->striptabs,
724 /* NOTREACHED */ 847 here->here->nhere.type == NHERE);
725 } 848
726 readtoken1(c, here->here->type == NHERE? SQSYNTAX : DQSYNTAX, 
727 here->eofmark, here->striptabs); 
728 n = stalloc(sizeof(struct narg)); 849 n = stalloc(sizeof(struct narg));
729 n->narg.type = NARG; 850 n->narg.type = NARG;
730 n->narg.next = NULL; 851 n->narg.next = NULL;
731 n->narg.text = wordtext; 852 n->narg.text = wordtext;
732 n->narg.backquote = backquotelist; 853 n->narg.backquote = backquotelist;
733 here->here->nhere.doc = n; 854 here->here->nhere.doc = n;
734 } 855 }
735} 856}
736 857
 858void
 859parse_heredoc(union node *n)
 860{
 861 if (n->narg.type != NARG)
 862 abort();
 863
 864 if (n->narg.text[0] == '\0') /* nothing to do */
 865 return;
 866
 867 setinputstring(n->narg.text, 1);
 868
 869 readtoken1(pgetc(), DQSYNTAX, 1);
 870
 871 n->narg.text = wordtext;
 872 n->narg.backquote = backquotelist;
 873
 874 popfile();
 875}
 876
737STATIC int 877STATIC int
738peektoken(void) 878peektoken(void)
739{ 879{
740 int t; 880 int t;
741 881
742 t = readtoken(); 882 t = readtoken();
743 tokpushback++; 883 tokpushback++;
744 return (t); 884 return (t);
745} 885}
746 886
747STATIC int 887STATIC int
748readtoken(void) 888readtoken(void)
749{ 889{
@@ -754,27 +894,27 @@ readtoken(void) @@ -754,27 +894,27 @@ readtoken(void)
754#endif 894#endif
755 struct alias *ap; 895 struct alias *ap;
756 896
757 top: 897 top:
758 t = xxreadtoken(); 898 t = xxreadtoken();
759 899
760 if (checkkwd) { 900 if (checkkwd) {
761 /* 901 /*
762 * eat newlines 902 * eat newlines
763 */ 903 */
764 if (checkkwd == 2) { 904 if (checkkwd == 2) {
765 checkkwd = 0; 905 checkkwd = 0;
766 while (t == TNL) { 906 while (t == TNL) {
767 parseheredoc(); 907 readheredocs();
768 t = xxreadtoken(); 908 t = xxreadtoken();
769 } 909 }
770 } else 910 } else
771 checkkwd = 0; 911 checkkwd = 0;
772 /* 912 /*
773 * check for keywords and aliases 913 * check for keywords and aliases
774 */ 914 */
775 if (t == TWORD && !quoteflag) { 915 if (t == TWORD && !quoteflag) {
776 const char *const *pp; 916 const char *const *pp;
777 917
778 for (pp = parsekwd; *pp; pp++) { 918 for (pp = parsekwd; *pp; pp++) {
779 if (**pp == *wordtext && equal(*pp, wordtext)) { 919 if (**pp == *wordtext && equal(*pp, wordtext)) {
780 lasttoken = t = pp - 920 lasttoken = t = pp -
@@ -877,27 +1017,27 @@ xxreadtoken(void) @@ -877,27 +1017,27 @@ xxreadtoken(void)
877 if (doprompt) 1017 if (doprompt)
878 setprompt(2); 1018 setprompt(2);
879 else 1019 else
880 setprompt(0); 1020 setprompt(0);
881 continue; 1021 continue;
882 case PEOF: 1022 case PEOF:
883 RETURN(TEOF); 1023 RETURN(TEOF);
884 default: 1024 default:
885 pungetc(); 1025 pungetc();
886 break; 1026 break;
887 } 1027 }
888 /* FALLTHROUGH */ 1028 /* FALLTHROUGH */
889 default: 1029 default:
890 return readtoken1(c, BASESYNTAX, NULL, 0); 1030 return readtoken1(c, BASESYNTAX, 0);
891 } 1031 }
892 } 1032 }
893#undef RETURN 1033#undef RETURN
894} 1034}
895 1035
896 1036
897 1037
898/* 1038/*
899 * If eofmark is NULL, read a word or a redirection symbol. If eofmark 1039 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
900 * is not NULL, read a here document. In the latter case, eofmark is the 1040 * is not NULL, read a here document. In the latter case, eofmark is the
901 * word which marks the end of the document and striptabs is true if 1041 * word which marks the end of the document and striptabs is true if
902 * leading tabs should be stripped from the document. The argument firstc 1042 * leading tabs should be stripped from the document. The argument firstc
903 * is the first character of the input token or document. 1043 * is the first character of the input token or document.
@@ -1029,27 +1169,26 @@ drop_state_level(VSS *stack) @@ -1029,27 +1169,26 @@ drop_state_level(VSS *stack)
1029 --stack->cur; 1169 --stack->cur;
1030 return stack; 1170 return stack;
1031} 1171}
1032 1172
1033static inline void 1173static inline void
1034cleanup_state_stack(VSS *stack) 1174cleanup_state_stack(VSS *stack)
1035{ 1175{
1036 while (stack->prev != NULL) { 1176 while (stack->prev != NULL) {
1037 stack->cur = 0; 1177 stack->cur = 0;
1038 stack = drop_state_level(stack); 1178 stack = drop_state_level(stack);
1039 } 1179 }
1040} 1180}
1041 1181
1042#define CHECKEND() {goto checkend; checkend_return:;} 
1043#define PARSEREDIR() {goto parseredir; parseredir_return:;} 1182#define PARSEREDIR() {goto parseredir; parseredir_return:;}
1044#define PARSESUB() {goto parsesub; parsesub_return:;} 1183#define PARSESUB() {goto parsesub; parsesub_return:;}
1045#define PARSEARITH() {goto parsearith; parsearith_return:;} 1184#define PARSEARITH() {goto parsearith; parsearith_return:;}
1046 1185
1047/* 1186/*
1048 * The following macros all assume the existance of a local var "stack" 1187 * The following macros all assume the existance of a local var "stack"
1049 * which contains a pointer to the current struct stackstate 1188 * which contains a pointer to the current struct stackstate
1050 */ 1189 */
1051 1190
1052/* 1191/*
1053 * These are macros rather than inline funcs to avoid code churn as much 1192 * These are macros rather than inline funcs to avoid code churn as much
1054 * as possible - they replace macros of the same name used previously. 1193 * as possible - they replace macros of the same name used previously.
1055 */ 1194 */
@@ -1222,149 +1361,145 @@ done: @@ -1222,149 +1361,145 @@ done:
1222 INTON; 1361 INTON;
1223 } 1362 }
1224 parsebackquote = savepbq; 1363 parsebackquote = savepbq;
1225 handler = savehandler; 1364 handler = savehandler;
1226 if (arinest || ISDBLQUOTE()) 1365 if (arinest || ISDBLQUOTE())
1227 USTPUTC(CTLBACKQ | CTLQUOTE, out); 1366 USTPUTC(CTLBACKQ | CTLQUOTE, out);
1228 else 1367 else
1229 USTPUTC(CTLBACKQ, out); 1368 USTPUTC(CTLBACKQ, out);
1230 1369
1231 return out; 1370 return out;
1232} 1371}
1233 1372
1234STATIC int 1373STATIC int
1235readtoken1(int firstc, char const *syn, char *eofmark, int striptabs) 1374readtoken1(int firstc, char const *syn, int magicq)
1236{ 1375{
1237 int c = firstc; 1376 int c = firstc;
1238 char * out; 1377 char * out;
1239 int len; 1378 int len;
1240 char line[EOFMARKLEN + 1]; 
1241 struct nodelist *bqlist; 1379 struct nodelist *bqlist;
1242 int quotef; 1380 int quotef;
1243 VSS static_stack; 1381 VSS static_stack;
1244 VSS *stack = &static_stack; 1382 VSS *stack = &static_stack;
1245 1383
1246 stack->prev = NULL; 1384 stack->prev = NULL;
1247 stack->cur = 0; 1385 stack->cur = 0;
1248 1386
1249 syntax = syn; 1387 syntax = syn;
1250 1388
1251 startlinno = plinno; 1389 startlinno = plinno;
1252 varnest = 0; 1390 varnest = 0;
1253 quoted = 0; 1391 quoted = 0;
1254 if (syntax == DQSYNTAX) 1392 if (syntax == DQSYNTAX)
1255 SETDBLQUOTE(); 1393 SETDBLQUOTE();
1256 quotef = 0; 1394 quotef = 0;
1257 bqlist = NULL; 1395 bqlist = NULL;
1258 arinest = 0; 1396 arinest = 0;
1259 parenlevel = 0; 1397 parenlevel = 0;
1260 1398
1261 STARTSTACKSTR(out); 1399 STARTSTACKSTR(out);
1262 loop: { /* for each line, until end of word */ 1400 loop: { /* for each line, until end of word */
1263 CHECKEND(); /* set c to PEOF if at end of here document */ 
1264 for (;;) { /* until end of line or end of word */ 1401 for (;;) { /* until end of line or end of word */
1265 CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */ 1402 CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */
1266 switch(syntax[c]) { 1403 switch(syntax[c]) {
1267 case CNL: /* '\n' */ 1404 case CNL: /* '\n' */
1268 if (syntax == BASESYNTAX) 1405 if (syntax == BASESYNTAX)
1269 goto endword; /* exit outer loop */ 1406 goto endword; /* exit outer loop */
1270 USTPUTC(c, out); 1407 USTPUTC(c, out);
1271 plinno++; 1408 plinno++;
1272 if (doprompt) 1409 if (doprompt)
1273 setprompt(2); 1410 setprompt(2);
1274 else 1411 else
1275 setprompt(0); 1412 setprompt(0);
1276 c = pgetc(); 1413 c = pgetc();
1277 goto loop; /* continue outer loop */ 1414 goto loop; /* continue outer loop */
1278 case CWORD: 1415 case CWORD:
1279 USTPUTC(c, out); 1416 USTPUTC(c, out);
1280 break; 1417 break;
1281 case CCTL: 1418 case CCTL:
1282 if (eofmark == NULL || ISDBLQUOTE()) 1419 if (!magicq || ISDBLQUOTE())
1283 USTPUTC(CTLESC, out); 1420 USTPUTC(CTLESC, out);
1284 USTPUTC(c, out); 1421 USTPUTC(c, out);
1285 break; 1422 break;
1286 case CBACK: /* backslash */ 1423 case CBACK: /* backslash */
1287 c = pgetc(); 1424 c = pgetc();
1288 if (c == PEOF) { 1425 if (c == PEOF) {
1289 USTPUTC('\\', out); 1426 USTPUTC('\\', out);
1290 pungetc(); 1427 pungetc();
1291 break; 1428 break;
1292 } 1429 }
1293 if (c == '\n') { 1430 if (c == '\n') {
1294 plinno++; 1431 plinno++;
1295 if (doprompt) 1432 if (doprompt)
1296 setprompt(2); 1433 setprompt(2);
1297 else 1434 else
1298 setprompt(0); 1435 setprompt(0);
1299 break; 1436 break;
1300 } 1437 }
1301 quotef = 1; 1438 quotef = 1;
1302 if (ISDBLQUOTE() && c != '\\' && 1439 if (ISDBLQUOTE() && c != '\\' &&
1303 c != '`' && c != '$' && 1440 c != '`' && c != '$' &&
1304 (c != '"' || eofmark != NULL)) 1441 (c != '"' || magicq))
1305 USTPUTC('\\', out); 1442 USTPUTC('\\', out);
1306 if (SQSYNTAX[c] == CCTL) 1443 if (SQSYNTAX[c] == CCTL)
1307 USTPUTC(CTLESC, out); 1444 USTPUTC(CTLESC, out);
1308 else if (eofmark == NULL) { 1445 else if (!magicq) {
1309 USTPUTC(CTLQUOTEMARK, out); 1446 USTPUTC(CTLQUOTEMARK, out);
1310 USTPUTC(c, out); 1447 USTPUTC(c, out);
1311 if (varnest != 0) 1448 if (varnest != 0)
1312 USTPUTC(CTLQUOTEEND, out); 1449 USTPUTC(CTLQUOTEEND, out);
1313 break; 1450 break;
1314 } 1451 }
1315 USTPUTC(c, out); 1452 USTPUTC(c, out);
1316 break; 1453 break;
1317 case CSQUOTE: 1454 case CSQUOTE:
1318 if (syntax != SQSYNTAX) { 1455 if (syntax != SQSYNTAX) {
1319 if (eofmark == NULL) 1456 if (!magicq)
1320 USTPUTC(CTLQUOTEMARK, out); 1457 USTPUTC(CTLQUOTEMARK, out);
1321 quotef = 1; 1458 quotef = 1;
1322 TS_PUSH(); 1459 TS_PUSH();
1323 syntax = SQSYNTAX; 1460 syntax = SQSYNTAX;
1324 quoted = SQ; 1461 quoted = SQ;
1325 break; 1462 break;
1326 } 1463 }
1327 if (eofmark != NULL && arinest == 0 && 1464 if (magicq && arinest == 0 && varnest == 0) {
1328 varnest == 0) { 
1329 /* Ignore inside quoted here document */ 1465 /* Ignore inside quoted here document */
1330 USTPUTC(c, out); 1466 USTPUTC(c, out);
1331 break; 1467 break;
1332 } 1468 }
1333 /* End of single quotes... */ 1469 /* End of single quotes... */
1334 TS_POP(); 1470 TS_POP();
1335 if (syntax == BASESYNTAX && varnest != 0) 1471 if (syntax == BASESYNTAX && varnest != 0)
1336 USTPUTC(CTLQUOTEEND, out); 1472 USTPUTC(CTLQUOTEEND, out);
1337 break; 1473 break;
1338 case CDQUOTE: 1474 case CDQUOTE:
1339 if (eofmark != NULL && arinest == 0 && 1475 if (magicq && arinest == 0 && varnest == 0) {
1340 varnest == 0) { 
1341 /* Ignore inside here document */ 1476 /* Ignore inside here document */
1342 USTPUTC(c, out); 1477 USTPUTC(c, out);
1343 break; 1478 break;
1344 } 1479 }
1345 quotef = 1; 1480 quotef = 1;
1346 if (arinest) { 1481 if (arinest) {
1347 if (ISDBLQUOTE()) { 1482 if (ISDBLQUOTE()) {
1348 TS_POP(); 1483 TS_POP();
1349 } else { 1484 } else {
1350 TS_PUSH(); 1485 TS_PUSH();
1351 syntax = DQSYNTAX; 1486 syntax = DQSYNTAX;
1352 SETDBLQUOTE(); 1487 SETDBLQUOTE();
1353 USTPUTC(CTLQUOTEMARK, out); 1488 USTPUTC(CTLQUOTEMARK, out);
1354 } 1489 }
1355 break; 1490 break;
1356 } 1491 }
1357 if (eofmark != NULL) 1492 if (magicq)
1358 break; 1493 break;
1359 if (ISDBLQUOTE()) { 1494 if (ISDBLQUOTE()) {
1360 TS_POP(); 1495 TS_POP();
1361 if (varnest != 0) 1496 if (varnest != 0)
1362 USTPUTC(CTLQUOTEEND, out); 1497 USTPUTC(CTLQUOTEEND, out);
1363 } else { 1498 } else {
1364 TS_PUSH(); 1499 TS_PUSH();
1365 syntax = DQSYNTAX; 1500 syntax = DQSYNTAX;
1366 SETDBLQUOTE(); 1501 SETDBLQUOTE();
1367 USTPUTC(CTLQUOTEMARK, out); 1502 USTPUTC(CTLQUOTEMARK, out);
1368 } 1503 }
1369 break; 1504 break;
1370 case CVAR: /* '$' */ 1505 case CVAR: /* '$' */
@@ -1411,96 +1546,59 @@ readtoken1(int firstc, char const *syn,  @@ -1411,96 +1546,59 @@ readtoken1(int firstc, char const *syn,
1411 default: 1546 default:
1412 if (varnest == 0 && !ISDBLQUOTE()) 1547 if (varnest == 0 && !ISDBLQUOTE())
1413 goto endword; /* exit outer loop */ 1548 goto endword; /* exit outer loop */
1414 USTPUTC(c, out); 1549 USTPUTC(c, out);
1415 } 1550 }
1416 c = pgetc_macro(); 1551 c = pgetc_macro();
1417 } 1552 }
1418 } 1553 }
1419endword: 1554endword:
1420 if (syntax == ARISYNTAX) { 1555 if (syntax == ARISYNTAX) {
1421 cleanup_state_stack(stack); 1556 cleanup_state_stack(stack);
1422 synerror("Missing '))'"); 1557 synerror("Missing '))'");
1423 } 1558 }
1424 if (syntax != BASESYNTAX && /* ! parsebackquote && */ eofmark == NULL) { 1559 if (syntax != BASESYNTAX && /* ! parsebackquote && */ !magicq) {
1425 cleanup_state_stack(stack); 1560 cleanup_state_stack(stack);
1426 synerror("Unterminated quoted string"); 1561 synerror("Unterminated quoted string");
1427 } 1562 }
1428 if (varnest != 0) { 1563 if (varnest != 0) {
1429 cleanup_state_stack(stack); 1564 cleanup_state_stack(stack);
1430 startlinno = plinno; 1565 startlinno = plinno;
1431 /* { */ 1566 /* { */
1432 synerror("Missing '}'"); 1567 synerror("Missing '}'");
1433 } 1568 }
1434 USTPUTC('\0', out); 1569 USTPUTC('\0', out);
1435 len = out - stackblock(); 1570 len = out - stackblock();
1436 out = stackblock(); 1571 out = stackblock();
1437 if (eofmark == NULL) { 1572 if (!magicq) {
1438 if ((c == '>' || c == '<') 1573 if ((c == '<' || c == '>')
1439 && quotef == 0 1574 && quotef == 0
1440 && (*out == '\0' || is_number(out))) { 1575 && (*out == '\0' || is_number(out))) {
1441 PARSEREDIR(); 1576 PARSEREDIR();
1442 cleanup_state_stack(stack); 1577 cleanup_state_stack(stack);
1443 return lasttoken = TREDIR; 1578 return lasttoken = TREDIR;
1444 } else { 1579 } else {
1445 pungetc(); 1580 pungetc();
1446 } 1581 }
1447 } 1582 }
1448 quoteflag = quotef; 1583 quoteflag = quotef;
1449 backquotelist = bqlist; 1584 backquotelist = bqlist;
1450 grabstackblock(len); 1585 grabstackblock(len);
1451 wordtext = out; 1586 wordtext = out;
1452 cleanup_state_stack(stack); 1587 cleanup_state_stack(stack);
1453 return lasttoken = TWORD; 1588 return lasttoken = TWORD;
1454/* end of readtoken routine */ 1589/* end of readtoken routine */
1455 1590
1456 1591
1457 
1458/* 
1459 * Check to see whether we are at the end of the here document. When this 
1460 * is called, c is set to the first character of the next input line. If 
1461 * we are at the end of the here document, this routine sets the c to PEOF. 
1462 */ 
1463 
1464checkend: { 
1465 if (eofmark) { 
1466 if (c == PEOF) 
1467 synerror(EOFhere); 
1468 if (striptabs) { 
1469 while (c == '\t') 
1470 c = pgetc(); 
1471 } 
1472 if (c == *eofmark) { 
1473 if (pfgets(line, sizeof line) != NULL) { 
1474 char *p, *q; 
1475 
1476 p = line; 
1477 for (q = eofmark + 1 ; *q && *p == *q ; p++, q++) 
1478 continue; 
1479 if ((*p == '\0' || *p == '\n') && *q == '\0') { 
1480 c = PEOF; 
1481 plinno++; 
1482 needprompt = doprompt; 
1483 } else { 
1484 pushstring(line, strlen(line), NULL); 
1485 } 
1486 } else 
1487 synerror(EOFhere); 
1488 } 
1489 } 
1490 goto checkend_return; 
1491} 
1492 
1493 
1494/* 1592/*
1495 * Parse a redirection operator. The variable "out" points to a string 1593 * Parse a redirection operator. The variable "out" points to a string
1496 * specifying the fd to be redirected. The variable "c" contains the 1594 * specifying the fd to be redirected. The variable "c" contains the
1497 * first character of the redirection operator. 1595 * first character of the redirection operator.
1498 */ 1596 */
1499 1597
1500parseredir: { 1598parseredir: {
1501 char fd[64]; 1599 char fd[64];
1502 union node *np; 1600 union node *np;
1503 strlcpy(fd, out, sizeof(fd)); 1601 strlcpy(fd, out, sizeof(fd));
1504 1602
1505 np = stalloc(sizeof(struct nfile)); 1603 np = stalloc(sizeof(struct nfile));
1506 if (c == '>') { 1604 if (c == '>') {

cvs diff -r1.19 -r1.20 src/bin/sh/parser.h (expand / switch to unified diff)

--- src/bin/sh/parser.h 2016/02/22 20:02:00 1.19
+++ src/bin/sh/parser.h 2016/03/27 14:39:33 1.20
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: parser.h,v 1.19 2016/02/22 20:02:00 christos Exp $ */ 1/* $NetBSD: parser.h,v 1.20 2016/03/27 14:39:33 christos Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1991, 1993 4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved. 5 * The Regents of the University of California. All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to Berkeley by 7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist. 8 * Kenneth Almquist.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
@@ -71,15 +71,16 @@ @@ -71,15 +71,16 @@
71 71
72/* 72/*
73 * NEOF is returned by parsecmd when it encounters an end of file. It 73 * NEOF is returned by parsecmd when it encounters an end of file. It
74 * must be distinct from NULL, so we use the address of a variable that 74 * must be distinct from NULL, so we use the address of a variable that
75 * happens to be handy. 75 * happens to be handy.
76 */ 76 */
77extern int tokpushback; 77extern int tokpushback;
78#define NEOF ((union node *)&tokpushback) 78#define NEOF ((union node *)&tokpushback)
79extern int whichprompt; /* 1 == PS1, 2 == PS2 */ 79extern int whichprompt; /* 1 == PS1, 2 == PS2 */
80 80
81 81
82union node *parsecmd(int); 82union node *parsecmd(int);
83void fixredir(union node *, const char *, int); 83void fixredir(union node *, const char *, int);
 84void parse_heredoc(union node *);
84int goodname(char *); 85int goodname(char *);
85const char *getprompt(void *); 86const char *getprompt(void *);