@@ -1,4 +1,4 @@
-/*	$NetBSD: parser.c,v 1.110 2016/03/27 14:36:29 christos Exp $	*/
+/*	$NetBSD: parser.c,v 1.111 2016/03/27 14:39:33 christos Exp $	*/
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)parser.c	8.7 (Berkeley) 5/16/95";
 #else
-__RCSID("$NetBSD: parser.c,v 1.110 2016/03/27 14:36:29 christos Exp $");
+__RCSID("$NetBSD: parser.c,v 1.111 2016/03/27 14:39:33 christos Exp $");
 #endif
 #endif /* not lint */
 
@@ -69,8 +69,6 @@
  * Shell command parser.
  */
 
-#define EOFMARKLEN 79
-
 /* values returned by readtoken */
 #include "token.h"
 
@@ -111,11 +109,12 @@
 STATIC union node *simplecmd(union node **, union node *);
 STATIC union node *makename(void);
 STATIC void parsefname(void);
-STATIC void parseheredoc(void);
+STATIC void slurp_heredoc(char *const, int, int);
+STATIC void readheredocs(void);
 STATIC int peektoken(void);
 STATIC int readtoken(void);
 STATIC int xxreadtoken(void);
-STATIC int readtoken1(int, char const *, char *, int);
+STATIC int readtoken1(int, char const *, int);
 STATIC int noexpand(char *);
 STATIC void synexpect(int, const char *) __dead;
 STATIC void synerror(const char *) __dead;
@@ -196,7 +195,7 @@
 			/* FALLTHROUGH */
 		case TNL:
 			if (tok == TNL) {
-				parseheredoc();
+				readheredocs();
 				if (nlflag)
 					return n1;
 			} else {
@@ -208,7 +207,7 @@
 			break;
 		case TEOF:
 			if (heredoclist)
-				parseheredoc();
+				readheredocs();
 			else
 				pungetc();	/* push back EOF on input */
 			return n1;
@@ -671,7 +670,6 @@
 	if (n->type == NHERE) {
 		struct heredoc *here = heredoc;
 		struct heredoc *p;
-		int i;
 
 		if (quoteflag == 0)
 			n->type = NXHERE;
@@ -680,8 +678,21 @@
 			while (*wordtext == '\t')
 				wordtext++;
 		}
-		if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN)
+
+		/*
+		 * this test is not really necessary, we are not
+		 * required to expand wordtext, but there's no reason
+		 * it cannot be $$ or something like that - that would
+		 * not mean the pid, but literally two '$' characters.
+		 * There is no need for limits on what the word can be.
+		 * However, it needs to stay literal as entered, not
+		 * have $ converted to CTLVAR or something, which as
+		 * the parser is, at the minute, is impossible to prevent.
+		 * So, leave it like this until the rest of the parser is fixed.
+		 */
+		if (! noexpand(wordtext))
 			synerror("Illegal eof marker for << redirection");
+
 		rmescapes(wordtext);
 		here->eofmark = wordtext;
 		here->next = NULL;
@@ -699,32 +710,142 @@
 	}
 }
 
+/*
+ * Check to see whether we are at the end of the here document.  When this
+ * is called, c is set to the first character of the next input line.  If
+ * we are at the end of the here document, this routine sets the c to PEOF.
+ * The new value of c is returned.
+ */
 
+static int
+checkend(int c, char * const eofmark, const int striptabs)
+{
+	if (striptabs) {
+		while (c == '\t')
+			c = pgetc();
+	}
+	if (c == PEOF) {
+		if (*eofmark == '\0')
+			return (c);
+		synerror(EOFhere);
+	}
+	if (c == *eofmark) {
+		int c2;
+		char *q;
+
+		for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++)
+			;
+		if ((c2 == PEOF || c2 == '\n') && *q == '\0') {
+			c = PEOF;
+			if (c2 == '\n') {
+				plinno++;
+				needprompt = doprompt;
+			}
+		} else {
+			pungetc();
+			pushstring(eofmark + 1, q - (eofmark + 1), NULL);
+		}
+	} else if (c == '\n' && *eofmark == '\0') {
+		c = PEOF;
+		plinno++;
+		needprompt = doprompt;
+	}
+	return (c);
+}
+
+
 /*
  * Input any here documents.
  */
 
 STATIC void
-parseheredoc(void)
+slurp_heredoc(char *const eofmark, int striptabs, int sq)
 {
+	int c;
+	char *out;
+
+	c = pgetc();
+
+	/*
+	 * If we hit EOF on the input, and the eofmark is a null string ('')
+	 * we consider this empty line to be the eofmark, and exit without err.
+	 */
+	if (c == PEOF && *eofmark != '\0')
+		synerror(EOFhere);
+
+	STARTSTACKSTR(out);
+
+	while ((c = checkend(c, eofmark, striptabs)) != PEOF) {
+		do {
+			if (sq) {
+				/*
+				 * in single quoted mode (eofmark quoted)
+				 * all we look for is \n so we can check
+				 * for the epfmark - everything saved literally.
+				 */
+				STPUTC(c, out);
+				if (c == '\n')
+					break;
+				continue;
+			}
+			/*
+			 * In double quoted (non-quoted eofmark)
+			 * we must handle \ followed by \n here
+			 * otherwise we can mismatch the end mark.
+			 * All other uses of \ will be handled later
+			 * when the here doc is expanded.
+			 *
+			 * This also makes sure \\ followed by \n does
+			 * not suppress the newline (the \ quotes itself)
+			 */
+			if (c == '\\') {		/* A backslash */
+				c = pgetc();		/* followed by */
+				if (c == '\n')		/* a newline?  */
+					continue;	/* y:drop both */
+				STPUTC('\\', out);	/* else keep \ */
+			}
+			STPUTC(c, out);			/* keep the char */
+			if (c == '\n')			/* at end of line */
+				break;			/* look for eofmark */
+
+		} while ((c = pgetc()) != PEOF);
+
+		/*
+		 * If we have read a line, and reached EOF, without
+		 * finding the eofmark, whether the EOF comes before
+		 * or immediately after the \n, that is an error.
+		 */
+		if (c == PEOF || (c = pgetc()) == PEOF)
+			synerror(EOFhere);
+	}
+	STPUTC('\0', out);
+
+	c = out - stackblock();
+	out = stackblock();
+	grabstackblock(c);
+	wordtext = out;
+
+	TRACE(("Slurped a heredoc (to '%s')%s: len %d, \"%.16s\"...\n",
+		eofmark, striptabs ? " tab stripped" : "", c, wordtext));
+}
+
+STATIC void
+readheredocs(void)
+{
 	struct heredoc *here;
 	union node *n;
 
 	while (heredoclist) {
-		int c;
-
 		here = heredoclist;
 		heredoclist = here->next;
 		if (needprompt) {
 			setprompt(2);
 			needprompt = 0;
 		}
-		if ((c = pgetc()) == PEOF) {
-			synerror(EOFhere);
-			/* NOTREACHED */
-		}
-		readtoken1(c, here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
-		    here->eofmark, here->striptabs);
+
+		slurp_heredoc(here->eofmark, here->striptabs,
+		    here->here->nhere.type == NHERE);
+
 		n = stalloc(sizeof(struct narg));
 		n->narg.type = NARG;
 		n->narg.next = NULL;
@@ -734,6 +855,25 @@
 	}
 }
 
+void
+parse_heredoc(union node *n)
+{
+	if (n->narg.type != NARG)
+		abort();
+
+	if (n->narg.text[0] == '\0')		/* nothing to do */
+		return;
+
+	setinputstring(n->narg.text, 1);
+
+	readtoken1(pgetc(), DQSYNTAX, 1);
+
+	n->narg.text = wordtext;
+	n->narg.backquote = backquotelist;
+
+	popfile();
+}
+
 STATIC int
 peektoken(void)
 {
@@ -764,7 +904,7 @@
 		if (checkkwd == 2) {
 			checkkwd = 0;
 			while (t == TNL) {
-				parseheredoc();
+				readheredocs();
 				t = xxreadtoken();
 			}
 		} else
@@ -887,7 +1027,7 @@
 			}
 			/* FALLTHROUGH */
 		default:
-			return readtoken1(c, BASESYNTAX, NULL, 0);
+			return readtoken1(c, BASESYNTAX, 0);
 		}
 	}
 #undef RETURN
@@ -1039,7 +1179,6 @@
 	}
 }
 
-#define	CHECKEND()	{goto checkend; checkend_return:;}
 #define	PARSEREDIR()	{goto parseredir; parseredir_return:;}
 #define	PARSESUB()	{goto parsesub; parsesub_return:;}
 #define	PARSEARITH()	{goto parsearith; parsearith_return:;}
@@ -1232,12 +1371,11 @@
 }
 
 STATIC int
-readtoken1(int firstc, char const *syn, char *eofmark, int striptabs)
+readtoken1(int firstc, char const *syn, int magicq)
 {
 	int c = firstc;
 	char * out;
 	int len;
-	char line[EOFMARKLEN + 1];
 	struct nodelist *bqlist;
 	int quotef;
 	VSS static_stack;
@@ -1260,7 +1398,6 @@
 
 	STARTSTACKSTR(out);
 	loop: {	/* for each line, until end of word */
-		CHECKEND();	/* set c to PEOF if at end of here document */
 		for (;;) {	/* until end of line or end of word */
 			CHECKSTRSPACE(4, out);	/* permit 4 calls to USTPUTC */
 			switch(syntax[c]) {
@@ -1279,7 +1416,7 @@
 				USTPUTC(c, out);
 				break;
 			case CCTL:
-				if (eofmark == NULL || ISDBLQUOTE())
+				if (!magicq || ISDBLQUOTE())
 					USTPUTC(CTLESC, out);
 				USTPUTC(c, out);
 				break;
@@ -1301,11 +1438,11 @@
 				quotef = 1;
 				if (ISDBLQUOTE() && c != '\\' &&
 				    c != '`' && c != '$' &&
-				    (c != '"' || eofmark != NULL))
+				    (c != '"' || magicq))
 					USTPUTC('\\', out);
 				if (SQSYNTAX[c] == CCTL)
 					USTPUTC(CTLESC, out);
-				else if (eofmark == NULL) {
+				else if (!magicq) {
 					USTPUTC(CTLQUOTEMARK, out);
 					USTPUTC(c, out);
 					if (varnest != 0)
@@ -1316,7 +1453,7 @@
 				break;
 			case CSQUOTE:
 				if (syntax != SQSYNTAX) {
-					if (eofmark == NULL)
+					if (!magicq)
 						USTPUTC(CTLQUOTEMARK, out);
 					quotef = 1;
 					TS_PUSH();
@@ -1324,8 +1461,7 @@
 					quoted = SQ;
 					break;
 				}
-				if (eofmark != NULL && arinest == 0 &&
-				    varnest == 0) {
+				if (magicq && arinest == 0 && varnest == 0) {
 					/* Ignore inside quoted here document */
 					USTPUTC(c, out);
 					break;
@@ -1336,8 +1472,7 @@
 					USTPUTC(CTLQUOTEEND, out);
 				break;
 			case CDQUOTE:
-				if (eofmark != NULL && arinest == 0 &&
-				    varnest == 0) {
+				if (magicq && arinest == 0 && varnest == 0) {
 					/* Ignore inside here document */
 					USTPUTC(c, out);
 					break;
@@ -1354,7 +1489,7 @@
 					}
 					break;
 				}
-				if (eofmark != NULL)
+				if (magicq)
 					break;
 				if (ISDBLQUOTE()) {
 					TS_POP();
@@ -1421,7 +1556,7 @@
 		cleanup_state_stack(stack);
 		synerror("Missing '))'");
 	}
-	if (syntax != BASESYNTAX && /* ! parsebackquote && */ eofmark == NULL) {
+	if (syntax != BASESYNTAX && /* ! parsebackquote && */ !magicq) {
 		cleanup_state_stack(stack);
 		synerror("Unterminated quoted string");
 	}
@@ -1434,8 +1569,8 @@
 	USTPUTC('\0', out);
 	len = out - stackblock();
 	out = stackblock();
-	if (eofmark == NULL) {
-		if ((c == '>' || c == '<')
+	if (!magicq) {
+		if ((c == '<' || c == '>')
 		 && quotef == 0
 		 && (*out == '\0' || is_number(out))) {
 			PARSEREDIR();
@@ -1452,43 +1587,6 @@
 	cleanup_state_stack(stack);
 	return lasttoken = TWORD;
 /* end of readtoken routine */
-
-
-
-/*
- * Check to see whether we are at the end of the here document.  When this
- * is called, c is set to the first character of the next input line.  If
- * we are at the end of the here document, this routine sets the c to PEOF.
- */
-
-checkend: {
-	if (eofmark) {
-		if (c == PEOF)
-			synerror(EOFhere);
-		if (striptabs) {
-			while (c == '\t')
-				c = pgetc();
-		}
-		if (c == *eofmark) {
-			if (pfgets(line, sizeof line) != NULL) {
-				char *p, *q;
-
-				p = line;
-				for (q = eofmark + 1 ; *q && *p == *q ; p++, q++)
-					continue;
-				if ((*p == '\0' || *p == '\n') && *q == '\0') {
-					c = PEOF;
-					plinno++;
-					needprompt = doprompt;
-				} else {
-					pushstring(line, strlen(line), NULL);
-				}
-			} else
-				synerror(EOFhere);
-		}
-	}
-	goto checkend_return;
-}
 
 
 /*