Thu Mar 11 22:31:19 2021 UTC ()
remove extra increment; fixes: echo o | sed -e 's/o/\a/'


(christos)
diff -r1.48 -r1.49 src/usr.bin/sed/compile.c

cvs diff -r1.48 -r1.49 src/usr.bin/sed/compile.c (switch to unified diff)

--- src/usr.bin/sed/compile.c 2019/10/05 20:23:55 1.48
+++ src/usr.bin/sed/compile.c 2021/03/11 22:31:19 1.49
@@ -1,1087 +1,1086 @@ @@ -1,1087 +1,1086 @@
1/* $NetBSD: compile.c,v 1.48 2019/10/05 20:23:55 christos Exp $ */ 1/* $NetBSD: compile.c,v 1.49 2021/03/11 22:31:19 christos Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1992 Diomidis Spinellis. 4 * Copyright (c) 1992 Diomidis Spinellis.
5 * Copyright (c) 1992, 1993 5 * Copyright (c) 1992, 1993
6 * The Regents of the University of California. All rights reserved. 6 * The Regents of the University of California. All rights reserved.
7 * 7 *
8 * This code is derived from software contributed to Berkeley by 8 * This code is derived from software contributed to Berkeley by
9 * Diomidis Spinellis of Imperial College, University of London. 9 * Diomidis Spinellis of Imperial College, University of London.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors 19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software 20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission. 21 * without specific prior written permission.
22 * 22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE. 33 * SUCH DAMAGE.
34 */ 34 */
35 35
36#if HAVE_NBTOOL_CONFIG_H 36#if HAVE_NBTOOL_CONFIG_H
37#include "nbtool_config.h" 37#include "nbtool_config.h"
38#endif 38#endif
39 39
40#include <sys/cdefs.h> 40#include <sys/cdefs.h>
41__RCSID("$NetBSD: compile.c,v 1.48 2019/10/05 20:23:55 christos Exp $"); 41__RCSID("$NetBSD: compile.c,v 1.49 2021/03/11 22:31:19 christos Exp $");
42#ifdef __FBSDID 42#ifdef __FBSDID
43__FBSDID("$FreeBSD: head/usr.bin/sed/compile.c 259132 2013-12-09 18:57:20Z eadler $"); 43__FBSDID("$FreeBSD: head/usr.bin/sed/compile.c 259132 2013-12-09 18:57:20Z eadler $");
44#endif 44#endif
45 45
46#if 0 46#if 0
47static const char sccsid[] = "@(#)compile.c 8.1 (Berkeley) 6/6/93"; 47static const char sccsid[] = "@(#)compile.c 8.1 (Berkeley) 6/6/93";
48#endif 48#endif
49 49
50#include <sys/types.h> 50#include <sys/types.h>
51#include <sys/stat.h> 51#include <sys/stat.h>
52 52
53#include <ctype.h> 53#include <ctype.h>
54#include <err.h> 54#include <err.h>
55#include <errno.h> 55#include <errno.h>
56#include <fcntl.h> 56#include <fcntl.h>
57#include <limits.h> 57#include <limits.h>
58#include <regex.h> 58#include <regex.h>
59#include <stdio.h> 59#include <stdio.h>
60#include <stdlib.h> 60#include <stdlib.h>
61#include <string.h> 61#include <string.h>
62#include <wchar.h> 62#include <wchar.h>
63 63
64#include "defs.h" 64#include "defs.h"
65#include "extern.h" 65#include "extern.h"
66 66
67#define LHSZ 128 67#define LHSZ 128
68#define LHMASK (LHSZ - 1) 68#define LHMASK (LHSZ - 1)
69static struct labhash { 69static struct labhash {
70 struct labhash *lh_next; 70 struct labhash *lh_next;
71 u_int lh_hash; 71 u_int lh_hash;
72 struct s_command *lh_cmd; 72 struct s_command *lh_cmd;
73 int lh_ref; 73 int lh_ref;
74} *labels[LHSZ]; 74} *labels[LHSZ];
75 75
76static char *compile_addr(char *, struct s_addr *); 76static char *compile_addr(char *, struct s_addr *);
77static char *compile_ccl(char **, char *); 77static char *compile_ccl(char **, char *);
78static char *compile_delimited(char *, char *, int); 78static char *compile_delimited(char *, char *, int);
79static char *compile_flags(char *, struct s_subst *); 79static char *compile_flags(char *, struct s_subst *);
80static regex_t *compile_re(char *, int); 80static regex_t *compile_re(char *, int);
81static char *compile_subst(char *, struct s_subst *); 81static char *compile_subst(char *, struct s_subst *);
82static char *compile_text(void); 82static char *compile_text(void);
83static char *compile_tr(char *, struct s_tr **); 83static char *compile_tr(char *, struct s_tr **);
84static struct s_command 84static struct s_command
85 **compile_stream(struct s_command **); 85 **compile_stream(struct s_command **);
86static char *duptoeol(char *, const char *); 86static char *duptoeol(char *, const char *);
87static void enterlabel(struct s_command *); 87static void enterlabel(struct s_command *);
88static struct s_command 88static struct s_command
89 *findlabel(char *); 89 *findlabel(char *);
90static void fixuplabel(struct s_command *, struct s_command *); 90static void fixuplabel(struct s_command *, struct s_command *);
91static void uselabel(void); 91static void uselabel(void);
92static void parse_escapes(char *); 92static void parse_escapes(char *);
93 93
94/* 94/*
95 * Command specification. This is used to drive the command parser. 95 * Command specification. This is used to drive the command parser.
96 */ 96 */
97struct s_format { 97struct s_format {
98 char code; /* Command code */ 98 char code; /* Command code */
99 int naddr; /* Number of address args */ 99 int naddr; /* Number of address args */
100 enum e_args args; /* Argument type */ 100 enum e_args args; /* Argument type */
101}; 101};
102 102
103static struct s_format cmd_fmts[] = { 103static struct s_format cmd_fmts[] = {
104 {'{', 2, GROUP}, 104 {'{', 2, GROUP},
105 {'}', 0, ENDGROUP}, 105 {'}', 0, ENDGROUP},
106 {'a', 1, TEXT}, 106 {'a', 1, TEXT},
107 {'b', 2, BRANCH}, 107 {'b', 2, BRANCH},
108 {'c', 2, TEXT}, 108 {'c', 2, TEXT},
109 {'d', 2, EMPTY}, 109 {'d', 2, EMPTY},
110 {'D', 2, EMPTY}, 110 {'D', 2, EMPTY},
111 {'g', 2, EMPTY}, 111 {'g', 2, EMPTY},
112 {'G', 2, EMPTY}, 112 {'G', 2, EMPTY},
113 {'h', 2, EMPTY}, 113 {'h', 2, EMPTY},
114 {'H', 2, EMPTY}, 114 {'H', 2, EMPTY},
115 {'i', 1, TEXT}, 115 {'i', 1, TEXT},
116 {'l', 2, EMPTY}, 116 {'l', 2, EMPTY},
117 {'n', 2, EMPTY}, 117 {'n', 2, EMPTY},
118 {'N', 2, EMPTY}, 118 {'N', 2, EMPTY},
119 {'p', 2, EMPTY}, 119 {'p', 2, EMPTY},
120 {'P', 2, EMPTY}, 120 {'P', 2, EMPTY},
121 {'q', 1, EMPTY}, 121 {'q', 1, EMPTY},
122 {'r', 1, RFILE}, 122 {'r', 1, RFILE},
123 {'s', 2, SUBST}, 123 {'s', 2, SUBST},
124 {'t', 2, BRANCH}, 124 {'t', 2, BRANCH},
125 {'w', 2, WFILE}, 125 {'w', 2, WFILE},
126 {'x', 2, EMPTY}, 126 {'x', 2, EMPTY},
127 {'y', 2, TR}, 127 {'y', 2, TR},
128 {'!', 2, NONSEL}, 128 {'!', 2, NONSEL},
129 {':', 0, LABEL}, 129 {':', 0, LABEL},
130 {'#', 0, COMMENT}, 130 {'#', 0, COMMENT},
131 {'=', 1, EMPTY}, 131 {'=', 1, EMPTY},
132 {'\0', 0, COMMENT}, 132 {'\0', 0, COMMENT},
133}; 133};
134 134
135/* The compiled program. */ 135/* The compiled program. */
136struct s_command *prog; 136struct s_command *prog;
137 137
138/* 138/*
139 * Compile the program into prog. 139 * Compile the program into prog.
140 * Initialise appends. 140 * Initialise appends.
141 */ 141 */
142void 142void
143compile(void) 143compile(void)
144{ 144{
145 *compile_stream(&prog) = NULL; 145 *compile_stream(&prog) = NULL;
146 fixuplabel(prog, NULL); 146 fixuplabel(prog, NULL);
147 uselabel(); 147 uselabel();
148 if (appendnum > 0) 148 if (appendnum > 0)
149 appends = xmalloc(sizeof(struct s_appends) * appendnum); 149 appends = xmalloc(sizeof(struct s_appends) * appendnum);
150 match = xmalloc((maxnsub + 1) * sizeof(regmatch_t)); 150 match = xmalloc((maxnsub + 1) * sizeof(regmatch_t));
151} 151}
152 152
153#define EATSPACE() do { \ 153#define EATSPACE() do { \
154 if (p) \ 154 if (p) \
155 while (*p && isspace((unsigned char)*p)) \ 155 while (*p && isspace((unsigned char)*p)) \
156 p++; \ 156 p++; \
157 } while (0) 157 } while (0)
158 158
159static struct s_command ** 159static struct s_command **
160compile_stream(struct s_command **link) 160compile_stream(struct s_command **link)
161{ 161{
162 char *p; 162 char *p;
163 static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */ 163 static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */
164 struct s_command *cmd, *cmd2, *stack; 164 struct s_command *cmd, *cmd2, *stack;
165 struct s_format *fp; 165 struct s_format *fp;
166 char re[_POSIX2_LINE_MAX + 1]; 166 char re[_POSIX2_LINE_MAX + 1];
167 int naddr; /* Number of addresses */ 167 int naddr; /* Number of addresses */
168 168
169 stack = 0; 169 stack = 0;
170 for (;;) { 170 for (;;) {
171 if ((p = cu_fgets(lbuf, sizeof(lbuf), NULL)) == NULL) { 171 if ((p = cu_fgets(lbuf, sizeof(lbuf), NULL)) == NULL) {
172 if (stack != 0) 172 if (stack != 0)
173 errx(1, "%lu: %s: unexpected EOF (pending }'s)", 173 errx(1, "%lu: %s: unexpected EOF (pending }'s)",
174 linenum, fname); 174 linenum, fname);
175 return (link); 175 return (link);
176 } 176 }
177 177
178semicolon: EATSPACE(); 178semicolon: EATSPACE();
179 if (p) { 179 if (p) {
180 if (*p == '#' || *p == '\0') 180 if (*p == '#' || *p == '\0')
181 continue; 181 continue;
182 else if (*p == ';') { 182 else if (*p == ';') {
183 p++; 183 p++;
184 goto semicolon; 184 goto semicolon;
185 } 185 }
186 } 186 }
187 *link = cmd = xmalloc(sizeof(struct s_command)); 187 *link = cmd = xmalloc(sizeof(struct s_command));
188 link = &cmd->next; 188 link = &cmd->next;
189 cmd->startline = cmd->nonsel = 0; 189 cmd->startline = cmd->nonsel = 0;
190 /* First parse the addresses */ 190 /* First parse the addresses */
191 naddr = 0; 191 naddr = 0;
192 192
193/* Valid characters to start an address */ 193/* Valid characters to start an address */
194#define addrchar(c) (strchr("0123456789/\\$", (c))) 194#define addrchar(c) (strchr("0123456789/\\$", (c)))
195 if (addrchar(*p)) { 195 if (addrchar(*p)) {
196 naddr++; 196 naddr++;
197 cmd->a1 = xmalloc(sizeof(struct s_addr)); 197 cmd->a1 = xmalloc(sizeof(struct s_addr));
198 p = compile_addr(p, cmd->a1); 198 p = compile_addr(p, cmd->a1);
199 EATSPACE(); /* EXTENSION */ 199 EATSPACE(); /* EXTENSION */
200 if (*p == ',') { 200 if (*p == ',') {
201 p++; 201 p++;
202 EATSPACE(); /* EXTENSION */ 202 EATSPACE(); /* EXTENSION */
203 naddr++; 203 naddr++;
204 cmd->a2 = xmalloc(sizeof(struct s_addr)); 204 cmd->a2 = xmalloc(sizeof(struct s_addr));
205 p = compile_addr(p, cmd->a2); 205 p = compile_addr(p, cmd->a2);
206 EATSPACE(); 206 EATSPACE();
207 } else 207 } else
208 cmd->a2 = 0; 208 cmd->a2 = 0;
209 } else 209 } else
210 cmd->a1 = cmd->a2 = 0; 210 cmd->a1 = cmd->a2 = 0;
211 211
212nonsel: /* Now parse the command */ 212nonsel: /* Now parse the command */
213 if (!*p) 213 if (!*p)
214 errx(1, "%lu: %s: command expected", linenum, fname); 214 errx(1, "%lu: %s: command expected", linenum, fname);
215 cmd->code = *p; 215 cmd->code = *p;
216 for (fp = cmd_fmts; fp->code; fp++) 216 for (fp = cmd_fmts; fp->code; fp++)
217 if (fp->code == *p) 217 if (fp->code == *p)
218 break; 218 break;
219 if (!fp->code) 219 if (!fp->code)
220 errx(1, "%lu: %s: invalid command code %c", linenum, fname, *p); 220 errx(1, "%lu: %s: invalid command code %c", linenum, fname, *p);
221 if (naddr > fp->naddr) 221 if (naddr > fp->naddr)
222 errx(1, 222 errx(1,
223 "%lu: %s: command %c expects up to %d address(es), found %d", 223 "%lu: %s: command %c expects up to %d address(es), found %d",
224 linenum, fname, *p, fp->naddr, naddr); 224 linenum, fname, *p, fp->naddr, naddr);
225 switch (fp->args) { 225 switch (fp->args) {
226 case NONSEL: /* ! */ 226 case NONSEL: /* ! */
227 p++; 227 p++;
228 EATSPACE(); 228 EATSPACE();
229 cmd->nonsel = ! cmd->nonsel; 229 cmd->nonsel = ! cmd->nonsel;
230 goto nonsel; 230 goto nonsel;
231 case GROUP: /* { */ 231 case GROUP: /* { */
232 p++; 232 p++;
233 EATSPACE(); 233 EATSPACE();
234 cmd->next = stack; 234 cmd->next = stack;
235 stack = cmd; 235 stack = cmd;
236 link = &cmd->u.c; 236 link = &cmd->u.c;
237 if (*p) 237 if (*p)
238 goto semicolon; 238 goto semicolon;
239 break; 239 break;
240 case ENDGROUP: 240 case ENDGROUP:
241 /* 241 /*
242 * Short-circuit command processing, since end of 242 * Short-circuit command processing, since end of
243 * group is really just a noop. 243 * group is really just a noop.
244 */ 244 */
245 cmd->nonsel = 1; 245 cmd->nonsel = 1;
246 if (stack == 0) 246 if (stack == 0)
247 errx(1, "%lu: %s: unexpected }", linenum, fname); 247 errx(1, "%lu: %s: unexpected }", linenum, fname);
248 cmd2 = stack; 248 cmd2 = stack;
249 stack = cmd2->next; 249 stack = cmd2->next;
250 cmd2->next = cmd; 250 cmd2->next = cmd;
251 /*FALLTHROUGH*/ 251 /*FALLTHROUGH*/
252 case EMPTY: /* d D g G h H l n N p P q x = \0 */ 252 case EMPTY: /* d D g G h H l n N p P q x = \0 */
253 p++; 253 p++;
254 EATSPACE(); 254 EATSPACE();
255 switch (*p) { 255 switch (*p) {
256 case ';': 256 case ';':
257 p++; 257 p++;
258 link = &cmd->next; 258 link = &cmd->next;
259 goto semicolon; 259 goto semicolon;
260 case '}': 260 case '}':
261 goto semicolon; 261 goto semicolon;
262 case '\0': 262 case '\0':
263 break; 263 break;
264 default: 264 default:
265 errx(1, "%lu: %s: extra characters at the end of %c command", 265 errx(1, "%lu: %s: extra characters at the end of %c command",
266 linenum, fname, cmd->code); 266 linenum, fname, cmd->code);
267 } 267 }
268 break; 268 break;
269 case TEXT: /* a c i */ 269 case TEXT: /* a c i */
270 p++; 270 p++;
271 EATSPACE(); 271 EATSPACE();
272 if (*p != '\\') 272 if (*p != '\\')
273 errx(1, 273 errx(1,
274"%lu: %s: command %c expects \\ followed by text", linenum, fname, cmd->code); 274"%lu: %s: command %c expects \\ followed by text", linenum, fname, cmd->code);
275 p++; 275 p++;
276 EATSPACE(); 276 EATSPACE();
277 if (*p) 277 if (*p)
278 errx(1, 278 errx(1,
279 "%lu: %s: extra characters after \\ at the end of %c command", 279 "%lu: %s: extra characters after \\ at the end of %c command",
280 linenum, fname, cmd->code); 280 linenum, fname, cmd->code);
281 cmd->t = compile_text(); 281 cmd->t = compile_text();
282 break; 282 break;
283 case COMMENT: /* \0 # */ 283 case COMMENT: /* \0 # */
284 break; 284 break;
285 case WFILE: /* w */ 285 case WFILE: /* w */
286 p++; 286 p++;
287 EATSPACE(); 287 EATSPACE();
288 if (*p == '\0') 288 if (*p == '\0')
289 errx(1, "%lu: %s: filename expected", linenum, fname); 289 errx(1, "%lu: %s: filename expected", linenum, fname);
290 cmd->t = duptoeol(p, "w command"); 290 cmd->t = duptoeol(p, "w command");
291 if (aflag) 291 if (aflag)
292 cmd->u.fd = -1; 292 cmd->u.fd = -1;
293 else if ((cmd->u.fd = open(p, 293 else if ((cmd->u.fd = open(p,
294 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 294 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
295 DEFFILEMODE)) == -1) 295 DEFFILEMODE)) == -1)
296 err(1, "%s", p); 296 err(1, "%s", p);
297 break; 297 break;
298 case RFILE: /* r */ 298 case RFILE: /* r */
299 p++; 299 p++;
300 EATSPACE(); 300 EATSPACE();
301 if (*p == '\0') 301 if (*p == '\0')
302 errx(1, "%lu: %s: filename expected", linenum, fname); 302 errx(1, "%lu: %s: filename expected", linenum, fname);
303 else 303 else
304 cmd->t = duptoeol(p, "read command"); 304 cmd->t = duptoeol(p, "read command");
305 break; 305 break;
306 case BRANCH: /* b t */ 306 case BRANCH: /* b t */
307 p++; 307 p++;
308 EATSPACE(); 308 EATSPACE();
309 if (*p == '\0') 309 if (*p == '\0')
310 cmd->t = NULL; 310 cmd->t = NULL;
311 else 311 else
312 cmd->t = duptoeol(p, "branch"); 312 cmd->t = duptoeol(p, "branch");
313 break; 313 break;
314 case LABEL: /* : */ 314 case LABEL: /* : */
315 p++; 315 p++;
316 EATSPACE(); 316 EATSPACE();
317 cmd->t = duptoeol(p, "label"); 317 cmd->t = duptoeol(p, "label");
318 if (strlen(p) == 0) 318 if (strlen(p) == 0)
319 errx(1, "%lu: %s: empty label", linenum, fname); 319 errx(1, "%lu: %s: empty label", linenum, fname);
320 enterlabel(cmd); 320 enterlabel(cmd);
321 break; 321 break;
322 case SUBST: /* s */ 322 case SUBST: /* s */
323 p++; 323 p++;
324 if (*p == '\0' || *p == '\\') 324 if (*p == '\0' || *p == '\\')
325 errx(1, 325 errx(1,
326"%lu: %s: substitute pattern can not be delimited by newline or backslash", 326"%lu: %s: substitute pattern can not be delimited by newline or backslash",
327 linenum, fname); 327 linenum, fname);
328 cmd->u.s = xcalloc(1, sizeof(struct s_subst)); 328 cmd->u.s = xcalloc(1, sizeof(struct s_subst));
329 p = compile_delimited(p, re, 0); 329 p = compile_delimited(p, re, 0);
330 if (p == NULL) 330 if (p == NULL)
331 errx(1, 331 errx(1,
332 "%lu: %s: unterminated substitute pattern", linenum, fname); 332 "%lu: %s: unterminated substitute pattern", linenum, fname);
333 333
334 /* Compile RE with no case sensitivity temporarily */ 334 /* Compile RE with no case sensitivity temporarily */
335 if (*re == '\0') 335 if (*re == '\0')
336 cmd->u.s->re = NULL; 336 cmd->u.s->re = NULL;
337 else 337 else
338 cmd->u.s->re = compile_re(re, 0); 338 cmd->u.s->re = compile_re(re, 0);
339 --p; 339 --p;
340 p = compile_subst(p, cmd->u.s); 340 p = compile_subst(p, cmd->u.s);
341 p = compile_flags(p, cmd->u.s); 341 p = compile_flags(p, cmd->u.s);
342 342
343 /* Recompile RE with case sensitivity from "I" flag if any */ 343 /* Recompile RE with case sensitivity from "I" flag if any */
344 if (*re == '\0') 344 if (*re == '\0')
345 cmd->u.s->re = NULL; 345 cmd->u.s->re = NULL;
346 else 346 else
347 cmd->u.s->re = compile_re(re, cmd->u.s->icase); 347 cmd->u.s->re = compile_re(re, cmd->u.s->icase);
348 EATSPACE(); 348 EATSPACE();
349 if (*p == ';') { 349 if (*p == ';') {
350 p++; 350 p++;
351 link = &cmd->next; 351 link = &cmd->next;
352 goto semicolon; 352 goto semicolon;
353 } 353 }
354 break; 354 break;
355 case TR: /* y */ 355 case TR: /* y */
356 p++; 356 p++;
357 p = compile_tr(p, &cmd->u.y); 357 p = compile_tr(p, &cmd->u.y);
358 EATSPACE(); 358 EATSPACE();
359 switch (*p) { 359 switch (*p) {
360 case ';': 360 case ';':
361 p++; 361 p++;
362 link = &cmd->next; 362 link = &cmd->next;
363 goto semicolon; 363 goto semicolon;
364 case '}': 364 case '}':
365 goto semicolon; 365 goto semicolon;
366 case '\0': 366 case '\0':
367 break; 367 break;
368 default: 368 default:
369 errx(1, 369 errx(1,
370"%lu: %s: extra text at the end of a transform command", linenum, fname); 370"%lu: %s: extra text at the end of a transform command", linenum, fname);
371 } 371 }
372 if (*p) 372 if (*p)
373 break; 373 break;
374 } 374 }
375 } 375 }
376} 376}
377 377
378/* 378/*
379 * Get a delimited string. P points to the delimeter of the string; d points 379 * Get a delimited string. P points to the delimeter of the string; d points
380 * to a buffer area. Newline and delimiter escapes are processed; other 380 * to a buffer area. Newline and delimiter escapes are processed; other
381 * escapes are ignored. 381 * escapes are ignored.
382 * 382 *
383 * Returns a pointer to the first character after the final delimiter or NULL 383 * Returns a pointer to the first character after the final delimiter or NULL
384 * in the case of a non-terminated string. The character array d is filled 384 * in the case of a non-terminated string. The character array d is filled
385 * with the processed string. 385 * with the processed string.
386 */ 386 */
387static char * 387static char *
388compile_delimited(char *p, char *d, int is_tr) 388compile_delimited(char *p, char *d, int is_tr)
389{ 389{
390 char c; 390 char c;
391 391
392 c = *p++; 392 c = *p++;
393 if (c == '\0') 393 if (c == '\0')
394 return (NULL); 394 return (NULL);
395 else if (c == '\\') 395 else if (c == '\\')
396 errx(1, "%lu: %s: \\ can not be used as a string delimiter", 396 errx(1, "%lu: %s: \\ can not be used as a string delimiter",
397 linenum, fname); 397 linenum, fname);
398 else if (c == '\n') 398 else if (c == '\n')
399 errx(1, "%lu: %s: newline can not be used as a string delimiter", 399 errx(1, "%lu: %s: newline can not be used as a string delimiter",
400 linenum, fname); 400 linenum, fname);
401 while (*p) { 401 while (*p) {
402 if (*p == '[' && *p != c) { 402 if (*p == '[' && *p != c) {
403 if ((d = compile_ccl(&p, d)) == NULL) 403 if ((d = compile_ccl(&p, d)) == NULL)
404 errx(1, "%lu: %s: unbalanced brackets ([])", linenum, fname); 404 errx(1, "%lu: %s: unbalanced brackets ([])", linenum, fname);
405 continue; 405 continue;
406 } else if (*p == '\\' && p[1] == '[') { 406 } else if (*p == '\\' && p[1] == '[') {
407 *d++ = *p++; 407 *d++ = *p++;
408 } else if (*p == '\\' && p[1] == c) 408 } else if (*p == '\\' && p[1] == c)
409 p++; 409 p++;
410 else if (*p == '\\' && p[1] == 'n') { 410 else if (*p == '\\' && p[1] == 'n') {
411 *d++ = '\n'; 411 *d++ = '\n';
412 p += 2; 412 p += 2;
413 continue; 413 continue;
414 } else if (*p == '\\' && p[1] == '\\') { 414 } else if (*p == '\\' && p[1] == '\\') {
415 if (is_tr) 415 if (is_tr)
416 p++; 416 p++;
417 else 417 else
418 *d++ = *p++; 418 *d++ = *p++;
419 } else if (*p == c) { 419 } else if (*p == c) {
420 *d = '\0'; 420 *d = '\0';
421 return (p + 1); 421 return (p + 1);
422 } 422 }
423 *d++ = *p++; 423 *d++ = *p++;
424 } 424 }
425 return (NULL); 425 return (NULL);
426} 426}
427 427
428 428
429/* compile_ccl: expand a POSIX character class */ 429/* compile_ccl: expand a POSIX character class */
430static char * 430static char *
431compile_ccl(char **sp, char *t) 431compile_ccl(char **sp, char *t)
432{ 432{
433 int c, d; 433 int c, d;
434 char *s = *sp; 434 char *s = *sp;
435 435
436 *t++ = *s++; 436 *t++ = *s++;
437 if (*s == '^') 437 if (*s == '^')
438 *t++ = *s++; 438 *t++ = *s++;
439 if (*s == ']') 439 if (*s == ']')
440 *t++ = *s++; 440 *t++ = *s++;
441 for (; *s && (*t = *s) != ']'; s++, t++) 441 for (; *s && (*t = *s) != ']'; s++, t++)
442 if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) { 442 if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) {
443 *++t = *++s, t++, s++; 443 *++t = *++s, t++, s++;
444 for (c = *s; (*t = *s) != ']' || c != d; s++, t++) 444 for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
445 if ((c = *s) == '\0') 445 if ((c = *s) == '\0')
446 return NULL; 446 return NULL;
447 } 447 }
448 return (*s == ']') ? *sp = ++s, ++t : NULL; 448 return (*s == ']') ? *sp = ++s, ++t : NULL;
449} 449}
450 450
451/* 451/*
452 * Compiles the regular expression in RE and returns a pointer to the compiled 452 * Compiles the regular expression in RE and returns a pointer to the compiled
453 * regular expression. 453 * regular expression.
454 * Cflags are passed to regcomp. 454 * Cflags are passed to regcomp.
455 */ 455 */
456static regex_t * 456static regex_t *
457compile_re(char *re, int case_insensitive) 457compile_re(char *re, int case_insensitive)
458{ 458{
459 regex_t *rep; 459 regex_t *rep;
460 int eval, flags; 460 int eval, flags;
461 461
462 462
463 flags = rflags; 463 flags = rflags;
464 if (case_insensitive) 464 if (case_insensitive)
465 flags |= REG_ICASE; 465 flags |= REG_ICASE;
466 rep = xmalloc(sizeof(regex_t)); 466 rep = xmalloc(sizeof(regex_t));
467 parse_escapes(re); 467 parse_escapes(re);
468 if ((eval = regcomp(rep, re, flags)) != 0) 468 if ((eval = regcomp(rep, re, flags)) != 0)
469 errx(1, "%lu: %s: RE error: %s", 469 errx(1, "%lu: %s: RE error: %s",
470 linenum, fname, strregerror(eval, rep)); 470 linenum, fname, strregerror(eval, rep));
471 if (maxnsub < rep->re_nsub) 471 if (maxnsub < rep->re_nsub)
472 maxnsub = rep->re_nsub; 472 maxnsub = rep->re_nsub;
473 return (rep); 473 return (rep);
474} 474}
475 475
476static char 476static char
477cton(char c, int base) 477cton(char c, int base)
478{ 478{
479 switch (c) { 479 switch (c) {
480 case '0': case '1': case '2': case '3': case '4': 480 case '0': case '1': case '2': case '3': case '4':
481 case '5': case '6': case '7': 481 case '5': case '6': case '7':
482 return (char)(c - '0'); 482 return (char)(c - '0');
483 case '8': case '9': 483 case '8': case '9':
484 return base == 8 ? '?' : (char)(c - '0'); 484 return base == 8 ? '?' : (char)(c - '0');
485 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 485 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
486 return base == 16 ? (char)(c - 'a' + 10) : '?';  486 return base == 16 ? (char)(c - 'a' + 10) : '?';
487 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 487 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
488 return base == 16 ? (char)(c - 'A' + 10) : '?';  488 return base == 16 ? (char)(c - 'A' + 10) : '?';
489 default: 489 default:
490 return '?'; 490 return '?';
491 } 491 }
492} 492}
493 493
494static int 494static int
495ston(char **pp, char *sp, int base) 495ston(char **pp, char *sp, int base)
496{ 496{
497 char *p = *pp, n; 497 char *p = *pp, n;
498 int r = cton(p[1], base); 498 int r = cton(p[1], base);
499 499
500 if (r == '?') 500 if (r == '?')
501 return 0; 501 return 0;
502 502
503 p++; 503 p++;
504 while ((n = cton(p[1], base)) != '?' && r < 255) { 504 while ((n = cton(p[1], base)) != '?' && r < 255) {
505 r = r * base + n; 505 r = r * base + n;
506 p++; 506 p++;
507 } 507 }
508 *sp = (char)r; 508 *sp = (char)r;
509 *pp = p; 509 *pp = p;
510 return 1; 510 return 1;
511} 511}
512  512
513static int 513static int
514unescape(char **pp, char **spp) 514unescape(char **pp, char **spp)
515{ 515{
516 char *p = *pp; 516 char *p = *pp;
517 char *sp = *spp; 517 char *sp = *spp;
518 518
519 switch (*p) { 519 switch (*p) {
520 case 'o': 520 case 'o':
521 if (!ston(&p, sp, 8)) 521 if (!ston(&p, sp, 8))
522 return 0; 522 return 0;
523 break; 523 break;
524 case 'd': 524 case 'd':
525 if (!ston(&p, sp, 10)) 525 if (!ston(&p, sp, 10))
526 return 0; 526 return 0;
527 break; 527 break;
528 case 'x': 528 case 'x':
529 if (!ston(&p, sp, 16)) 529 if (!ston(&p, sp, 16))
530 return 0; 530 return 0;
531 break; 531 break;
532 case 'a': 532 case 'a':
533 *sp = '\a'; 533 *sp = '\a';
534 p++; 
535 break; 534 break;
536#if 0 535#if 0
537 // No, \b RE 536 // No, \b RE
538 case 'b': 537 case 'b':
539 *sp = '\b'; 538 *sp = '\b';
540 break; 539 break;
541#endif 540#endif
542 case 'f': 541 case 'f':
543 *sp = '\f'; 542 *sp = '\f';
544 break; 543 break;
545 case 'n': 544 case 'n':
546 *sp = '\n'; 545 *sp = '\n';
547 break; 546 break;
548 case 'r': 547 case 'r':
549 *sp = '\r'; 548 *sp = '\r';
550 break; 549 break;
551 case 'v': 550 case 'v':
552 *sp = '\v'; 551 *sp = '\v';
553 break; 552 break;
554 default: 553 default:
555 return 0; 554 return 0;
556 } 555 }
557 *spp = sp + 1; 556 *spp = sp + 1;
558 *pp = p; 557 *pp = p;
559 return 1; 558 return 1;
560} 559}
561 560
562static void 561static void
563parse_escapes(char *buf) 562parse_escapes(char *buf)
564{ 563{
565 char bracket = '\0'; 564 char bracket = '\0';
566 char *p, *q; 565 char *p, *q;
567 566
568 p = q = buf; 567 p = q = buf;
569 568
570 for (p = q = buf; *p; p++) { 569 for (p = q = buf; *p; p++) {
571 if (*p == '\\' && p[1] && !bracket) { 570 if (*p == '\\' && p[1] && !bracket) {
572 p++; 571 p++;
573 if (unescape(&p, &q)) 572 if (unescape(&p, &q))
574 continue; 573 continue;
575 *q++ = '\\'; 574 *q++ = '\\';
576 } 575 }
577 switch (*p) { 576 switch (*p) {
578 case '[': 577 case '[':
579 if (!bracket) 578 if (!bracket)
580 bracket = *p; 579 bracket = *p;
581 break; 580 break;
582 case '.': 581 case '.':
583 case ':': 582 case ':':
584 case '=': 583 case '=':
585 if (bracket == '[' && p[-1] == '[') 584 if (bracket == '[' && p[-1] == '[')
586 bracket = *p; 585 bracket = *p;
587 break; 586 break;
588 case ']': 587 case ']':
589 if (!bracket) 588 if (!bracket)
590 break; 589 break;
591 if (bracket == '[') 590 if (bracket == '[')
592 bracket = '\0'; 591 bracket = '\0';
593 else if (p[-2] != bracket && p[-1] == bracket) 592 else if (p[-2] != bracket && p[-1] == bracket)
594 bracket = '['; 593 bracket = '[';
595 break; 594 break;
596 default: 595 default:
597 break; 596 break;
598 } 597 }
599 *q++ = *p; 598 *q++ = *p;
600 } 599 }
601 *q = '\0'; 600 *q = '\0';
602} 601}
603 602
604/* 603/*
605 * Compile the substitution string of a regular expression and set res to 604 * Compile the substitution string of a regular expression and set res to
606 * point to a saved copy of it. Nsub is the number of parenthesized regular 605 * point to a saved copy of it. Nsub is the number of parenthesized regular
607 * expressions. 606 * expressions.
608 */ 607 */
609static char * 608static char *
610compile_subst(char *p, struct s_subst *s) 609compile_subst(char *p, struct s_subst *s)
611{ 610{
612 static char lbuf[_POSIX2_LINE_MAX + 1]; 611 static char lbuf[_POSIX2_LINE_MAX + 1];
613 size_t asize, size; 612 size_t asize, size;
614 u_char ref; 613 u_char ref;
615 char c, *text, *op, *sp; 614 char c, *text, *op, *sp;
616 int more = 1, sawesc = 0; 615 int more = 1, sawesc = 0;
617 616
618 c = *p++; /* Terminator character */ 617 c = *p++; /* Terminator character */
619 if (c == '\0') 618 if (c == '\0')
620 return (NULL); 619 return (NULL);
621 620
622 s->maxbref = 0; 621 s->maxbref = 0;
623 s->linenum = linenum; 622 s->linenum = linenum;
624 asize = 2 * _POSIX2_LINE_MAX + 1; 623 asize = 2 * _POSIX2_LINE_MAX + 1;
625 text = xmalloc(asize); 624 text = xmalloc(asize);
626 size = 0; 625 size = 0;
627 do { 626 do {
628 op = sp = text + size; 627 op = sp = text + size;
629 for (; *p; p++) { 628 for (; *p; p++) {
630 if (*p == '\\' || sawesc) { 629 if (*p == '\\' || sawesc) {
631 /* 630 /*
632 * If this is a continuation from the last 631 * If this is a continuation from the last
633 * buffer, we won't have a character to 632 * buffer, we won't have a character to
634 * skip over. 633 * skip over.
635 */ 634 */
636 if (sawesc) 635 if (sawesc)
637 sawesc = 0; 636 sawesc = 0;
638 else 637 else
639 p++; 638 p++;
640 639
641 switch (*p) { 640 switch (*p) {
642 case '\0': 641 case '\0':
643 /* 642 /*
644 * This escaped character is continued 643 * This escaped character is continued
645 * in the next part of the line. Note 644 * in the next part of the line. Note
646 * this fact, then cause the loop to 645 * this fact, then cause the loop to
647 * exit w/ normal EOL case and reenter 646 * exit w/ normal EOL case and reenter
648 * above with the new buffer. 647 * above with the new buffer.
649 */ 648 */
650 sawesc = 1; 649 sawesc = 1;
651 p--; 650 p--;
652 continue; 651 continue;
653 case '0': case '1': case '2': case '3': 652 case '0': case '1': case '2': case '3':
654 case '4': case '5': case '6': case '7': 653 case '4': case '5': case '6': case '7':
655 case '8': case '9': 654 case '8': case '9':
656 *sp++ = '\\'; 655 *sp++ = '\\';
657 ref = (u_char)(*p - '0'); 656 ref = (u_char)(*p - '0');
658 if (s->re != NULL && 657 if (s->re != NULL &&
659 ref > s->re->re_nsub) 658 ref > s->re->re_nsub)
660 errx(1, "%lu: %s: \\%c not defined in the RE", 659 errx(1, "%lu: %s: \\%c not defined in the RE",
661 linenum, fname, *p); 660 linenum, fname, *p);
662 if (s->maxbref < ref) 661 if (s->maxbref < ref)
663 s->maxbref = ref; 662 s->maxbref = ref;
664 break; 663 break;
665 case '&': 664 case '&':
666 case '\\': 665 case '\\':
667 *sp++ = '\\'; 666 *sp++ = '\\';
668 break; 667 break;
669 default: 668 default:
670 if (unescape(&p, &sp)) 669 if (unescape(&p, &sp))
671 continue; 670 continue;
672 break; 671 break;
673 } 672 }
674 } else if (*p == c) { 673 } else if (*p == c) {
675 if (*++p == '\0' && more) { 674 if (*++p == '\0' && more) {
676 if (cu_fgets(lbuf, sizeof(lbuf), &more)) 675 if (cu_fgets(lbuf, sizeof(lbuf), &more))
677 p = lbuf; 676 p = lbuf;
678 } 677 }
679 *sp++ = '\0'; 678 *sp++ = '\0';
680 size += (size_t)(sp - op); 679 size += (size_t)(sp - op);
681 s->new = xrealloc(text, size); 680 s->new = xrealloc(text, size);
682 return (p); 681 return (p);
683 } else if (*p == '\n') { 682 } else if (*p == '\n') {
684 errx(1, 683 errx(1,
685"%lu: %s: unescaped newline inside substitute pattern", linenum, fname); 684"%lu: %s: unescaped newline inside substitute pattern", linenum, fname);
686 /* NOTREACHED */ 685 /* NOTREACHED */
687 } 686 }
688 *sp++ = *p; 687 *sp++ = *p;
689 } 688 }
690 size += (size_t)(sp - op); 689 size += (size_t)(sp - op);
691 if (asize - size < _POSIX2_LINE_MAX + 1) { 690 if (asize - size < _POSIX2_LINE_MAX + 1) {
692 asize *= 2; 691 asize *= 2;
693 text = xrealloc(text, asize); 692 text = xrealloc(text, asize);
694 } 693 }
695 } while (cu_fgets(p = lbuf, sizeof(lbuf), &more)); 694 } while (cu_fgets(p = lbuf, sizeof(lbuf), &more));
696 errx(1, "%lu: %s: unterminated substitute in regular expression", 695 errx(1, "%lu: %s: unterminated substitute in regular expression",
697 linenum, fname); 696 linenum, fname);
698 /* NOTREACHED */ 697 /* NOTREACHED */
699} 698}
700 699
701/* 700/*
702 * Compile the flags of the s command 701 * Compile the flags of the s command
703 */ 702 */
704static char * 703static char *
705compile_flags(char *p, struct s_subst *s) 704compile_flags(char *p, struct s_subst *s)
706{ 705{
707 int gn; /* True if we have seen g or n */ 706 int gn; /* True if we have seen g or n */
708 unsigned long nval; 707 unsigned long nval;
709 char wfile[_POSIX2_LINE_MAX + 1], *q; 708 char wfile[_POSIX2_LINE_MAX + 1], *q;
710 709
711 s->n = 1; /* Default */ 710 s->n = 1; /* Default */
712 s->p = 0; 711 s->p = 0;
713 s->wfile = NULL; 712 s->wfile = NULL;
714 s->wfd = -1; 713 s->wfd = -1;
715 s->icase = 0; 714 s->icase = 0;
716 for (gn = 0;;) { 715 for (gn = 0;;) {
717 EATSPACE(); /* EXTENSION */ 716 EATSPACE(); /* EXTENSION */
718 switch (*p) { 717 switch (*p) {
719 case 'g': 718 case 'g':
720 if (gn) 719 if (gn)
721 errx(1, 720 errx(1,
722"%lu: %s: more than one number or 'g' in substitute flags", linenum, fname); 721"%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
723 gn = 1; 722 gn = 1;
724 s->n = 0; 723 s->n = 0;
725 break; 724 break;
726 case '\0': 725 case '\0':
727 case '\n': 726 case '\n':
728 case ';': 727 case ';':
729 return (p); 728 return (p);
730 case 'p': 729 case 'p':
731 s->p = 1; 730 s->p = 1;
732 break; 731 break;
733 case 'i': 732 case 'i':
734 case 'I': 733 case 'I':
735 s->icase = 1; 734 s->icase = 1;
736 break; 735 break;
737 case '1': case '2': case '3': 736 case '1': case '2': case '3':
738 case '4': case '5': case '6': 737 case '4': case '5': case '6':
739 case '7': case '8': case '9': 738 case '7': case '8': case '9':
740 if (gn) 739 if (gn)
741 errx(1, 740 errx(1,
742"%lu: %s: more than one number or 'g' in substitute flags", linenum, fname); 741"%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
743 gn = 1; 742 gn = 1;
744 errno = 0; 743 errno = 0;
745 nval = strtoul(p, &p, 10); 744 nval = strtoul(p, &p, 10);
746 if (errno == ERANGE || nval > INT_MAX) 745 if (errno == ERANGE || nval > INT_MAX)
747 errx(1, 746 errx(1,
748"%lu: %s: overflow in the 'N' substitute flag", linenum, fname); 747"%lu: %s: overflow in the 'N' substitute flag", linenum, fname);
749 s->n = (int)nval; 748 s->n = (int)nval;
750 p--; 749 p--;
751 break; 750 break;
752 case 'w': 751 case 'w':
753 p++; 752 p++;
754#ifdef HISTORIC_PRACTICE 753#ifdef HISTORIC_PRACTICE
755 if (*p != ' ') { 754 if (*p != ' ') {
756 warnx("%lu: %s: space missing before w wfile", linenum, fname); 755 warnx("%lu: %s: space missing before w wfile", linenum, fname);
757 return (p); 756 return (p);
758 } 757 }
759#endif 758#endif
760 EATSPACE(); 759 EATSPACE();
761 q = wfile; 760 q = wfile;
762 while (*p) { 761 while (*p) {
763 if (*p == '\n') 762 if (*p == '\n')
764 break; 763 break;
765 *q++ = *p++; 764 *q++ = *p++;
766 } 765 }
767 *q = '\0'; 766 *q = '\0';
768 if (q == wfile) 767 if (q == wfile)
769 errx(1, "%lu: %s: no wfile specified", linenum, fname); 768 errx(1, "%lu: %s: no wfile specified", linenum, fname);
770 s->wfile = strdup(wfile); 769 s->wfile = strdup(wfile);
771 if (!aflag && (s->wfd = open(wfile, 770 if (!aflag && (s->wfd = open(wfile,
772 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 771 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
773 DEFFILEMODE)) == -1) 772 DEFFILEMODE)) == -1)
774 err(1, "%s", wfile); 773 err(1, "%s", wfile);
775 return (p); 774 return (p);
776 default: 775 default:
777 errx(1, "%lu: %s: bad flag in substitute command: '%c'", 776 errx(1, "%lu: %s: bad flag in substitute command: '%c'",
778 linenum, fname, *p); 777 linenum, fname, *p);
779 break; 778 break;
780 } 779 }
781 p++; 780 p++;
782 } 781 }
783} 782}
784 783
785/* 784/*
786 * Compile a translation set of strings into a lookup table. 785 * Compile a translation set of strings into a lookup table.
787 */ 786 */
788static char * 787static char *
789compile_tr(char *p, struct s_tr **py) 788compile_tr(char *p, struct s_tr **py)
790{ 789{
791 struct s_tr *y; 790 struct s_tr *y;
792 size_t i; 791 size_t i;
793 const char *op, *np; 792 const char *op, *np;
794 char old[_POSIX2_LINE_MAX + 1]; 793 char old[_POSIX2_LINE_MAX + 1];
795 char new[_POSIX2_LINE_MAX + 1]; 794 char new[_POSIX2_LINE_MAX + 1];
796 size_t oclen, oldlen, nclen, newlen; 795 size_t oclen, oldlen, nclen, newlen;
797 mbstate_t mbs1, mbs2; 796 mbstate_t mbs1, mbs2;
798 797
799 *py = y = xmalloc(sizeof(*y)); 798 *py = y = xmalloc(sizeof(*y));
800 y->multis = NULL; 799 y->multis = NULL;
801 y->nmultis = 0; 800 y->nmultis = 0;
802 801
803 if (*p == '\0' || *p == '\\') 802 if (*p == '\0' || *p == '\\')
804 errx(1, 803 errx(1,
805 "%lu: %s: transform pattern can not be delimited by newline or backslash", 804 "%lu: %s: transform pattern can not be delimited by newline or backslash",
806 linenum, fname); 805 linenum, fname);
807 p = compile_delimited(p, old, 1); 806 p = compile_delimited(p, old, 1);
808 if (p == NULL) 807 if (p == NULL)
809 errx(1, "%lu: %s: unterminated transform source string", 808 errx(1, "%lu: %s: unterminated transform source string",
810 linenum, fname); 809 linenum, fname);
811 p = compile_delimited(p - 1, new, 1); 810 p = compile_delimited(p - 1, new, 1);
812 if (p == NULL) 811 if (p == NULL)
813 errx(1, "%lu: %s: unterminated transform target string", 812 errx(1, "%lu: %s: unterminated transform target string",
814 linenum, fname); 813 linenum, fname);
815 EATSPACE(); 814 EATSPACE();
816 op = old; 815 op = old;
817 oldlen = mbsrtowcs(NULL, &op, 0, NULL); 816 oldlen = mbsrtowcs(NULL, &op, 0, NULL);
818 if (oldlen == (size_t)-1) 817 if (oldlen == (size_t)-1)
819 err(1, NULL); 818 err(1, NULL);
820 np = new; 819 np = new;
821 newlen = mbsrtowcs(NULL, &np, 0, NULL); 820 newlen = mbsrtowcs(NULL, &np, 0, NULL);
822 if (newlen == (size_t)-1) 821 if (newlen == (size_t)-1)
823 err(1, NULL); 822 err(1, NULL);
824 if (newlen != oldlen) 823 if (newlen != oldlen)
825 errx(1, "%lu: %s: transform strings are not the same length", 824 errx(1, "%lu: %s: transform strings are not the same length",
826 linenum, fname); 825 linenum, fname);
827 if (MB_CUR_MAX == 1) { 826 if (MB_CUR_MAX == 1) {
828 /* 827 /*
829 * The single-byte encoding case is easy: generate a 828 * The single-byte encoding case is easy: generate a
830 * lookup table. 829 * lookup table.
831 */ 830 */
832 for (i = 0; i <= UCHAR_MAX; i++) 831 for (i = 0; i <= UCHAR_MAX; i++)
833 y->bytetab[i] = (u_char)i; 832 y->bytetab[i] = (u_char)i;
834 for (; *op; op++, np++) 833 for (; *op; op++, np++)
835 y->bytetab[(u_char)*op] = (u_char)*np; 834 y->bytetab[(u_char)*op] = (u_char)*np;
836 } else { 835 } else {
837 /* 836 /*
838 * Multi-byte encoding case: generate a lookup table as 837 * Multi-byte encoding case: generate a lookup table as
839 * above, but only for single-byte characters. The first 838 * above, but only for single-byte characters. The first
840 * bytes of multi-byte characters have their lookup table 839 * bytes of multi-byte characters have their lookup table
841 * entries set to 0, which causes do_tr() to search through 840 * entries set to 0, which causes do_tr() to search through
842 * an auxiliary vector of multi-byte mappings. 841 * an auxiliary vector of multi-byte mappings.
843 */ 842 */
844 memset(&mbs1, 0, sizeof(mbs1)); 843 memset(&mbs1, 0, sizeof(mbs1));
845 memset(&mbs2, 0, sizeof(mbs2)); 844 memset(&mbs2, 0, sizeof(mbs2));
846 for (i = 0; i <= UCHAR_MAX; i++) 845 for (i = 0; i <= UCHAR_MAX; i++)
847 y->bytetab[i] = (u_char)((btowc((int)i) != WEOF) ? i : 0); 846 y->bytetab[i] = (u_char)((btowc((int)i) != WEOF) ? i : 0);
848 while (*op != '\0') { 847 while (*op != '\0') {
849 oclen = mbrlen(op, MB_LEN_MAX, &mbs1); 848 oclen = mbrlen(op, MB_LEN_MAX, &mbs1);
850 if (oclen == (size_t)-1 || oclen == (size_t)-2) 849 if (oclen == (size_t)-1 || oclen == (size_t)-2)
851 errc(1, EILSEQ, NULL); 850 errc(1, EILSEQ, NULL);
852 nclen = mbrlen(np, MB_LEN_MAX, &mbs2); 851 nclen = mbrlen(np, MB_LEN_MAX, &mbs2);
853 if (nclen == (size_t)-1 || nclen == (size_t)-2) 852 if (nclen == (size_t)-1 || nclen == (size_t)-2)
854 errc(1, EILSEQ, NULL); 853 errc(1, EILSEQ, NULL);
855 if (oclen == 1 && nclen == 1) 854 if (oclen == 1 && nclen == 1)
856 y->bytetab[(u_char)*op] = (u_char)*np; 855 y->bytetab[(u_char)*op] = (u_char)*np;
857 else { 856 else {
858 y->bytetab[(u_char)*op] = 0; 857 y->bytetab[(u_char)*op] = 0;
859 y->multis = xrealloc(y->multis, 858 y->multis = xrealloc(y->multis,
860 (y->nmultis + 1) * sizeof(*y->multis)); 859 (y->nmultis + 1) * sizeof(*y->multis));
861 i = y->nmultis++; 860 i = y->nmultis++;
862 y->multis[i].fromlen = oclen; 861 y->multis[i].fromlen = oclen;
863 memcpy(y->multis[i].from, op, oclen); 862 memcpy(y->multis[i].from, op, oclen);
864 y->multis[i].tolen = nclen; 863 y->multis[i].tolen = nclen;
865 memcpy(y->multis[i].to, np, nclen); 864 memcpy(y->multis[i].to, np, nclen);
866 } 865 }
867 op += oclen; 866 op += oclen;
868 np += nclen; 867 np += nclen;
869 } 868 }
870 } 869 }
871 return (p); 870 return (p);
872} 871}
873 872
874/* 873/*
875 * Compile the text following an a or i command. 874 * Compile the text following an a or i command.
876 */ 875 */
877static char * 876static char *
878compile_text(void) 877compile_text(void)
879{ 878{
880 size_t asize, size; 879 size_t asize, size;
881 int esc_nl; 880 int esc_nl;
882 char *text, *p, *op, *s; 881 char *text, *p, *op, *s;
883 char lbuf[_POSIX2_LINE_MAX + 1]; 882 char lbuf[_POSIX2_LINE_MAX + 1];
884 883
885 asize = 2 * _POSIX2_LINE_MAX + 1; 884 asize = 2 * _POSIX2_LINE_MAX + 1;
886 text = xmalloc(asize); 885 text = xmalloc(asize);
887 size = 0; 886 size = 0;
888 while (cu_fgets(lbuf, sizeof(lbuf), NULL)) { 887 while (cu_fgets(lbuf, sizeof(lbuf), NULL)) {
889 op = s = text + size; 888 op = s = text + size;
890 p = lbuf; 889 p = lbuf;
891 for (esc_nl = 0; *p != '\0'; p++) { 890 for (esc_nl = 0; *p != '\0'; p++) {
892 if (*p == '\\' && p[1] != '\0' && *++p == '\n') 891 if (*p == '\\' && p[1] != '\0' && *++p == '\n')
893 esc_nl = 1; 892 esc_nl = 1;
894 *s++ = *p; 893 *s++ = *p;
895 } 894 }
896 size += (size_t)(s - op); 895 size += (size_t)(s - op);
897 if (!esc_nl) { 896 if (!esc_nl) {
898 *s = '\0'; 897 *s = '\0';
899 break; 898 break;
900 } 899 }
901 if (asize - size < _POSIX2_LINE_MAX + 1) { 900 if (asize - size < _POSIX2_LINE_MAX + 1) {
902 asize *= 2; 901 asize *= 2;
903 text = xrealloc(text, asize); 902 text = xrealloc(text, asize);
904 } 903 }
905 } 904 }
906 text[size] = '\0'; 905 text[size] = '\0';
907 p = xrealloc(text, size + 1); 906 p = xrealloc(text, size + 1);
908 return (p); 907 return (p);
909} 908}
910 909
911/* 910/*
912 * Get an address and return a pointer to the first character after 911 * Get an address and return a pointer to the first character after
913 * it. Fill the structure pointed to according to the address. 912 * it. Fill the structure pointed to according to the address.
914 */ 913 */
915static char * 914static char *
916compile_addr(char *p, struct s_addr *a) 915compile_addr(char *p, struct s_addr *a)
917{ 916{
918 char *end, re[_POSIX2_LINE_MAX + 1]; 917 char *end, re[_POSIX2_LINE_MAX + 1];
919 int icase; 918 int icase;
920 919
921 icase = 0; 920 icase = 0;
922 921
923 a->type = 0; 922 a->type = 0;
924 switch (*p) { 923 switch (*p) {
925 case '\\': /* Context address */ 924 case '\\': /* Context address */
926 ++p; 925 ++p;
927 /* FALLTHROUGH */ 926 /* FALLTHROUGH */
928 case '/': /* Context address */ 927 case '/': /* Context address */
929 p = compile_delimited(p, re, 0); 928 p = compile_delimited(p, re, 0);
930 if (p == NULL) 929 if (p == NULL)
931 errx(1, "%lu: %s: unterminated regular expression", linenum, fname); 930 errx(1, "%lu: %s: unterminated regular expression", linenum, fname);
932 /* Check for case insensitive regexp flag */ 931 /* Check for case insensitive regexp flag */
933 if (*p == 'I') { 932 if (*p == 'I') {
934 icase = 1; 933 icase = 1;
935 p++; 934 p++;
936 } 935 }
937 if (*re == '\0') 936 if (*re == '\0')
938 a->u.r = NULL; 937 a->u.r = NULL;
939 else 938 else
940 a->u.r = compile_re(re, icase); 939 a->u.r = compile_re(re, icase);
941 a->type = AT_RE; 940 a->type = AT_RE;
942 return (p); 941 return (p);
943 942
944 case '$': /* Last line */ 943 case '$': /* Last line */
945 a->type = AT_LAST; 944 a->type = AT_LAST;
946 return (p + 1); 945 return (p + 1);
947 946
948 case '+': /* Relative line number */ 947 case '+': /* Relative line number */
949 a->type = AT_RELLINE; 948 a->type = AT_RELLINE;
950 p++; 949 p++;
951 /* FALLTHROUGH */ 950 /* FALLTHROUGH */
952 /* Line number */ 951 /* Line number */
953 case '0': case '1': case '2': case '3': case '4': 952 case '0': case '1': case '2': case '3': case '4':
954 case '5': case '6': case '7': case '8': case '9': 953 case '5': case '6': case '7': case '8': case '9':
955 if (a->type == 0) 954 if (a->type == 0)
956 a->type = AT_LINE; 955 a->type = AT_LINE;
957 a->u.l = strtoul(p, &end, 10); 956 a->u.l = strtoul(p, &end, 10);
958 return (end); 957 return (end);
959 default: 958 default:
960 errx(1, "%lu: %s: expected context address", linenum, fname); 959 errx(1, "%lu: %s: expected context address", linenum, fname);
961 return (NULL); 960 return (NULL);
962 } 961 }
963} 962}
964 963
965/* 964/*
966 * duptoeol -- 965 * duptoeol --
967 * Return a copy of all the characters up to \n or \0. 966 * Return a copy of all the characters up to \n or \0.
968 */ 967 */
969static char * 968static char *
970duptoeol(char *s, const char *ctype) 969duptoeol(char *s, const char *ctype)
971{ 970{
972 size_t len; 971 size_t len;
973 int ws; 972 int ws;
974 char *p, *start; 973 char *p, *start;
975 974
976 ws = 0; 975 ws = 0;
977 for (start = s; *s != '\0' && *s != '\n'; ++s) 976 for (start = s; *s != '\0' && *s != '\n'; ++s)
978 ws = isspace((unsigned char)*s); 977 ws = isspace((unsigned char)*s);
979 *s = '\0'; 978 *s = '\0';
980 if (ws) 979 if (ws)
981 warnx("%lu: %s: whitespace after %s", linenum, fname, ctype); 980 warnx("%lu: %s: whitespace after %s", linenum, fname, ctype);
982 len = (size_t)(s - start + 1); 981 len = (size_t)(s - start + 1);
983 p = xmalloc(len); 982 p = xmalloc(len);
984 return (memmove(p, start, len)); 983 return (memmove(p, start, len));
985} 984}
986 985
987/* 986/*
988 * Convert goto label names to addresses, and count a and r commands, in 987 * Convert goto label names to addresses, and count a and r commands, in
989 * the given subset of the script. Free the memory used by labels in b 988 * the given subset of the script. Free the memory used by labels in b
990 * and t commands (but not by :). 989 * and t commands (but not by :).
991 * 990 *
992 * TODO: Remove } nodes 991 * TODO: Remove } nodes
993 */ 992 */
994static void 993static void
995fixuplabel(struct s_command *cp, struct s_command *end) 994fixuplabel(struct s_command *cp, struct s_command *end)
996{ 995{
997 996
998 for (; cp != end; cp = cp->next) 997 for (; cp != end; cp = cp->next)
999 switch (cp->code) { 998 switch (cp->code) {
1000 case 'a': 999 case 'a':
1001 case 'r': 1000 case 'r':
1002 appendnum++; 1001 appendnum++;
1003 break; 1002 break;
1004 case 'b': 1003 case 'b':
1005 case 't': 1004 case 't':
1006 /* Resolve branch target. */ 1005 /* Resolve branch target. */
1007 if (cp->t == NULL) { 1006 if (cp->t == NULL) {
1008 cp->u.c = NULL; 1007 cp->u.c = NULL;
1009 break; 1008 break;
1010 } 1009 }
1011 if ((cp->u.c = findlabel(cp->t)) == NULL) 1010 if ((cp->u.c = findlabel(cp->t)) == NULL)
1012 errx(1, "%lu: %s: undefined label '%s'", linenum, fname, cp->t); 1011 errx(1, "%lu: %s: undefined label '%s'", linenum, fname, cp->t);
1013 free(cp->t); 1012 free(cp->t);
1014 break; 1013 break;
1015 case '{': 1014 case '{':
1016 /* Do interior commands. */ 1015 /* Do interior commands. */
1017 fixuplabel(cp->u.c, cp->next); 1016 fixuplabel(cp->u.c, cp->next);
1018 break; 1017 break;
1019 } 1018 }
1020} 1019}
1021 1020
1022/* 1021/*
1023 * Associate the given command label for later lookup. 1022 * Associate the given command label for later lookup.
1024 */ 1023 */
1025static void 1024static void
1026enterlabel(struct s_command *cp) 1025enterlabel(struct s_command *cp)
1027{ 1026{
1028 struct labhash **lhp, *lh; 1027 struct labhash **lhp, *lh;
1029 u_char *p; 1028 u_char *p;
1030 u_int h, c; 1029 u_int h, c;
1031 1030
1032 for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++) 1031 for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++)
1033 h = (h << 5) + h + c; 1032 h = (h << 5) + h + c;
1034 lhp = &labels[h & LHMASK]; 1033 lhp = &labels[h & LHMASK];
1035 for (lh = *lhp; lh != NULL; lh = lh->lh_next) 1034 for (lh = *lhp; lh != NULL; lh = lh->lh_next)
1036 if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0) 1035 if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0)
1037 errx(1, "%lu: %s: duplicate label '%s'", linenum, fname, cp->t); 1036 errx(1, "%lu: %s: duplicate label '%s'", linenum, fname, cp->t);
1038 lh = xmalloc(sizeof *lh); 1037 lh = xmalloc(sizeof *lh);
1039 lh->lh_next = *lhp; 1038 lh->lh_next = *lhp;
1040 lh->lh_hash = h; 1039 lh->lh_hash = h;
1041 lh->lh_cmd = cp; 1040 lh->lh_cmd = cp;
1042 lh->lh_ref = 0; 1041 lh->lh_ref = 0;
1043 *lhp = lh; 1042 *lhp = lh;
1044} 1043}
1045 1044
1046/* 1045/*
1047 * Find the label contained in the command l in the command linked 1046 * Find the label contained in the command l in the command linked
1048 * list cp. L is excluded from the search. Return NULL if not found. 1047 * list cp. L is excluded from the search. Return NULL if not found.
1049 */ 1048 */
1050static struct s_command * 1049static struct s_command *
1051findlabel(char *name) 1050findlabel(char *name)
1052{ 1051{
1053 struct labhash *lh; 1052 struct labhash *lh;
1054 u_char *p; 1053 u_char *p;
1055 u_int h, c; 1054 u_int h, c;
1056 1055
1057 for (h = 0, p = (u_char *)name; (c = *p) != 0; p++) 1056 for (h = 0, p = (u_char *)name; (c = *p) != 0; p++)
1058 h = (h << 5) + h + c; 1057 h = (h << 5) + h + c;
1059 for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) { 1058 for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) {
1060 if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) { 1059 if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) {
1061 lh->lh_ref = 1; 1060 lh->lh_ref = 1;
1062 return (lh->lh_cmd); 1061 return (lh->lh_cmd);
1063 } 1062 }
1064 } 1063 }
1065 return (NULL); 1064 return (NULL);
1066} 1065}
1067 1066
1068/* 1067/*
1069 * Warn about any unused labels. As a side effect, release the label hash 1068 * Warn about any unused labels. As a side effect, release the label hash
1070 * table space. 1069 * table space.
1071 */ 1070 */
1072static void 1071static void
1073uselabel(void) 1072uselabel(void)
1074{ 1073{
1075 struct labhash *lh, *next; 1074 struct labhash *lh, *next;
1076 int i; 1075 int i;
1077 1076
1078 for (i = 0; i < LHSZ; i++) { 1077 for (i = 0; i < LHSZ; i++) {
1079 for (lh = labels[i]; lh != NULL; lh = next) { 1078 for (lh = labels[i]; lh != NULL; lh = next) {
1080 next = lh->lh_next; 1079 next = lh->lh_next;
1081 if (!lh->lh_ref) 1080 if (!lh->lh_ref)
1082 warnx("%lu: %s: unused label '%s'", 1081 warnx("%lu: %s: unused label '%s'",
1083 linenum, fname, lh->lh_cmd->t); 1082 linenum, fname, lh->lh_cmd->t);
1084 free(lh); 1083 free(lh);
1085 } 1084 }
1086 } 1085 }
1087} 1086}