| @@ -1,1087 +1,1086 @@ | | | @@ -1,1087 +1,1086 @@ |
1 | /* $NetBSD: compile.c,v 1.48 2019/10/05 20:23:55 christos Exp $ */ | | 1 | /* $NetBSD: compile.c,v 1.49 2021/03/11 22:31:19 christos Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 1992 Diomidis Spinellis. | | 4 | * Copyright (c) 1992 Diomidis Spinellis. |
5 | * Copyright (c) 1992, 1993 | | 5 | * Copyright (c) 1992, 1993 |
6 | * The Regents of the University of California. All rights reserved. | | 6 | * The Regents of the University of California. All rights reserved. |
7 | * | | 7 | * |
8 | * This code is derived from software contributed to Berkeley by | | 8 | * This code is derived from software contributed to Berkeley by |
9 | * Diomidis Spinellis of Imperial College, University of London. | | 9 | * Diomidis Spinellis of Imperial College, University of London. |
10 | * | | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without | | 11 | * Redistribution and use in source and binary forms, with or without |
12 | * modification, are permitted provided that the following conditions | | 12 | * modification, are permitted provided that the following conditions |
13 | * are met: | | 13 | * are met: |
14 | * 1. Redistributions of source code must retain the above copyright | | 14 | * 1. Redistributions of source code must retain the above copyright |
15 | * notice, this list of conditions and the following disclaimer. | | 15 | * notice, this list of conditions and the following disclaimer. |
16 | * 2. Redistributions in binary form must reproduce the above copyright | | 16 | * 2. Redistributions in binary form must reproduce the above copyright |
17 | * notice, this list of conditions and the following disclaimer in the | | 17 | * notice, this list of conditions and the following disclaimer in the |
18 | * documentation and/or other materials provided with the distribution. | | 18 | * documentation and/or other materials provided with the distribution. |
19 | * 3. Neither the name of the University nor the names of its contributors | | 19 | * 3. Neither the name of the University nor the names of its contributors |
20 | * may be used to endorse or promote products derived from this software | | 20 | * may be used to endorse or promote products derived from this software |
21 | * without specific prior written permission. | | 21 | * without specific prior written permission. |
22 | * | | 22 | * |
23 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | | 23 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
24 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 24 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
25 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 25 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
26 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 26 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
27 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 27 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
28 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 28 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
29 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 29 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
30 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 30 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
31 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 31 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
32 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 32 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
33 | * SUCH DAMAGE. | | 33 | * SUCH DAMAGE. |
34 | */ | | 34 | */ |
35 | | | 35 | |
36 | #if HAVE_NBTOOL_CONFIG_H | | 36 | #if HAVE_NBTOOL_CONFIG_H |
37 | #include "nbtool_config.h" | | 37 | #include "nbtool_config.h" |
38 | #endif | | 38 | #endif |
39 | | | 39 | |
40 | #include <sys/cdefs.h> | | 40 | #include <sys/cdefs.h> |
41 | __RCSID("$NetBSD: compile.c,v 1.48 2019/10/05 20:23:55 christos Exp $"); | | 41 | __RCSID("$NetBSD: compile.c,v 1.49 2021/03/11 22:31:19 christos Exp $"); |
42 | #ifdef __FBSDID | | 42 | #ifdef __FBSDID |
43 | __FBSDID("$FreeBSD: head/usr.bin/sed/compile.c 259132 2013-12-09 18:57:20Z eadler $"); | | 43 | __FBSDID("$FreeBSD: head/usr.bin/sed/compile.c 259132 2013-12-09 18:57:20Z eadler $"); |
44 | #endif | | 44 | #endif |
45 | | | 45 | |
46 | #if 0 | | 46 | #if 0 |
47 | static const char sccsid[] = "@(#)compile.c 8.1 (Berkeley) 6/6/93"; | | 47 | static const char sccsid[] = "@(#)compile.c 8.1 (Berkeley) 6/6/93"; |
48 | #endif | | 48 | #endif |
49 | | | 49 | |
50 | #include <sys/types.h> | | 50 | #include <sys/types.h> |
51 | #include <sys/stat.h> | | 51 | #include <sys/stat.h> |
52 | | | 52 | |
53 | #include <ctype.h> | | 53 | #include <ctype.h> |
54 | #include <err.h> | | 54 | #include <err.h> |
55 | #include <errno.h> | | 55 | #include <errno.h> |
56 | #include <fcntl.h> | | 56 | #include <fcntl.h> |
57 | #include <limits.h> | | 57 | #include <limits.h> |
58 | #include <regex.h> | | 58 | #include <regex.h> |
59 | #include <stdio.h> | | 59 | #include <stdio.h> |
60 | #include <stdlib.h> | | 60 | #include <stdlib.h> |
61 | #include <string.h> | | 61 | #include <string.h> |
62 | #include <wchar.h> | | 62 | #include <wchar.h> |
63 | | | 63 | |
64 | #include "defs.h" | | 64 | #include "defs.h" |
65 | #include "extern.h" | | 65 | #include "extern.h" |
66 | | | 66 | |
67 | #define LHSZ 128 | | 67 | #define LHSZ 128 |
68 | #define LHMASK (LHSZ - 1) | | 68 | #define LHMASK (LHSZ - 1) |
69 | static struct labhash { | | 69 | static struct labhash { |
70 | struct labhash *lh_next; | | 70 | struct labhash *lh_next; |
71 | u_int lh_hash; | | 71 | u_int lh_hash; |
72 | struct s_command *lh_cmd; | | 72 | struct s_command *lh_cmd; |
73 | int lh_ref; | | 73 | int lh_ref; |
74 | } *labels[LHSZ]; | | 74 | } *labels[LHSZ]; |
75 | | | 75 | |
76 | static char *compile_addr(char *, struct s_addr *); | | 76 | static char *compile_addr(char *, struct s_addr *); |
77 | static char *compile_ccl(char **, char *); | | 77 | static char *compile_ccl(char **, char *); |
78 | static char *compile_delimited(char *, char *, int); | | 78 | static char *compile_delimited(char *, char *, int); |
79 | static char *compile_flags(char *, struct s_subst *); | | 79 | static char *compile_flags(char *, struct s_subst *); |
80 | static regex_t *compile_re(char *, int); | | 80 | static regex_t *compile_re(char *, int); |
81 | static char *compile_subst(char *, struct s_subst *); | | 81 | static char *compile_subst(char *, struct s_subst *); |
82 | static char *compile_text(void); | | 82 | static char *compile_text(void); |
83 | static char *compile_tr(char *, struct s_tr **); | | 83 | static char *compile_tr(char *, struct s_tr **); |
84 | static struct s_command | | 84 | static struct s_command |
85 | **compile_stream(struct s_command **); | | 85 | **compile_stream(struct s_command **); |
86 | static char *duptoeol(char *, const char *); | | 86 | static char *duptoeol(char *, const char *); |
87 | static void enterlabel(struct s_command *); | | 87 | static void enterlabel(struct s_command *); |
88 | static struct s_command | | 88 | static struct s_command |
89 | *findlabel(char *); | | 89 | *findlabel(char *); |
90 | static void fixuplabel(struct s_command *, struct s_command *); | | 90 | static void fixuplabel(struct s_command *, struct s_command *); |
91 | static void uselabel(void); | | 91 | static void uselabel(void); |
92 | static void parse_escapes(char *); | | 92 | static void parse_escapes(char *); |
93 | | | 93 | |
94 | /* | | 94 | /* |
95 | * Command specification. This is used to drive the command parser. | | 95 | * Command specification. This is used to drive the command parser. |
96 | */ | | 96 | */ |
97 | struct s_format { | | 97 | struct s_format { |
98 | char code; /* Command code */ | | 98 | char code; /* Command code */ |
99 | int naddr; /* Number of address args */ | | 99 | int naddr; /* Number of address args */ |
100 | enum e_args args; /* Argument type */ | | 100 | enum e_args args; /* Argument type */ |
101 | }; | | 101 | }; |
102 | | | 102 | |
103 | static struct s_format cmd_fmts[] = { | | 103 | static struct s_format cmd_fmts[] = { |
104 | {'{', 2, GROUP}, | | 104 | {'{', 2, GROUP}, |
105 | {'}', 0, ENDGROUP}, | | 105 | {'}', 0, ENDGROUP}, |
106 | {'a', 1, TEXT}, | | 106 | {'a', 1, TEXT}, |
107 | {'b', 2, BRANCH}, | | 107 | {'b', 2, BRANCH}, |
108 | {'c', 2, TEXT}, | | 108 | {'c', 2, TEXT}, |
109 | {'d', 2, EMPTY}, | | 109 | {'d', 2, EMPTY}, |
110 | {'D', 2, EMPTY}, | | 110 | {'D', 2, EMPTY}, |
111 | {'g', 2, EMPTY}, | | 111 | {'g', 2, EMPTY}, |
112 | {'G', 2, EMPTY}, | | 112 | {'G', 2, EMPTY}, |
113 | {'h', 2, EMPTY}, | | 113 | {'h', 2, EMPTY}, |
114 | {'H', 2, EMPTY}, | | 114 | {'H', 2, EMPTY}, |
115 | {'i', 1, TEXT}, | | 115 | {'i', 1, TEXT}, |
116 | {'l', 2, EMPTY}, | | 116 | {'l', 2, EMPTY}, |
117 | {'n', 2, EMPTY}, | | 117 | {'n', 2, EMPTY}, |
118 | {'N', 2, EMPTY}, | | 118 | {'N', 2, EMPTY}, |
119 | {'p', 2, EMPTY}, | | 119 | {'p', 2, EMPTY}, |
120 | {'P', 2, EMPTY}, | | 120 | {'P', 2, EMPTY}, |
121 | {'q', 1, EMPTY}, | | 121 | {'q', 1, EMPTY}, |
122 | {'r', 1, RFILE}, | | 122 | {'r', 1, RFILE}, |
123 | {'s', 2, SUBST}, | | 123 | {'s', 2, SUBST}, |
124 | {'t', 2, BRANCH}, | | 124 | {'t', 2, BRANCH}, |
125 | {'w', 2, WFILE}, | | 125 | {'w', 2, WFILE}, |
126 | {'x', 2, EMPTY}, | | 126 | {'x', 2, EMPTY}, |
127 | {'y', 2, TR}, | | 127 | {'y', 2, TR}, |
128 | {'!', 2, NONSEL}, | | 128 | {'!', 2, NONSEL}, |
129 | {':', 0, LABEL}, | | 129 | {':', 0, LABEL}, |
130 | {'#', 0, COMMENT}, | | 130 | {'#', 0, COMMENT}, |
131 | {'=', 1, EMPTY}, | | 131 | {'=', 1, EMPTY}, |
132 | {'\0', 0, COMMENT}, | | 132 | {'\0', 0, COMMENT}, |
133 | }; | | 133 | }; |
134 | | | 134 | |
135 | /* The compiled program. */ | | 135 | /* The compiled program. */ |
136 | struct s_command *prog; | | 136 | struct s_command *prog; |
137 | | | 137 | |
138 | /* | | 138 | /* |
139 | * Compile the program into prog. | | 139 | * Compile the program into prog. |
140 | * Initialise appends. | | 140 | * Initialise appends. |
141 | */ | | 141 | */ |
142 | void | | 142 | void |
143 | compile(void) | | 143 | compile(void) |
144 | { | | 144 | { |
145 | *compile_stream(&prog) = NULL; | | 145 | *compile_stream(&prog) = NULL; |
146 | fixuplabel(prog, NULL); | | 146 | fixuplabel(prog, NULL); |
147 | uselabel(); | | 147 | uselabel(); |
148 | if (appendnum > 0) | | 148 | if (appendnum > 0) |
149 | appends = xmalloc(sizeof(struct s_appends) * appendnum); | | 149 | appends = xmalloc(sizeof(struct s_appends) * appendnum); |
150 | match = xmalloc((maxnsub + 1) * sizeof(regmatch_t)); | | 150 | match = xmalloc((maxnsub + 1) * sizeof(regmatch_t)); |
151 | } | | 151 | } |
152 | | | 152 | |
153 | #define EATSPACE() do { \ | | 153 | #define EATSPACE() do { \ |
154 | if (p) \ | | 154 | if (p) \ |
155 | while (*p && isspace((unsigned char)*p)) \ | | 155 | while (*p && isspace((unsigned char)*p)) \ |
156 | p++; \ | | 156 | p++; \ |
157 | } while (0) | | 157 | } while (0) |
158 | | | 158 | |
159 | static struct s_command ** | | 159 | static struct s_command ** |
160 | compile_stream(struct s_command **link) | | 160 | compile_stream(struct s_command **link) |
161 | { | | 161 | { |
162 | char *p; | | 162 | char *p; |
163 | static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */ | | 163 | static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */ |
164 | struct s_command *cmd, *cmd2, *stack; | | 164 | struct s_command *cmd, *cmd2, *stack; |
165 | struct s_format *fp; | | 165 | struct s_format *fp; |
166 | char re[_POSIX2_LINE_MAX + 1]; | | 166 | char re[_POSIX2_LINE_MAX + 1]; |
167 | int naddr; /* Number of addresses */ | | 167 | int naddr; /* Number of addresses */ |
168 | | | 168 | |
169 | stack = 0; | | 169 | stack = 0; |
170 | for (;;) { | | 170 | for (;;) { |
171 | if ((p = cu_fgets(lbuf, sizeof(lbuf), NULL)) == NULL) { | | 171 | if ((p = cu_fgets(lbuf, sizeof(lbuf), NULL)) == NULL) { |
172 | if (stack != 0) | | 172 | if (stack != 0) |
173 | errx(1, "%lu: %s: unexpected EOF (pending }'s)", | | 173 | errx(1, "%lu: %s: unexpected EOF (pending }'s)", |
174 | linenum, fname); | | 174 | linenum, fname); |
175 | return (link); | | 175 | return (link); |
176 | } | | 176 | } |
177 | | | 177 | |
178 | semicolon: EATSPACE(); | | 178 | semicolon: EATSPACE(); |
179 | if (p) { | | 179 | if (p) { |
180 | if (*p == '#' || *p == '\0') | | 180 | if (*p == '#' || *p == '\0') |
181 | continue; | | 181 | continue; |
182 | else if (*p == ';') { | | 182 | else if (*p == ';') { |
183 | p++; | | 183 | p++; |
184 | goto semicolon; | | 184 | goto semicolon; |
185 | } | | 185 | } |
186 | } | | 186 | } |
187 | *link = cmd = xmalloc(sizeof(struct s_command)); | | 187 | *link = cmd = xmalloc(sizeof(struct s_command)); |
188 | link = &cmd->next; | | 188 | link = &cmd->next; |
189 | cmd->startline = cmd->nonsel = 0; | | 189 | cmd->startline = cmd->nonsel = 0; |
190 | /* First parse the addresses */ | | 190 | /* First parse the addresses */ |
191 | naddr = 0; | | 191 | naddr = 0; |
192 | | | 192 | |
193 | /* Valid characters to start an address */ | | 193 | /* Valid characters to start an address */ |
194 | #define addrchar(c) (strchr("0123456789/\\$", (c))) | | 194 | #define addrchar(c) (strchr("0123456789/\\$", (c))) |
195 | if (addrchar(*p)) { | | 195 | if (addrchar(*p)) { |
196 | naddr++; | | 196 | naddr++; |
197 | cmd->a1 = xmalloc(sizeof(struct s_addr)); | | 197 | cmd->a1 = xmalloc(sizeof(struct s_addr)); |
198 | p = compile_addr(p, cmd->a1); | | 198 | p = compile_addr(p, cmd->a1); |
199 | EATSPACE(); /* EXTENSION */ | | 199 | EATSPACE(); /* EXTENSION */ |
200 | if (*p == ',') { | | 200 | if (*p == ',') { |
201 | p++; | | 201 | p++; |
202 | EATSPACE(); /* EXTENSION */ | | 202 | EATSPACE(); /* EXTENSION */ |
203 | naddr++; | | 203 | naddr++; |
204 | cmd->a2 = xmalloc(sizeof(struct s_addr)); | | 204 | cmd->a2 = xmalloc(sizeof(struct s_addr)); |
205 | p = compile_addr(p, cmd->a2); | | 205 | p = compile_addr(p, cmd->a2); |
206 | EATSPACE(); | | 206 | EATSPACE(); |
207 | } else | | 207 | } else |
208 | cmd->a2 = 0; | | 208 | cmd->a2 = 0; |
209 | } else | | 209 | } else |
210 | cmd->a1 = cmd->a2 = 0; | | 210 | cmd->a1 = cmd->a2 = 0; |
211 | | | 211 | |
212 | nonsel: /* Now parse the command */ | | 212 | nonsel: /* Now parse the command */ |
213 | if (!*p) | | 213 | if (!*p) |
214 | errx(1, "%lu: %s: command expected", linenum, fname); | | 214 | errx(1, "%lu: %s: command expected", linenum, fname); |
215 | cmd->code = *p; | | 215 | cmd->code = *p; |
216 | for (fp = cmd_fmts; fp->code; fp++) | | 216 | for (fp = cmd_fmts; fp->code; fp++) |
217 | if (fp->code == *p) | | 217 | if (fp->code == *p) |
218 | break; | | 218 | break; |
219 | if (!fp->code) | | 219 | if (!fp->code) |
220 | errx(1, "%lu: %s: invalid command code %c", linenum, fname, *p); | | 220 | errx(1, "%lu: %s: invalid command code %c", linenum, fname, *p); |
221 | if (naddr > fp->naddr) | | 221 | if (naddr > fp->naddr) |
222 | errx(1, | | 222 | errx(1, |
223 | "%lu: %s: command %c expects up to %d address(es), found %d", | | 223 | "%lu: %s: command %c expects up to %d address(es), found %d", |
224 | linenum, fname, *p, fp->naddr, naddr); | | 224 | linenum, fname, *p, fp->naddr, naddr); |
225 | switch (fp->args) { | | 225 | switch (fp->args) { |
226 | case NONSEL: /* ! */ | | 226 | case NONSEL: /* ! */ |
227 | p++; | | 227 | p++; |
228 | EATSPACE(); | | 228 | EATSPACE(); |
229 | cmd->nonsel = ! cmd->nonsel; | | 229 | cmd->nonsel = ! cmd->nonsel; |
230 | goto nonsel; | | 230 | goto nonsel; |
231 | case GROUP: /* { */ | | 231 | case GROUP: /* { */ |
232 | p++; | | 232 | p++; |
233 | EATSPACE(); | | 233 | EATSPACE(); |
234 | cmd->next = stack; | | 234 | cmd->next = stack; |
235 | stack = cmd; | | 235 | stack = cmd; |
236 | link = &cmd->u.c; | | 236 | link = &cmd->u.c; |
237 | if (*p) | | 237 | if (*p) |
238 | goto semicolon; | | 238 | goto semicolon; |
239 | break; | | 239 | break; |
240 | case ENDGROUP: | | 240 | case ENDGROUP: |
241 | /* | | 241 | /* |
242 | * Short-circuit command processing, since end of | | 242 | * Short-circuit command processing, since end of |
243 | * group is really just a noop. | | 243 | * group is really just a noop. |
244 | */ | | 244 | */ |
245 | cmd->nonsel = 1; | | 245 | cmd->nonsel = 1; |
246 | if (stack == 0) | | 246 | if (stack == 0) |
247 | errx(1, "%lu: %s: unexpected }", linenum, fname); | | 247 | errx(1, "%lu: %s: unexpected }", linenum, fname); |
248 | cmd2 = stack; | | 248 | cmd2 = stack; |
249 | stack = cmd2->next; | | 249 | stack = cmd2->next; |
250 | cmd2->next = cmd; | | 250 | cmd2->next = cmd; |
251 | /*FALLTHROUGH*/ | | 251 | /*FALLTHROUGH*/ |
252 | case EMPTY: /* d D g G h H l n N p P q x = \0 */ | | 252 | case EMPTY: /* d D g G h H l n N p P q x = \0 */ |
253 | p++; | | 253 | p++; |
254 | EATSPACE(); | | 254 | EATSPACE(); |
255 | switch (*p) { | | 255 | switch (*p) { |
256 | case ';': | | 256 | case ';': |
257 | p++; | | 257 | p++; |
258 | link = &cmd->next; | | 258 | link = &cmd->next; |
259 | goto semicolon; | | 259 | goto semicolon; |
260 | case '}': | | 260 | case '}': |
261 | goto semicolon; | | 261 | goto semicolon; |
262 | case '\0': | | 262 | case '\0': |
263 | break; | | 263 | break; |
264 | default: | | 264 | default: |
265 | errx(1, "%lu: %s: extra characters at the end of %c command", | | 265 | errx(1, "%lu: %s: extra characters at the end of %c command", |
266 | linenum, fname, cmd->code); | | 266 | linenum, fname, cmd->code); |
267 | } | | 267 | } |
268 | break; | | 268 | break; |
269 | case TEXT: /* a c i */ | | 269 | case TEXT: /* a c i */ |
270 | p++; | | 270 | p++; |
271 | EATSPACE(); | | 271 | EATSPACE(); |
272 | if (*p != '\\') | | 272 | if (*p != '\\') |
273 | errx(1, | | 273 | errx(1, |
274 | "%lu: %s: command %c expects \\ followed by text", linenum, fname, cmd->code); | | 274 | "%lu: %s: command %c expects \\ followed by text", linenum, fname, cmd->code); |
275 | p++; | | 275 | p++; |
276 | EATSPACE(); | | 276 | EATSPACE(); |
277 | if (*p) | | 277 | if (*p) |
278 | errx(1, | | 278 | errx(1, |
279 | "%lu: %s: extra characters after \\ at the end of %c command", | | 279 | "%lu: %s: extra characters after \\ at the end of %c command", |
280 | linenum, fname, cmd->code); | | 280 | linenum, fname, cmd->code); |
281 | cmd->t = compile_text(); | | 281 | cmd->t = compile_text(); |
282 | break; | | 282 | break; |
283 | case COMMENT: /* \0 # */ | | 283 | case COMMENT: /* \0 # */ |
284 | break; | | 284 | break; |
285 | case WFILE: /* w */ | | 285 | case WFILE: /* w */ |
286 | p++; | | 286 | p++; |
287 | EATSPACE(); | | 287 | EATSPACE(); |
288 | if (*p == '\0') | | 288 | if (*p == '\0') |
289 | errx(1, "%lu: %s: filename expected", linenum, fname); | | 289 | errx(1, "%lu: %s: filename expected", linenum, fname); |
290 | cmd->t = duptoeol(p, "w command"); | | 290 | cmd->t = duptoeol(p, "w command"); |
291 | if (aflag) | | 291 | if (aflag) |
292 | cmd->u.fd = -1; | | 292 | cmd->u.fd = -1; |
293 | else if ((cmd->u.fd = open(p, | | 293 | else if ((cmd->u.fd = open(p, |
294 | O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, | | 294 | O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, |
295 | DEFFILEMODE)) == -1) | | 295 | DEFFILEMODE)) == -1) |
296 | err(1, "%s", p); | | 296 | err(1, "%s", p); |
297 | break; | | 297 | break; |
298 | case RFILE: /* r */ | | 298 | case RFILE: /* r */ |
299 | p++; | | 299 | p++; |
300 | EATSPACE(); | | 300 | EATSPACE(); |
301 | if (*p == '\0') | | 301 | if (*p == '\0') |
302 | errx(1, "%lu: %s: filename expected", linenum, fname); | | 302 | errx(1, "%lu: %s: filename expected", linenum, fname); |
303 | else | | 303 | else |
304 | cmd->t = duptoeol(p, "read command"); | | 304 | cmd->t = duptoeol(p, "read command"); |
305 | break; | | 305 | break; |
306 | case BRANCH: /* b t */ | | 306 | case BRANCH: /* b t */ |
307 | p++; | | 307 | p++; |
308 | EATSPACE(); | | 308 | EATSPACE(); |
309 | if (*p == '\0') | | 309 | if (*p == '\0') |
310 | cmd->t = NULL; | | 310 | cmd->t = NULL; |
311 | else | | 311 | else |
312 | cmd->t = duptoeol(p, "branch"); | | 312 | cmd->t = duptoeol(p, "branch"); |
313 | break; | | 313 | break; |
314 | case LABEL: /* : */ | | 314 | case LABEL: /* : */ |
315 | p++; | | 315 | p++; |
316 | EATSPACE(); | | 316 | EATSPACE(); |
317 | cmd->t = duptoeol(p, "label"); | | 317 | cmd->t = duptoeol(p, "label"); |
318 | if (strlen(p) == 0) | | 318 | if (strlen(p) == 0) |
319 | errx(1, "%lu: %s: empty label", linenum, fname); | | 319 | errx(1, "%lu: %s: empty label", linenum, fname); |
320 | enterlabel(cmd); | | 320 | enterlabel(cmd); |
321 | break; | | 321 | break; |
322 | case SUBST: /* s */ | | 322 | case SUBST: /* s */ |
323 | p++; | | 323 | p++; |
324 | if (*p == '\0' || *p == '\\') | | 324 | if (*p == '\0' || *p == '\\') |
325 | errx(1, | | 325 | errx(1, |
326 | "%lu: %s: substitute pattern can not be delimited by newline or backslash", | | 326 | "%lu: %s: substitute pattern can not be delimited by newline or backslash", |
327 | linenum, fname); | | 327 | linenum, fname); |
328 | cmd->u.s = xcalloc(1, sizeof(struct s_subst)); | | 328 | cmd->u.s = xcalloc(1, sizeof(struct s_subst)); |
329 | p = compile_delimited(p, re, 0); | | 329 | p = compile_delimited(p, re, 0); |
330 | if (p == NULL) | | 330 | if (p == NULL) |
331 | errx(1, | | 331 | errx(1, |
332 | "%lu: %s: unterminated substitute pattern", linenum, fname); | | 332 | "%lu: %s: unterminated substitute pattern", linenum, fname); |
333 | | | 333 | |
334 | /* Compile RE with no case sensitivity temporarily */ | | 334 | /* Compile RE with no case sensitivity temporarily */ |
335 | if (*re == '\0') | | 335 | if (*re == '\0') |
336 | cmd->u.s->re = NULL; | | 336 | cmd->u.s->re = NULL; |
337 | else | | 337 | else |
338 | cmd->u.s->re = compile_re(re, 0); | | 338 | cmd->u.s->re = compile_re(re, 0); |
339 | --p; | | 339 | --p; |
340 | p = compile_subst(p, cmd->u.s); | | 340 | p = compile_subst(p, cmd->u.s); |
341 | p = compile_flags(p, cmd->u.s); | | 341 | p = compile_flags(p, cmd->u.s); |
342 | | | 342 | |
343 | /* Recompile RE with case sensitivity from "I" flag if any */ | | 343 | /* Recompile RE with case sensitivity from "I" flag if any */ |
344 | if (*re == '\0') | | 344 | if (*re == '\0') |
345 | cmd->u.s->re = NULL; | | 345 | cmd->u.s->re = NULL; |
346 | else | | 346 | else |
347 | cmd->u.s->re = compile_re(re, cmd->u.s->icase); | | 347 | cmd->u.s->re = compile_re(re, cmd->u.s->icase); |
348 | EATSPACE(); | | 348 | EATSPACE(); |
349 | if (*p == ';') { | | 349 | if (*p == ';') { |
350 | p++; | | 350 | p++; |
351 | link = &cmd->next; | | 351 | link = &cmd->next; |
352 | goto semicolon; | | 352 | goto semicolon; |
353 | } | | 353 | } |
354 | break; | | 354 | break; |
355 | case TR: /* y */ | | 355 | case TR: /* y */ |
356 | p++; | | 356 | p++; |
357 | p = compile_tr(p, &cmd->u.y); | | 357 | p = compile_tr(p, &cmd->u.y); |
358 | EATSPACE(); | | 358 | EATSPACE(); |
359 | switch (*p) { | | 359 | switch (*p) { |
360 | case ';': | | 360 | case ';': |
361 | p++; | | 361 | p++; |
362 | link = &cmd->next; | | 362 | link = &cmd->next; |
363 | goto semicolon; | | 363 | goto semicolon; |
364 | case '}': | | 364 | case '}': |
365 | goto semicolon; | | 365 | goto semicolon; |
366 | case '\0': | | 366 | case '\0': |
367 | break; | | 367 | break; |
368 | default: | | 368 | default: |
369 | errx(1, | | 369 | errx(1, |
370 | "%lu: %s: extra text at the end of a transform command", linenum, fname); | | 370 | "%lu: %s: extra text at the end of a transform command", linenum, fname); |
371 | } | | 371 | } |
372 | if (*p) | | 372 | if (*p) |
373 | break; | | 373 | break; |
374 | } | | 374 | } |
375 | } | | 375 | } |
376 | } | | 376 | } |
377 | | | 377 | |
378 | /* | | 378 | /* |
379 | * Get a delimited string. P points to the delimeter of the string; d points | | 379 | * Get a delimited string. P points to the delimeter of the string; d points |
380 | * to a buffer area. Newline and delimiter escapes are processed; other | | 380 | * to a buffer area. Newline and delimiter escapes are processed; other |
381 | * escapes are ignored. | | 381 | * escapes are ignored. |
382 | * | | 382 | * |
383 | * Returns a pointer to the first character after the final delimiter or NULL | | 383 | * Returns a pointer to the first character after the final delimiter or NULL |
384 | * in the case of a non-terminated string. The character array d is filled | | 384 | * in the case of a non-terminated string. The character array d is filled |
385 | * with the processed string. | | 385 | * with the processed string. |
386 | */ | | 386 | */ |
387 | static char * | | 387 | static char * |
388 | compile_delimited(char *p, char *d, int is_tr) | | 388 | compile_delimited(char *p, char *d, int is_tr) |
389 | { | | 389 | { |
390 | char c; | | 390 | char c; |
391 | | | 391 | |
392 | c = *p++; | | 392 | c = *p++; |
393 | if (c == '\0') | | 393 | if (c == '\0') |
394 | return (NULL); | | 394 | return (NULL); |
395 | else if (c == '\\') | | 395 | else if (c == '\\') |
396 | errx(1, "%lu: %s: \\ can not be used as a string delimiter", | | 396 | errx(1, "%lu: %s: \\ can not be used as a string delimiter", |
397 | linenum, fname); | | 397 | linenum, fname); |
398 | else if (c == '\n') | | 398 | else if (c == '\n') |
399 | errx(1, "%lu: %s: newline can not be used as a string delimiter", | | 399 | errx(1, "%lu: %s: newline can not be used as a string delimiter", |
400 | linenum, fname); | | 400 | linenum, fname); |
401 | while (*p) { | | 401 | while (*p) { |
402 | if (*p == '[' && *p != c) { | | 402 | if (*p == '[' && *p != c) { |
403 | if ((d = compile_ccl(&p, d)) == NULL) | | 403 | if ((d = compile_ccl(&p, d)) == NULL) |
404 | errx(1, "%lu: %s: unbalanced brackets ([])", linenum, fname); | | 404 | errx(1, "%lu: %s: unbalanced brackets ([])", linenum, fname); |
405 | continue; | | 405 | continue; |
406 | } else if (*p == '\\' && p[1] == '[') { | | 406 | } else if (*p == '\\' && p[1] == '[') { |
407 | *d++ = *p++; | | 407 | *d++ = *p++; |
408 | } else if (*p == '\\' && p[1] == c) | | 408 | } else if (*p == '\\' && p[1] == c) |
409 | p++; | | 409 | p++; |
410 | else if (*p == '\\' && p[1] == 'n') { | | 410 | else if (*p == '\\' && p[1] == 'n') { |
411 | *d++ = '\n'; | | 411 | *d++ = '\n'; |
412 | p += 2; | | 412 | p += 2; |
413 | continue; | | 413 | continue; |
414 | } else if (*p == '\\' && p[1] == '\\') { | | 414 | } else if (*p == '\\' && p[1] == '\\') { |
415 | if (is_tr) | | 415 | if (is_tr) |
416 | p++; | | 416 | p++; |
417 | else | | 417 | else |
418 | *d++ = *p++; | | 418 | *d++ = *p++; |
419 | } else if (*p == c) { | | 419 | } else if (*p == c) { |
420 | *d = '\0'; | | 420 | *d = '\0'; |
421 | return (p + 1); | | 421 | return (p + 1); |
422 | } | | 422 | } |
423 | *d++ = *p++; | | 423 | *d++ = *p++; |
424 | } | | 424 | } |
425 | return (NULL); | | 425 | return (NULL); |
426 | } | | 426 | } |
427 | | | 427 | |
428 | | | 428 | |
429 | /* compile_ccl: expand a POSIX character class */ | | 429 | /* compile_ccl: expand a POSIX character class */ |
430 | static char * | | 430 | static char * |
431 | compile_ccl(char **sp, char *t) | | 431 | compile_ccl(char **sp, char *t) |
432 | { | | 432 | { |
433 | int c, d; | | 433 | int c, d; |
434 | char *s = *sp; | | 434 | char *s = *sp; |
435 | | | 435 | |
436 | *t++ = *s++; | | 436 | *t++ = *s++; |
437 | if (*s == '^') | | 437 | if (*s == '^') |
438 | *t++ = *s++; | | 438 | *t++ = *s++; |
439 | if (*s == ']') | | 439 | if (*s == ']') |
440 | *t++ = *s++; | | 440 | *t++ = *s++; |
441 | for (; *s && (*t = *s) != ']'; s++, t++) | | 441 | for (; *s && (*t = *s) != ']'; s++, t++) |
442 | if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) { | | 442 | if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) { |
443 | *++t = *++s, t++, s++; | | 443 | *++t = *++s, t++, s++; |
444 | for (c = *s; (*t = *s) != ']' || c != d; s++, t++) | | 444 | for (c = *s; (*t = *s) != ']' || c != d; s++, t++) |
445 | if ((c = *s) == '\0') | | 445 | if ((c = *s) == '\0') |
446 | return NULL; | | 446 | return NULL; |
447 | } | | 447 | } |
448 | return (*s == ']') ? *sp = ++s, ++t : NULL; | | 448 | return (*s == ']') ? *sp = ++s, ++t : NULL; |
449 | } | | 449 | } |
450 | | | 450 | |
451 | /* | | 451 | /* |
452 | * Compiles the regular expression in RE and returns a pointer to the compiled | | 452 | * Compiles the regular expression in RE and returns a pointer to the compiled |
453 | * regular expression. | | 453 | * regular expression. |
454 | * Cflags are passed to regcomp. | | 454 | * Cflags are passed to regcomp. |
455 | */ | | 455 | */ |
456 | static regex_t * | | 456 | static regex_t * |
457 | compile_re(char *re, int case_insensitive) | | 457 | compile_re(char *re, int case_insensitive) |
458 | { | | 458 | { |
459 | regex_t *rep; | | 459 | regex_t *rep; |
460 | int eval, flags; | | 460 | int eval, flags; |
461 | | | 461 | |
462 | | | 462 | |
463 | flags = rflags; | | 463 | flags = rflags; |
464 | if (case_insensitive) | | 464 | if (case_insensitive) |
465 | flags |= REG_ICASE; | | 465 | flags |= REG_ICASE; |
466 | rep = xmalloc(sizeof(regex_t)); | | 466 | rep = xmalloc(sizeof(regex_t)); |
467 | parse_escapes(re); | | 467 | parse_escapes(re); |
468 | if ((eval = regcomp(rep, re, flags)) != 0) | | 468 | if ((eval = regcomp(rep, re, flags)) != 0) |
469 | errx(1, "%lu: %s: RE error: %s", | | 469 | errx(1, "%lu: %s: RE error: %s", |
470 | linenum, fname, strregerror(eval, rep)); | | 470 | linenum, fname, strregerror(eval, rep)); |
471 | if (maxnsub < rep->re_nsub) | | 471 | if (maxnsub < rep->re_nsub) |
472 | maxnsub = rep->re_nsub; | | 472 | maxnsub = rep->re_nsub; |
473 | return (rep); | | 473 | return (rep); |
474 | } | | 474 | } |
475 | | | 475 | |
476 | static char | | 476 | static char |
477 | cton(char c, int base) | | 477 | cton(char c, int base) |
478 | { | | 478 | { |
479 | switch (c) { | | 479 | switch (c) { |
480 | case '0': case '1': case '2': case '3': case '4': | | 480 | case '0': case '1': case '2': case '3': case '4': |
481 | case '5': case '6': case '7': | | 481 | case '5': case '6': case '7': |
482 | return (char)(c - '0'); | | 482 | return (char)(c - '0'); |
483 | case '8': case '9': | | 483 | case '8': case '9': |
484 | return base == 8 ? '?' : (char)(c - '0'); | | 484 | return base == 8 ? '?' : (char)(c - '0'); |
485 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | | 485 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': |
486 | return base == 16 ? (char)(c - 'a' + 10) : '?'; | | 486 | return base == 16 ? (char)(c - 'a' + 10) : '?'; |
487 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | | 487 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': |
488 | return base == 16 ? (char)(c - 'A' + 10) : '?'; | | 488 | return base == 16 ? (char)(c - 'A' + 10) : '?'; |
489 | default: | | 489 | default: |
490 | return '?'; | | 490 | return '?'; |
491 | } | | 491 | } |
492 | } | | 492 | } |
493 | | | 493 | |
494 | static int | | 494 | static int |
495 | ston(char **pp, char *sp, int base) | | 495 | ston(char **pp, char *sp, int base) |
496 | { | | 496 | { |
497 | char *p = *pp, n; | | 497 | char *p = *pp, n; |
498 | int r = cton(p[1], base); | | 498 | int r = cton(p[1], base); |
499 | | | 499 | |
500 | if (r == '?') | | 500 | if (r == '?') |
501 | return 0; | | 501 | return 0; |
502 | | | 502 | |
503 | p++; | | 503 | p++; |
504 | while ((n = cton(p[1], base)) != '?' && r < 255) { | | 504 | while ((n = cton(p[1], base)) != '?' && r < 255) { |
505 | r = r * base + n; | | 505 | r = r * base + n; |
506 | p++; | | 506 | p++; |
507 | } | | 507 | } |
508 | *sp = (char)r; | | 508 | *sp = (char)r; |
509 | *pp = p; | | 509 | *pp = p; |
510 | return 1; | | 510 | return 1; |
511 | } | | 511 | } |
512 | | | 512 | |
513 | static int | | 513 | static int |
514 | unescape(char **pp, char **spp) | | 514 | unescape(char **pp, char **spp) |
515 | { | | 515 | { |
516 | char *p = *pp; | | 516 | char *p = *pp; |
517 | char *sp = *spp; | | 517 | char *sp = *spp; |
518 | | | 518 | |
519 | switch (*p) { | | 519 | switch (*p) { |
520 | case 'o': | | 520 | case 'o': |
521 | if (!ston(&p, sp, 8)) | | 521 | if (!ston(&p, sp, 8)) |
522 | return 0; | | 522 | return 0; |
523 | break; | | 523 | break; |
524 | case 'd': | | 524 | case 'd': |
525 | if (!ston(&p, sp, 10)) | | 525 | if (!ston(&p, sp, 10)) |
526 | return 0; | | 526 | return 0; |
527 | break; | | 527 | break; |
528 | case 'x': | | 528 | case 'x': |
529 | if (!ston(&p, sp, 16)) | | 529 | if (!ston(&p, sp, 16)) |
530 | return 0; | | 530 | return 0; |
531 | break; | | 531 | break; |
532 | case 'a': | | 532 | case 'a': |
533 | *sp = '\a'; | | 533 | *sp = '\a'; |
534 | p++; | | | |
535 | break; | | 534 | break; |
536 | #if 0 | | 535 | #if 0 |
537 | // No, \b RE | | 536 | // No, \b RE |
538 | case 'b': | | 537 | case 'b': |
539 | *sp = '\b'; | | 538 | *sp = '\b'; |
540 | break; | | 539 | break; |
541 | #endif | | 540 | #endif |
542 | case 'f': | | 541 | case 'f': |
543 | *sp = '\f'; | | 542 | *sp = '\f'; |
544 | break; | | 543 | break; |
545 | case 'n': | | 544 | case 'n': |
546 | *sp = '\n'; | | 545 | *sp = '\n'; |
547 | break; | | 546 | break; |
548 | case 'r': | | 547 | case 'r': |
549 | *sp = '\r'; | | 548 | *sp = '\r'; |
550 | break; | | 549 | break; |
551 | case 'v': | | 550 | case 'v': |
552 | *sp = '\v'; | | 551 | *sp = '\v'; |
553 | break; | | 552 | break; |
554 | default: | | 553 | default: |
555 | return 0; | | 554 | return 0; |
556 | } | | 555 | } |
557 | *spp = sp + 1; | | 556 | *spp = sp + 1; |
558 | *pp = p; | | 557 | *pp = p; |
559 | return 1; | | 558 | return 1; |
560 | } | | 559 | } |
561 | | | 560 | |
562 | static void | | 561 | static void |
563 | parse_escapes(char *buf) | | 562 | parse_escapes(char *buf) |
564 | { | | 563 | { |
565 | char bracket = '\0'; | | 564 | char bracket = '\0'; |
566 | char *p, *q; | | 565 | char *p, *q; |
567 | | | 566 | |
568 | p = q = buf; | | 567 | p = q = buf; |
569 | | | 568 | |
570 | for (p = q = buf; *p; p++) { | | 569 | for (p = q = buf; *p; p++) { |
571 | if (*p == '\\' && p[1] && !bracket) { | | 570 | if (*p == '\\' && p[1] && !bracket) { |
572 | p++; | | 571 | p++; |
573 | if (unescape(&p, &q)) | | 572 | if (unescape(&p, &q)) |
574 | continue; | | 573 | continue; |
575 | *q++ = '\\'; | | 574 | *q++ = '\\'; |
576 | } | | 575 | } |
577 | switch (*p) { | | 576 | switch (*p) { |
578 | case '[': | | 577 | case '[': |
579 | if (!bracket) | | 578 | if (!bracket) |
580 | bracket = *p; | | 579 | bracket = *p; |
581 | break; | | 580 | break; |
582 | case '.': | | 581 | case '.': |
583 | case ':': | | 582 | case ':': |
584 | case '=': | | 583 | case '=': |
585 | if (bracket == '[' && p[-1] == '[') | | 584 | if (bracket == '[' && p[-1] == '[') |
586 | bracket = *p; | | 585 | bracket = *p; |
587 | break; | | 586 | break; |
588 | case ']': | | 587 | case ']': |
589 | if (!bracket) | | 588 | if (!bracket) |
590 | break; | | 589 | break; |
591 | if (bracket == '[') | | 590 | if (bracket == '[') |
592 | bracket = '\0'; | | 591 | bracket = '\0'; |
593 | else if (p[-2] != bracket && p[-1] == bracket) | | 592 | else if (p[-2] != bracket && p[-1] == bracket) |
594 | bracket = '['; | | 593 | bracket = '['; |
595 | break; | | 594 | break; |
596 | default: | | 595 | default: |
597 | break; | | 596 | break; |
598 | } | | 597 | } |
599 | *q++ = *p; | | 598 | *q++ = *p; |
600 | } | | 599 | } |
601 | *q = '\0'; | | 600 | *q = '\0'; |
602 | } | | 601 | } |
603 | | | 602 | |
604 | /* | | 603 | /* |
605 | * Compile the substitution string of a regular expression and set res to | | 604 | * Compile the substitution string of a regular expression and set res to |
606 | * point to a saved copy of it. Nsub is the number of parenthesized regular | | 605 | * point to a saved copy of it. Nsub is the number of parenthesized regular |
607 | * expressions. | | 606 | * expressions. |
608 | */ | | 607 | */ |
609 | static char * | | 608 | static char * |
610 | compile_subst(char *p, struct s_subst *s) | | 609 | compile_subst(char *p, struct s_subst *s) |
611 | { | | 610 | { |
612 | static char lbuf[_POSIX2_LINE_MAX + 1]; | | 611 | static char lbuf[_POSIX2_LINE_MAX + 1]; |
613 | size_t asize, size; | | 612 | size_t asize, size; |
614 | u_char ref; | | 613 | u_char ref; |
615 | char c, *text, *op, *sp; | | 614 | char c, *text, *op, *sp; |
616 | int more = 1, sawesc = 0; | | 615 | int more = 1, sawesc = 0; |
617 | | | 616 | |
618 | c = *p++; /* Terminator character */ | | 617 | c = *p++; /* Terminator character */ |
619 | if (c == '\0') | | 618 | if (c == '\0') |
620 | return (NULL); | | 619 | return (NULL); |
621 | | | 620 | |
622 | s->maxbref = 0; | | 621 | s->maxbref = 0; |
623 | s->linenum = linenum; | | 622 | s->linenum = linenum; |
624 | asize = 2 * _POSIX2_LINE_MAX + 1; | | 623 | asize = 2 * _POSIX2_LINE_MAX + 1; |
625 | text = xmalloc(asize); | | 624 | text = xmalloc(asize); |
626 | size = 0; | | 625 | size = 0; |
627 | do { | | 626 | do { |
628 | op = sp = text + size; | | 627 | op = sp = text + size; |
629 | for (; *p; p++) { | | 628 | for (; *p; p++) { |
630 | if (*p == '\\' || sawesc) { | | 629 | if (*p == '\\' || sawesc) { |
631 | /* | | 630 | /* |
632 | * If this is a continuation from the last | | 631 | * If this is a continuation from the last |
633 | * buffer, we won't have a character to | | 632 | * buffer, we won't have a character to |
634 | * skip over. | | 633 | * skip over. |
635 | */ | | 634 | */ |
636 | if (sawesc) | | 635 | if (sawesc) |
637 | sawesc = 0; | | 636 | sawesc = 0; |
638 | else | | 637 | else |
639 | p++; | | 638 | p++; |
640 | | | 639 | |
641 | switch (*p) { | | 640 | switch (*p) { |
642 | case '\0': | | 641 | case '\0': |
643 | /* | | 642 | /* |
644 | * This escaped character is continued | | 643 | * This escaped character is continued |
645 | * in the next part of the line. Note | | 644 | * in the next part of the line. Note |
646 | * this fact, then cause the loop to | | 645 | * this fact, then cause the loop to |
647 | * exit w/ normal EOL case and reenter | | 646 | * exit w/ normal EOL case and reenter |
648 | * above with the new buffer. | | 647 | * above with the new buffer. |
649 | */ | | 648 | */ |
650 | sawesc = 1; | | 649 | sawesc = 1; |
651 | p--; | | 650 | p--; |
652 | continue; | | 651 | continue; |
653 | case '0': case '1': case '2': case '3': | | 652 | case '0': case '1': case '2': case '3': |
654 | case '4': case '5': case '6': case '7': | | 653 | case '4': case '5': case '6': case '7': |
655 | case '8': case '9': | | 654 | case '8': case '9': |
656 | *sp++ = '\\'; | | 655 | *sp++ = '\\'; |
657 | ref = (u_char)(*p - '0'); | | 656 | ref = (u_char)(*p - '0'); |
658 | if (s->re != NULL && | | 657 | if (s->re != NULL && |
659 | ref > s->re->re_nsub) | | 658 | ref > s->re->re_nsub) |
660 | errx(1, "%lu: %s: \\%c not defined in the RE", | | 659 | errx(1, "%lu: %s: \\%c not defined in the RE", |
661 | linenum, fname, *p); | | 660 | linenum, fname, *p); |
662 | if (s->maxbref < ref) | | 661 | if (s->maxbref < ref) |
663 | s->maxbref = ref; | | 662 | s->maxbref = ref; |
664 | break; | | 663 | break; |
665 | case '&': | | 664 | case '&': |
666 | case '\\': | | 665 | case '\\': |
667 | *sp++ = '\\'; | | 666 | *sp++ = '\\'; |
668 | break; | | 667 | break; |
669 | default: | | 668 | default: |
670 | if (unescape(&p, &sp)) | | 669 | if (unescape(&p, &sp)) |
671 | continue; | | 670 | continue; |
672 | break; | | 671 | break; |
673 | } | | 672 | } |
674 | } else if (*p == c) { | | 673 | } else if (*p == c) { |
675 | if (*++p == '\0' && more) { | | 674 | if (*++p == '\0' && more) { |
676 | if (cu_fgets(lbuf, sizeof(lbuf), &more)) | | 675 | if (cu_fgets(lbuf, sizeof(lbuf), &more)) |
677 | p = lbuf; | | 676 | p = lbuf; |
678 | } | | 677 | } |
679 | *sp++ = '\0'; | | 678 | *sp++ = '\0'; |
680 | size += (size_t)(sp - op); | | 679 | size += (size_t)(sp - op); |
681 | s->new = xrealloc(text, size); | | 680 | s->new = xrealloc(text, size); |
682 | return (p); | | 681 | return (p); |
683 | } else if (*p == '\n') { | | 682 | } else if (*p == '\n') { |
684 | errx(1, | | 683 | errx(1, |
685 | "%lu: %s: unescaped newline inside substitute pattern", linenum, fname); | | 684 | "%lu: %s: unescaped newline inside substitute pattern", linenum, fname); |
686 | /* NOTREACHED */ | | 685 | /* NOTREACHED */ |
687 | } | | 686 | } |
688 | *sp++ = *p; | | 687 | *sp++ = *p; |
689 | } | | 688 | } |
690 | size += (size_t)(sp - op); | | 689 | size += (size_t)(sp - op); |
691 | if (asize - size < _POSIX2_LINE_MAX + 1) { | | 690 | if (asize - size < _POSIX2_LINE_MAX + 1) { |
692 | asize *= 2; | | 691 | asize *= 2; |
693 | text = xrealloc(text, asize); | | 692 | text = xrealloc(text, asize); |
694 | } | | 693 | } |
695 | } while (cu_fgets(p = lbuf, sizeof(lbuf), &more)); | | 694 | } while (cu_fgets(p = lbuf, sizeof(lbuf), &more)); |
696 | errx(1, "%lu: %s: unterminated substitute in regular expression", | | 695 | errx(1, "%lu: %s: unterminated substitute in regular expression", |
697 | linenum, fname); | | 696 | linenum, fname); |
698 | /* NOTREACHED */ | | 697 | /* NOTREACHED */ |
699 | } | | 698 | } |
700 | | | 699 | |
701 | /* | | 700 | /* |
702 | * Compile the flags of the s command | | 701 | * Compile the flags of the s command |
703 | */ | | 702 | */ |
704 | static char * | | 703 | static char * |
705 | compile_flags(char *p, struct s_subst *s) | | 704 | compile_flags(char *p, struct s_subst *s) |
706 | { | | 705 | { |
707 | int gn; /* True if we have seen g or n */ | | 706 | int gn; /* True if we have seen g or n */ |
708 | unsigned long nval; | | 707 | unsigned long nval; |
709 | char wfile[_POSIX2_LINE_MAX + 1], *q; | | 708 | char wfile[_POSIX2_LINE_MAX + 1], *q; |
710 | | | 709 | |
711 | s->n = 1; /* Default */ | | 710 | s->n = 1; /* Default */ |
712 | s->p = 0; | | 711 | s->p = 0; |
713 | s->wfile = NULL; | | 712 | s->wfile = NULL; |
714 | s->wfd = -1; | | 713 | s->wfd = -1; |
715 | s->icase = 0; | | 714 | s->icase = 0; |
716 | for (gn = 0;;) { | | 715 | for (gn = 0;;) { |
717 | EATSPACE(); /* EXTENSION */ | | 716 | EATSPACE(); /* EXTENSION */ |
718 | switch (*p) { | | 717 | switch (*p) { |
719 | case 'g': | | 718 | case 'g': |
720 | if (gn) | | 719 | if (gn) |
721 | errx(1, | | 720 | errx(1, |
722 | "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname); | | 721 | "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname); |
723 | gn = 1; | | 722 | gn = 1; |
724 | s->n = 0; | | 723 | s->n = 0; |
725 | break; | | 724 | break; |
726 | case '\0': | | 725 | case '\0': |
727 | case '\n': | | 726 | case '\n': |
728 | case ';': | | 727 | case ';': |
729 | return (p); | | 728 | return (p); |
730 | case 'p': | | 729 | case 'p': |
731 | s->p = 1; | | 730 | s->p = 1; |
732 | break; | | 731 | break; |
733 | case 'i': | | 732 | case 'i': |
734 | case 'I': | | 733 | case 'I': |
735 | s->icase = 1; | | 734 | s->icase = 1; |
736 | break; | | 735 | break; |
737 | case '1': case '2': case '3': | | 736 | case '1': case '2': case '3': |
738 | case '4': case '5': case '6': | | 737 | case '4': case '5': case '6': |
739 | case '7': case '8': case '9': | | 738 | case '7': case '8': case '9': |
740 | if (gn) | | 739 | if (gn) |
741 | errx(1, | | 740 | errx(1, |
742 | "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname); | | 741 | "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname); |
743 | gn = 1; | | 742 | gn = 1; |
744 | errno = 0; | | 743 | errno = 0; |
745 | nval = strtoul(p, &p, 10); | | 744 | nval = strtoul(p, &p, 10); |
746 | if (errno == ERANGE || nval > INT_MAX) | | 745 | if (errno == ERANGE || nval > INT_MAX) |
747 | errx(1, | | 746 | errx(1, |
748 | "%lu: %s: overflow in the 'N' substitute flag", linenum, fname); | | 747 | "%lu: %s: overflow in the 'N' substitute flag", linenum, fname); |
749 | s->n = (int)nval; | | 748 | s->n = (int)nval; |
750 | p--; | | 749 | p--; |
751 | break; | | 750 | break; |
752 | case 'w': | | 751 | case 'w': |
753 | p++; | | 752 | p++; |
754 | #ifdef HISTORIC_PRACTICE | | 753 | #ifdef HISTORIC_PRACTICE |
755 | if (*p != ' ') { | | 754 | if (*p != ' ') { |
756 | warnx("%lu: %s: space missing before w wfile", linenum, fname); | | 755 | warnx("%lu: %s: space missing before w wfile", linenum, fname); |
757 | return (p); | | 756 | return (p); |
758 | } | | 757 | } |
759 | #endif | | 758 | #endif |
760 | EATSPACE(); | | 759 | EATSPACE(); |
761 | q = wfile; | | 760 | q = wfile; |
762 | while (*p) { | | 761 | while (*p) { |
763 | if (*p == '\n') | | 762 | if (*p == '\n') |
764 | break; | | 763 | break; |
765 | *q++ = *p++; | | 764 | *q++ = *p++; |
766 | } | | 765 | } |
767 | *q = '\0'; | | 766 | *q = '\0'; |
768 | if (q == wfile) | | 767 | if (q == wfile) |
769 | errx(1, "%lu: %s: no wfile specified", linenum, fname); | | 768 | errx(1, "%lu: %s: no wfile specified", linenum, fname); |
770 | s->wfile = strdup(wfile); | | 769 | s->wfile = strdup(wfile); |
771 | if (!aflag && (s->wfd = open(wfile, | | 770 | if (!aflag && (s->wfd = open(wfile, |
772 | O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, | | 771 | O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, |
773 | DEFFILEMODE)) == -1) | | 772 | DEFFILEMODE)) == -1) |
774 | err(1, "%s", wfile); | | 773 | err(1, "%s", wfile); |
775 | return (p); | | 774 | return (p); |
776 | default: | | 775 | default: |
777 | errx(1, "%lu: %s: bad flag in substitute command: '%c'", | | 776 | errx(1, "%lu: %s: bad flag in substitute command: '%c'", |
778 | linenum, fname, *p); | | 777 | linenum, fname, *p); |
779 | break; | | 778 | break; |
780 | } | | 779 | } |
781 | p++; | | 780 | p++; |
782 | } | | 781 | } |
783 | } | | 782 | } |
784 | | | 783 | |
785 | /* | | 784 | /* |
786 | * Compile a translation set of strings into a lookup table. | | 785 | * Compile a translation set of strings into a lookup table. |
787 | */ | | 786 | */ |
788 | static char * | | 787 | static char * |
789 | compile_tr(char *p, struct s_tr **py) | | 788 | compile_tr(char *p, struct s_tr **py) |
790 | { | | 789 | { |
791 | struct s_tr *y; | | 790 | struct s_tr *y; |
792 | size_t i; | | 791 | size_t i; |
793 | const char *op, *np; | | 792 | const char *op, *np; |
794 | char old[_POSIX2_LINE_MAX + 1]; | | 793 | char old[_POSIX2_LINE_MAX + 1]; |
795 | char new[_POSIX2_LINE_MAX + 1]; | | 794 | char new[_POSIX2_LINE_MAX + 1]; |
796 | size_t oclen, oldlen, nclen, newlen; | | 795 | size_t oclen, oldlen, nclen, newlen; |
797 | mbstate_t mbs1, mbs2; | | 796 | mbstate_t mbs1, mbs2; |
798 | | | 797 | |
799 | *py = y = xmalloc(sizeof(*y)); | | 798 | *py = y = xmalloc(sizeof(*y)); |
800 | y->multis = NULL; | | 799 | y->multis = NULL; |
801 | y->nmultis = 0; | | 800 | y->nmultis = 0; |
802 | | | 801 | |
803 | if (*p == '\0' || *p == '\\') | | 802 | if (*p == '\0' || *p == '\\') |
804 | errx(1, | | 803 | errx(1, |
805 | "%lu: %s: transform pattern can not be delimited by newline or backslash", | | 804 | "%lu: %s: transform pattern can not be delimited by newline or backslash", |
806 | linenum, fname); | | 805 | linenum, fname); |
807 | p = compile_delimited(p, old, 1); | | 806 | p = compile_delimited(p, old, 1); |
808 | if (p == NULL) | | 807 | if (p == NULL) |
809 | errx(1, "%lu: %s: unterminated transform source string", | | 808 | errx(1, "%lu: %s: unterminated transform source string", |
810 | linenum, fname); | | 809 | linenum, fname); |
811 | p = compile_delimited(p - 1, new, 1); | | 810 | p = compile_delimited(p - 1, new, 1); |
812 | if (p == NULL) | | 811 | if (p == NULL) |
813 | errx(1, "%lu: %s: unterminated transform target string", | | 812 | errx(1, "%lu: %s: unterminated transform target string", |
814 | linenum, fname); | | 813 | linenum, fname); |
815 | EATSPACE(); | | 814 | EATSPACE(); |
816 | op = old; | | 815 | op = old; |
817 | oldlen = mbsrtowcs(NULL, &op, 0, NULL); | | 816 | oldlen = mbsrtowcs(NULL, &op, 0, NULL); |
818 | if (oldlen == (size_t)-1) | | 817 | if (oldlen == (size_t)-1) |
819 | err(1, NULL); | | 818 | err(1, NULL); |
820 | np = new; | | 819 | np = new; |
821 | newlen = mbsrtowcs(NULL, &np, 0, NULL); | | 820 | newlen = mbsrtowcs(NULL, &np, 0, NULL); |
822 | if (newlen == (size_t)-1) | | 821 | if (newlen == (size_t)-1) |
823 | err(1, NULL); | | 822 | err(1, NULL); |
824 | if (newlen != oldlen) | | 823 | if (newlen != oldlen) |
825 | errx(1, "%lu: %s: transform strings are not the same length", | | 824 | errx(1, "%lu: %s: transform strings are not the same length", |
826 | linenum, fname); | | 825 | linenum, fname); |
827 | if (MB_CUR_MAX == 1) { | | 826 | if (MB_CUR_MAX == 1) { |
828 | /* | | 827 | /* |
829 | * The single-byte encoding case is easy: generate a | | 828 | * The single-byte encoding case is easy: generate a |
830 | * lookup table. | | 829 | * lookup table. |
831 | */ | | 830 | */ |
832 | for (i = 0; i <= UCHAR_MAX; i++) | | 831 | for (i = 0; i <= UCHAR_MAX; i++) |
833 | y->bytetab[i] = (u_char)i; | | 832 | y->bytetab[i] = (u_char)i; |
834 | for (; *op; op++, np++) | | 833 | for (; *op; op++, np++) |
835 | y->bytetab[(u_char)*op] = (u_char)*np; | | 834 | y->bytetab[(u_char)*op] = (u_char)*np; |
836 | } else { | | 835 | } else { |
837 | /* | | 836 | /* |
838 | * Multi-byte encoding case: generate a lookup table as | | 837 | * Multi-byte encoding case: generate a lookup table as |
839 | * above, but only for single-byte characters. The first | | 838 | * above, but only for single-byte characters. The first |
840 | * bytes of multi-byte characters have their lookup table | | 839 | * bytes of multi-byte characters have their lookup table |
841 | * entries set to 0, which causes do_tr() to search through | | 840 | * entries set to 0, which causes do_tr() to search through |
842 | * an auxiliary vector of multi-byte mappings. | | 841 | * an auxiliary vector of multi-byte mappings. |
843 | */ | | 842 | */ |
844 | memset(&mbs1, 0, sizeof(mbs1)); | | 843 | memset(&mbs1, 0, sizeof(mbs1)); |
845 | memset(&mbs2, 0, sizeof(mbs2)); | | 844 | memset(&mbs2, 0, sizeof(mbs2)); |
846 | for (i = 0; i <= UCHAR_MAX; i++) | | 845 | for (i = 0; i <= UCHAR_MAX; i++) |
847 | y->bytetab[i] = (u_char)((btowc((int)i) != WEOF) ? i : 0); | | 846 | y->bytetab[i] = (u_char)((btowc((int)i) != WEOF) ? i : 0); |
848 | while (*op != '\0') { | | 847 | while (*op != '\0') { |
849 | oclen = mbrlen(op, MB_LEN_MAX, &mbs1); | | 848 | oclen = mbrlen(op, MB_LEN_MAX, &mbs1); |
850 | if (oclen == (size_t)-1 || oclen == (size_t)-2) | | 849 | if (oclen == (size_t)-1 || oclen == (size_t)-2) |
851 | errc(1, EILSEQ, NULL); | | 850 | errc(1, EILSEQ, NULL); |
852 | nclen = mbrlen(np, MB_LEN_MAX, &mbs2); | | 851 | nclen = mbrlen(np, MB_LEN_MAX, &mbs2); |
853 | if (nclen == (size_t)-1 || nclen == (size_t)-2) | | 852 | if (nclen == (size_t)-1 || nclen == (size_t)-2) |
854 | errc(1, EILSEQ, NULL); | | 853 | errc(1, EILSEQ, NULL); |
855 | if (oclen == 1 && nclen == 1) | | 854 | if (oclen == 1 && nclen == 1) |
856 | y->bytetab[(u_char)*op] = (u_char)*np; | | 855 | y->bytetab[(u_char)*op] = (u_char)*np; |
857 | else { | | 856 | else { |
858 | y->bytetab[(u_char)*op] = 0; | | 857 | y->bytetab[(u_char)*op] = 0; |
859 | y->multis = xrealloc(y->multis, | | 858 | y->multis = xrealloc(y->multis, |
860 | (y->nmultis + 1) * sizeof(*y->multis)); | | 859 | (y->nmultis + 1) * sizeof(*y->multis)); |
861 | i = y->nmultis++; | | 860 | i = y->nmultis++; |
862 | y->multis[i].fromlen = oclen; | | 861 | y->multis[i].fromlen = oclen; |
863 | memcpy(y->multis[i].from, op, oclen); | | 862 | memcpy(y->multis[i].from, op, oclen); |
864 | y->multis[i].tolen = nclen; | | 863 | y->multis[i].tolen = nclen; |
865 | memcpy(y->multis[i].to, np, nclen); | | 864 | memcpy(y->multis[i].to, np, nclen); |
866 | } | | 865 | } |
867 | op += oclen; | | 866 | op += oclen; |
868 | np += nclen; | | 867 | np += nclen; |
869 | } | | 868 | } |
870 | } | | 869 | } |
871 | return (p); | | 870 | return (p); |
872 | } | | 871 | } |
873 | | | 872 | |
874 | /* | | 873 | /* |
875 | * Compile the text following an a or i command. | | 874 | * Compile the text following an a or i command. |
876 | */ | | 875 | */ |
877 | static char * | | 876 | static char * |
878 | compile_text(void) | | 877 | compile_text(void) |
879 | { | | 878 | { |
880 | size_t asize, size; | | 879 | size_t asize, size; |
881 | int esc_nl; | | 880 | int esc_nl; |
882 | char *text, *p, *op, *s; | | 881 | char *text, *p, *op, *s; |
883 | char lbuf[_POSIX2_LINE_MAX + 1]; | | 882 | char lbuf[_POSIX2_LINE_MAX + 1]; |
884 | | | 883 | |
885 | asize = 2 * _POSIX2_LINE_MAX + 1; | | 884 | asize = 2 * _POSIX2_LINE_MAX + 1; |
886 | text = xmalloc(asize); | | 885 | text = xmalloc(asize); |
887 | size = 0; | | 886 | size = 0; |
888 | while (cu_fgets(lbuf, sizeof(lbuf), NULL)) { | | 887 | while (cu_fgets(lbuf, sizeof(lbuf), NULL)) { |
889 | op = s = text + size; | | 888 | op = s = text + size; |
890 | p = lbuf; | | 889 | p = lbuf; |
891 | for (esc_nl = 0; *p != '\0'; p++) { | | 890 | for (esc_nl = 0; *p != '\0'; p++) { |
892 | if (*p == '\\' && p[1] != '\0' && *++p == '\n') | | 891 | if (*p == '\\' && p[1] != '\0' && *++p == '\n') |
893 | esc_nl = 1; | | 892 | esc_nl = 1; |
894 | *s++ = *p; | | 893 | *s++ = *p; |
895 | } | | 894 | } |
896 | size += (size_t)(s - op); | | 895 | size += (size_t)(s - op); |
897 | if (!esc_nl) { | | 896 | if (!esc_nl) { |
898 | *s = '\0'; | | 897 | *s = '\0'; |
899 | break; | | 898 | break; |
900 | } | | 899 | } |
901 | if (asize - size < _POSIX2_LINE_MAX + 1) { | | 900 | if (asize - size < _POSIX2_LINE_MAX + 1) { |
902 | asize *= 2; | | 901 | asize *= 2; |
903 | text = xrealloc(text, asize); | | 902 | text = xrealloc(text, asize); |
904 | } | | 903 | } |
905 | } | | 904 | } |
906 | text[size] = '\0'; | | 905 | text[size] = '\0'; |
907 | p = xrealloc(text, size + 1); | | 906 | p = xrealloc(text, size + 1); |
908 | return (p); | | 907 | return (p); |
909 | } | | 908 | } |
910 | | | 909 | |
911 | /* | | 910 | /* |
912 | * Get an address and return a pointer to the first character after | | 911 | * Get an address and return a pointer to the first character after |
913 | * it. Fill the structure pointed to according to the address. | | 912 | * it. Fill the structure pointed to according to the address. |
914 | */ | | 913 | */ |
915 | static char * | | 914 | static char * |
916 | compile_addr(char *p, struct s_addr *a) | | 915 | compile_addr(char *p, struct s_addr *a) |
917 | { | | 916 | { |
918 | char *end, re[_POSIX2_LINE_MAX + 1]; | | 917 | char *end, re[_POSIX2_LINE_MAX + 1]; |
919 | int icase; | | 918 | int icase; |
920 | | | 919 | |
921 | icase = 0; | | 920 | icase = 0; |
922 | | | 921 | |
923 | a->type = 0; | | 922 | a->type = 0; |
924 | switch (*p) { | | 923 | switch (*p) { |
925 | case '\\': /* Context address */ | | 924 | case '\\': /* Context address */ |
926 | ++p; | | 925 | ++p; |
927 | /* FALLTHROUGH */ | | 926 | /* FALLTHROUGH */ |
928 | case '/': /* Context address */ | | 927 | case '/': /* Context address */ |
929 | p = compile_delimited(p, re, 0); | | 928 | p = compile_delimited(p, re, 0); |
930 | if (p == NULL) | | 929 | if (p == NULL) |
931 | errx(1, "%lu: %s: unterminated regular expression", linenum, fname); | | 930 | errx(1, "%lu: %s: unterminated regular expression", linenum, fname); |
932 | /* Check for case insensitive regexp flag */ | | 931 | /* Check for case insensitive regexp flag */ |
933 | if (*p == 'I') { | | 932 | if (*p == 'I') { |
934 | icase = 1; | | 933 | icase = 1; |
935 | p++; | | 934 | p++; |
936 | } | | 935 | } |
937 | if (*re == '\0') | | 936 | if (*re == '\0') |
938 | a->u.r = NULL; | | 937 | a->u.r = NULL; |
939 | else | | 938 | else |
940 | a->u.r = compile_re(re, icase); | | 939 | a->u.r = compile_re(re, icase); |
941 | a->type = AT_RE; | | 940 | a->type = AT_RE; |
942 | return (p); | | 941 | return (p); |
943 | | | 942 | |
944 | case '$': /* Last line */ | | 943 | case '$': /* Last line */ |
945 | a->type = AT_LAST; | | 944 | a->type = AT_LAST; |
946 | return (p + 1); | | 945 | return (p + 1); |
947 | | | 946 | |
948 | case '+': /* Relative line number */ | | 947 | case '+': /* Relative line number */ |
949 | a->type = AT_RELLINE; | | 948 | a->type = AT_RELLINE; |
950 | p++; | | 949 | p++; |
951 | /* FALLTHROUGH */ | | 950 | /* FALLTHROUGH */ |
952 | /* Line number */ | | 951 | /* Line number */ |
953 | case '0': case '1': case '2': case '3': case '4': | | 952 | case '0': case '1': case '2': case '3': case '4': |
954 | case '5': case '6': case '7': case '8': case '9': | | 953 | case '5': case '6': case '7': case '8': case '9': |
955 | if (a->type == 0) | | 954 | if (a->type == 0) |
956 | a->type = AT_LINE; | | 955 | a->type = AT_LINE; |
957 | a->u.l = strtoul(p, &end, 10); | | 956 | a->u.l = strtoul(p, &end, 10); |
958 | return (end); | | 957 | return (end); |
959 | default: | | 958 | default: |
960 | errx(1, "%lu: %s: expected context address", linenum, fname); | | 959 | errx(1, "%lu: %s: expected context address", linenum, fname); |
961 | return (NULL); | | 960 | return (NULL); |
962 | } | | 961 | } |
963 | } | | 962 | } |
964 | | | 963 | |
965 | /* | | 964 | /* |
966 | * duptoeol -- | | 965 | * duptoeol -- |
967 | * Return a copy of all the characters up to \n or \0. | | 966 | * Return a copy of all the characters up to \n or \0. |
968 | */ | | 967 | */ |
969 | static char * | | 968 | static char * |
970 | duptoeol(char *s, const char *ctype) | | 969 | duptoeol(char *s, const char *ctype) |
971 | { | | 970 | { |
972 | size_t len; | | 971 | size_t len; |
973 | int ws; | | 972 | int ws; |
974 | char *p, *start; | | 973 | char *p, *start; |
975 | | | 974 | |
976 | ws = 0; | | 975 | ws = 0; |
977 | for (start = s; *s != '\0' && *s != '\n'; ++s) | | 976 | for (start = s; *s != '\0' && *s != '\n'; ++s) |
978 | ws = isspace((unsigned char)*s); | | 977 | ws = isspace((unsigned char)*s); |
979 | *s = '\0'; | | 978 | *s = '\0'; |
980 | if (ws) | | 979 | if (ws) |
981 | warnx("%lu: %s: whitespace after %s", linenum, fname, ctype); | | 980 | warnx("%lu: %s: whitespace after %s", linenum, fname, ctype); |
982 | len = (size_t)(s - start + 1); | | 981 | len = (size_t)(s - start + 1); |
983 | p = xmalloc(len); | | 982 | p = xmalloc(len); |
984 | return (memmove(p, start, len)); | | 983 | return (memmove(p, start, len)); |
985 | } | | 984 | } |
986 | | | 985 | |
987 | /* | | 986 | /* |
988 | * Convert goto label names to addresses, and count a and r commands, in | | 987 | * Convert goto label names to addresses, and count a and r commands, in |
989 | * the given subset of the script. Free the memory used by labels in b | | 988 | * the given subset of the script. Free the memory used by labels in b |
990 | * and t commands (but not by :). | | 989 | * and t commands (but not by :). |
991 | * | | 990 | * |
992 | * TODO: Remove } nodes | | 991 | * TODO: Remove } nodes |
993 | */ | | 992 | */ |
994 | static void | | 993 | static void |
995 | fixuplabel(struct s_command *cp, struct s_command *end) | | 994 | fixuplabel(struct s_command *cp, struct s_command *end) |
996 | { | | 995 | { |
997 | | | 996 | |
998 | for (; cp != end; cp = cp->next) | | 997 | for (; cp != end; cp = cp->next) |
999 | switch (cp->code) { | | 998 | switch (cp->code) { |
1000 | case 'a': | | 999 | case 'a': |
1001 | case 'r': | | 1000 | case 'r': |
1002 | appendnum++; | | 1001 | appendnum++; |
1003 | break; | | 1002 | break; |
1004 | case 'b': | | 1003 | case 'b': |
1005 | case 't': | | 1004 | case 't': |
1006 | /* Resolve branch target. */ | | 1005 | /* Resolve branch target. */ |
1007 | if (cp->t == NULL) { | | 1006 | if (cp->t == NULL) { |
1008 | cp->u.c = NULL; | | 1007 | cp->u.c = NULL; |
1009 | break; | | 1008 | break; |
1010 | } | | 1009 | } |
1011 | if ((cp->u.c = findlabel(cp->t)) == NULL) | | 1010 | if ((cp->u.c = findlabel(cp->t)) == NULL) |
1012 | errx(1, "%lu: %s: undefined label '%s'", linenum, fname, cp->t); | | 1011 | errx(1, "%lu: %s: undefined label '%s'", linenum, fname, cp->t); |
1013 | free(cp->t); | | 1012 | free(cp->t); |
1014 | break; | | 1013 | break; |
1015 | case '{': | | 1014 | case '{': |
1016 | /* Do interior commands. */ | | 1015 | /* Do interior commands. */ |
1017 | fixuplabel(cp->u.c, cp->next); | | 1016 | fixuplabel(cp->u.c, cp->next); |
1018 | break; | | 1017 | break; |
1019 | } | | 1018 | } |
1020 | } | | 1019 | } |
1021 | | | 1020 | |
1022 | /* | | 1021 | /* |
1023 | * Associate the given command label for later lookup. | | 1022 | * Associate the given command label for later lookup. |
1024 | */ | | 1023 | */ |
1025 | static void | | 1024 | static void |
1026 | enterlabel(struct s_command *cp) | | 1025 | enterlabel(struct s_command *cp) |
1027 | { | | 1026 | { |
1028 | struct labhash **lhp, *lh; | | 1027 | struct labhash **lhp, *lh; |
1029 | u_char *p; | | 1028 | u_char *p; |
1030 | u_int h, c; | | 1029 | u_int h, c; |
1031 | | | 1030 | |
1032 | for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++) | | 1031 | for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++) |
1033 | h = (h << 5) + h + c; | | 1032 | h = (h << 5) + h + c; |
1034 | lhp = &labels[h & LHMASK]; | | 1033 | lhp = &labels[h & LHMASK]; |
1035 | for (lh = *lhp; lh != NULL; lh = lh->lh_next) | | 1034 | for (lh = *lhp; lh != NULL; lh = lh->lh_next) |
1036 | if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0) | | 1035 | if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0) |
1037 | errx(1, "%lu: %s: duplicate label '%s'", linenum, fname, cp->t); | | 1036 | errx(1, "%lu: %s: duplicate label '%s'", linenum, fname, cp->t); |
1038 | lh = xmalloc(sizeof *lh); | | 1037 | lh = xmalloc(sizeof *lh); |
1039 | lh->lh_next = *lhp; | | 1038 | lh->lh_next = *lhp; |
1040 | lh->lh_hash = h; | | 1039 | lh->lh_hash = h; |
1041 | lh->lh_cmd = cp; | | 1040 | lh->lh_cmd = cp; |
1042 | lh->lh_ref = 0; | | 1041 | lh->lh_ref = 0; |
1043 | *lhp = lh; | | 1042 | *lhp = lh; |
1044 | } | | 1043 | } |
1045 | | | 1044 | |
1046 | /* | | 1045 | /* |
1047 | * Find the label contained in the command l in the command linked | | 1046 | * Find the label contained in the command l in the command linked |
1048 | * list cp. L is excluded from the search. Return NULL if not found. | | 1047 | * list cp. L is excluded from the search. Return NULL if not found. |
1049 | */ | | 1048 | */ |
1050 | static struct s_command * | | 1049 | static struct s_command * |
1051 | findlabel(char *name) | | 1050 | findlabel(char *name) |
1052 | { | | 1051 | { |
1053 | struct labhash *lh; | | 1052 | struct labhash *lh; |
1054 | u_char *p; | | 1053 | u_char *p; |
1055 | u_int h, c; | | 1054 | u_int h, c; |
1056 | | | 1055 | |
1057 | for (h = 0, p = (u_char *)name; (c = *p) != 0; p++) | | 1056 | for (h = 0, p = (u_char *)name; (c = *p) != 0; p++) |
1058 | h = (h << 5) + h + c; | | 1057 | h = (h << 5) + h + c; |
1059 | for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) { | | 1058 | for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) { |
1060 | if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) { | | 1059 | if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) { |
1061 | lh->lh_ref = 1; | | 1060 | lh->lh_ref = 1; |
1062 | return (lh->lh_cmd); | | 1061 | return (lh->lh_cmd); |
1063 | } | | 1062 | } |
1064 | } | | 1063 | } |
1065 | return (NULL); | | 1064 | return (NULL); |
1066 | } | | 1065 | } |
1067 | | | 1066 | |
1068 | /* | | 1067 | /* |
1069 | * Warn about any unused labels. As a side effect, release the label hash | | 1068 | * Warn about any unused labels. As a side effect, release the label hash |
1070 | * table space. | | 1069 | * table space. |
1071 | */ | | 1070 | */ |
1072 | static void | | 1071 | static void |
1073 | uselabel(void) | | 1072 | uselabel(void) |
1074 | { | | 1073 | { |
1075 | struct labhash *lh, *next; | | 1074 | struct labhash *lh, *next; |
1076 | int i; | | 1075 | int i; |
1077 | | | 1076 | |
1078 | for (i = 0; i < LHSZ; i++) { | | 1077 | for (i = 0; i < LHSZ; i++) { |
1079 | for (lh = labels[i]; lh != NULL; lh = next) { | | 1078 | for (lh = labels[i]; lh != NULL; lh = next) { |
1080 | next = lh->lh_next; | | 1079 | next = lh->lh_next; |
1081 | if (!lh->lh_ref) | | 1080 | if (!lh->lh_ref) |
1082 | warnx("%lu: %s: unused label '%s'", | | 1081 | warnx("%lu: %s: unused label '%s'", |
1083 | linenum, fname, lh->lh_cmd->t); | | 1082 | linenum, fname, lh->lh_cmd->t); |
1084 | free(lh); | | 1083 | free(lh); |
1085 | } | | 1084 | } |
1086 | } | | 1085 | } |
1087 | } | | 1086 | } |