Sat Mar 7 22:35:16 2020 UTC ()
PR/55057: Paul Goyette: Don't use % 6 arithmetic that hurts the brain for
the ifdef state machine, use bits and shifts instead. Also don't forget to
restore the state once an include file ends.


(christos)
diff -r1.27 -r1.28 src/usr.bin/config/scan.l

cvs diff -r1.27 -r1.28 src/usr.bin/config/scan.l (expand / switch to unified diff)

--- src/usr.bin/config/scan.l 2020/03/07 19:26:13 1.27
+++ src/usr.bin/config/scan.l 2020/03/07 22:35:16 1.28
@@ -1,15 +1,15 @@ @@ -1,15 +1,15 @@
1%{ 1%{
2/* $NetBSD: scan.l,v 1.27 2020/03/07 19:26:13 christos Exp $ */ 2/* $NetBSD: scan.l,v 1.28 2020/03/07 22:35:16 christos Exp $ */
3 3
4/* 4/*
5 * Copyright (c) 1992, 1993 5 * Copyright (c) 1992, 1993
6 * The Regents of the University of California. All rights reserved. 6 * The Regents of the University of California. All rights reserved.
7 * 7 *
8 * This software was developed by the Computer Systems Engineering group 8 * This software was developed by the Computer Systems Engineering group
9 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 9 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
10 * contributed to Berkeley. 10 * contributed to Berkeley.
11 * 11 *
12 * All advertising materials mentioning features or use of this software 12 * All advertising materials mentioning features or use of this software
13 * must display the following acknowledgement: 13 * must display the following acknowledgement:
14 * This product includes software developed by the University of 14 * This product includes software developed by the University of
15 * California, Lawrence Berkeley Laboratories. 15 * California, Lawrence Berkeley Laboratories.
@@ -32,85 +32,75 @@ @@ -32,85 +32,75 @@
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE. 39 * SUCH DAMAGE.
40 * 40 *
41 * from: @(#)scan.l 8.1 (Berkeley) 6/6/93 41 * from: @(#)scan.l 8.1 (Berkeley) 6/6/93
42 */ 42 */
43 43
44#include <sys/cdefs.h> 44#include <sys/cdefs.h>
45__RCSID("$NetBSD: scan.l,v 1.27 2020/03/07 19:26:13 christos Exp $"); 45__RCSID("$NetBSD: scan.l,v 1.28 2020/03/07 22:35:16 christos Exp $");
46 46
47#include <sys/param.h> 47#include <sys/param.h>
48#include <errno.h> 48#include <errno.h>
49#include <libgen.h> 49#include <libgen.h>
50#include <stdio.h> 50#include <stdio.h>
51#include <stdlib.h> 51#include <stdlib.h>
52#include <string.h> 52#include <string.h>
53#include <unistd.h> 53#include <unistd.h>
54#include <stddef.h> 54#include <stddef.h>
55#include <ctype.h> 55#include <ctype.h>
56#include <util.h> 56#include <util.h>
57#undef ECHO 57#undef ECHO
58#include "defs.h" 58#include "defs.h"
59#include "gram.h" 59#include "gram.h"
60 60
61int yyline; 61int yyline;
62const char *yyfile; 62const char *yyfile;
63const char *lastfile; 63const char *lastfile;
64char curinclpath[PATH_MAX]; 64char curinclpath[PATH_MAX];
65int ifdefstate = -1; 65uint64_t ifdefstate;
66int st; 66int ifdefshift = -1;
67#define IDS_PARENT_DISABLED \ 67
68 ((ifdefstate > 6) && ((((ifdefstate/6)-1) & 1) == 1)) 68/*
69#define IDS_MAX_DEPTH 362797056 /* 6^11 */ 69 * The state is represented by 3 bits.
70/* States for ifdefstate: 70 */
71 71#define IDS_MATCH 1ll
72 0 -> matched ifdef 72#define IDS_ELIF 2ll
73 1 -> unmatched ifdef 73#define IDS_ELSE 4ll
74 2 -> matched elifdef 74
75 3 -> unmatched elifdef 75#define IDS_BITS 7
76 4 -> matched else 76#define IDS_SHIFT 3
77 5 -> unmatched else 
78 
79 Upon "ifdef", add one and multiply by 6. 
80 Upon "endif", divide by 6, remove 1. 
81 
82 ifdef -> MATCH => continue 
83 MISMATCH => set to 1 
84 elifdef -> if (!1) -> MISMATCH 
85 MATCH => set to 2 
86 MISMATCH => if (2 || 3) set to 3, else set to 1 
87 else -> if (1) -> MATCH 
88 MATCH => set to 4 
89 MISMATCH => set to 5 
90 77
91 in each case, if parent & 1 == 1, MISMATCH 78#define IDS_ISMATCH(st) (((st) & IDS_MATCH) != 0)
92*/ 79#define IDS_PARENT_DISABLED \
 80 (ifdefshift > 0 && !IDS_ISMATCH(ifdefstate >> IDS_SHIFT))
 81#define IDS_MAX_DEPTH 21 /* 64 / 3 */
93  82
94/* 83/*
95 * Data for returning to previous files from include files. 84 * Data for returning to previous files from include files.
96 */ 85 */
97struct incl { 86struct incl {
98 struct incl *in_prev; /* previous includes in effect, if any */ 87 struct incl *in_prev; /* previous includes in effect, if any */
99 YY_BUFFER_STATE in_buf; /* previous lex state */ 88 YY_BUFFER_STATE in_buf; /* previous lex state */
100 struct where in_where; 89 struct where in_where;
101 int in_ateof; /* token to insert at EOF */ 90 int in_ateof; /* token to insert at EOF */
102 int in_interesting; /* previous value for "interesting" */ 91 int in_interesting; /* previous value for "interesting" */
103 int in_ifdefstate; /* conditional level */ 92 uint64_t in_ifdefstate; /* conditional level */
 93 int in_ifdefshift; /* conditional level */
104}; 94};
105static struct incl *incl; 95static struct incl *incl;
106static int endinclude(void); 96static int endinclude(void);
107static int getincludepath(void); 97static int getincludepath(void);
108static int getcurifdef(void); 98static int getcurifdef(void);
109 99
110 100
111%} 101%}
112 102
113%option noyywrap nounput noinput 103%option noyywrap nounput noinput
114 104
115PATH [A-Za-z_0-9]*[./][-A-Za-z_0-9./]* 105PATH [A-Za-z_0-9]*[./][-A-Za-z_0-9./]*
116QCHARS \"(\\.|[^\\"])*\"  106QCHARS \"(\\.|[^\\"])*\"
@@ -170,117 +160,108 @@ pseudo-root return PSEUDO_ROOT; @@ -170,117 +160,108 @@ pseudo-root return PSEUDO_ROOT;
170root return ROOT; 160root return ROOT;
171select return SELECT; 161select return SELECT;
172single return SINGLE; 162single return SINGLE;
173source return SOURCE; 163source return SOURCE;
174type return TYPE; 164type return TYPE;
175vector return VECTOR; 165vector return VECTOR;
176version return VERSION; 166version return VERSION;
177with return WITH; 167with return WITH;
178 168
179\+= return PLUSEQ; 169\+= return PLUSEQ;
180:= return COLONEQ; 170:= return COLONEQ;
181 171
182<*>ifdef[ \t]+{WORD}{RESTOFLINE} { 172<*>ifdef[ \t]+{WORD}{RESTOFLINE} {
183 ifdefstate = (ifdefstate + 1) * 6; 173 ifdefstate <<= IDS_SHIFT;
184 if (ifdefstate >= IDS_MAX_DEPTH) { 174 if (++ifdefshift >= IDS_MAX_DEPTH) {
185 yyerror("too many levels of conditional"); 175 yyerror("too many levels of conditional");
186 } 176 }
187 if (!IDS_PARENT_DISABLED && getcurifdef()) { 177 if (IDS_PARENT_DISABLED || !getcurifdef()) {
188 BEGIN(INITIAL); 
189 } else { 
190 ifdefstate++; 
191 BEGIN(IGNORED); 178 BEGIN(IGNORED);
 179 } else {
 180 ifdefstate |= IDS_MATCH;
 181 BEGIN(INITIAL);
192 } 182 }
193 yyline++; 183 yyline++;
194 } 184 }
195 185
196<*>ifndef[ \t]+{WORD}{RESTOFLINE} { 186<*>ifndef[ \t]+{WORD}{RESTOFLINE} {
197 ifdefstate = (ifdefstate + 1) * 6; 187 ifdefstate <<= IDS_SHIFT;
198 if (ifdefstate >= IDS_MAX_DEPTH) { 188 if (++ifdefshift >= IDS_MAX_DEPTH) {
199 yyerror("too many levels of conditional"); 189 yyerror("too many levels of conditional");
200 } 190 }
201 if (!IDS_PARENT_DISABLED && !getcurifdef()) { 191 if (IDS_PARENT_DISABLED || getcurifdef()) {
202 BEGIN(INITIAL); 
203 } else { 
204 ifdefstate++; 
205 BEGIN(IGNORED); 192 BEGIN(IGNORED);
 193 } else {
 194 ifdefstate |= IDS_MATCH;
 195 BEGIN(INITIAL);
206 } 196 }
207 yyline++; 197 yyline++;
208 } 198 }
209 199
210 200
211<*>elifdef[ \t]+{WORD}{RESTOFLINE} { 201<*>elifdef[ \t]+{WORD}{RESTOFLINE} {
212 st = ifdefstate % 6; 202 int st = ifdefstate & IDS_BITS;
213 if (ifdefstate < 0 || st > 3) { 203 if (ifdefshift == -1 || (st & IDS_ELSE) != 0) {
214 yyerror("mismatched elifdef"); 204 yyerror("mismatched elifdef");
215 } 205 }
216 if (IDS_PARENT_DISABLED || 206 if (IDS_PARENT_DISABLED || IDS_ISMATCH(st) || !getcurifdef()) {
217 st != 1 || !getcurifdef()) { 
218 if (st == 2 || st == 3) { 
219 ifdefstate += 3 - st; 
220 } else { 
221 ifdefstate += 1 - st; 
222 } 
223 BEGIN(IGNORED); 207 BEGIN(IGNORED);
224 } else { 208 } else {
225 ifdefstate++; 209 ifdefstate |= IDS_MATCH;
226 BEGIN(INITIAL); 210 BEGIN(INITIAL);
227 } 211 }
 212 ifdefstate |= IDS_ELIF;
228 yyline++; 213 yyline++;
229 } 214 }
230 215
231<*>elifndef[ \t]+{WORD}{RESTOFLINE} { 216<*>elifndef[ \t]+{WORD}{RESTOFLINE} {
232 st = ifdefstate % 6; 217 int st = ifdefstate & IDS_BITS;
233 if (ifdefstate < 0 || st > 3) { 218 if (ifdefshift == -1 || (st & IDS_ELSE) != 0) {
234 yyerror("mismatched elifndef"); 219 yyerror("mismatched elifndef");
235 } 220 }
236 if (IDS_PARENT_DISABLED || 221 if (IDS_PARENT_DISABLED || IDS_ISMATCH(st) || getcurifdef()) {
237 st != 1 || getcurifdef()) { 
238 if (st == 2 || st == 3) { 
239 ifdefstate += 3 - st; 
240 } else { 
241 ifdefstate += 1 - st; 
242 } 
243 BEGIN(IGNORED); 222 BEGIN(IGNORED);
244 } else { 223 } else {
245 ifdefstate++; 224 ifdefstate |= IDS_MATCH;
246 BEGIN(INITIAL); 225 BEGIN(INITIAL);
247 } 226 }
 227 ifdefstate |= IDS_ELIF;
248 yyline++; 228 yyline++;
249 } 229 }
250 230
251<*>else{RESTOFLINE} { 231<*>else{RESTOFLINE} {
252 st = ifdefstate % 6; 232 int st = ifdefstate & IDS_BITS;
253 if (ifdefstate < 0 || st > 3) { 233 if (ifdefshift == -1 || (st & IDS_ELSE) != 0) {
254 yyerror("mismatched else"); 234 yyerror("mismatched else");
255 } 235 }
256 if (!IDS_PARENT_DISABLED && (st == 1)) { 236 if (IDS_PARENT_DISABLED || IDS_ISMATCH(st)) {
257 ifdefstate += 3; 
258 BEGIN(INITIAL); 
259 } else { 
260 ifdefstate += 5 - st; 
261 BEGIN(IGNORED); 237 BEGIN(IGNORED);
 238 } else {
 239 ifdefstate |= IDS_MATCH;
 240 BEGIN(INITIAL);
262 } 241 }
 242 ifdefstate |= IDS_ELSE;
263 yyline++; 243 yyline++;
264 } 244 }
265 245
266<*>endif{RESTOFLINE} { 246<*>endif{RESTOFLINE} {
267 if (ifdefstate < 0) { 247 if (ifdefshift == -1) {
268 yyerror("mismatched endif"); 248 yyerror("mismatched endif");
269 } 249 }
270 if (!IDS_PARENT_DISABLED) { 250 if (!IDS_PARENT_DISABLED) {
271 BEGIN(INITIAL); 251 BEGIN(INITIAL);
272 } 252 }
273 ifdefstate = (ifdefstate/6) - 1; 253 ifdefshift--;
 254 ifdefstate >>= IDS_SHIFT;
274 yyline++; 255 yyline++;
275 } 256 }
276 257
277<IGNORED>\n { 258<IGNORED>\n {
278 yyline++; 259 yyline++;
279 } 260 }
280 261
281<IGNORED>. /* ignore */ 262<IGNORED>. /* ignore */
282 263
283include[ \t]+{FILENAME}{RESTOFLINE} { 264include[ \t]+{FILENAME}{RESTOFLINE} {
284 yyline++; 265 yyline++;
285 if (getincludepath()) { 266 if (getincludepath()) {
286 include(curinclpath, 0, 0, 1); 267 include(curinclpath, 0, 0, 1);
@@ -359,27 +340,27 @@ package[ \t]+{FILENAME}{RESTOFLINE} { @@ -359,27 +340,27 @@ package[ \t]+{FILENAME}{RESTOFLINE} {
359 yyline++; 340 yyline++;
360 return '\n'; 341 return '\n';
361 } 342 }
362\00 { 343\00 {
363 /* Detect NUL characters in the config file and 344 /* Detect NUL characters in the config file and
364 * error out. 345 * error out.
365 */ 346 */
366 cfgerror("NUL character detected at line %i", yyline); 347 cfgerror("NUL character detected at line %i", yyline);
367 } 348 }
368#.* { /* ignored (comment) */; } 349#.* { /* ignored (comment) */; }
369[ \t]+ { /* ignored (white space) */; } 350[ \t]+ { /* ignored (white space) */; }
370. { return yytext[0]; } 351. { return yytext[0]; }
371<*><<EOF>> { 352<*><<EOF>> {
372 if (ifdefstate > (incl == NULL ? -1 : incl->in_ifdefstate)) { 353 if (ifdefshift > (incl == NULL ? -1 : incl->in_ifdefshift)) {
373 yyerror("reached EOF while looking for endif"); 354 yyerror("reached EOF while looking for endif");
374 } 355 }
375 if (incl == NULL) 356 if (incl == NULL)
376 return YY_NULL; 357 return YY_NULL;
377 tok = endinclude(); 358 tok = endinclude();
378 if (tok) 359 if (tok)
379 return tok; 360 return tok;
380 /* otherwise continue scanning */ 361 /* otherwise continue scanning */
381 } 362 }
382 363
383%% 364%%
384 365
385int interesting = 1; 366int interesting = 1;
@@ -520,26 +501,27 @@ include(const char *fname, int ateof, in @@ -520,26 +501,27 @@ include(const char *fname, int ateof, in
520 cfgerror("cannot record current working directory for %s", s); 501 cfgerror("cannot record current working directory for %s", s);
521 fclose(fp); 502 fclose(fp);
522 free(s); 503 free(s);
523 return (-1); 504 return (-1);
524 } 505 }
525 in = ecalloc(1, sizeof *in); 506 in = ecalloc(1, sizeof *in);
526 in->in_prev = incl; 507 in->in_prev = incl;
527 in->in_buf = YY_CURRENT_BUFFER; 508 in->in_buf = YY_CURRENT_BUFFER;
528 in->in_where.w_srcfile = yyfile; 509 in->in_where.w_srcfile = yyfile;
529 in->in_where.w_srcline = (u_short)yyline; 510 in->in_where.w_srcline = (u_short)yyline;
530 in->in_ateof = ateof; 511 in->in_ateof = ateof;
531 in->in_interesting = interesting; 512 in->in_interesting = interesting;
532 in->in_ifdefstate = ifdefstate; 513 in->in_ifdefstate = ifdefstate;
 514 in->in_ifdefshift = ifdefshift;
533 interesting = direct & interesting; 515 interesting = direct & interesting;
534 if (interesting) 516 if (interesting)
535 logconfig_include(fp, fname); 517 logconfig_include(fp, fname);
536 incl = in; 518 incl = in;
537 CFGDBG(1, "include `%s' from `%s' line %d", fname, yyfile, yyline); 519 CFGDBG(1, "include `%s' from `%s' line %d", fname, yyfile, yyline);
538 yy_switch_to_buffer(yy_create_buffer(fp, YY_BUF_SIZE)); 520 yy_switch_to_buffer(yy_create_buffer(fp, YY_BUF_SIZE));
539 yyfile = intern(s); 521 yyfile = intern(s);
540 yyline = 1; 522 yyline = 1;
541 free(s); 523 free(s);
542 includedepth++; 524 includedepth++;
543 return (0); 525 return (0);
544} 526}
545 527
@@ -590,26 +572,28 @@ endinclude(void) @@ -590,26 +572,28 @@ endinclude(void)
590 572
591 curdir_pop(); 573 curdir_pop();
592 if ((in = incl) == NULL) 574 if ((in = incl) == NULL)
593 panic("endinclude"); 575 panic("endinclude");
594 incl = in->in_prev; 576 incl = in->in_prev;
595 lastfile = yyfile; 577 lastfile = yyfile;
596 yy_delete_buffer(YY_CURRENT_BUFFER); 578 yy_delete_buffer(YY_CURRENT_BUFFER);
597 (void)fclose(yyin); 579 (void)fclose(yyin);
598 yy_switch_to_buffer(in->in_buf); 580 yy_switch_to_buffer(in->in_buf);
599 yyfile = in->in_where.w_srcfile; 581 yyfile = in->in_where.w_srcfile;
600 yyline = in->in_where.w_srcline; 582 yyline = in->in_where.w_srcline;
601 ateof = in->in_ateof; 583 ateof = in->in_ateof;
602 interesting = in->in_interesting; 584 interesting = in->in_interesting;
 585 ifdefstate = in->in_ifdefstate;
 586 ifdefshift = in->in_ifdefshift;
603 free(in); 587 free(in);
604 588
605 includedepth--; 589 includedepth--;
606 590
607 return (ateof); 591 return (ateof);
608} 592}
609 593
610/* 594/*
611 * Return the current line number. If yacc has looked ahead and caused 595 * Return the current line number. If yacc has looked ahead and caused
612 * us to consume a newline, we have to subtract one. yychar is yacc's 596 * us to consume a newline, we have to subtract one. yychar is yacc's
613 * token lookahead, so we can tell. 597 * token lookahead, so we can tell.
614 */ 598 */
615u_short 599u_short