| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: lex.c,v 1.103 2022/02/27 18:29:14 rillig Exp $ */ | | 1 | /* $NetBSD: lex.c,v 1.104 2022/02/27 22:26:12 rillig Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved. | | 4 | * Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved. |
5 | * Copyright (c) 1994, 1995 Jochen Pohl | | 5 | * Copyright (c) 1994, 1995 Jochen Pohl |
6 | * All Rights Reserved. | | 6 | * All Rights Reserved. |
7 | * | | 7 | * |
8 | * Redistribution and use in source and binary forms, with or without | | 8 | * Redistribution and use in source and binary forms, with or without |
9 | * modification, are permitted provided that the following conditions | | 9 | * modification, are permitted provided that the following conditions |
10 | * are met: | | 10 | * are met: |
11 | * 1. Redistributions of source code must retain the above copyright | | 11 | * 1. Redistributions of source code must retain the above copyright |
12 | * notice, this list of conditions and the following disclaimer. | | 12 | * notice, this list of conditions and the following disclaimer. |
13 | * 2. Redistributions in binary form must reproduce the above copyright | | 13 | * 2. Redistributions in binary form must reproduce the above copyright |
14 | * notice, this list of conditions and the following disclaimer in the | | 14 | * notice, this list of conditions and the following disclaimer in the |
| @@ -28,27 +28,27 @@ | | | @@ -28,27 +28,27 @@ |
28 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | | 28 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | | 29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | | 30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | | 31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
32 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | | 32 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
33 | */ | | 33 | */ |
34 | | | 34 | |
35 | #if HAVE_NBTOOL_CONFIG_H | | 35 | #if HAVE_NBTOOL_CONFIG_H |
36 | #include "nbtool_config.h" | | 36 | #include "nbtool_config.h" |
37 | #endif | | 37 | #endif |
38 | | | 38 | |
39 | #include <sys/cdefs.h> | | 39 | #include <sys/cdefs.h> |
40 | #if defined(__RCSID) && !defined(lint) | | 40 | #if defined(__RCSID) && !defined(lint) |
41 | __RCSID("$NetBSD: lex.c,v 1.103 2022/02/27 18:29:14 rillig Exp $"); | | 41 | __RCSID("$NetBSD: lex.c,v 1.104 2022/02/27 22:26:12 rillig Exp $"); |
42 | #endif | | 42 | #endif |
43 | | | 43 | |
44 | #include <ctype.h> | | 44 | #include <ctype.h> |
45 | #include <errno.h> | | 45 | #include <errno.h> |
46 | #include <float.h> | | 46 | #include <float.h> |
47 | #include <limits.h> | | 47 | #include <limits.h> |
48 | #include <math.h> | | 48 | #include <math.h> |
49 | #include <stdlib.h> | | 49 | #include <stdlib.h> |
50 | #include <string.h> | | 50 | #include <string.h> |
51 | | | 51 | |
52 | #include "lint1.h" | | 52 | #include "lint1.h" |
53 | #include "cgram.h" | | 53 | #include "cgram.h" |
54 | | | 54 | |
| @@ -58,28 +58,26 @@ __RCSID("$NetBSD: lex.c,v 1.103 2022/02/ | | | @@ -58,28 +58,26 @@ __RCSID("$NetBSD: lex.c,v 1.103 2022/02/ |
58 | /* Current position (it's also updated when an included file is parsed) */ | | 58 | /* Current position (it's also updated when an included file is parsed) */ |
59 | pos_t curr_pos = { "", 1, 0 }; | | 59 | pos_t curr_pos = { "", 1, 0 }; |
60 | | | 60 | |
61 | /* | | 61 | /* |
62 | * Current position in C source (not updated when an included file is | | 62 | * Current position in C source (not updated when an included file is |
63 | * parsed). | | 63 | * parsed). |
64 | */ | | 64 | */ |
65 | pos_t csrc_pos = { "", 1, 0 }; | | 65 | pos_t csrc_pos = { "", 1, 0 }; |
66 | | | 66 | |
67 | bool in_gcc_attribute; | | 67 | bool in_gcc_attribute; |
68 | bool in_system_header; | | 68 | bool in_system_header; |
69 | | | 69 | |
70 | static int inpc(void); | | 70 | static int inpc(void); |
71 | static unsigned int hash(const char *); | | | |
72 | static sym_t * search(sbuf_t *); | | | |
73 | static int keyw(sym_t *); | | 71 | static int keyw(sym_t *); |
74 | static int get_escaped_char(int); | | 72 | static int get_escaped_char(int); |
75 | | | 73 | |
76 | void | | 74 | void |
77 | lex_next_line(void) | | 75 | lex_next_line(void) |
78 | { | | 76 | { |
79 | curr_pos.p_line++; | | 77 | curr_pos.p_line++; |
80 | curr_pos.p_uniq = 0; | | 78 | curr_pos.p_uniq = 0; |
81 | debug_step("parsing %s:%d", curr_pos.p_file, curr_pos.p_line); | | 79 | debug_step("parsing %s:%d", curr_pos.p_file, curr_pos.p_line); |
82 | if (curr_pos.p_file == csrc_pos.p_file) { | | 80 | if (curr_pos.p_file == csrc_pos.p_file) { |
83 | csrc_pos.p_line++; | | 81 | csrc_pos.p_line++; |
84 | csrc_pos.p_uniq = 0; | | 82 | csrc_pos.p_uniq = 0; |
85 | } | | 83 | } |
| @@ -250,47 +248,95 @@ static struct keyword { | | | @@ -250,47 +248,95 @@ static struct keyword { |
250 | #undef kwdef_type | | 248 | #undef kwdef_type |
251 | #undef kwdef_tqual | | 249 | #undef kwdef_tqual |
252 | #undef kwdef_keyword | | 250 | #undef kwdef_keyword |
253 | #undef kwdef_gcc_attr | | 251 | #undef kwdef_gcc_attr |
254 | }; | | 252 | }; |
255 | | | 253 | |
256 | /* Symbol table */ | | 254 | /* Symbol table */ |
257 | static sym_t *symtab[HSHSIZ1]; | | 255 | static sym_t *symtab[HSHSIZ1]; |
258 | | | 256 | |
259 | /* type of next expected symbol */ | | 257 | /* type of next expected symbol */ |
260 | symt_t symtyp; | | 258 | symt_t symtyp; |
261 | | | 259 | |
262 | | | 260 | |
| | | 261 | static unsigned int |
| | | 262 | hash(const char *s) |
| | | 263 | { |
| | | 264 | unsigned int v; |
| | | 265 | const char *p; |
| | | 266 | |
| | | 267 | v = 0; |
| | | 268 | for (p = s; *p != '\0'; p++) { |
| | | 269 | v = (v << 4) + (unsigned char)*p; |
| | | 270 | v ^= v >> 28; |
| | | 271 | } |
| | | 272 | return v % HSHSIZ1; |
| | | 273 | } |
| | | 274 | |
263 | static void | | 275 | static void |
264 | symtab_add(sym_t *sym) | | 276 | symtab_add(sym_t *sym) |
265 | { | | 277 | { |
266 | size_t h; | | 278 | unsigned int h; |
267 | | | 279 | |
268 | h = hash(sym->s_name); | | 280 | h = hash(sym->s_name); |
269 | if ((sym->s_symtab_next = symtab[h]) != NULL) | | 281 | if ((sym->s_symtab_next = symtab[h]) != NULL) |
270 | symtab[h]->s_symtab_ref = &sym->s_symtab_next; | | 282 | symtab[h]->s_symtab_ref = &sym->s_symtab_next; |
271 | sym->s_symtab_ref = &symtab[h]; | | 283 | sym->s_symtab_ref = &symtab[h]; |
272 | symtab[h] = sym; | | 284 | symtab[h] = sym; |
273 | } | | 285 | } |
274 | | | 286 | |
| | | 287 | static sym_t * |
| | | 288 | symtab_search(sbuf_t *sb) |
| | | 289 | { |
| | | 290 | |
| | | 291 | unsigned int h = hash(sb->sb_name); |
| | | 292 | for (sym_t *sym = symtab[h]; sym != NULL; sym = sym->s_symtab_next) { |
| | | 293 | if (strcmp(sym->s_name, sb->sb_name) != 0) |
| | | 294 | continue; |
| | | 295 | |
| | | 296 | const struct keyword *kw = sym->s_keyword; |
| | | 297 | if (kw != NULL && !kw->kw_attr) |
| | | 298 | return sym; |
| | | 299 | if (kw != NULL && in_gcc_attribute) |
| | | 300 | return sym; |
| | | 301 | if (kw == NULL && !in_gcc_attribute && sym->s_kind == symtyp) |
| | | 302 | return sym; |
| | | 303 | } |
| | | 304 | |
| | | 305 | return NULL; |
| | | 306 | } |
| | | 307 | |
275 | static void | | 308 | static void |
276 | symtab_remove(sym_t *sym) | | 309 | symtab_remove(sym_t *sym) |
277 | { | | 310 | { |
278 | | | 311 | |
279 | if ((*sym->s_symtab_ref = sym->s_symtab_next) != NULL) | | 312 | if ((*sym->s_symtab_ref = sym->s_symtab_next) != NULL) |
280 | sym->s_symtab_next->s_symtab_ref = sym->s_symtab_ref; | | 313 | sym->s_symtab_next->s_symtab_ref = sym->s_symtab_ref; |
281 | sym->s_symtab_next = NULL; | | 314 | sym->s_symtab_next = NULL; |
282 | } | | 315 | } |
283 | | | 316 | |
| | | 317 | static void |
| | | 318 | symtab_remove_locals(void) |
| | | 319 | { |
| | | 320 | |
| | | 321 | for (size_t i = 0; i < HSHSIZ1; i++) { |
| | | 322 | for (sym_t *sym = symtab[i]; sym != NULL; ) { |
| | | 323 | sym_t *next = sym->s_symtab_next; |
| | | 324 | if (sym->s_block_level >= 1) |
| | | 325 | symtab_remove(sym); |
| | | 326 | sym = next; |
| | | 327 | } |
| | | 328 | } |
| | | 329 | } |
284 | | | 330 | |
285 | static void | | 331 | static void |
286 | add_keyword(const struct keyword *kw, bool leading, bool trailing) | | 332 | add_keyword(const struct keyword *kw, bool leading, bool trailing) |
287 | { | | 333 | { |
288 | sym_t *sym; | | 334 | sym_t *sym; |
289 | char buf[256]; | | 335 | char buf[256]; |
290 | const char *name; | | 336 | const char *name; |
291 | | | 337 | |
292 | if (!leading && !trailing) { | | 338 | if (!leading && !trailing) { |
293 | name = kw->kw_name; | | 339 | name = kw->kw_name; |
294 | } else { | | 340 | } else { |
295 | (void)snprintf(buf, sizeof(buf), "%s%s%s", | | 341 | (void)snprintf(buf, sizeof(buf), "%s%s%s", |
296 | leading ? "__" : "", kw->kw_name, trailing ? "__" : ""); | | 342 | leading ? "__" : "", kw->kw_name, trailing ? "__" : ""); |
| @@ -346,112 +392,74 @@ inpc(void) | | | @@ -346,112 +392,74 @@ inpc(void) |
346 | { | | 392 | { |
347 | int c; | | 393 | int c; |
348 | | | 394 | |
349 | if ((c = lex_input()) == EOF) | | 395 | if ((c = lex_input()) == EOF) |
350 | return c; | | 396 | return c; |
351 | c &= CHAR_MASK; | | 397 | c &= CHAR_MASK; |
352 | if (c == '\0') | | 398 | if (c == '\0') |
353 | return EOF; /* lex returns 0 on EOF. */ | | 399 | return EOF; /* lex returns 0 on EOF. */ |
354 | if (c == '\n') | | 400 | if (c == '\n') |
355 | lex_next_line(); | | 401 | lex_next_line(); |
356 | return c; | | 402 | return c; |
357 | } | | 403 | } |
358 | | | 404 | |
359 | static unsigned int | | | |
360 | hash(const char *s) | | | |
361 | { | | | |
362 | unsigned int v; | | | |
363 | const char *p; | | | |
364 | | | | |
365 | v = 0; | | | |
366 | for (p = s; *p != '\0'; p++) { | | | |
367 | v = (v << 4) + (unsigned char)*p; | | | |
368 | v ^= v >> 28; | | | |
369 | } | | | |
370 | return v % HSHSIZ1; | | | |
371 | } | | | |
372 | | | | |
373 | /* | | 405 | /* |
374 | * Lex has found a letter followed by zero or more letters or digits. | | 406 | * Lex has found a letter followed by zero or more letters or digits. |
375 | * It looks for a symbol in the symbol table with the same name. This | | 407 | * It looks for a symbol in the symbol table with the same name. This |
376 | * symbol must either be a keyword or a symbol of the type required by | | 408 | * symbol must either be a keyword or a symbol of the type required by |
377 | * symtyp (label, member, tag, ...). | | 409 | * symtyp (label, member, tag, ...). |
378 | * | | 410 | * |
379 | * If it is a keyword, the token is returned. In some cases it is described | | 411 | * If it is a keyword, the token is returned. In some cases it is described |
380 | * more deeply by data written to yylval. | | 412 | * more deeply by data written to yylval. |
381 | * | | 413 | * |
382 | * If it is a symbol, T_NAME is returned and the name is stored in yylval. | | 414 | * If it is a symbol, T_NAME is returned and the name is stored in yylval. |
383 | * If there is already a symbol of the same name and type in the symbol | | 415 | * If there is already a symbol of the same name and type in the symbol |
384 | * table, yylval.y_name->sb_sym points there. | | 416 | * table, yylval.y_name->sb_sym points there. |
385 | */ | | 417 | */ |
386 | extern int | | 418 | extern int |
387 | lex_name(const char *yytext, size_t yyleng) | | 419 | lex_name(const char *yytext, size_t yyleng) |
388 | { | | 420 | { |
389 | char *s; | | 421 | char *s; |
390 | sbuf_t *sb; | | 422 | sbuf_t *sb; |
391 | sym_t *sym; | | 423 | sym_t *sym; |
392 | int tok; | | 424 | int tok; |
393 | | | 425 | |
394 | sb = xmalloc(sizeof(*sb)); | | 426 | sb = xmalloc(sizeof(*sb)); |
395 | sb->sb_name = yytext; | | 427 | sb->sb_name = yytext; |
396 | sb->sb_len = yyleng; | | 428 | sb->sb_len = yyleng; |
397 | if ((sym = search(sb)) != NULL && sym->s_keyword != NULL) { | | 429 | if ((sym = symtab_search(sb)) != NULL && sym->s_keyword != NULL) { |
398 | free(sb); | | 430 | free(sb); |
399 | return keyw(sym); | | 431 | return keyw(sym); |
400 | } | | 432 | } |
401 | | | 433 | |
402 | sb->sb_sym = sym; | | 434 | sb->sb_sym = sym; |
403 | | | 435 | |
404 | if (sym != NULL) { | | 436 | if (sym != NULL) { |
405 | lint_assert(block_level >= sym->s_block_level); | | 437 | lint_assert(block_level >= sym->s_block_level); |
406 | sb->sb_name = sym->s_name; | | 438 | sb->sb_name = sym->s_name; |
407 | sb->sb_len = strlen(sym->s_name); | | 439 | sb->sb_len = strlen(sym->s_name); |
408 | tok = sym->s_scl == TYPEDEF ? T_TYPENAME : T_NAME; | | 440 | tok = sym->s_scl == TYPEDEF ? T_TYPENAME : T_NAME; |
409 | } else { | | 441 | } else { |
410 | s = block_zero_alloc(yyleng + 1); | | 442 | s = block_zero_alloc(yyleng + 1); |
411 | (void)memcpy(s, yytext, yyleng + 1); | | 443 | (void)memcpy(s, yytext, yyleng + 1); |
412 | sb->sb_name = s; | | 444 | sb->sb_name = s; |
413 | sb->sb_len = yyleng; | | 445 | sb->sb_len = yyleng; |
414 | tok = T_NAME; | | 446 | tok = T_NAME; |
415 | } | | 447 | } |
416 | | | 448 | |
417 | yylval.y_name = sb; | | 449 | yylval.y_name = sb; |
418 | return tok; | | 450 | return tok; |
419 | } | | 451 | } |
420 | | | 452 | |
421 | static sym_t * | | | |
422 | search(sbuf_t *sb) | | | |
423 | { | | | |
424 | unsigned int h; | | | |
425 | sym_t *sym; | | | |
426 | const struct keyword *kw; | | | |
427 | | | | |
428 | h = hash(sb->sb_name); | | | |
429 | for (sym = symtab[h]; sym != NULL; sym = sym->s_symtab_next) { | | | |
430 | if (strcmp(sym->s_name, sb->sb_name) != 0) | | | |
431 | continue; | | | |
432 | kw = sym->s_keyword; | | | |
433 | | | | |
434 | if (kw != NULL && !kw->kw_attr) | | | |
435 | return sym; | | | |
436 | if (kw != NULL && in_gcc_attribute) | | | |
437 | return sym; | | | |
438 | if (kw == NULL && !in_gcc_attribute && sym->s_kind == symtyp) | | | |
439 | return sym; | | | |
440 | } | | | |
441 | | | | |
442 | return NULL; | | | |
443 | } | | | |
444 | | | | |
445 | static int | | 453 | static int |
446 | keyw(sym_t *sym) | | 454 | keyw(sym_t *sym) |
447 | { | | 455 | { |
448 | int t; | | 456 | int t; |
449 | | | 457 | |
450 | if ((t = (int)sym->s_value.v_quad) == T_SCLASS) { | | 458 | if ((t = (int)sym->s_value.v_quad) == T_SCLASS) { |
451 | yylval.y_scl = sym->s_scl; | | 459 | yylval.y_scl = sym->s_scl; |
452 | } else if (t == T_TYPE || t == T_STRUCT_OR_UNION) { | | 460 | } else if (t == T_TYPE || t == T_STRUCT_OR_UNION) { |
453 | yylval.y_tspec = sym->s_tspec; | | 461 | yylval.y_tspec = sym->s_tspec; |
454 | } else if (t == T_QUAL) { | | 462 | } else if (t == T_QUAL) { |
455 | yylval.y_tqual = sym->s_tqual; | | 463 | yylval.y_tqual = sym->s_tqual; |
456 | } | | 464 | } |
457 | return t; | | 465 | return t; |
| @@ -1328,27 +1336,27 @@ getsym(sbuf_t *sb) | | | @@ -1328,27 +1336,27 @@ getsym(sbuf_t *sb) |
1328 | char *s; | | 1336 | char *s; |
1329 | sym_t *sym; | | 1337 | sym_t *sym; |
1330 | | | 1338 | |
1331 | sym = sb->sb_sym; | | 1339 | sym = sb->sb_sym; |
1332 | | | 1340 | |
1333 | /* | | 1341 | /* |
1334 | * During member declaration it is possible that name() looked | | 1342 | * During member declaration it is possible that name() looked |
1335 | * for symbols of type FVFT, although it should have looked for | | 1343 | * for symbols of type FVFT, although it should have looked for |
1336 | * symbols of type FTAG. Same can happen for labels. Both cases | | 1344 | * symbols of type FTAG. Same can happen for labels. Both cases |
1337 | * are compensated here. | | 1345 | * are compensated here. |
1338 | */ | | 1346 | */ |
1339 | if (symtyp == FMEMBER || symtyp == FLABEL) { | | 1347 | if (symtyp == FMEMBER || symtyp == FLABEL) { |
1340 | if (sym == NULL || sym->s_kind == FVFT) | | 1348 | if (sym == NULL || sym->s_kind == FVFT) |
1341 | sym = search(sb); | | 1349 | sym = symtab_search(sb); |
1342 | } | | 1350 | } |
1343 | | | 1351 | |
1344 | if (sym != NULL) { | | 1352 | if (sym != NULL) { |
1345 | lint_assert(sym->s_kind == symtyp); | | 1353 | lint_assert(sym->s_kind == symtyp); |
1346 | symtyp = FVFT; | | 1354 | symtyp = FVFT; |
1347 | free(sb); | | 1355 | free(sb); |
1348 | return sym; | | 1356 | return sym; |
1349 | } | | 1357 | } |
1350 | | | 1358 | |
1351 | /* create a new symbol table entry */ | | 1359 | /* create a new symbol table entry */ |
1352 | | | 1360 | |
1353 | /* labels must always be allocated at level 1 (outermost block) */ | | 1361 | /* labels must always be allocated at level 1 (outermost block) */ |
1354 | if (symtyp == FLABEL) { | | 1362 | if (symtyp == FLABEL) { |
| @@ -1465,38 +1473,30 @@ inssym(int bl, sym_t *sym) | | | @@ -1465,38 +1473,30 @@ inssym(int bl, sym_t *sym) |
1465 | sym->s_block_level >= sym->s_symtab_next->s_block_level); | | 1473 | sym->s_block_level >= sym->s_symtab_next->s_block_level); |
1466 | } | | 1474 | } |
1467 | | | 1475 | |
1468 | /* | | 1476 | /* |
1469 | * Called at level 0 after syntax errors. | | 1477 | * Called at level 0 after syntax errors. |
1470 | * | | 1478 | * |
1471 | * Removes all symbols which are not declared at level 0 from the | | 1479 | * Removes all symbols which are not declared at level 0 from the |
1472 | * symbol table. Also frees all memory which is not associated with | | 1480 | * symbol table. Also frees all memory which is not associated with |
1473 | * level 0. | | 1481 | * level 0. |
1474 | */ | | 1482 | */ |
1475 | void | | 1483 | void |
1476 | cleanup(void) | | 1484 | cleanup(void) |
1477 | { | | 1485 | { |
1478 | sym_t *sym, *nsym; | | | |
1479 | size_t i; | | | |
1480 | | | 1486 | |
1481 | for (i = 0; i < HSHSIZ1; i++) { | | 1487 | symtab_remove_locals(); |
1482 | for (sym = symtab[i]; sym != NULL; sym = nsym) { | | | |
1483 | nsym = sym->s_symtab_next; | | | |
1484 | if (sym->s_block_level >= 1) | | | |
1485 | symtab_remove(sym); | | | |
1486 | } | | | |
1487 | } | | | |
1488 | | | 1488 | |
1489 | for (i = mem_block_level; i > 0; i--) | | 1489 | for (size_t i = mem_block_level; i > 0; i--) |
1490 | level_free_all(i); | | 1490 | level_free_all(i); |
1491 | } | | 1491 | } |
1492 | | | 1492 | |
1493 | /* | | 1493 | /* |
1494 | * Create a new symbol with the name of an existing symbol. | | 1494 | * Create a new symbol with the name of an existing symbol. |
1495 | */ | | 1495 | */ |
1496 | sym_t * | | 1496 | sym_t * |
1497 | pushdown(const sym_t *sym) | | 1497 | pushdown(const sym_t *sym) |
1498 | { | | 1498 | { |
1499 | sym_t *nsym; | | 1499 | sym_t *nsym; |
1500 | | | 1500 | |
1501 | debug_step("pushdown '%s' %s '%s'", | | 1501 | debug_step("pushdown '%s' %s '%s'", |
1502 | sym->s_name, symt_name(sym->s_kind), type_name(sym->s_type)); | | 1502 | sym->s_name, symt_name(sym->s_kind), type_name(sym->s_type)); |