indent: remove a redundant flag from the parser state No functional change.diff -r1.11 -r1.12 src/tests/usr.bin/indent/lsym_binary_op.c
(rillig)
--- src/tests/usr.bin/indent/lsym_binary_op.c 2023/06/04 22:57:18 1.11
+++ src/tests/usr.bin/indent/lsym_binary_op.c 2023/06/14 08:25:15 1.12
@@ -1,205 +1,205 @@ | @@ -1,205 +1,205 @@ | |||
1 | /* $NetBSD: lsym_binary_op.c,v 1.11 2023/06/04 22:57:18 rillig Exp $ */ | 1 | /* $NetBSD: lsym_binary_op.c,v 1.12 2023/06/14 08:25:15 rillig Exp $ */ | |
2 | 2 | |||
3 | /* | 3 | /* | |
4 | * Tests for the token lsym_binary_op, which represents a binary operator in | 4 | * Tests for the token lsym_binary_op, which represents a binary operator in | |
5 | * an expression. Examples for binary operators are '>>', '=', '+', '&&'. | 5 | * an expression. Examples for binary operators are '>>', '=', '+', '&&'. | |
6 | * | 6 | * | |
7 | * Binary operators are surrounded by blanks. | 7 | * Binary operators are surrounded by blanks. | |
8 | * | 8 | * | |
9 | * Some tokens like '+', '*' or '&' can be either binary or unary operators, | 9 | * Some tokens like '+', '*' or '&' can be either binary or unary operators, | |
10 | * with an entirely different meaning. | 10 | * with an entirely different meaning. | |
11 | * | 11 | * | |
12 | * The token '*' is not only a binary or a unary operator, it is used in types | 12 | * The token '*' is not only a binary or a unary operator, it is used in types | |
13 | * as well, to derive a pointer type. | 13 | * as well, to derive a pointer type. | |
14 | * | 14 | * | |
15 | * See also: | 15 | * See also: | |
16 | * lsym_postfix_op.c for postfix unary operators | 16 | * lsym_postfix_op.c for postfix unary operators | |
17 | * lsym_unary_op.c for prefix unary operators | 17 | * lsym_unary_op.c for prefix unary operators | |
18 | * lsym_colon.c for ':' | 18 | * lsym_colon.c for ':' | |
19 | * lsym_question.c for '?' | 19 | * lsym_question.c for '?' | |
20 | * lsym_comma.c for ',' | 20 | * lsym_comma.c for ',' | |
21 | * C99 6.4.6 "Punctuators" | 21 | * C99 6.4.6 "Punctuators" | |
22 | */ | 22 | */ | |
23 | 23 | |||
24 | //indent input | 24 | //indent input | |
25 | void | 25 | void | |
26 | binary_operators(void) | 26 | binary_operators(void) | |
27 | { | 27 | { | |
28 | /* In the order of appearance in C11 6.5. */ | 28 | /* In the order of appearance in C11 6.5. */ | |
29 | a = a * a; | 29 | a = a * a; | |
30 | a = a / a; | 30 | a = a / a; | |
31 | a = a % a; | 31 | a = a % a; | |
32 | a = a + a; | 32 | a = a + a; | |
33 | a = a - a; | 33 | a = a - a; | |
34 | a = a << a; | 34 | a = a << a; | |
35 | a = a >> a; | 35 | a = a >> a; | |
36 | a = a < a; | 36 | a = a < a; | |
37 | a = a > a; | 37 | a = a > a; | |
38 | a = a <= a; | 38 | a = a <= a; | |
39 | a = a >= a; | 39 | a = a >= a; | |
40 | a = a == a; | 40 | a = a == a; | |
41 | a = a != a; | 41 | a = a != a; | |
42 | a = a & a; | 42 | a = a & a; | |
43 | a = a ^ a; | 43 | a = a ^ a; | |
44 | a = a | a; | 44 | a = a | a; | |
45 | a = a && a; | 45 | a = a && a; | |
46 | a = a || a; | 46 | a = a || a; | |
47 | a = a ? a : a; | 47 | a = a ? a : a; | |
48 | a = a; | 48 | a = a; | |
49 | a *= a; | 49 | a *= a; | |
50 | a /= a; | 50 | a /= a; | |
51 | a %= a; | 51 | a %= a; | |
52 | a += a; | 52 | a += a; | |
53 | a -= a; | 53 | a -= a; | |
54 | a <<= a; | 54 | a <<= a; | |
55 | a >>= a; | 55 | a >>= a; | |
56 | a &= a; | 56 | a &= a; | |
57 | a ^= a; | 57 | a ^= a; | |
58 | a |= a; | 58 | a |= a; | |
59 | a = a, a; | 59 | a = a, a; | |
60 | } | 60 | } | |
61 | //indent end | 61 | //indent end | |
62 | 62 | |||
63 | //indent run-equals-input | 63 | //indent run-equals-input | |
64 | 64 | |||
65 | 65 | |||
66 | /* | 66 | /* | |
67 | * If a '*' is immediately followed by another '*', they still form separate | 67 | * If a '*' is immediately followed by another '*', they still form separate | |
68 | * operators. The first is a binary operator, the second is unary. | 68 | * operators. The first is a binary operator, the second is unary. | |
69 | */ | 69 | */ | |
70 | //indent input | 70 | //indent input | |
71 | int var = expr**ptr; | 71 | int var = expr**ptr; | |
72 | //indent end | 72 | //indent end | |
73 | 73 | |||
74 | //indent run -di0 | 74 | //indent run -di0 | |
75 | int var = expr * *ptr; | 75 | int var = expr * *ptr; | |
76 | //indent end | 76 | //indent end | |
77 | 77 | |||
78 | 78 | |||
79 | /* | 79 | /* | |
80 | * Before 2023-06-04, indent allowed for arbitrary repetitions of some operator | 80 | * Before 2023-06-04, indent allowed for arbitrary repetitions of some operator | |
81 | * characters, followed by an arbitrary amount of '='. This could be used for | 81 | * characters, followed by an arbitrary amount of '='. This could be used for | |
82 | * operators like '&&' or '|||==='. | 82 | * operators like '&&' or '|||==='. | |
83 | * | 83 | * | |
84 | * Before 2021-03-07 22:11:01, the comment '//' was treated as a binary | 84 | * Before 2021-03-07 22:11:01, the comment '//' was treated as a binary | |
85 | * operator as well, and so was the comment '/////', leading to unexpected | 85 | * operator as well, and so was the comment '/////', leading to unexpected | |
86 | * spacing. | 86 | * spacing. | |
87 | * | 87 | * | |
88 | * See lexi.c, lexi, "default:". | 88 | * See lexi.c, lexi, "default:". | |
89 | */ | 89 | */ | |
90 | //indent input | 90 | //indent input | |
91 | void | 91 | void | |
92 | long_run_of_operators(void) | 92 | long_run_of_operators(void) | |
93 | { | 93 | { | |
94 | if (a &&&&&&& b) | 94 | if (a &&&&&&& b) | |
95 | return; | 95 | return; | |
96 | if (a |||=== b) | 96 | if (a |||=== b) | |
97 | return; | 97 | return; | |
98 | } | 98 | } | |
99 | //indent end | 99 | //indent end | |
100 | 100 | |||
101 | //indent run | 101 | //indent run | |
102 | void | 102 | void | |
103 | long_run_of_operators(void) | 103 | long_run_of_operators(void) | |
104 | { | 104 | { | |
105 | if (a && && && &b) | 105 | if (a && && && &b) | |
106 | return; | 106 | return; | |
107 | if (a || |= == b) | 107 | if (a || |= == b) | |
108 | return; | 108 | return; | |
109 | } | 109 | } | |
110 | //indent end | 110 | //indent end | |
111 | 111 | |||
112 | 112 | |||
113 | /* | 113 | /* | |
114 | * Long chains of '+' and '-' must be split into several operators as the | 114 | * Long chains of '+' and '-' must be split into several operators as the | |
115 | * lexer has to distinguish between '++' and '+' early. The following | 115 | * lexer has to distinguish between '++' and '+' early. The following | |
116 | * sequence is thus tokenized as: | 116 | * sequence is thus tokenized as: | |
117 | * | 117 | * | |
118 | * word "a" | 118 | * word "a" | |
119 | * postfix_op "++" | 119 | * postfix_op "++" | |
120 | * binary_op "++" | 120 | * binary_op "++" | |
121 | * unary_op "++" | 121 | * unary_op "++" | |
122 | * unary_op "+" | 122 | * unary_op "+" | |
123 | * word "b" | 123 | * word "b" | |
124 | * | 124 | * | |
125 | * See lexi.c, lexi, "case '+':". | 125 | * See lexi.c, lexi, "case '+':". | |
126 | */ | 126 | */ | |
127 | //indent input | 127 | //indent input | |
128 | void | 128 | void | |
129 | joined_unary_and_binary_operators(void) | 129 | joined_unary_and_binary_operators(void) | |
130 | { | 130 | { | |
131 | if (a +++++++ b) | 131 | if (a +++++++ b) | |
132 | return; | 132 | return; | |
133 | } | 133 | } | |
134 | //indent end | 134 | //indent end | |
135 | 135 | |||
136 | //indent run | 136 | //indent run | |
137 | void | 137 | void | |
138 | joined_unary_and_binary_operators(void) | 138 | joined_unary_and_binary_operators(void) | |
139 | { | 139 | { | |
140 | if (a++ ++ ++ +b) | 140 | if (a++ ++ ++ +b) | |
141 | return; | 141 | return; | |
142 | } | 142 | } | |
143 | //indent end | 143 | //indent end | |
144 | 144 | |||
145 | 145 | |||
146 | /* | 146 | /* | |
147 | * Ensure that the result of the indentation does not depend on whether a | 147 | * Ensure that the result of the indentation does not depend on whether a | |
148 | * token from the input starts in column 1 or 9. | 148 | * token from the input starts in column 1 or 9. | |
149 | * | 149 | * | |
150 | * See process_binary_op, ps.curr_col_1. | 150 | * See process_binary_op. | |
151 | */ | 151 | */ | |
152 | //indent input | 152 | //indent input | |
153 | int col_1 // | 153 | int col_1 // | |
154 | = // | 154 | = // | |
155 | 1; | 155 | 1; | |
156 | 156 | |||
157 | int col_9 // | 157 | int col_9 // | |
158 | = // | 158 | = // | |
159 | 9; | 159 | 9; | |
160 | //indent end | 160 | //indent end | |
161 | 161 | |||
162 | //indent run | 162 | //indent run | |
163 | int col_1 // | 163 | int col_1 // | |
164 | = // | 164 | = // | |
165 | 1; | 165 | 1; | |
166 | 166 | |||
167 | int col_9 // | 167 | int col_9 // | |
168 | = // | 168 | = // | |
169 | 9; | 169 | 9; | |
170 | //indent end | 170 | //indent end | |
171 | 171 | |||
172 | 172 | |||
173 | /* | 173 | /* | |
174 | * The ternary conditional operator is not a binary operator, but both its | 174 | * The ternary conditional operator is not a binary operator, but both its | |
175 | * components '?' and ':' follow the same spacing rules. | 175 | * components '?' and ':' follow the same spacing rules. | |
176 | */ | 176 | */ | |
177 | //indent input | 177 | //indent input | |
178 | int conditional = condition ? number : number; | 178 | int conditional = condition ? number : number; | |
179 | //indent end | 179 | //indent end | |
180 | 180 | |||
181 | //indent run-equals-input -di0 | 181 | //indent run-equals-input -di0 | |
182 | 182 | |||
183 | 183 | |||
184 | // After a ']', a '*' is a binary operator. | 184 | // After a ']', a '*' is a binary operator. | |
185 | //indent input | 185 | //indent input | |
186 | int x = arr[3]*y; | 186 | int x = arr[3]*y; | |
187 | //indent end | 187 | //indent end | |
188 | 188 | |||
189 | //indent run -di0 | 189 | //indent run -di0 | |
190 | int x = arr[3] * y; | 190 | int x = arr[3] * y; | |
191 | //indent end | 191 | //indent end | |
192 | 192 | |||
193 | 193 | |||
194 | /* | 194 | /* | |
195 | * Ensure that after an assignment, a '*=' operator is properly spaced, like | 195 | * Ensure that after an assignment, a '*=' operator is properly spaced, like | |
196 | * any other binary operator. | 196 | * any other binary operator. | |
197 | */ | 197 | */ | |
198 | //indent input | 198 | //indent input | |
199 | { | 199 | { | |
200 | a = a; | 200 | a = a; | |
201 | a *= b *= c; | 201 | a *= b *= c; | |
202 | } | 202 | } | |
203 | //indent end | 203 | //indent end | |
204 | 204 | |||
205 | //indent run-equals-input -di0 | 205 | //indent run-equals-input -di0 |
--- src/usr.bin/indent/debug.c 2023/06/14 07:20:55 1.54
+++ src/usr.bin/indent/debug.c 2023/06/14 08:25:15 1.55
@@ -1,398 +1,397 @@ | @@ -1,398 +1,397 @@ | |||
1 | /* $NetBSD: debug.c,v 1.54 2023/06/14 07:20:55 rillig Exp $ */ | 1 | /* $NetBSD: debug.c,v 1.55 2023/06/14 08:25:15 rillig Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 2023 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 2023 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * This code is derived from software contributed to The NetBSD Foundation | 7 | * This code is derived from software contributed to The NetBSD Foundation | |
8 | * by Roland Illig <rillig@NetBSD.org>. | 8 | * by Roland Illig <rillig@NetBSD.org>. | |
9 | * | 9 | * | |
10 | * Redistribution and use in source and binary forms, with or without | 10 | * Redistribution and use in source and binary forms, with or without | |
11 | * modification, are permitted provided that the following conditions | 11 | * modification, are permitted provided that the following conditions | |
12 | * are met: | 12 | * are met: | |
13 | * 1. Redistributions of source code must retain the above copyright | 13 | * 1. Redistributions of source code must retain the above copyright | |
14 | * notice, this list of conditions and the following disclaimer. | 14 | * notice, this list of conditions and the following disclaimer. | |
15 | * 2. Redistributions in binary form must reproduce the above copyright | 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
16 | * notice, this list of conditions and the following disclaimer in the | 16 | * notice, this list of conditions and the following disclaimer in the | |
17 | * documentation and/or other materials provided with the distribution. | 17 | * documentation and/or other materials provided with the distribution. | |
18 | * | 18 | * | |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
29 | * POSSIBILITY OF SUCH DAMAGE. | 29 | * POSSIBILITY OF SUCH DAMAGE. | |
30 | */ | 30 | */ | |
31 | 31 | |||
32 | #include <sys/cdefs.h> | 32 | #include <sys/cdefs.h> | |
33 | __RCSID("$NetBSD: debug.c,v 1.54 2023/06/14 07:20:55 rillig Exp $"); | 33 | __RCSID("$NetBSD: debug.c,v 1.55 2023/06/14 08:25:15 rillig Exp $"); | |
34 | 34 | |||
35 | #include <stdarg.h> | 35 | #include <stdarg.h> | |
36 | #include <string.h> | 36 | #include <string.h> | |
37 | 37 | |||
38 | #include "indent.h" | 38 | #include "indent.h" | |
39 | 39 | |||
40 | #ifdef debug | 40 | #ifdef debug | |
41 | 41 | |||
42 | static struct { | 42 | static struct { | |
43 | /*- | 43 | /*- | |
44 | * false show only the changes to the parser state | 44 | * false show only the changes to the parser state | |
45 | * true show unchanged parts of the parser state as well | 45 | * true show unchanged parts of the parser state as well | |
46 | */ | 46 | */ | |
47 | bool full_parser_state; | 47 | bool full_parser_state; | |
48 | } config = { | 48 | } config = { | |
49 | .full_parser_state = false, | 49 | .full_parser_state = false, | |
50 | }; | 50 | }; | |
51 | 51 | |||
52 | const char *const lsym_name[] = { | 52 | const char *const lsym_name[] = { | |
53 | "eof", | 53 | "eof", | |
54 | "preprocessing", | 54 | "preprocessing", | |
55 | "newline", | 55 | "newline", | |
56 | "comment", | 56 | "comment", | |
57 | "lparen", | 57 | "lparen", | |
58 | "rparen", | 58 | "rparen", | |
59 | "lbracket", | 59 | "lbracket", | |
60 | "rbracket", | 60 | "rbracket", | |
61 | "lbrace", | 61 | "lbrace", | |
62 | "rbrace", | 62 | "rbrace", | |
63 | "period", | 63 | "period", | |
64 | "unary_op", | 64 | "unary_op", | |
65 | "sizeof", | 65 | "sizeof", | |
66 | "offsetof", | 66 | "offsetof", | |
67 | "postfix_op", | 67 | "postfix_op", | |
68 | "binary_op", | 68 | "binary_op", | |
69 | "question", | 69 | "question", | |
70 | "question_colon", | 70 | "question_colon", | |
71 | "comma", | 71 | "comma", | |
72 | "typedef", | 72 | "typedef", | |
73 | "modifier", | 73 | "modifier", | |
74 | "tag", | 74 | "tag", | |
75 | "type_outside_parentheses", | 75 | "type_outside_parentheses", | |
76 | "type_in_parentheses", | 76 | "type_in_parentheses", | |
77 | "word", | 77 | "word", | |
78 | "funcname", | 78 | "funcname", | |
79 | "label_colon", | 79 | "label_colon", | |
80 | "other_colon", | 80 | "other_colon", | |
81 | "semicolon", | 81 | "semicolon", | |
82 | "case", | 82 | "case", | |
83 | "default", | 83 | "default", | |
84 | "do", | 84 | "do", | |
85 | "else", | 85 | "else", | |
86 | "for", | 86 | "for", | |
87 | "if", | 87 | "if", | |
88 | "switch", | 88 | "switch", | |
89 | "while", | 89 | "while", | |
90 | "return", | 90 | "return", | |
91 | }; | 91 | }; | |
92 | 92 | |||
93 | const char *const psym_name[] = { | 93 | const char *const psym_name[] = { | |
94 | "-", | 94 | "-", | |
95 | "{block", | 95 | "{block", | |
96 | "{struct", | 96 | "{struct", | |
97 | "{union", | 97 | "{union", | |
98 | "{enum", | 98 | "{enum", | |
99 | "}", | 99 | "}", | |
100 | "decl", | 100 | "decl", | |
101 | "stmt", | 101 | "stmt", | |
102 | "for_exprs", | 102 | "for_exprs", | |
103 | "if_expr", | 103 | "if_expr", | |
104 | "if_expr_stmt", | 104 | "if_expr_stmt", | |
105 | "if_expr_stmt_else", | 105 | "if_expr_stmt_else", | |
106 | "else", | 106 | "else", | |
107 | "switch_expr", | 107 | "switch_expr", | |
108 | "do", | 108 | "do", | |
109 | "do_stmt", | 109 | "do_stmt", | |
110 | "while_expr", | 110 | "while_expr", | |
111 | }; | 111 | }; | |
112 | 112 | |||
113 | static const char *const declaration_name[] = { | 113 | static const char *const declaration_name[] = { | |
114 | "no", | 114 | "no", | |
115 | "begin", | 115 | "begin", | |
116 | "end", | 116 | "end", | |
117 | }; | 117 | }; | |
118 | 118 | |||
119 | const char *const paren_level_cast_name[] = { | 119 | const char *const paren_level_cast_name[] = { | |
120 | "(unknown cast)", | 120 | "(unknown cast)", | |
121 | "(maybe cast)", | 121 | "(maybe cast)", | |
122 | "(no cast)", | 122 | "(no cast)", | |
123 | }; | 123 | }; | |
124 | 124 | |||
125 | const char *const line_kind_name[] = { | 125 | const char *const line_kind_name[] = { | |
126 | "other", | 126 | "other", | |
127 | "blank", | 127 | "blank", | |
128 | "#if", | 128 | "#if", | |
129 | "#endif", | 129 | "#endif", | |
130 | "stmt head", | 130 | "stmt head", | |
131 | "}", | 131 | "}", | |
132 | "block comment", | 132 | "block comment", | |
133 | "case/default", | 133 | "case/default", | |
134 | }; | 134 | }; | |
135 | 135 | |||
136 | static const char *const extra_expr_indent_name[] = { | 136 | static const char *const extra_expr_indent_name[] = { | |
137 | "no", | 137 | "no", | |
138 | "maybe", | 138 | "maybe", | |
139 | "last", | 139 | "last", | |
140 | }; | 140 | }; | |
141 | 141 | |||
142 | static struct { | 142 | static struct { | |
143 | struct parser_state prev_ps; | 143 | struct parser_state prev_ps; | |
144 | bool ps_first; | 144 | bool ps_first; | |
145 | const char *heading; | 145 | const char *heading; | |
146 | unsigned wrote_newlines; | 146 | unsigned wrote_newlines; | |
147 | } state = { | 147 | } state = { | |
148 | .ps_first = true, | 148 | .ps_first = true, | |
149 | .wrote_newlines = 1, | 149 | .wrote_newlines = 1, | |
150 | }; | 150 | }; | |
151 | 151 | |||
152 | void | 152 | void | |
153 | debug_printf(const char *fmt, ...) | 153 | debug_printf(const char *fmt, ...) | |
154 | { | 154 | { | |
155 | FILE *f = output == stdout ? stderr : stdout; | 155 | FILE *f = output == stdout ? stderr : stdout; | |
156 | va_list ap; | 156 | va_list ap; | |
157 | 157 | |||
158 | if (state.heading != NULL) { | 158 | if (state.heading != NULL) { | |
159 | fprintf(f, "%s\n", state.heading); | 159 | fprintf(f, "%s\n", state.heading); | |
160 | state.heading = NULL; | 160 | state.heading = NULL; | |
161 | } | 161 | } | |
162 | va_start(ap, fmt); | 162 | va_start(ap, fmt); | |
163 | vfprintf(f, fmt, ap); | 163 | vfprintf(f, fmt, ap); | |
164 | va_end(ap); | 164 | va_end(ap); | |
165 | state.wrote_newlines = 0; | 165 | state.wrote_newlines = 0; | |
166 | } | 166 | } | |
167 | 167 | |||
168 | void | 168 | void | |
169 | debug_println(const char *fmt, ...) | 169 | debug_println(const char *fmt, ...) | |
170 | { | 170 | { | |
171 | FILE *f = output == stdout ? stderr : stdout; | 171 | FILE *f = output == stdout ? stderr : stdout; | |
172 | va_list ap; | 172 | va_list ap; | |
173 | 173 | |||
174 | if (state.heading != NULL) { | 174 | if (state.heading != NULL) { | |
175 | fprintf(f, "%s\n", state.heading); | 175 | fprintf(f, "%s\n", state.heading); | |
176 | state.heading = NULL; | 176 | state.heading = NULL; | |
177 | state.wrote_newlines = 1; | 177 | state.wrote_newlines = 1; | |
178 | } | 178 | } | |
179 | va_start(ap, fmt); | 179 | va_start(ap, fmt); | |
180 | vfprintf(f, fmt, ap); | 180 | vfprintf(f, fmt, ap); | |
181 | va_end(ap); | 181 | va_end(ap); | |
182 | fprintf(f, "\n"); | 182 | fprintf(f, "\n"); | |
183 | state.wrote_newlines = fmt[0] == '\0' ? state.wrote_newlines + 1 : 1; | 183 | state.wrote_newlines = fmt[0] == '\0' ? state.wrote_newlines + 1 : 1; | |
184 | } | 184 | } | |
185 | 185 | |||
186 | void | 186 | void | |
187 | debug_blank_line(void) | 187 | debug_blank_line(void) | |
188 | { | 188 | { | |
189 | while (state.wrote_newlines < 2) | 189 | while (state.wrote_newlines < 2) | |
190 | debug_println(""); | 190 | debug_println(""); | |
191 | } | 191 | } | |
192 | 192 | |||
193 | void | 193 | void | |
194 | debug_vis_range(const char *prefix, const char *s, size_t len, | 194 | debug_vis_range(const char *prefix, const char *s, size_t len, | |
195 | const char *suffix) | 195 | const char *suffix) | |
196 | { | 196 | { | |
197 | debug_printf("%s", prefix); | 197 | debug_printf("%s", prefix); | |
198 | for (size_t i = 0; i < len; i++) { | 198 | for (size_t i = 0; i < len; i++) { | |
199 | const char *p = s + i; | 199 | const char *p = s + i; | |
200 | if (*p == '\\' || *p == '"') | 200 | if (*p == '\\' || *p == '"') | |
201 | debug_printf("\\%c", *p); | 201 | debug_printf("\\%c", *p); | |
202 | else if (isprint((unsigned char)*p)) | 202 | else if (isprint((unsigned char)*p)) | |
203 | debug_printf("%c", *p); | 203 | debug_printf("%c", *p); | |
204 | else if (*p == '\n') | 204 | else if (*p == '\n') | |
205 | debug_printf("\\n"); | 205 | debug_printf("\\n"); | |
206 | else if (*p == '\t') | 206 | else if (*p == '\t') | |
207 | debug_printf("\\t"); | 207 | debug_printf("\\t"); | |
208 | else | 208 | else | |
209 | debug_printf("\\x%02x", (unsigned char)*p); | 209 | debug_printf("\\x%02x", (unsigned char)*p); | |
210 | } | 210 | } | |
211 | debug_printf("%s", suffix); | 211 | debug_printf("%s", suffix); | |
212 | } | 212 | } | |
213 | 213 | |||
214 | void | 214 | void | |
215 | debug_print_buf(const char *name, const struct buffer *buf) | 215 | debug_print_buf(const char *name, const struct buffer *buf) | |
216 | { | 216 | { | |
217 | if (buf->len > 0) { | 217 | if (buf->len > 0) { | |
218 | debug_printf(" %s ", name); | 218 | debug_printf(" %s ", name); | |
219 | debug_vis_range("\"", buf->s, buf->len, "\""); | 219 | debug_vis_range("\"", buf->s, buf->len, "\""); | |
220 | } | 220 | } | |
221 | } | 221 | } | |
222 | 222 | |||
223 | void | 223 | void | |
224 | debug_buffers(void) | 224 | debug_buffers(void) | |
225 | { | 225 | { | |
226 | debug_print_buf("label", &lab); | 226 | debug_print_buf("label", &lab); | |
227 | debug_print_buf("code", &code); | 227 | debug_print_buf("code", &code); | |
228 | debug_print_buf("comment", &com); | 228 | debug_print_buf("comment", &com); | |
229 | debug_blank_line(); | 229 | debug_blank_line(); | |
230 | } | 230 | } | |
231 | 231 | |||
232 | static void | 232 | static void | |
233 | write_ps_bool(const char *name, bool prev, bool curr) | 233 | write_ps_bool(const char *name, bool prev, bool curr) | |
234 | { | 234 | { | |
235 | if (!state.ps_first && curr != prev) { | 235 | if (!state.ps_first && curr != prev) { | |
236 | char diff = " -+x"[(prev ? 1 : 0) + (curr ? 2 : 0)]; | 236 | char diff = " -+x"[(prev ? 1 : 0) + (curr ? 2 : 0)]; | |
237 | debug_println(" [%c] ps.%s", diff, name); | 237 | debug_println(" [%c] ps.%s", diff, name); | |
238 | } else if (config.full_parser_state || state.ps_first) | 238 | } else if (config.full_parser_state || state.ps_first) | |
239 | debug_println(" [%c] ps.%s", curr ? 'x' : ' ', name); | 239 | debug_println(" [%c] ps.%s", curr ? 'x' : ' ', name); | |
240 | } | 240 | } | |
241 | 241 | |||
242 | static void | 242 | static void | |
243 | write_ps_int(const char *name, int prev, int curr) | 243 | write_ps_int(const char *name, int prev, int curr) | |
244 | { | 244 | { | |
245 | if (!state.ps_first && curr != prev) | 245 | if (!state.ps_first && curr != prev) | |
246 | debug_println(" %3d -> %3d ps.%s", prev, curr, name); | 246 | debug_println(" %3d -> %3d ps.%s", prev, curr, name); | |
247 | else if (config.full_parser_state || state.ps_first) | 247 | else if (config.full_parser_state || state.ps_first) | |
248 | debug_println(" %3d ps.%s", curr, name); | 248 | debug_println(" %3d ps.%s", curr, name); | |
249 | } | 249 | } | |
250 | 250 | |||
251 | static void | 251 | static void | |
252 | write_ps_enum(const char *name, const char *prev, const char *curr) | 252 | write_ps_enum(const char *name, const char *prev, const char *curr) | |
253 | { | 253 | { | |
254 | if (!state.ps_first && strcmp(prev, curr) != 0) | 254 | if (!state.ps_first && strcmp(prev, curr) != 0) | |
255 | debug_println(" %3s -> %3s ps.%s", prev, curr, name); | 255 | debug_println(" %3s -> %3s ps.%s", prev, curr, name); | |
256 | else if (config.full_parser_state || state.ps_first) | 256 | else if (config.full_parser_state || state.ps_first) | |
257 | debug_println(" %10s ps.%s", curr, name); | 257 | debug_println(" %10s ps.%s", curr, name); | |
258 | } | 258 | } | |
259 | 259 | |||
260 | static bool | 260 | static bool | |
261 | ps_paren_has_changed(void) | 261 | ps_paren_has_changed(void) | |
262 | { | 262 | { | |
263 | if (state.prev_ps.nparen != ps.nparen) | 263 | if (state.prev_ps.nparen != ps.nparen) | |
264 | return true; | 264 | return true; | |
265 | 265 | |||
266 | const struct paren_level *prev = state.prev_ps.paren, *curr = ps.paren; | 266 | const struct paren_level *prev = state.prev_ps.paren, *curr = ps.paren; | |
267 | for (int i = 0; i < ps.nparen; i++) | 267 | for (int i = 0; i < ps.nparen; i++) | |
268 | if (curr[i].indent != prev[i].indent | 268 | if (curr[i].indent != prev[i].indent | |
269 | || curr[i].cast != prev[i].cast) | 269 | || curr[i].cast != prev[i].cast) | |
270 | return true; | 270 | return true; | |
271 | return false; | 271 | return false; | |
272 | } | 272 | } | |
273 | 273 | |||
274 | static void | 274 | static void | |
275 | debug_ps_paren(void) | 275 | debug_ps_paren(void) | |
276 | { | 276 | { | |
277 | if (!config.full_parser_state && !ps_paren_has_changed() | 277 | if (!config.full_parser_state && !ps_paren_has_changed() | |
278 | && !state.ps_first) | 278 | && !state.ps_first) | |
279 | return; | 279 | return; | |
280 | 280 | |||
281 | debug_printf(" ps.paren:"); | 281 | debug_printf(" ps.paren:"); | |
282 | for (int i = 0; i < ps.nparen; i++) { | 282 | for (int i = 0; i < ps.nparen; i++) { | |
283 | debug_printf(" %s%d", | 283 | debug_printf(" %s%d", | |
284 | paren_level_cast_name[ps.paren[i].cast], | 284 | paren_level_cast_name[ps.paren[i].cast], | |
285 | ps.paren[i].indent); | 285 | ps.paren[i].indent); | |
286 | } | 286 | } | |
287 | if (ps.nparen == 0) | 287 | if (ps.nparen == 0) | |
288 | debug_printf(" none"); | 288 | debug_printf(" none"); | |
289 | debug_println(""); | 289 | debug_println(""); | |
290 | } | 290 | } | |
291 | 291 | |||
292 | static bool | 292 | static bool | |
293 | ps_di_stack_has_changed(void) | 293 | ps_di_stack_has_changed(void) | |
294 | { | 294 | { | |
295 | if (state.prev_ps.decl_level != ps.decl_level) | 295 | if (state.prev_ps.decl_level != ps.decl_level) | |
296 | return true; | 296 | return true; | |
297 | for (int i = 0; i < ps.decl_level; i++) | 297 | for (int i = 0; i < ps.decl_level; i++) | |
298 | if (state.prev_ps.di_stack[i] != ps.di_stack[i]) | 298 | if (state.prev_ps.di_stack[i] != ps.di_stack[i]) | |
299 | return true; | 299 | return true; | |
300 | return false; | 300 | return false; | |
301 | } | 301 | } | |
302 | 302 | |||
303 | static void | 303 | static void | |
304 | debug_ps_di_stack(void) | 304 | debug_ps_di_stack(void) | |
305 | { | 305 | { | |
306 | bool changed = ps_di_stack_has_changed(); | 306 | bool changed = ps_di_stack_has_changed(); | |
307 | if (!config.full_parser_state && !changed && !state.ps_first) | 307 | if (!config.full_parser_state && !changed && !state.ps_first) | |
308 | return; | 308 | return; | |
309 | 309 | |||
310 | debug_printf(" %s ps.di_stack:", changed ? "->" : " "); | 310 | debug_printf(" %s ps.di_stack:", changed ? "->" : " "); | |
311 | for (int i = 0; i < ps.decl_level; i++) | 311 | for (int i = 0; i < ps.decl_level; i++) | |
312 | debug_printf(" %d", ps.di_stack[i]); | 312 | debug_printf(" %d", ps.di_stack[i]); | |
313 | if (ps.decl_level == 0) | 313 | if (ps.decl_level == 0) | |
314 | debug_printf(" none"); | 314 | debug_printf(" none"); | |
315 | debug_println(""); | 315 | debug_println(""); | |
316 | } | 316 | } | |
317 | 317 | |||
318 | #define debug_ps_bool(name) \ | 318 | #define debug_ps_bool(name) \ | |
319 | write_ps_bool(#name, state.prev_ps.name, ps.name) | 319 | write_ps_bool(#name, state.prev_ps.name, ps.name) | |
320 | #define debug_ps_int(name) \ | 320 | #define debug_ps_int(name) \ | |
321 | write_ps_int(#name, state.prev_ps.name, ps.name) | 321 | write_ps_int(#name, state.prev_ps.name, ps.name) | |
322 | #define debug_ps_enum(name, names) \ | 322 | #define debug_ps_enum(name, names) \ | |
323 | write_ps_enum(#name, (names)[state.prev_ps.name], (names)[ps.name]) | 323 | write_ps_enum(#name, (names)[state.prev_ps.name], (names)[ps.name]) | |
324 | 324 | |||
325 | void | 325 | void | |
326 | debug_parser_state(void) | 326 | debug_parser_state(void) | |
327 | { | 327 | { | |
328 | debug_blank_line(); | 328 | debug_blank_line(); | |
329 | 329 | |||
330 | state.heading = "token classification"; | 330 | state.heading = "token classification"; | |
331 | debug_ps_enum(prev_lsym, lsym_name); | 331 | debug_ps_enum(prev_lsym, lsym_name); | |
332 | debug_ps_bool(in_stmt_or_decl); | 332 | debug_ps_bool(in_stmt_or_decl); | |
333 | debug_ps_bool(in_decl); | 333 | debug_ps_bool(in_decl); | |
334 | debug_ps_bool(in_var_decl); | 334 | debug_ps_bool(in_var_decl); | |
335 | debug_ps_bool(in_init); | 335 | debug_ps_bool(in_init); | |
336 | debug_ps_int(init_level); | 336 | debug_ps_int(init_level); | |
337 | debug_ps_bool(line_has_func_def); | 337 | debug_ps_bool(line_has_func_def); | |
338 | debug_ps_bool(in_func_def_params); | 338 | debug_ps_bool(in_func_def_params); | |
339 | debug_ps_bool(line_has_decl); | 339 | debug_ps_bool(line_has_decl); | |
340 | debug_ps_enum(lbrace_kind, psym_name); | 340 | debug_ps_enum(lbrace_kind, psym_name); | |
341 | debug_ps_enum(spaced_expr_psym, psym_name); | 341 | debug_ps_enum(spaced_expr_psym, psym_name); | |
342 | debug_ps_bool(seen_case); | 342 | debug_ps_bool(seen_case); | |
343 | debug_ps_bool(prev_paren_was_cast); | 343 | debug_ps_bool(prev_paren_was_cast); | |
344 | debug_ps_int(quest_level); | 344 | debug_ps_int(quest_level); | |
345 | 345 | |||
346 | state.heading = "indentation of statements and declarations"; | 346 | state.heading = "indentation of statements and declarations"; | |
347 | debug_ps_int(ind_level); | 347 | debug_ps_int(ind_level); | |
348 | debug_ps_int(ind_level_follow); | 348 | debug_ps_int(ind_level_follow); | |
349 | debug_ps_bool(in_stmt_cont); | 349 | debug_ps_bool(in_stmt_cont); | |
350 | debug_ps_int(decl_level); | 350 | debug_ps_int(decl_level); | |
351 | debug_ps_di_stack(); | 351 | debug_ps_di_stack(); | |
352 | debug_ps_bool(decl_indent_done); | 352 | debug_ps_bool(decl_indent_done); | |
353 | debug_ps_int(decl_ind); | 353 | debug_ps_int(decl_ind); | |
354 | debug_ps_bool(tabs_to_var); | 354 | debug_ps_bool(tabs_to_var); | |
355 | debug_ps_enum(extra_expr_indent, extra_expr_indent_name); | 355 | debug_ps_enum(extra_expr_indent, extra_expr_indent_name); | |
356 | 356 | |||
357 | // The parser symbol stack is printed in debug_psyms_stack instead. | 357 | // The parser symbol stack is printed in debug_psyms_stack instead. | |
358 | 358 | |||
359 | state.heading = "spacing inside a statement or declaration"; | 359 | state.heading = "spacing inside a statement or declaration"; | |
360 | debug_ps_bool(next_unary); | 360 | debug_ps_bool(next_unary); | |
361 | debug_ps_bool(want_blank); | 361 | debug_ps_bool(want_blank); | |
362 | debug_ps_int(ind_paren_level); | 362 | debug_ps_int(ind_paren_level); | |
363 | debug_ps_int(nparen); | 363 | debug_ps_int(nparen); | |
364 | debug_ps_paren(); | 364 | debug_ps_paren(); | |
365 | 365 | |||
366 | state.heading = "horizontal spacing for comments"; | 366 | state.heading = "horizontal spacing for comments"; | |
367 | debug_ps_int(comment_delta); | 367 | debug_ps_int(comment_delta); | |
368 | debug_ps_int(n_comment_delta); | 368 | debug_ps_int(n_comment_delta); | |
369 | debug_ps_int(com_ind); | 369 | debug_ps_int(com_ind); | |
370 | 370 | |||
371 | state.heading = "vertical spacing"; | 371 | state.heading = "vertical spacing"; | |
372 | debug_ps_bool(break_after_comma); | 372 | debug_ps_bool(break_after_comma); | |
373 | debug_ps_bool(force_nl); | 373 | debug_ps_bool(force_nl); | |
374 | debug_ps_enum(declaration, declaration_name); | 374 | debug_ps_enum(declaration, declaration_name); | |
375 | debug_ps_bool(blank_line_after_decl); | 375 | debug_ps_bool(blank_line_after_decl); | |
376 | 376 | |||
377 | state.heading = "comments"; | 377 | state.heading = "comments"; | |
378 | debug_ps_bool(curr_col_1); | |||
379 | debug_ps_bool(next_col_1); | 378 | debug_ps_bool(next_col_1); | |
380 | 379 | |||
381 | state.heading = NULL; | 380 | state.heading = NULL; | |
382 | debug_blank_line(); | 381 | debug_blank_line(); | |
383 | 382 | |||
384 | state.prev_ps = ps; | 383 | state.prev_ps = ps; | |
385 | state.ps_first = false; | 384 | state.ps_first = false; | |
386 | } | 385 | } | |
387 | 386 | |||
388 | void | 387 | void | |
389 | debug_psyms_stack(const char *situation) | 388 | debug_psyms_stack(const char *situation) | |
390 | { | 389 | { | |
391 | debug_printf("parse stack %s:", situation); | 390 | debug_printf("parse stack %s:", situation); | |
392 | const struct psym_stack *psyms = &ps.psyms; | 391 | const struct psym_stack *psyms = &ps.psyms; | |
393 | for (int i = 0; i <= psyms->top; ++i) | 392 | for (int i = 0; i <= psyms->top; ++i) | |
394 | debug_printf(" %d %s", | 393 | debug_printf(" %d %s", | |
395 | psyms->ind_level[i], psym_name[psyms->sym[i]]); | 394 | psyms->ind_level[i], psym_name[psyms->sym[i]]); | |
396 | debug_println(""); | 395 | debug_println(""); | |
397 | } | 396 | } | |
398 | #endif | 397 | #endif |
--- src/usr.bin/indent/indent.h 2023/06/14 07:20:55 1.189
+++ src/usr.bin/indent/indent.h 2023/06/14 08:25:15 1.190
@@ -1,534 +1,533 @@ | @@ -1,534 +1,533 @@ | |||
1 | /* $NetBSD: indent.h,v 1.189 2023/06/14 07:20:55 rillig Exp $ */ | 1 | /* $NetBSD: indent.h,v 1.190 2023/06/14 08:25:15 rillig Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * SPDX-License-Identifier: BSD-2-Clause-FreeBSD | 4 | * SPDX-License-Identifier: BSD-2-Clause-FreeBSD | |
5 | * | 5 | * | |
6 | * Copyright (c) 2001 Jens Schweikhardt | 6 | * Copyright (c) 2001 Jens Schweikhardt | |
7 | * All rights reserved. | 7 | * All rights reserved. | |
8 | * | 8 | * | |
9 | * Redistribution and use in source and binary forms, with or without | 9 | * Redistribution and use in source and binary forms, with or without | |
10 | * modification, are permitted provided that the following conditions | 10 | * modification, are permitted provided that the following conditions | |
11 | * are met: | 11 | * are met: | |
12 | * 1. Redistributions of source code must retain the above copyright | 12 | * 1. Redistributions of source code must retain the above copyright | |
13 | * notice, this list of conditions and the following disclaimer. | 13 | * notice, this list of conditions and the following disclaimer. | |
14 | * 2. Redistributions in binary form must reproduce the above copyright | 14 | * 2. Redistributions in binary form must reproduce the above copyright | |
15 | * notice, this list of conditions and the following disclaimer in the | 15 | * notice, this list of conditions and the following disclaimer in the | |
16 | * documentation and/or other materials provided with the distribution. | 16 | * documentation and/or other materials provided with the distribution. | |
17 | * | 17 | * | |
18 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND | 18 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND | |
19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
20 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 20 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
21 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE | 21 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE | |
22 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 22 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
23 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 23 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
24 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 24 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
25 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 25 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
27 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 27 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
28 | * SUCH DAMAGE. | 28 | * SUCH DAMAGE. | |
29 | */ | 29 | */ | |
30 | /*- | 30 | /*- | |
31 | * SPDX-License-Identifier: BSD-4-Clause | 31 | * SPDX-License-Identifier: BSD-4-Clause | |
32 | * | 32 | * | |
33 | * Copyright (c) 1985 Sun Microsystems, Inc. | 33 | * Copyright (c) 1985 Sun Microsystems, Inc. | |
34 | * Copyright (c) 1980, 1993 | 34 | * Copyright (c) 1980, 1993 | |
35 | * The Regents of the University of California. All rights reserved. | 35 | * The Regents of the University of California. All rights reserved. | |
36 | * All rights reserved. | 36 | * All rights reserved. | |
37 | * | 37 | * | |
38 | * Redistribution and use in source and binary forms, with or without | 38 | * Redistribution and use in source and binary forms, with or without | |
39 | * modification, are permitted provided that the following conditions | 39 | * modification, are permitted provided that the following conditions | |
40 | * are met: | 40 | * are met: | |
41 | * 1. Redistributions of source code must retain the above copyright | 41 | * 1. Redistributions of source code must retain the above copyright | |
42 | * notice, this list of conditions and the following disclaimer. | 42 | * notice, this list of conditions and the following disclaimer. | |
43 | * 2. Redistributions in binary form must reproduce the above copyright | 43 | * 2. Redistributions in binary form must reproduce the above copyright | |
44 | * notice, this list of conditions and the following disclaimer in the | 44 | * notice, this list of conditions and the following disclaimer in the | |
45 | * documentation and/or other materials provided with the distribution. | 45 | * documentation and/or other materials provided with the distribution. | |
46 | * 3. All advertising materials mentioning features or use of this software | 46 | * 3. All advertising materials mentioning features or use of this software | |
47 | * must display the following acknowledgement: | 47 | * must display the following acknowledgement: | |
48 | * This product includes software developed by the University of | 48 | * This product includes software developed by the University of | |
49 | * California, Berkeley and its contributors. | 49 | * California, Berkeley and its contributors. | |
50 | * 4. Neither the name of the University nor the names of its contributors | 50 | * 4. Neither the name of the University nor the names of its contributors | |
51 | * may be used to endorse or promote products derived from this software | 51 | * may be used to endorse or promote products derived from this software | |
52 | * without specific prior written permission. | 52 | * without specific prior written permission. | |
53 | * | 53 | * | |
54 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 54 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
55 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 55 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
56 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 56 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
57 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 57 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
58 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 58 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
59 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 59 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
60 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 60 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
61 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 61 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
62 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 62 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
63 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 63 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
64 | * SUCH DAMAGE. | 64 | * SUCH DAMAGE. | |
65 | */ | 65 | */ | |
66 | 66 | |||
67 | #include <ctype.h> | 67 | #include <ctype.h> | |
68 | #include <stdbool.h> | 68 | #include <stdbool.h> | |
69 | #include <stdio.h> | 69 | #include <stdio.h> | |
70 | 70 | |||
71 | typedef enum lexer_symbol { | 71 | typedef enum lexer_symbol { | |
72 | lsym_eof, | 72 | lsym_eof, | |
73 | lsym_preprocessing, /* the initial '#' of a preprocessing line */ | 73 | lsym_preprocessing, /* the initial '#' of a preprocessing line */ | |
74 | lsym_newline, | 74 | lsym_newline, | |
75 | lsym_comment, /* the initial '/ *' or '//' of a comment */ | 75 | lsym_comment, /* the initial '/ *' or '//' of a comment */ | |
76 | 76 | |||
77 | lsym_lparen, | 77 | lsym_lparen, | |
78 | lsym_rparen, | 78 | lsym_rparen, | |
79 | lsym_lbracket, | 79 | lsym_lbracket, | |
80 | lsym_rbracket, | 80 | lsym_rbracket, | |
81 | lsym_lbrace, | 81 | lsym_lbrace, | |
82 | lsym_rbrace, | 82 | lsym_rbrace, | |
83 | 83 | |||
84 | lsym_period, | 84 | lsym_period, | |
85 | lsym_unary_op, /* e.g. '*', '&', '-' or leading '++' */ | 85 | lsym_unary_op, /* e.g. '*', '&', '-' or leading '++' */ | |
86 | lsym_sizeof, | 86 | lsym_sizeof, | |
87 | lsym_offsetof, | 87 | lsym_offsetof, | |
88 | lsym_postfix_op, /* trailing '++' or '--' */ | 88 | lsym_postfix_op, /* trailing '++' or '--' */ | |
89 | lsym_binary_op, /* e.g. '*', '&', '<<', '&&' or '/=' */ | 89 | lsym_binary_op, /* e.g. '*', '&', '<<', '&&' or '/=' */ | |
90 | lsym_question, /* the '?' from a '?:' expression */ | 90 | lsym_question, /* the '?' from a '?:' expression */ | |
91 | lsym_question_colon, /* the ':' from a '?:' expression */ | 91 | lsym_question_colon, /* the ':' from a '?:' expression */ | |
92 | lsym_comma, | 92 | lsym_comma, | |
93 | 93 | |||
94 | lsym_typedef, | 94 | lsym_typedef, | |
95 | lsym_modifier, /* modifiers for types, functions, variables */ | 95 | lsym_modifier, /* modifiers for types, functions, variables */ | |
96 | lsym_tag, /* 'struct', 'union' or 'enum' */ | 96 | lsym_tag, /* 'struct', 'union' or 'enum' */ | |
97 | lsym_type_outside_parentheses, | 97 | lsym_type_outside_parentheses, | |
98 | lsym_type_in_parentheses, | 98 | lsym_type_in_parentheses, | |
99 | lsym_word, /* identifier, constant or string */ | 99 | lsym_word, /* identifier, constant or string */ | |
100 | lsym_funcname, /* name of a function being defined */ | 100 | lsym_funcname, /* name of a function being defined */ | |
101 | lsym_label_colon, /* the ':' after a label */ | 101 | lsym_label_colon, /* the ':' after a label */ | |
102 | lsym_other_colon, /* bit-fields, generic-association (C11), | 102 | lsym_other_colon, /* bit-fields, generic-association (C11), | |
103 | * enum-type-specifier (C23), | 103 | * enum-type-specifier (C23), | |
104 | * attribute-prefixed-token (C23), | 104 | * attribute-prefixed-token (C23), | |
105 | * pp-prefixed-parameter (C23 6.10) */ | 105 | * pp-prefixed-parameter (C23 6.10) */ | |
106 | lsym_semicolon, | 106 | lsym_semicolon, | |
107 | 107 | |||
108 | lsym_case, | 108 | lsym_case, | |
109 | lsym_default, | 109 | lsym_default, | |
110 | lsym_do, | 110 | lsym_do, | |
111 | lsym_else, | 111 | lsym_else, | |
112 | lsym_for, | 112 | lsym_for, | |
113 | lsym_if, | 113 | lsym_if, | |
114 | lsym_switch, | 114 | lsym_switch, | |
115 | lsym_while, | 115 | lsym_while, | |
116 | lsym_return, | 116 | lsym_return, | |
117 | } lexer_symbol; | 117 | } lexer_symbol; | |
118 | 118 | |||
119 | /* | 119 | /* | |
120 | * Structure of the source code, in terms of declarations, statements and | 120 | * Structure of the source code, in terms of declarations, statements and | |
121 | * braces; used to determine the indentation level of these parts. | 121 | * braces; used to determine the indentation level of these parts. | |
122 | */ | 122 | */ | |
123 | typedef enum parser_symbol { | 123 | typedef enum parser_symbol { | |
124 | psym_0, /* a placeholder; not stored on the stack */ | 124 | psym_0, /* a placeholder; not stored on the stack */ | |
125 | psym_lbrace_block, /* '{' for a block of code */ | 125 | psym_lbrace_block, /* '{' for a block of code */ | |
126 | psym_lbrace_struct, /* '{' in 'struct ... { ... }' */ | 126 | psym_lbrace_struct, /* '{' in 'struct ... { ... }' */ | |
127 | psym_lbrace_union, /* '{' in 'union ... { ... }' */ | 127 | psym_lbrace_union, /* '{' in 'union ... { ... }' */ | |
128 | psym_lbrace_enum, /* '{' in 'enum ... { ... }' */ | 128 | psym_lbrace_enum, /* '{' in 'enum ... { ... }' */ | |
129 | psym_rbrace, /* not stored on the stack */ | 129 | psym_rbrace, /* not stored on the stack */ | |
130 | psym_decl, | 130 | psym_decl, | |
131 | psym_stmt, | 131 | psym_stmt, | |
132 | psym_for_exprs, /* 'for' '(' ... ')' */ | 132 | psym_for_exprs, /* 'for' '(' ... ')' */ | |
133 | psym_if_expr, /* 'if' '(' expr ')' */ | 133 | psym_if_expr, /* 'if' '(' expr ')' */ | |
134 | psym_if_expr_stmt, /* 'if' '(' expr ')' stmt */ | 134 | psym_if_expr_stmt, /* 'if' '(' expr ')' stmt */ | |
135 | psym_if_expr_stmt_else, /* 'if' '(' expr ')' stmt 'else' */ | 135 | psym_if_expr_stmt_else, /* 'if' '(' expr ')' stmt 'else' */ | |
136 | psym_else, /* 'else'; not stored on the stack */ | 136 | psym_else, /* 'else'; not stored on the stack */ | |
137 | psym_switch_expr, /* 'switch' '(' expr ')' */ | 137 | psym_switch_expr, /* 'switch' '(' expr ')' */ | |
138 | psym_do, /* 'do' */ | 138 | psym_do, /* 'do' */ | |
139 | psym_do_stmt, /* 'do' stmt */ | 139 | psym_do_stmt, /* 'do' stmt */ | |
140 | psym_while_expr, /* 'while' '(' expr ')' */ | 140 | psym_while_expr, /* 'while' '(' expr ')' */ | |
141 | } parser_symbol; | 141 | } parser_symbol; | |
142 | 142 | |||
143 | /* A range of characters, only null-terminated in debug mode. */ | 143 | /* A range of characters, only null-terminated in debug mode. */ | |
144 | struct buffer { | 144 | struct buffer { | |
145 | char *s; | 145 | char *s; | |
146 | size_t len; | 146 | size_t len; | |
147 | size_t cap; | 147 | size_t cap; | |
148 | }; | 148 | }; | |
149 | 149 | |||
150 | extern FILE *input; | 150 | extern FILE *input; | |
151 | extern FILE *output; | 151 | extern FILE *output; | |
152 | 152 | |||
153 | /* | 153 | /* | |
154 | * The current line from the input file, used by the lexer to generate tokens. | 154 | * The current line from the input file, used by the lexer to generate tokens. | |
155 | * To read from the line, start at inp_p and continue up to and including the | 155 | * To read from the line, start at inp_p and continue up to and including the | |
156 | * next '\n'. To read beyond the '\n', call inp_skip or inp_next, which will | 156 | * next '\n'. To read beyond the '\n', call inp_skip or inp_next, which will | |
157 | * make the next line available, invalidating any pointers into the previous | 157 | * make the next line available, invalidating any pointers into the previous | |
158 | * line. | 158 | * line. | |
159 | */ | 159 | */ | |
160 | extern struct buffer inp; | 160 | extern struct buffer inp; | |
161 | extern const char *inp_p; | 161 | extern const char *inp_p; | |
162 | 162 | |||
163 | extern struct buffer token; /* the current token to be processed, is | 163 | extern struct buffer token; /* the current token to be processed, is | |
164 | * typically copied to the buffer 'code', or in | 164 | * typically copied to the buffer 'code', or in | |
165 | * some cases to 'lab'. */ | 165 | * some cases to 'lab'. */ | |
166 | 166 | |||
167 | extern struct buffer lab; /* the label or preprocessor directive */ | 167 | extern struct buffer lab; /* the label or preprocessor directive */ | |
168 | extern struct buffer code; /* the main part of the current line of code, | 168 | extern struct buffer code; /* the main part of the current line of code, | |
169 | * containing declarations or statements */ | 169 | * containing declarations or statements */ | |
170 | extern struct buffer com; /* the trailing comment of the line, or the | 170 | extern struct buffer com; /* the trailing comment of the line, or the | |
171 | * start or end of a multi-line comment, or | 171 | * start or end of a multi-line comment, or | |
172 | * while in process_comment, a single line of a | 172 | * while in process_comment, a single line of a | |
173 | * multi-line comment */ | 173 | * multi-line comment */ | |
174 | 174 | |||
175 | extern struct options { | 175 | extern struct options { | |
176 | bool blank_line_around_conditional_compilation; | 176 | bool blank_line_around_conditional_compilation; | |
177 | bool blank_line_after_decl_at_top; /* this is vaguely similar to | 177 | bool blank_line_after_decl_at_top; /* this is vaguely similar to | |
178 | * blank_line_after_decl except | 178 | * blank_line_after_decl except | |
179 | * that it only applies to the | 179 | * that it only applies to the | |
180 | * first set of declarations in | 180 | * first set of declarations in | |
181 | * a procedure (just after the | 181 | * a procedure (just after the | |
182 | * first '{') and it causes a | 182 | * first '{') and it causes a | |
183 | * blank line to be generated | 183 | * blank line to be generated | |
184 | * even if there are no | 184 | * even if there are no | |
185 | * declarations */ | 185 | * declarations */ | |
186 | bool blank_line_after_decl; | 186 | bool blank_line_after_decl; | |
187 | bool blank_line_after_proc; | 187 | bool blank_line_after_proc; | |
188 | bool blank_line_before_block_comment; | 188 | bool blank_line_before_block_comment; | |
189 | bool break_after_comma; /* whether to add a line break after each | 189 | bool break_after_comma; /* whether to add a line break after each | |
190 | * declarator */ | 190 | * declarator */ | |
191 | bool brace_same_line; /* whether a brace should be on same line as an | 191 | bool brace_same_line; /* whether a brace should be on same line as an | |
192 | * if, while, etc. */ | 192 | * if, while, etc. */ | |
193 | bool blank_after_sizeof; | 193 | bool blank_after_sizeof; | |
194 | bool comment_delimiter_on_blank_line; | 194 | bool comment_delimiter_on_blank_line; | |
195 | int decl_comment_column; /* the column in which comments after | 195 | int decl_comment_column; /* the column in which comments after | |
196 | * declarations should be put */ | 196 | * declarations should be put */ | |
197 | bool cuddle_else; /* whether 'else' should cuddle up to '}' */ | 197 | bool cuddle_else; /* whether 'else' should cuddle up to '}' */ | |
198 | int continuation_indent; /* the indentation between the edge of | 198 | int continuation_indent; /* the indentation between the edge of | |
199 | * code and continuation lines */ | 199 | * code and continuation lines */ | |
200 | float case_indent; /* the distance (measured in indentation | 200 | float case_indent; /* the distance (measured in indentation | |
201 | * levels) to indent case labels from the | 201 | * levels) to indent case labels from the | |
202 | * switch statement */ | 202 | * switch statement */ | |
203 | int comment_column; /* the column in which comments to the right of | 203 | int comment_column; /* the column in which comments to the right of | |
204 | * code should start */ | 204 | * code should start */ | |
205 | int decl_indent; /* indentation of identifier in declaration */ | 205 | int decl_indent; /* indentation of identifier in declaration */ | |
206 | bool left_justify_decl; | 206 | bool left_justify_decl; | |
207 | int unindent_displace; /* comments not to the right of code will be | 207 | int unindent_displace; /* comments not to the right of code will be | |
208 | * placed this many indentation levels to the | 208 | * placed this many indentation levels to the | |
209 | * left of code */ | 209 | * left of code */ | |
210 | bool extra_expr_indent; /* whether continuation lines from the | 210 | bool extra_expr_indent; /* whether continuation lines from the | |
211 | * expression part of "if (e)", "while (e)", | 211 | * expression part of "if (e)", "while (e)", | |
212 | * "for (e; e; e)" should be indented an extra | 212 | * "for (e; e; e)" should be indented an extra | |
213 | * tab stop so that they are not confused with | 213 | * tab stop so that they are not confused with | |
214 | * the code that follows */ | 214 | * the code that follows */ | |
215 | bool else_if_in_same_line; | 215 | bool else_if_in_same_line; | |
216 | bool function_brace_split; /* split function declaration and brace | 216 | bool function_brace_split; /* split function declaration and brace | |
217 | * onto separate lines */ | 217 | * onto separate lines */ | |
218 | bool format_col1_comments; /* whether comments that start in | 218 | bool format_col1_comments; /* whether comments that start in | |
219 | * column 1 are to be reformatted (just | 219 | * column 1 are to be reformatted (just | |
220 | * like comments that begin in later | 220 | * like comments that begin in later | |
221 | * columns) */ | 221 | * columns) */ | |
222 | bool format_block_comments; /* whether to reformat comments that | 222 | bool format_block_comments; /* whether to reformat comments that | |
223 | * begin with '/ * \n' */ | 223 | * begin with '/ * \n' */ | |
224 | bool indent_parameters; | 224 | bool indent_parameters; | |
225 | int indent_size; /* the size of one indentation level */ | 225 | int indent_size; /* the size of one indentation level */ | |
226 | int block_comment_max_line_length; | 226 | int block_comment_max_line_length; | |
227 | int local_decl_indent; /* like decl_indent but for locals */ | 227 | int local_decl_indent; /* like decl_indent but for locals */ | |
228 | bool lineup_to_parens_always; /* whether to not(?) attempt to keep | 228 | bool lineup_to_parens_always; /* whether to not(?) attempt to keep | |
229 | * lined-up code within the margin */ | 229 | * lined-up code within the margin */ | |
230 | bool lineup_to_parens; /* whether continued code within parens will be | 230 | bool lineup_to_parens; /* whether continued code within parens will be | |
231 | * lined up to the open paren */ | 231 | * lined up to the open paren */ | |
232 | bool proc_calls_space; /* whether function calls look like: foo (bar) | 232 | bool proc_calls_space; /* whether function calls look like: foo (bar) | |
233 | * rather than foo(bar) */ | 233 | * rather than foo(bar) */ | |
234 | bool procnames_start_line; /* whether the names of functions being | 234 | bool procnames_start_line; /* whether the names of functions being | |
235 | * defined get placed in column 1 (i.e. | 235 | * defined get placed in column 1 (i.e. | |
236 | * a newline is placed between the type | 236 | * a newline is placed between the type | |
237 | * of the function and its name) */ | 237 | * of the function and its name) */ | |
238 | bool space_after_cast; /* "b = (int) a" vs. "b = (int)a" */ | 238 | bool space_after_cast; /* "b = (int) a" vs. "b = (int)a" */ | |
239 | bool star_comment_cont; /* whether comment continuation lines should | 239 | bool star_comment_cont; /* whether comment continuation lines should | |
240 | * have stars at the beginning of each line */ | 240 | * have stars at the beginning of each line */ | |
241 | bool swallow_optional_blank_lines; | 241 | bool swallow_optional_blank_lines; | |
242 | bool auto_typedefs; /* whether to recognize identifiers ending in | 242 | bool auto_typedefs; /* whether to recognize identifiers ending in | |
243 | * "_t" like typedefs */ | 243 | * "_t" like typedefs */ | |
244 | int tabsize; /* the size of a tab */ | 244 | int tabsize; /* the size of a tab */ | |
245 | int max_line_length; | 245 | int max_line_length; | |
246 | bool use_tabs; /* set true to use tabs for spacing, false uses | 246 | bool use_tabs; /* set true to use tabs for spacing, false uses | |
247 | * all spaces */ | 247 | * all spaces */ | |
248 | bool verbose; /* print configuration to stderr */ | 248 | bool verbose; /* print configuration to stderr */ | |
249 | } opt; | 249 | } opt; | |
250 | 250 | |||
251 | extern bool found_err; | 251 | extern bool found_err; | |
252 | extern bool had_eof; /* whether input is exhausted */ | 252 | extern bool had_eof; /* whether input is exhausted */ | |
253 | extern int line_no; /* the current input line number */ | 253 | extern int line_no; /* the current input line number */ | |
254 | extern enum indent_enabled { | 254 | extern enum indent_enabled { | |
255 | indent_on, | 255 | indent_on, | |
256 | indent_off, | 256 | indent_off, | |
257 | indent_last_off_line, | 257 | indent_last_off_line, | |
258 | } indent_enabled; | 258 | } indent_enabled; | |
259 | 259 | |||
260 | #define STACKSIZE 256 | 260 | #define STACKSIZE 256 | |
261 | 261 | |||
262 | /* Properties of each level of parentheses or brackets. */ | 262 | /* Properties of each level of parentheses or brackets. */ | |
263 | struct paren_level { | 263 | struct paren_level { | |
264 | int indent; /* indentation of the operand/argument, | 264 | int indent; /* indentation of the operand/argument, | |
265 | * relative to the enclosing statement; if | 265 | * relative to the enclosing statement; if | |
266 | * negative, reflected at -1 */ | 266 | * negative, reflected at -1 */ | |
267 | enum paren_level_cast { | 267 | enum paren_level_cast { | |
268 | cast_unknown, | 268 | cast_unknown, | |
269 | cast_maybe, | 269 | cast_maybe, | |
270 | cast_no, | 270 | cast_no, | |
271 | } cast; /* whether the parentheses form a type cast */ | 271 | } cast; /* whether the parentheses form a type cast */ | |
272 | }; | 272 | }; | |
273 | 273 | |||
274 | struct psym_stack { | 274 | struct psym_stack { | |
275 | int top; /* pointer to top of stack */ | 275 | int top; /* pointer to top of stack */ | |
276 | parser_symbol sym[STACKSIZE]; | 276 | parser_symbol sym[STACKSIZE]; | |
277 | int ind_level[STACKSIZE]; | 277 | int ind_level[STACKSIZE]; | |
278 | }; | 278 | }; | |
279 | 279 | |||
280 | /* | 280 | /* | |
281 | * The parser state determines the layout of the formatted text. | 281 | * The parser state determines the layout of the formatted text. | |
282 | * | 282 | * | |
283 | * At each '#if', the parser state is copied so that the corresponding '#else' | 283 | * At each '#if', the parser state is copied so that the corresponding '#else' | |
284 | * lines start in the same state. | 284 | * lines start in the same state. | |
285 | * | 285 | * | |
286 | * In a function body, the number of block braces determines the indentation | 286 | * In a function body, the number of block braces determines the indentation | |
287 | * of statements and declarations. | 287 | * of statements and declarations. | |
288 | * | 288 | * | |
289 | * In a statement, the number of parentheses or brackets determines the | 289 | * In a statement, the number of parentheses or brackets determines the | |
290 | * indentation of follow-up lines. | 290 | * indentation of follow-up lines. | |
291 | * | 291 | * | |
292 | * In an expression, the token type determine whether to put spaces around. | 292 | * In an expression, the token type determine whether to put spaces around. | |
293 | * | 293 | * | |
294 | * In a source file, the types of line determine the vertical spacing, such as | 294 | * In a source file, the types of line determine the vertical spacing, such as | |
295 | * around preprocessing directives or function bodies, or above block | 295 | * around preprocessing directives or function bodies, or above block | |
296 | * comments. | 296 | * comments. | |
297 | */ | 297 | */ | |
298 | extern struct parser_state { | 298 | extern struct parser_state { | |
299 | lexer_symbol prev_lsym; /* the previous token, but never comment, | 299 | lexer_symbol prev_lsym; /* the previous token, but never comment, | |
300 | * newline or preprocessing line */ | 300 | * newline or preprocessing line */ | |
301 | 301 | |||
302 | /* Token classification */ | 302 | /* Token classification */ | |
303 | 303 | |||
304 | bool in_stmt_or_decl; /* whether in a statement or a struct | 304 | bool in_stmt_or_decl; /* whether in a statement or a struct | |
305 | * declaration or a plain declaration */ | 305 | * declaration or a plain declaration */ | |
306 | bool in_decl; /* XXX: double-check the exact meaning */ | 306 | bool in_decl; /* XXX: double-check the exact meaning */ | |
307 | bool in_var_decl; /* starts at a type name or a '){' from a | 307 | bool in_var_decl; /* starts at a type name or a '){' from a | |
308 | * compound literal; ends at the '(' from a | 308 | * compound literal; ends at the '(' from a | |
309 | * function definition or a ';' outside '{}'; | 309 | * function definition or a ';' outside '{}'; | |
310 | * when active, '{}' form struct or union | 310 | * when active, '{}' form struct or union | |
311 | * declarations, ':' marks a bit-field, and '=' | 311 | * declarations, ':' marks a bit-field, and '=' | |
312 | * starts an initializer */ | 312 | * starts an initializer */ | |
313 | bool in_init; /* whether inside an initializer */ | 313 | bool in_init; /* whether inside an initializer */ | |
314 | int init_level; /* the number of '{}' in an initializer */ | 314 | int init_level; /* the number of '{}' in an initializer */ | |
315 | bool line_has_func_def; /* starts either at the 'name(' from a function | 315 | bool line_has_func_def; /* starts either at the 'name(' from a function | |
316 | * definition if it occurs at the beginning of | 316 | * definition if it occurs at the beginning of | |
317 | * a line, or at the first '*' from inside a | 317 | * a line, or at the first '*' from inside a | |
318 | * declaration when the line starts with words | 318 | * declaration when the line starts with words | |
319 | * followed by a '(' */ | 319 | * followed by a '(' */ | |
320 | bool in_func_def_params; /* for old-style functions */ | 320 | bool in_func_def_params; /* for old-style functions */ | |
321 | bool line_has_decl; /* whether this line of code has part of a | 321 | bool line_has_decl; /* whether this line of code has part of a | |
322 | * declaration on it; used for indenting | 322 | * declaration on it; used for indenting | |
323 | * comments */ | 323 | * comments */ | |
324 | parser_symbol lbrace_kind; /* the kind of brace to be pushed to | 324 | parser_symbol lbrace_kind; /* the kind of brace to be pushed to | |
325 | * the parser symbol stack next */ | 325 | * the parser symbol stack next */ | |
326 | parser_symbol spaced_expr_psym; /* the parser symbol to be shifted | 326 | parser_symbol spaced_expr_psym; /* the parser symbol to be shifted | |
327 | * after the parenthesized expression | 327 | * after the parenthesized expression | |
328 | * from a 'for', 'if', 'switch' or | 328 | * from a 'for', 'if', 'switch' or | |
329 | * 'while'; or psym_0 */ | 329 | * 'while'; or psym_0 */ | |
330 | bool seen_case; /* whether there was a 'case' or 'default', to | 330 | bool seen_case; /* whether there was a 'case' or 'default', to | |
331 | * properly space the following ':' */ | 331 | * properly space the following ':' */ | |
332 | bool prev_paren_was_cast; | 332 | bool prev_paren_was_cast; | |
333 | int quest_level; /* when this is positive, we have seen a '?' | 333 | int quest_level; /* when this is positive, we have seen a '?' | |
334 | * without the matching ':' in a '?:' | 334 | * without the matching ':' in a '?:' | |
335 | * expression */ | 335 | * expression */ | |
336 | 336 | |||
337 | /* Indentation of statements and declarations */ | 337 | /* Indentation of statements and declarations */ | |
338 | 338 | |||
339 | int ind_level; /* the indentation level for the line that is | 339 | int ind_level; /* the indentation level for the line that is | |
340 | * currently prepared for output */ | 340 | * currently prepared for output */ | |
341 | int ind_level_follow; /* the level to which ind_level should be set | 341 | int ind_level_follow; /* the level to which ind_level should be set | |
342 | * after the current line is printed */ | 342 | * after the current line is printed */ | |
343 | bool in_stmt_cont; /* whether the current line should have an | 343 | bool in_stmt_cont; /* whether the current line should have an | |
344 | * extra indentation level because we are in | 344 | * extra indentation level because we are in | |
345 | * the middle of a statement */ | 345 | * the middle of a statement */ | |
346 | int decl_level; /* current nesting level for a structure | 346 | int decl_level; /* current nesting level for a structure | |
347 | * declaration or an initializer */ | 347 | * declaration or an initializer */ | |
348 | int di_stack[20]; /* a stack of structure indentation levels */ | 348 | int di_stack[20]; /* a stack of structure indentation levels */ | |
349 | bool decl_indent_done; /* whether the indentation for a declaration | 349 | bool decl_indent_done; /* whether the indentation for a declaration | |
350 | * has been added to the code buffer. */ | 350 | * has been added to the code buffer. */ | |
351 | int decl_ind; /* current indentation for declarations */ | 351 | int decl_ind; /* current indentation for declarations */ | |
352 | bool tabs_to_var; /* true if using tabs to indent to var name */ | 352 | bool tabs_to_var; /* true if using tabs to indent to var name */ | |
353 | 353 | |||
354 | enum { | 354 | enum { | |
355 | eei_no, | 355 | eei_no, | |
356 | eei_maybe, | 356 | eei_maybe, | |
357 | eei_last | 357 | eei_last | |
358 | } extra_expr_indent; | 358 | } extra_expr_indent; | |
359 | 359 | |||
360 | struct psym_stack psyms; | 360 | struct psym_stack psyms; | |
361 | 361 | |||
362 | /* Spacing inside a statement or declaration */ | 362 | /* Spacing inside a statement or declaration */ | |
363 | 363 | |||
364 | bool next_unary; /* whether the following operator should be | 364 | bool next_unary; /* whether the following operator should be | |
365 | * unary; is used in declarations for '*', as | 365 | * unary; is used in declarations for '*', as | |
366 | * well as in expressions */ | 366 | * well as in expressions */ | |
367 | bool want_blank; /* whether the following token should be | 367 | bool want_blank; /* whether the following token should be | |
368 | * prefixed by a blank. (Said prefixing is | 368 | * prefixed by a blank. (Said prefixing is | |
369 | * ignored in some cases.) */ | 369 | * ignored in some cases.) */ | |
370 | int ind_paren_level; /* the number of parentheses or brackets that | 370 | int ind_paren_level; /* the number of parentheses or brackets that | |
371 | * is used for indenting a continuation line of | 371 | * is used for indenting a continuation line of | |
372 | * a declaration, initializer or statement */ | 372 | * a declaration, initializer or statement */ | |
373 | int nparen; /* the number of parentheses or brackets that | 373 | int nparen; /* the number of parentheses or brackets that | |
374 | * are currently open; used to indent the | 374 | * are currently open; used to indent the | |
375 | * remaining lines of the statement, | 375 | * remaining lines of the statement, | |
376 | * initializer or declaration */ | 376 | * initializer or declaration */ | |
377 | struct paren_level paren[20]; | 377 | struct paren_level paren[20]; | |
378 | 378 | |||
379 | /* Horizontal spacing for comments */ | 379 | /* Horizontal spacing for comments */ | |
380 | 380 | |||
381 | int comment_delta; /* used to set up indentation for all lines of | 381 | int comment_delta; /* used to set up indentation for all lines of | |
382 | * a boxed comment after the first one */ | 382 | * a boxed comment after the first one */ | |
383 | int n_comment_delta; /* remembers how many columns there were before | 383 | int n_comment_delta; /* remembers how many columns there were before | |
384 | * the start of a box comment so that | 384 | * the start of a box comment so that | |
385 | * forthcoming lines of the comment are | 385 | * forthcoming lines of the comment are | |
386 | * indented properly */ | 386 | * indented properly */ | |
387 | int com_ind; /* indentation of the current comment */ | 387 | int com_ind; /* indentation of the current comment */ | |
388 | 388 | |||
389 | /* Vertical spacing */ | 389 | /* Vertical spacing */ | |
390 | 390 | |||
391 | bool break_after_comma; /* whether to add a newline after the next | 391 | bool break_after_comma; /* whether to add a newline after the next | |
392 | * comma; used in declarations but not in | 392 | * comma; used in declarations but not in | |
393 | * initializer lists */ | 393 | * initializer lists */ | |
394 | bool force_nl; /* whether the next token is forced to go to a | 394 | bool force_nl; /* whether the next token is forced to go to a | |
395 | * new line; used after 'if (expr)' and in | 395 | * new line; used after 'if (expr)' and in | |
396 | * similar situations; tokens like '{' may | 396 | * similar situations; tokens like '{' may | |
397 | * ignore this */ | 397 | * ignore this */ | |
398 | 398 | |||
399 | enum declaration { | 399 | enum declaration { | |
400 | decl_no, /* no declaration anywhere nearby */ | 400 | decl_no, /* no declaration anywhere nearby */ | |
401 | decl_begin, /* collecting tokens of a declaration */ | 401 | decl_begin, /* collecting tokens of a declaration */ | |
402 | decl_end, /* finished a declaration */ | 402 | decl_end, /* finished a declaration */ | |
403 | } declaration; | 403 | } declaration; | |
404 | bool blank_line_after_decl; | 404 | bool blank_line_after_decl; | |
405 | 405 | |||
406 | /* Comments */ | 406 | /* Comments */ | |
407 | 407 | |||
408 | bool curr_col_1; /* whether the current token started in column | 408 | bool next_col_1; /* whether the next token starts in column 1 of | |
409 | * 1 of the original input */ | 409 | * the original input */ | |
410 | bool next_col_1; | |||
411 | } ps; | 410 | } ps; | |
412 | 411 | |||
413 | extern struct output_state { | 412 | extern struct output_state { | |
414 | enum line_kind { | 413 | enum line_kind { | |
415 | lk_other, | 414 | lk_other, | |
416 | lk_blank, | 415 | lk_blank, | |
417 | lk_if, /* #if, #ifdef, #ifndef */ | 416 | lk_if, /* #if, #ifdef, #ifndef */ | |
418 | lk_endif, /* #endif */ | 417 | lk_endif, /* #endif */ | |
419 | lk_stmt_head, /* the ')' of an incomplete statement such as | 418 | lk_stmt_head, /* the ')' of an incomplete statement such as | |
420 | * 'if (expr)' or 'for (expr; expr; expr)' */ | 419 | * 'if (expr)' or 'for (expr; expr; expr)' */ | |
421 | lk_func_end, /* the last '}' of a function body */ | 420 | lk_func_end, /* the last '}' of a function body */ | |
422 | lk_block_comment, | 421 | lk_block_comment, | |
423 | lk_case_or_default, | 422 | lk_case_or_default, | |
424 | } line_kind; /* kind of the line that is being prepared for | 423 | } line_kind; /* kind of the line that is being prepared for | |
425 | * output; is reset to lk_other each time after | 424 | * output; is reset to lk_other each time after | |
426 | * trying to send a line to the output, even if | 425 | * trying to send a line to the output, even if | |
427 | * that line was a suppressed blank line; used | 426 | * that line was a suppressed blank line; used | |
428 | * for inserting or removing blank lines */ | 427 | * for inserting or removing blank lines */ | |
429 | enum line_kind prev_line_kind; /* the kind of line that was actually | 428 | enum line_kind prev_line_kind; /* the kind of line that was actually | |
430 | * sent to the output */ | 429 | * sent to the output */ | |
431 | 430 | |||
432 | struct buffer indent_off_text; /* text from between 'INDENT OFF' and | 431 | struct buffer indent_off_text; /* text from between 'INDENT OFF' and | |
433 | * 'INDENT ON', both inclusive */ | 432 | * 'INDENT ON', both inclusive */ | |
434 | } out; | 433 | } out; | |
435 | 434 | |||
436 | 435 | |||
437 | #define array_length(array) (sizeof(array) / sizeof((array)[0])) | 436 | #define array_length(array) (sizeof(array) / sizeof((array)[0])) | |
438 | 437 | |||
439 | #ifdef debug | 438 | #ifdef debug | |
440 | void debug_printf(const char *, ...) __printflike(1, 2); | 439 | void debug_printf(const char *, ...) __printflike(1, 2); | |
441 | void debug_println(const char *, ...) __printflike(1, 2); | 440 | void debug_println(const char *, ...) __printflike(1, 2); | |
442 | void debug_blank_line(void); | 441 | void debug_blank_line(void); | |
443 | void debug_vis_range(const char *, const char *, size_t, const char *); | 442 | void debug_vis_range(const char *, const char *, size_t, const char *); | |
444 | void debug_parser_state(void); | 443 | void debug_parser_state(void); | |
445 | void debug_psyms_stack(const char *); | 444 | void debug_psyms_stack(const char *); | |
446 | void debug_print_buf(const char *, const struct buffer *); | 445 | void debug_print_buf(const char *, const struct buffer *); | |
447 | void debug_buffers(void); | 446 | void debug_buffers(void); | |
448 | extern const char *const lsym_name[]; | 447 | extern const char *const lsym_name[]; | |
449 | extern const char *const psym_name[]; | 448 | extern const char *const psym_name[]; | |
450 | extern const char *const paren_level_cast_name[]; | 449 | extern const char *const paren_level_cast_name[]; | |
451 | extern const char *const line_kind_name[]; | 450 | extern const char *const line_kind_name[]; | |
452 | #else | 451 | #else | |
453 | #define debug_noop() do { } while (false) | 452 | #define debug_noop() do { } while (false) | |
454 | #define debug_printf(fmt, ...) debug_noop() | 453 | #define debug_printf(fmt, ...) debug_noop() | |
455 | #define debug_println(fmt, ...) debug_noop() | 454 | #define debug_println(fmt, ...) debug_noop() | |
456 | #define debug_blank_line() debug_noop() | 455 | #define debug_blank_line() debug_noop() | |
457 | #define debug_vis_range(prefix, s, e, suffix) debug_noop() | 456 | #define debug_vis_range(prefix, s, e, suffix) debug_noop() | |
458 | #define debug_parser_state() debug_noop() | 457 | #define debug_parser_state() debug_noop() | |
459 | #define debug_psyms_stack(situation) debug_noop() | 458 | #define debug_psyms_stack(situation) debug_noop() | |
460 | #define debug_print_buf(name, buf) debug_noop() | 459 | #define debug_print_buf(name, buf) debug_noop() | |
461 | #define debug_buffers() debug_noop() | 460 | #define debug_buffers() debug_noop() | |
462 | #endif | 461 | #endif | |
463 | 462 | |||
464 | void register_typename(const char *); | 463 | void register_typename(const char *); | |
465 | int compute_code_indent(void); | 464 | int compute_code_indent(void); | |
466 | int compute_label_indent(void); | 465 | int compute_label_indent(void); | |
467 | int ind_add(int, const char *, size_t); | 466 | int ind_add(int, const char *, size_t); | |
468 | 467 | |||
469 | void inp_skip(void); | 468 | void inp_skip(void); | |
470 | char inp_next(void); | 469 | char inp_next(void); | |
471 | void finish_output(void); | 470 | void finish_output(void); | |
472 | 471 | |||
473 | lexer_symbol lexi(void); | 472 | lexer_symbol lexi(void); | |
474 | void diag(int, const char *, ...) __printflike(2, 3); | 473 | void diag(int, const char *, ...) __printflike(2, 3); | |
475 | void output_line(void); | 474 | void output_line(void); | |
476 | void inp_read_line(void); | 475 | void inp_read_line(void); | |
477 | void parse(parser_symbol); | 476 | void parse(parser_symbol); | |
478 | void process_comment(void); | 477 | void process_comment(void); | |
479 | void set_option(const char *, const char *); | 478 | void set_option(const char *, const char *); | |
480 | void load_profile_files(const char *); | 479 | void load_profile_files(const char *); | |
481 | 480 | |||
482 | void *nonnull(void *); | 481 | void *nonnull(void *); | |
483 | 482 | |||
484 | void buf_add_char(struct buffer *, char); | 483 | void buf_add_char(struct buffer *, char); | |
485 | void buf_add_chars(struct buffer *, const char *, size_t); | 484 | void buf_add_chars(struct buffer *, const char *, size_t); | |
486 | 485 | |||
487 | static inline bool | 486 | static inline bool | |
488 | ch_isalnum(char ch) | 487 | ch_isalnum(char ch) | |
489 | { | 488 | { | |
490 | return isalnum((unsigned char)ch) != 0; | 489 | return isalnum((unsigned char)ch) != 0; | |
491 | } | 490 | } | |
492 | 491 | |||
493 | static inline bool | 492 | static inline bool | |
494 | ch_isalpha(char ch) | 493 | ch_isalpha(char ch) | |
495 | { | 494 | { | |
496 | return isalpha((unsigned char)ch) != 0; | 495 | return isalpha((unsigned char)ch) != 0; | |
497 | } | 496 | } | |
498 | 497 | |||
499 | static inline bool | 498 | static inline bool | |
500 | ch_isblank(char ch) | 499 | ch_isblank(char ch) | |
501 | { | 500 | { | |
502 | return ch == ' ' || ch == '\t'; | 501 | return ch == ' ' || ch == '\t'; | |
503 | } | 502 | } | |
504 | 503 | |||
505 | static inline bool | 504 | static inline bool | |
506 | ch_isdigit(char ch) | 505 | ch_isdigit(char ch) | |
507 | { | 506 | { | |
508 | return '0' <= ch && ch <= '9'; | 507 | return '0' <= ch && ch <= '9'; | |
509 | } | 508 | } | |
510 | 509 | |||
511 | static inline bool | 510 | static inline bool | |
512 | ch_isspace(char ch) | 511 | ch_isspace(char ch) | |
513 | { | 512 | { | |
514 | return isspace((unsigned char)ch) != 0; | 513 | return isspace((unsigned char)ch) != 0; | |
515 | } | 514 | } | |
516 | 515 | |||
517 | static inline int | 516 | static inline int | |
518 | next_tab(int ind) | 517 | next_tab(int ind) | |
519 | { | 518 | { | |
520 | return ind - ind % opt.tabsize + opt.tabsize; | 519 | return ind - ind % opt.tabsize + opt.tabsize; | |
521 | } | 520 | } | |
522 | 521 | |||
523 | #ifdef debug | 522 | #ifdef debug | |
524 | void buf_terminate(struct buffer *); | 523 | void buf_terminate(struct buffer *); | |
525 | #else | 524 | #else | |
526 | #define buf_terminate(buf) debug_noop() | 525 | #define buf_terminate(buf) debug_noop() | |
527 | #endif | 526 | #endif | |
528 | 527 | |||
529 | static inline void | 528 | static inline void | |
530 | buf_clear(struct buffer *buf) | 529 | buf_clear(struct buffer *buf) | |
531 | { | 530 | { | |
532 | buf->len = 0; | 531 | buf->len = 0; | |
533 | buf_terminate(buf); | 532 | buf_terminate(buf); | |
534 | } | 533 | } |
--- src/usr.bin/indent/lexi.c 2023/06/10 16:43:56 1.225
+++ src/usr.bin/indent/lexi.c 2023/06/14 08:25:15 1.226
@@ -1,687 +1,685 @@ | @@ -1,687 +1,685 @@ | |||
1 | /* $NetBSD: lexi.c,v 1.225 2023/06/10 16:43:56 rillig Exp $ */ | 1 | /* $NetBSD: lexi.c,v 1.226 2023/06/14 08:25:15 rillig Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * SPDX-License-Identifier: BSD-4-Clause | 4 | * SPDX-License-Identifier: BSD-4-Clause | |
5 | * | 5 | * | |
6 | * Copyright (c) 1985 Sun Microsystems, Inc. | 6 | * Copyright (c) 1985 Sun Microsystems, Inc. | |
7 | * Copyright (c) 1980, 1993 | 7 | * Copyright (c) 1980, 1993 | |
8 | * The Regents of the University of California. All rights reserved. | 8 | * The Regents of the University of California. All rights reserved. | |
9 | * All rights reserved. | 9 | * All rights reserved. | |
10 | * | 10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | 11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | 12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | 13 | * are met: | |
14 | * 1. Redistributions of source code must retain the above copyright | 14 | * 1. Redistributions of source code must retain the above copyright | |
15 | * notice, this list of conditions and the following disclaimer. | 15 | * notice, this list of conditions and the following disclaimer. | |
16 | * 2. Redistributions in binary form must reproduce the above copyright | 16 | * 2. Redistributions in binary form must reproduce the above copyright | |
17 | * notice, this list of conditions and the following disclaimer in the | 17 | * notice, this list of conditions and the following disclaimer in the | |
18 | * documentation and/or other materials provided with the distribution. | 18 | * documentation and/or other materials provided with the distribution. | |
19 | * 3. All advertising materials mentioning features or use of this software | 19 | * 3. All advertising materials mentioning features or use of this software | |
20 | * must display the following acknowledgement: | 20 | * must display the following acknowledgement: | |
21 | * This product includes software developed by the University of | 21 | * This product includes software developed by the University of | |
22 | * California, Berkeley and its contributors. | 22 | * California, Berkeley and its contributors. | |
23 | * 4. Neither the name of the University nor the names of its contributors | 23 | * 4. Neither the name of the University nor the names of its contributors | |
24 | * may be used to endorse or promote products derived from this software | 24 | * may be used to endorse or promote products derived from this software | |
25 | * without specific prior written permission. | 25 | * without specific prior written permission. | |
26 | * | 26 | * | |
27 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 27 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
28 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 28 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
29 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 29 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
30 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 30 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
32 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 32 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
33 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 33 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
34 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 34 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
36 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 36 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
37 | * SUCH DAMAGE. | 37 | * SUCH DAMAGE. | |
38 | */ | 38 | */ | |
39 | 39 | |||
40 | #include <sys/cdefs.h> | 40 | #include <sys/cdefs.h> | |
41 | __RCSID("$NetBSD: lexi.c,v 1.225 2023/06/10 16:43:56 rillig Exp $"); | 41 | __RCSID("$NetBSD: lexi.c,v 1.226 2023/06/14 08:25:15 rillig Exp $"); | |
42 | 42 | |||
43 | #include <stdlib.h> | 43 | #include <stdlib.h> | |
44 | #include <string.h> | 44 | #include <string.h> | |
45 | 45 | |||
46 | #include "indent.h" | 46 | #include "indent.h" | |
47 | 47 | |||
48 | /* In lexi_alnum, this constant marks a type, independent of parentheses. */ | 48 | /* In lexi_alnum, this constant marks a type, independent of parentheses. */ | |
49 | #define lsym_type lsym_type_outside_parentheses | 49 | #define lsym_type lsym_type_outside_parentheses | |
50 | 50 | |||
51 | /* must be sorted alphabetically, is used in binary search */ | 51 | /* must be sorted alphabetically, is used in binary search */ | |
52 | static const struct keyword { | 52 | static const struct keyword { | |
53 | const char name[12]; | 53 | const char name[12]; | |
54 | lexer_symbol lsym; | 54 | lexer_symbol lsym; | |
55 | } keywords[] = { | 55 | } keywords[] = { | |
56 | {"_Bool", lsym_type}, | 56 | {"_Bool", lsym_type}, | |
57 | {"_Complex", lsym_type}, | 57 | {"_Complex", lsym_type}, | |
58 | {"_Imaginary", lsym_type}, | 58 | {"_Imaginary", lsym_type}, | |
59 | {"auto", lsym_modifier}, | 59 | {"auto", lsym_modifier}, | |
60 | {"bool", lsym_type}, | 60 | {"bool", lsym_type}, | |
61 | {"break", lsym_word}, | 61 | {"break", lsym_word}, | |
62 | {"case", lsym_case}, | 62 | {"case", lsym_case}, | |
63 | {"char", lsym_type}, | 63 | {"char", lsym_type}, | |
64 | {"complex", lsym_type}, | 64 | {"complex", lsym_type}, | |
65 | {"const", lsym_modifier}, | 65 | {"const", lsym_modifier}, | |
66 | {"continue", lsym_word}, | 66 | {"continue", lsym_word}, | |
67 | {"default", lsym_default}, | 67 | {"default", lsym_default}, | |
68 | {"do", lsym_do}, | 68 | {"do", lsym_do}, | |
69 | {"double", lsym_type}, | 69 | {"double", lsym_type}, | |
70 | {"else", lsym_else}, | 70 | {"else", lsym_else}, | |
71 | {"enum", lsym_tag}, | 71 | {"enum", lsym_tag}, | |
72 | {"extern", lsym_modifier}, | 72 | {"extern", lsym_modifier}, | |
73 | {"float", lsym_type}, | 73 | {"float", lsym_type}, | |
74 | {"for", lsym_for}, | 74 | {"for", lsym_for}, | |
75 | {"goto", lsym_word}, | 75 | {"goto", lsym_word}, | |
76 | {"if", lsym_if}, | 76 | {"if", lsym_if}, | |
77 | {"imaginary", lsym_type}, | 77 | {"imaginary", lsym_type}, | |
78 | {"inline", lsym_modifier}, | 78 | {"inline", lsym_modifier}, | |
79 | {"int", lsym_type}, | 79 | {"int", lsym_type}, | |
80 | {"long", lsym_type}, | 80 | {"long", lsym_type}, | |
81 | {"offsetof", lsym_offsetof}, | 81 | {"offsetof", lsym_offsetof}, | |
82 | {"register", lsym_modifier}, | 82 | {"register", lsym_modifier}, | |
83 | {"restrict", lsym_word}, | 83 | {"restrict", lsym_word}, | |
84 | {"return", lsym_return}, | 84 | {"return", lsym_return}, | |
85 | {"short", lsym_type}, | 85 | {"short", lsym_type}, | |
86 | {"signed", lsym_type}, | 86 | {"signed", lsym_type}, | |
87 | {"sizeof", lsym_sizeof}, | 87 | {"sizeof", lsym_sizeof}, | |
88 | {"static", lsym_modifier}, | 88 | {"static", lsym_modifier}, | |
89 | {"struct", lsym_tag}, | 89 | {"struct", lsym_tag}, | |
90 | {"switch", lsym_switch}, | 90 | {"switch", lsym_switch}, | |
91 | {"typedef", lsym_typedef}, | 91 | {"typedef", lsym_typedef}, | |
92 | {"union", lsym_tag}, | 92 | {"union", lsym_tag}, | |
93 | {"unsigned", lsym_type}, | 93 | {"unsigned", lsym_type}, | |
94 | {"void", lsym_type}, | 94 | {"void", lsym_type}, | |
95 | {"volatile", lsym_modifier}, | 95 | {"volatile", lsym_modifier}, | |
96 | {"while", lsym_while} | 96 | {"while", lsym_while} | |
97 | }; | 97 | }; | |
98 | 98 | |||
99 | static struct { | 99 | static struct { | |
100 | const char **items; | 100 | const char **items; | |
101 | unsigned int len; | 101 | unsigned int len; | |
102 | unsigned int cap; | 102 | unsigned int cap; | |
103 | } typenames; | 103 | } typenames; | |
104 | 104 | |||
105 | /*- | 105 | /*- | |
106 | * The transition table below was rewritten by hand from lx's output, given | 106 | * The transition table below was rewritten by hand from lx's output, given | |
107 | * the following definitions. lx is Katherine Flavel's lexer generator. | 107 | * the following definitions. lx is Katherine Flavel's lexer generator. | |
108 | * | 108 | * | |
109 | * O = /[0-7]/; D = /[0-9]/; NZ = /[1-9]/; | 109 | * O = /[0-7]/; D = /[0-9]/; NZ = /[1-9]/; | |
110 | * H = /[a-f0-9]/i; B = /[0-1]/; HP = /0x/i; | 110 | * H = /[a-f0-9]/i; B = /[0-1]/; HP = /0x/i; | |
111 | * BP = /0b/i; E = /e[+\-]?/i D+; P = /p[+\-]?/i D+; | 111 | * BP = /0b/i; E = /e[+\-]?/i D+; P = /p[+\-]?/i D+; | |
112 | * FS = /[fl]/i; IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?; | 112 | * FS = /[fl]/i; IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?; | |
113 | * | 113 | * | |
114 | * D+ E FS? -> $float; | 114 | * D+ E FS? -> $float; | |
115 | * D* "." D+ E? FS? -> $float; | 115 | * D* "." D+ E? FS? -> $float; | |
116 | * D+ "." E? FS? -> $float; HP H+ IS? -> $int; | 116 | * D+ "." E? FS? -> $float; HP H+ IS? -> $int; | |
117 | * HP H+ P FS? -> $float; NZ D* IS? -> $int; | 117 | * HP H+ P FS? -> $float; NZ D* IS? -> $int; | |
118 | * HP H* "." H+ P FS? -> $float; "0" O* IS? -> $int; | 118 | * HP H* "." H+ P FS? -> $float; "0" O* IS? -> $int; | |
119 | * HP H+ "." P FS -> $float; BP B+ IS? -> $int; | 119 | * HP H+ "." P FS -> $float; BP B+ IS? -> $int; | |
120 | */ | 120 | */ | |
121 | /* INDENT OFF */ | 121 | /* INDENT OFF */ | |
122 | static const unsigned char lex_number_state[][26] = { | 122 | static const unsigned char lex_number_state[][26] = { | |
123 | /* examples: | 123 | /* examples: | |
124 | 00 | 124 | 00 | |
125 | s 0xx | 125 | s 0xx | |
126 | t 00xaa | 126 | t 00xaa | |
127 | a 11 101100xxa.. | 127 | a 11 101100xxa.. | |
128 | r 11ee0001101lbuuxx.a.pp | 128 | r 11ee0001101lbuuxx.a.pp | |
129 | t.01.e+008bLuxll0Ll.aa.p+0 | 129 | t.01.e+008bLuxll0Ll.aa.p+0 | |
130 | states: ABCDEFGHIJKLMNOPQRSTUVWXYZ */ | 130 | states: ABCDEFGHIJKLMNOPQRSTUVWXYZ */ | |
131 | [0] = "uuiifuufiuuiiuiiiiiuiuuuuu", /* (other) */ | 131 | [0] = "uuiifuufiuuiiuiiiiiuiuuuuu", /* (other) */ | |
132 | [1] = "CEIDEHHHIJQ U Q VUVVZZZ", /* 0 */ | 132 | [1] = "CEIDEHHHIJQ U Q VUVVZZZ", /* 0 */ | |
133 | [2] = "DEIDEHHHIJQ U Q VUVVZZZ", /* 1 */ | 133 | [2] = "DEIDEHHHIJQ U Q VUVVZZZ", /* 1 */ | |
134 | [3] = "DEIDEHHHIJ U VUVVZZZ", /* 2 3 4 5 6 7 */ | 134 | [3] = "DEIDEHHHIJ U VUVVZZZ", /* 2 3 4 5 6 7 */ | |
135 | [4] = "DEJDEHHHJJ U VUVVZZZ", /* 8 9 */ | 135 | [4] = "DEJDEHHHJJ U VUVVZZZ", /* 8 9 */ | |
136 | [5] = " U VUVV ", /* A a C c D d */ | 136 | [5] = " U VUVV ", /* A a C c D d */ | |
137 | [6] = " K U VUVV ", /* B b */ | 137 | [6] = " K U VUVV ", /* B b */ | |
138 | [7] = " FFF FF U VUVV ", /* E e */ | 138 | [7] = " FFF FF U VUVV ", /* E e */ | |
139 | [8] = " f f U VUVV f", /* F f */ | 139 | [8] = " f f U VUVV f", /* F f */ | |
140 | [9] = " LLf fL PR Li L f", /* L */ | 140 | [9] = " LLf fL PR Li L f", /* L */ | |
141 | [10] = " OOf fO S P O i O f", /* l */ | 141 | [10] = " OOf fO S P O i O f", /* l */ | |
142 | [11] = " FFX ", /* P p */ | 142 | [11] = " FFX ", /* P p */ | |
143 | [12] = " MM M i iiM M ", /* U u */ | 143 | [12] = " MM M i iiM M ", /* U u */ | |
144 | [13] = " N ", /* X x */ | 144 | [13] = " N ", /* X x */ | |
145 | [14] = " G Y ", /* + - */ | 145 | [14] = " G Y ", /* + - */ | |
146 | [15] = "B EE EE T W ", /* . */ | 146 | [15] = "B EE EE T W ", /* . */ | |
147 | /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ | 147 | /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ | |
148 | }; | 148 | }; | |
149 | /* INDENT ON */ | 149 | /* INDENT ON */ | |
150 | 150 | |||
151 | static const unsigned char lex_number_row[] = { | 151 | static const unsigned char lex_number_row[] = { | |
152 | ['0'] = 1, | 152 | ['0'] = 1, | |
153 | ['1'] = 2, | 153 | ['1'] = 2, | |
154 | ['2'] = 3, ['3'] = 3, ['4'] = 3, ['5'] = 3, ['6'] = 3, ['7'] = 3, | 154 | ['2'] = 3, ['3'] = 3, ['4'] = 3, ['5'] = 3, ['6'] = 3, ['7'] = 3, | |
155 | ['8'] = 4, ['9'] = 4, | 155 | ['8'] = 4, ['9'] = 4, | |
156 | ['A'] = 5, ['a'] = 5, ['C'] = 5, ['c'] = 5, ['D'] = 5, ['d'] = 5, | 156 | ['A'] = 5, ['a'] = 5, ['C'] = 5, ['c'] = 5, ['D'] = 5, ['d'] = 5, | |
157 | ['B'] = 6, ['b'] = 6, | 157 | ['B'] = 6, ['b'] = 6, | |
158 | ['E'] = 7, ['e'] = 7, | 158 | ['E'] = 7, ['e'] = 7, | |
159 | ['F'] = 8, ['f'] = 8, | 159 | ['F'] = 8, ['f'] = 8, | |
160 | ['L'] = 9, | 160 | ['L'] = 9, | |
161 | ['l'] = 10, | 161 | ['l'] = 10, | |
162 | ['P'] = 11, ['p'] = 11, | 162 | ['P'] = 11, ['p'] = 11, | |
163 | ['U'] = 12, ['u'] = 12, | 163 | ['U'] = 12, ['u'] = 12, | |
164 | ['X'] = 13, ['x'] = 13, | 164 | ['X'] = 13, ['x'] = 13, | |
165 | ['+'] = 14, ['-'] = 14, | 165 | ['+'] = 14, ['-'] = 14, | |
166 | ['.'] = 15, | 166 | ['.'] = 15, | |
167 | }; | 167 | }; | |
168 | 168 | |||
169 | 169 | |||
170 | static bool | 170 | static bool | |
171 | is_identifier_start(char ch) | 171 | is_identifier_start(char ch) | |
172 | { | 172 | { | |
173 | return ch_isalpha(ch) || ch == '_' || ch == '$'; | 173 | return ch_isalpha(ch) || ch == '_' || ch == '$'; | |
174 | } | 174 | } | |
175 | 175 | |||
176 | static bool | 176 | static bool | |
177 | is_identifier_part(char ch) | 177 | is_identifier_part(char ch) | |
178 | { | 178 | { | |
179 | return ch_isalnum(ch) || ch == '_' || ch == '$'; | 179 | return ch_isalnum(ch) || ch == '_' || ch == '$'; | |
180 | } | 180 | } | |
181 | 181 | |||
182 | static void | 182 | static void | |
183 | token_add_char(char ch) | 183 | token_add_char(char ch) | |
184 | { | 184 | { | |
185 | buf_add_char(&token, ch); | 185 | buf_add_char(&token, ch); | |
186 | } | 186 | } | |
187 | 187 | |||
188 | static void | 188 | static void | |
189 | lex_number(void) | 189 | lex_number(void) | |
190 | { | 190 | { | |
191 | for (unsigned char s = 'A'; s != 'f' && s != 'i' && s != 'u';) { | 191 | for (unsigned char s = 'A'; s != 'f' && s != 'i' && s != 'u';) { | |
192 | unsigned char ch = (unsigned char)inp_p[0]; | 192 | unsigned char ch = (unsigned char)inp_p[0]; | |
193 | if (ch == '\\' && inp_p[1] == '\n') { | 193 | if (ch == '\\' && inp_p[1] == '\n') { | |
194 | inp_p++; | 194 | inp_p++; | |
195 | inp_skip(); | 195 | inp_skip(); | |
196 | line_no++; | 196 | line_no++; | |
197 | continue; | 197 | continue; | |
198 | } | 198 | } | |
199 | if (ch >= array_length(lex_number_row) | 199 | if (ch >= array_length(lex_number_row) | |
200 | || lex_number_row[ch] == 0) | 200 | || lex_number_row[ch] == 0) | |
201 | break; | 201 | break; | |
202 | 202 | |||
203 | unsigned char row = lex_number_row[ch]; | 203 | unsigned char row = lex_number_row[ch]; | |
204 | if (lex_number_state[row][s - 'A'] == ' ') { | 204 | if (lex_number_state[row][s - 'A'] == ' ') { | |
205 | /*- | 205 | /*- | |
206 | * lex_number_state[0][s - 'A'] now indicates the type: | 206 | * lex_number_state[0][s - 'A'] now indicates the type: | |
207 | * f = floating, i = integer, u = unknown | 207 | * f = floating, i = integer, u = unknown | |
208 | */ | 208 | */ | |
209 | return; | 209 | return; | |
210 | } | 210 | } | |
211 | 211 | |||
212 | s = lex_number_state[row][s - 'A']; | 212 | s = lex_number_state[row][s - 'A']; | |
213 | token_add_char(inp_next()); | 213 | token_add_char(inp_next()); | |
214 | } | 214 | } | |
215 | } | 215 | } | |
216 | 216 | |||
217 | static void | 217 | static void | |
218 | lex_word(void) | 218 | lex_word(void) | |
219 | { | 219 | { | |
220 | for (;;) { | 220 | for (;;) { | |
221 | if (is_identifier_part(inp_p[0])) | 221 | if (is_identifier_part(inp_p[0])) | |
222 | token_add_char(*inp_p++); | 222 | token_add_char(*inp_p++); | |
223 | else if (inp_p[0] == '\\' && inp_p[1] == '\n') { | 223 | else if (inp_p[0] == '\\' && inp_p[1] == '\n') { | |
224 | inp_p++; | 224 | inp_p++; | |
225 | inp_skip(); | 225 | inp_skip(); | |
226 | line_no++; | 226 | line_no++; | |
227 | } else | 227 | } else | |
228 | return; | 228 | return; | |
229 | } | 229 | } | |
230 | } | 230 | } | |
231 | 231 | |||
232 | static void | 232 | static void | |
233 | lex_char_or_string(void) | 233 | lex_char_or_string(void) | |
234 | { | 234 | { | |
235 | for (char delim = token.s[token.len - 1];;) { | 235 | for (char delim = token.s[token.len - 1];;) { | |
236 | if (inp_p[0] == '\n') { | 236 | if (inp_p[0] == '\n') { | |
237 | diag(1, "Unterminated literal"); | 237 | diag(1, "Unterminated literal"); | |
238 | return; | 238 | return; | |
239 | } | 239 | } | |
240 | 240 | |||
241 | token_add_char(*inp_p++); | 241 | token_add_char(*inp_p++); | |
242 | if (token.s[token.len - 1] == delim) | 242 | if (token.s[token.len - 1] == delim) | |
243 | return; | 243 | return; | |
244 | 244 | |||
245 | if (token.s[token.len - 1] == '\\') { | 245 | if (token.s[token.len - 1] == '\\') { | |
246 | if (inp_p[0] == '\n') | 246 | if (inp_p[0] == '\n') | |
247 | ++line_no; | 247 | ++line_no; | |
248 | token_add_char(inp_next()); | 248 | token_add_char(inp_next()); | |
249 | } | 249 | } | |
250 | } | 250 | } | |
251 | } | 251 | } | |
252 | 252 | |||
253 | /* Guess whether the current token is a declared type. */ | 253 | /* Guess whether the current token is a declared type. */ | |
254 | static bool | 254 | static bool | |
255 | probably_typename(void) | 255 | probably_typename(void) | |
256 | { | 256 | { | |
257 | if (ps.prev_lsym == lsym_modifier) | 257 | if (ps.prev_lsym == lsym_modifier) | |
258 | return true; | 258 | return true; | |
259 | if (ps.in_init) | 259 | if (ps.in_init) | |
260 | return false; | 260 | return false; | |
261 | if (ps.in_stmt_or_decl) /* XXX: this condition looks incorrect */ | 261 | if (ps.in_stmt_or_decl) /* XXX: this condition looks incorrect */ | |
262 | return false; | 262 | return false; | |
263 | if (ps.prev_lsym == lsym_semicolon | 263 | if (ps.prev_lsym == lsym_semicolon | |
264 | || ps.prev_lsym == lsym_lbrace | 264 | || ps.prev_lsym == lsym_lbrace | |
265 | || ps.prev_lsym == lsym_rbrace) { | 265 | || ps.prev_lsym == lsym_rbrace) { | |
266 | if (inp_p[0] == '*' && inp_p[1] != '=') | 266 | if (inp_p[0] == '*' && inp_p[1] != '=') | |
267 | return true; | 267 | return true; | |
268 | /* XXX: is_identifier_start */ | 268 | /* XXX: is_identifier_start */ | |
269 | if (ch_isalpha(inp_p[0])) | 269 | if (ch_isalpha(inp_p[0])) | |
270 | return true; | 270 | return true; | |
271 | } | 271 | } | |
272 | return false; | 272 | return false; | |
273 | } | 273 | } | |
274 | 274 | |||
275 | static int | 275 | static int | |
276 | bsearch_typenames(const char *key) | 276 | bsearch_typenames(const char *key) | |
277 | { | 277 | { | |
278 | const char **arr = typenames.items; | 278 | const char **arr = typenames.items; | |
279 | unsigned lo = 0; | 279 | unsigned lo = 0; | |
280 | unsigned hi = typenames.len; | 280 | unsigned hi = typenames.len; | |
281 | 281 | |||
282 | while (lo < hi) { | 282 | while (lo < hi) { | |
283 | unsigned mid = (lo + hi) / 2; | 283 | unsigned mid = (lo + hi) / 2; | |
284 | int cmp = strcmp(arr[mid], key); | 284 | int cmp = strcmp(arr[mid], key); | |
285 | if (cmp < 0) | 285 | if (cmp < 0) | |
286 | lo = mid + 1; | 286 | lo = mid + 1; | |
287 | else if (cmp > 0) | 287 | else if (cmp > 0) | |
288 | hi = mid; | 288 | hi = mid; | |
289 | else | 289 | else | |
290 | return (int)mid; | 290 | return (int)mid; | |
291 | } | 291 | } | |
292 | return -1 - (int)lo; | 292 | return -1 - (int)lo; | |
293 | } | 293 | } | |
294 | 294 | |||
295 | static bool | 295 | static bool | |
296 | is_typename(void) | 296 | is_typename(void) | |
297 | { | 297 | { | |
298 | if (opt.auto_typedefs && | 298 | if (opt.auto_typedefs && | |
299 | token.len >= 2 && memcmp(token.s + token.len - 2, "_t", 2) == 0) | 299 | token.len >= 2 && memcmp(token.s + token.len - 2, "_t", 2) == 0) | |
300 | return true; | 300 | return true; | |
301 | 301 | |||
302 | return bsearch_typenames(token.s) >= 0; | 302 | return bsearch_typenames(token.s) >= 0; | |
303 | } | 303 | } | |
304 | 304 | |||
305 | void | 305 | void | |
306 | register_typename(const char *name) | 306 | register_typename(const char *name) | |
307 | { | 307 | { | |
308 | if (typenames.len >= typenames.cap) { | 308 | if (typenames.len >= typenames.cap) { | |
309 | typenames.cap = 16 + 2 * typenames.cap; | 309 | typenames.cap = 16 + 2 * typenames.cap; | |
310 | typenames.items = nonnull(realloc(typenames.items, | 310 | typenames.items = nonnull(realloc(typenames.items, | |
311 | sizeof(typenames.items[0]) * typenames.cap)); | 311 | sizeof(typenames.items[0]) * typenames.cap)); | |
312 | } | 312 | } | |
313 | 313 | |||
314 | int pos = bsearch_typenames(name); | 314 | int pos = bsearch_typenames(name); | |
315 | if (pos >= 0) | 315 | if (pos >= 0) | |
316 | return; /* already in the list */ | 316 | return; /* already in the list */ | |
317 | 317 | |||
318 | pos = -1 - pos; | 318 | pos = -1 - pos; | |
319 | memmove(typenames.items + pos + 1, typenames.items + pos, | 319 | memmove(typenames.items + pos + 1, typenames.items + pos, | |
320 | sizeof(typenames.items[0]) * (typenames.len++ - (unsigned)pos)); | 320 | sizeof(typenames.items[0]) * (typenames.len++ - (unsigned)pos)); | |
321 | typenames.items[pos] = nonnull(strdup(name)); | 321 | typenames.items[pos] = nonnull(strdup(name)); | |
322 | } | 322 | } | |
323 | 323 | |||
324 | static int | 324 | static int | |
325 | cmp_keyword_by_name(const void *key, const void *elem) | 325 | cmp_keyword_by_name(const void *key, const void *elem) | |
326 | { | 326 | { | |
327 | return strcmp(key, ((const struct keyword *)elem)->name); | 327 | return strcmp(key, ((const struct keyword *)elem)->name); | |
328 | } | 328 | } | |
329 | 329 | |||
330 | /* | 330 | /* | |
331 | * Looking at something like 'function_name(...)' in a line, guess whether | 331 | * Looking at something like 'function_name(...)' in a line, guess whether | |
332 | * this starts a function definition or a declaration. | 332 | * this starts a function definition or a declaration. | |
333 | */ | 333 | */ | |
334 | static bool | 334 | static bool | |
335 | probably_function_definition(void) | 335 | probably_function_definition(void) | |
336 | { | 336 | { | |
337 | int paren_level = 0; | 337 | int paren_level = 0; | |
338 | for (const char *p = inp_p; *p != '\n'; p++) { | 338 | for (const char *p = inp_p; *p != '\n'; p++) { | |
339 | if (*p == '(') | 339 | if (*p == '(') | |
340 | paren_level++; | 340 | paren_level++; | |
341 | if (*p == ')' && --paren_level == 0) { | 341 | if (*p == ')' && --paren_level == 0) { | |
342 | p++; | 342 | p++; | |
343 | 343 | |||
344 | while (*p != '\n' | 344 | while (*p != '\n' | |
345 | && (ch_isspace(*p) || is_identifier_part(*p))) | 345 | && (ch_isspace(*p) || is_identifier_part(*p))) | |
346 | p++; /* '__dead' or '__unused' */ | 346 | p++; /* '__dead' or '__unused' */ | |
347 | 347 | |||
348 | if (*p == '\n') /* func(...) */ | 348 | if (*p == '\n') /* func(...) */ | |
349 | break; | 349 | break; | |
350 | if (*p == ';') /* func(...); */ | 350 | if (*p == ';') /* func(...); */ | |
351 | return false; | 351 | return false; | |
352 | if (*p == ',') /* double abs(), pi; */ | 352 | if (*p == ',') /* double abs(), pi; */ | |
353 | return false; | 353 | return false; | |
354 | if (*p == '(') /* func(...) __attribute__((...)) */ | 354 | if (*p == '(') /* func(...) __attribute__((...)) */ | |
355 | paren_level++; /* func(...) __printflike(...) | 355 | paren_level++; /* func(...) __printflike(...) | |
356 | */ | 356 | */ | |
357 | else | 357 | else | |
358 | break; /* func(...) { ... */ | 358 | break; /* func(...) { ... */ | |
359 | } | 359 | } | |
360 | 360 | |||
361 | if (paren_level == 1 && p[0] == '*' && p[1] == ',') | 361 | if (paren_level == 1 && p[0] == '*' && p[1] == ',') | |
362 | return false; | 362 | return false; | |
363 | } | 363 | } | |
364 | 364 | |||
365 | /* To further reduce the cases where indent wrongly treats an | 365 | /* To further reduce the cases where indent wrongly treats an | |
366 | * incomplete function declaration as a function definition, thus | 366 | * incomplete function declaration as a function definition, thus | |
367 | * adding a newline before the function name, it may be worth looking | 367 | * adding a newline before the function name, it may be worth looking | |
368 | * for parameter names, as these are often omitted in function | 368 | * for parameter names, as these are often omitted in function | |
369 | * declarations and only included in function definitions. Or just | 369 | * declarations and only included in function definitions. Or just | |
370 | * increase the lookahead to more than just the current line of input, | 370 | * increase the lookahead to more than just the current line of input, | |
371 | * until the next '{'. */ | 371 | * until the next '{'. */ | |
372 | return true; | 372 | return true; | |
373 | } | 373 | } | |
374 | 374 | |||
375 | static lexer_symbol | 375 | static lexer_symbol | |
376 | lexi_alnum(void) | 376 | lexi_alnum(void) | |
377 | { | 377 | { | |
378 | if (ch_isdigit(inp_p[0]) || | 378 | if (ch_isdigit(inp_p[0]) || | |
379 | (inp_p[0] == '.' && ch_isdigit(inp_p[1]))) { | 379 | (inp_p[0] == '.' && ch_isdigit(inp_p[1]))) { | |
380 | lex_number(); | 380 | lex_number(); | |
381 | } else if (is_identifier_start(inp_p[0])) { | 381 | } else if (is_identifier_start(inp_p[0])) { | |
382 | lex_word(); | 382 | lex_word(); | |
383 | 383 | |||
384 | if (token.len == 1 && token.s[0] == 'L' && | 384 | if (token.len == 1 && token.s[0] == 'L' && | |
385 | (inp_p[0] == '"' || inp_p[0] == '\'')) { | 385 | (inp_p[0] == '"' || inp_p[0] == '\'')) { | |
386 | token_add_char(*inp_p++); | 386 | token_add_char(*inp_p++); | |
387 | lex_char_or_string(); | 387 | lex_char_or_string(); | |
388 | ps.next_unary = false; | 388 | ps.next_unary = false; | |
389 | return lsym_word; | 389 | return lsym_word; | |
390 | } | 390 | } | |
391 | } else | 391 | } else | |
392 | return lsym_eof; /* just as a placeholder */ | 392 | return lsym_eof; /* just as a placeholder */ | |
393 | 393 | |||
394 | while (ch_isblank(inp_p[0])) | 394 | while (ch_isblank(inp_p[0])) | |
395 | inp_p++; | 395 | inp_p++; | |
396 | 396 | |||
397 | ps.next_unary = ps.prev_lsym == lsym_tag | 397 | ps.next_unary = ps.prev_lsym == lsym_tag | |
398 | || ps.prev_lsym == lsym_typedef; | 398 | || ps.prev_lsym == lsym_typedef; | |
399 | 399 | |||
400 | if (ps.prev_lsym == lsym_tag && ps.nparen == 0) | 400 | if (ps.prev_lsym == lsym_tag && ps.nparen == 0) | |
401 | return lsym_type_outside_parentheses; | 401 | return lsym_type_outside_parentheses; | |
402 | 402 | |||
403 | token_add_char('\0'); | 403 | token_add_char('\0'); | |
404 | token.len--; | 404 | token.len--; | |
405 | const struct keyword *kw = bsearch(token.s, keywords, | 405 | const struct keyword *kw = bsearch(token.s, keywords, | |
406 | array_length(keywords), sizeof(keywords[0]), cmp_keyword_by_name); | 406 | array_length(keywords), sizeof(keywords[0]), cmp_keyword_by_name); | |
407 | lexer_symbol lsym = lsym_word; | 407 | lexer_symbol lsym = lsym_word; | |
408 | if (kw != NULL) { | 408 | if (kw != NULL) { | |
409 | if (kw->lsym == lsym_type) | 409 | if (kw->lsym == lsym_type) | |
410 | lsym = lsym_type_in_parentheses; | 410 | lsym = lsym_type_in_parentheses; | |
411 | ps.next_unary = true; | 411 | ps.next_unary = true; | |
412 | if (kw->lsym == lsym_tag || kw->lsym == lsym_type) | 412 | if (kw->lsym == lsym_tag || kw->lsym == lsym_type) | |
413 | goto found_typename; | 413 | goto found_typename; | |
414 | return kw->lsym; | 414 | return kw->lsym; | |
415 | } | 415 | } | |
416 | 416 | |||
417 | if (is_typename()) { | 417 | if (is_typename()) { | |
418 | lsym = lsym_type_in_parentheses; | 418 | lsym = lsym_type_in_parentheses; | |
419 | ps.next_unary = true; | 419 | ps.next_unary = true; | |
420 | found_typename: | 420 | found_typename: | |
421 | if (ps.nparen > 0) { | 421 | if (ps.nparen > 0) { | |
422 | /* inside parentheses: cast, param list, offsetof or | 422 | /* inside parentheses: cast, param list, offsetof or | |
423 | * sizeof */ | 423 | * sizeof */ | |
424 | if (ps.paren[ps.nparen - 1].cast == cast_unknown) | 424 | if (ps.paren[ps.nparen - 1].cast == cast_unknown) | |
425 | ps.paren[ps.nparen - 1].cast = cast_maybe; | 425 | ps.paren[ps.nparen - 1].cast = cast_maybe; | |
426 | } | 426 | } | |
427 | if (ps.prev_lsym != lsym_period | 427 | if (ps.prev_lsym != lsym_period | |
428 | && ps.prev_lsym != lsym_unary_op) { | 428 | && ps.prev_lsym != lsym_unary_op) { | |
429 | if (kw != NULL && kw->lsym == lsym_tag) | 429 | if (kw != NULL && kw->lsym == lsym_tag) | |
430 | return lsym_tag; | 430 | return lsym_tag; | |
431 | if (ps.nparen == 0) | 431 | if (ps.nparen == 0) | |
432 | return lsym_type_outside_parentheses; | 432 | return lsym_type_outside_parentheses; | |
433 | } | 433 | } | |
434 | } | 434 | } | |
435 | 435 | |||
436 | if (inp_p[0] == '(' && ps.psyms.top <= 1 && ps.ind_level == 0 && | 436 | if (inp_p[0] == '(' && ps.psyms.top <= 1 && ps.ind_level == 0 && | |
437 | !ps.in_func_def_params && !ps.in_init) { | 437 | !ps.in_func_def_params && !ps.in_init) { | |
438 | 438 | |||
439 | if (ps.nparen == 0 && probably_function_definition()) { | 439 | if (ps.nparen == 0 && probably_function_definition()) { | |
440 | ps.line_has_func_def = true; | 440 | ps.line_has_func_def = true; | |
441 | if (ps.in_decl) | 441 | if (ps.in_decl) | |
442 | ps.in_func_def_params = true; | 442 | ps.in_func_def_params = true; | |
443 | return lsym_funcname; | 443 | return lsym_funcname; | |
444 | } | 444 | } | |
445 | 445 | |||
446 | } else if (ps.nparen == 0 && probably_typename()) { | 446 | } else if (ps.nparen == 0 && probably_typename()) { | |
447 | ps.next_unary = true; | 447 | ps.next_unary = true; | |
448 | return lsym_type_outside_parentheses; | 448 | return lsym_type_outside_parentheses; | |
449 | } | 449 | } | |
450 | 450 | |||
451 | return lsym; | 451 | return lsym; | |
452 | } | 452 | } | |
453 | 453 | |||
454 | static bool | 454 | static bool | |
455 | is_asterisk_pointer(void) | 455 | is_asterisk_pointer(void) | |
456 | { | 456 | { | |
457 | if (inp_p[strspn(inp_p, "* \t")] == ')') | 457 | if (inp_p[strspn(inp_p, "* \t")] == ')') | |
458 | return true; | 458 | return true; | |
459 | if (ps.next_unary || ps.in_func_def_params) | 459 | if (ps.next_unary || ps.in_func_def_params) | |
460 | return true; | 460 | return true; | |
461 | if (ps.prev_lsym == lsym_word || | 461 | if (ps.prev_lsym == lsym_word || | |
462 | ps.prev_lsym == lsym_rparen || | 462 | ps.prev_lsym == lsym_rparen || | |
463 | ps.prev_lsym == lsym_rbracket) | 463 | ps.prev_lsym == lsym_rbracket) | |
464 | return false; | 464 | return false; | |
465 | return ps.in_decl && ps.nparen > 0; | 465 | return ps.in_decl && ps.nparen > 0; | |
466 | } | 466 | } | |
467 | 467 | |||
468 | static bool | 468 | static bool | |
469 | probably_in_function_definition(void) | 469 | probably_in_function_definition(void) | |
470 | { | 470 | { | |
471 | for (const char *tp = inp_p; *tp != '\n';) { | 471 | for (const char *tp = inp_p; *tp != '\n';) { | |
472 | if (ch_isspace(*tp)) | 472 | if (ch_isspace(*tp)) | |
473 | tp++; | 473 | tp++; | |
474 | else if (is_identifier_start(*tp)) { | 474 | else if (is_identifier_start(*tp)) { | |
475 | tp++; | 475 | tp++; | |
476 | while (is_identifier_part(*tp)) | 476 | while (is_identifier_part(*tp)) | |
477 | tp++; | 477 | tp++; | |
478 | } else | 478 | } else | |
479 | return *tp == '('; | 479 | return *tp == '('; | |
480 | } | 480 | } | |
481 | return false; | 481 | return false; | |
482 | } | 482 | } | |
483 | 483 | |||
484 | static void | 484 | static void | |
485 | lex_asterisk_pointer(void) | 485 | lex_asterisk_pointer(void) | |
486 | { | 486 | { | |
487 | while (inp_p[0] == '*' || ch_isspace(inp_p[0])) { | 487 | while (inp_p[0] == '*' || ch_isspace(inp_p[0])) { | |
488 | if (inp_p[0] == '*') | 488 | if (inp_p[0] == '*') | |
489 | token_add_char('*'); | 489 | token_add_char('*'); | |
490 | inp_skip(); | 490 | inp_skip(); | |
491 | } | 491 | } | |
492 | 492 | |||
493 | if (ps.in_decl && probably_in_function_definition()) | 493 | if (ps.in_decl && probably_in_function_definition()) | |
494 | ps.line_has_func_def = true; | 494 | ps.line_has_func_def = true; | |
495 | } | 495 | } | |
496 | 496 | |||
497 | static bool | 497 | static bool | |
498 | skip(const char **pp, const char *s) | 498 | skip(const char **pp, const char *s) | |
499 | { | 499 | { | |
500 | size_t len = strlen(s); | 500 | size_t len = strlen(s); | |
501 | while (ch_isblank(**pp)) | 501 | while (ch_isblank(**pp)) | |
502 | (*pp)++; | 502 | (*pp)++; | |
503 | if (strncmp(*pp, s, len) == 0) { | 503 | if (strncmp(*pp, s, len) == 0) { | |
504 | *pp += len; | 504 | *pp += len; | |
505 | return true; | 505 | return true; | |
506 | } | 506 | } | |
507 | return false; | 507 | return false; | |
508 | } | 508 | } | |
509 | 509 | |||
510 | static void | 510 | static void | |
511 | lex_indent_comment(void) | 511 | lex_indent_comment(void) | |
512 | { | 512 | { | |
513 | const char *p = inp.s; | 513 | const char *p = inp.s; | |
514 | if (skip(&p, "/*") && skip(&p, "INDENT")) { | 514 | if (skip(&p, "/*") && skip(&p, "INDENT")) { | |
515 | enum indent_enabled enabled; | 515 | enum indent_enabled enabled; | |
516 | if (skip(&p, "ON") || *p == '*') | 516 | if (skip(&p, "ON") || *p == '*') | |
517 | enabled = indent_last_off_line; | 517 | enabled = indent_last_off_line; | |
518 | else if (skip(&p, "OFF")) | 518 | else if (skip(&p, "OFF")) | |
519 | enabled = indent_off; | 519 | enabled = indent_off; | |
520 | else | 520 | else | |
521 | return; | 521 | return; | |
522 | if (skip(&p, "*/\n")) { | 522 | if (skip(&p, "*/\n")) { | |
523 | if (lab.len > 0 || code.len > 0 || com.len > 0) | 523 | if (lab.len > 0 || code.len > 0 || com.len > 0) | |
524 | output_line(); | 524 | output_line(); | |
525 | indent_enabled = enabled; | 525 | indent_enabled = enabled; | |
526 | } | 526 | } | |
527 | } | 527 | } | |
528 | } | 528 | } | |
529 | 529 | |||
530 | /* Reads the next token, placing it in the global variable "token". */ | 530 | /* Reads the next token, placing it in the global variable "token". */ | |
531 | lexer_symbol | 531 | lexer_symbol | |
532 | lexi(void) | 532 | lexi(void) | |
533 | { | 533 | { | |
534 | buf_clear(&token); | 534 | buf_clear(&token); | |
535 | ps.curr_col_1 = ps.next_col_1; | |||
536 | ps.next_col_1 = false; | 535 | ps.next_col_1 = false; | |
537 | 536 | |||
538 | for (;;) { | 537 | for (;;) { | |
539 | if (ch_isblank(inp_p[0])) { | 538 | if (ch_isblank(inp_p[0])) | |
540 | ps.curr_col_1 = false; | |||
541 | inp_p++; | 539 | inp_p++; | |
542 | } else if (inp_p[0] == '\\' && inp_p[1] == '\n') { | 540 | else if (inp_p[0] == '\\' && inp_p[1] == '\n') { | |
543 | inp_p++; | 541 | inp_p++; | |
544 | inp_skip(); | 542 | inp_skip(); | |
545 | line_no++; | 543 | line_no++; | |
546 | } else | 544 | } else | |
547 | break; | 545 | break; | |
548 | } | 546 | } | |
549 | 547 | |||
550 | lexer_symbol alnum_lsym = lexi_alnum(); | 548 | lexer_symbol alnum_lsym = lexi_alnum(); | |
551 | if (alnum_lsym != lsym_eof) | 549 | if (alnum_lsym != lsym_eof) | |
552 | return alnum_lsym; | 550 | return alnum_lsym; | |
553 | 551 | |||
554 | /* Scan a non-alphanumeric token */ | 552 | /* Scan a non-alphanumeric token */ | |
555 | 553 | |||
556 | token_add_char(inp_next()); | 554 | token_add_char(inp_next()); | |
557 | 555 | |||
558 | lexer_symbol lsym; | 556 | lexer_symbol lsym; | |
559 | bool next_unary; | 557 | bool next_unary; | |
560 | 558 | |||
561 | switch (token.s[token.len - 1]) { | 559 | switch (token.s[token.len - 1]) { | |
562 | 560 | |||
563 | case '#': | 561 | case '#': | |
564 | lsym = lsym_preprocessing; | 562 | lsym = lsym_preprocessing; | |
565 | next_unary = ps.next_unary; | 563 | next_unary = ps.next_unary; | |
566 | break; | 564 | break; | |
567 | 565 | |||
568 | case '\n': | 566 | case '\n': | |
569 | /* if data has been exhausted, the '\n' is a dummy. */ | 567 | /* if data has been exhausted, the '\n' is a dummy. */ | |
570 | lsym = had_eof ? lsym_eof : lsym_newline; | 568 | lsym = had_eof ? lsym_eof : lsym_newline; | |
571 | next_unary = ps.next_unary; | 569 | next_unary = ps.next_unary; | |
572 | ps.next_col_1 = true; | 570 | ps.next_col_1 = true; | |
573 | break; | 571 | break; | |
574 | 572 | |||
575 | /* INDENT OFF */ | 573 | /* INDENT OFF */ | |
576 | case '(': lsym = lsym_lparen; next_unary = true; break; | 574 | case '(': lsym = lsym_lparen; next_unary = true; break; | |
577 | case ')': lsym = lsym_rparen; next_unary = false; break; | 575 | case ')': lsym = lsym_rparen; next_unary = false; break; | |
578 | case '[': lsym = lsym_lbracket; next_unary = true; break; | 576 | case '[': lsym = lsym_lbracket; next_unary = true; break; | |
579 | case ']': lsym = lsym_rbracket; next_unary = false; break; | 577 | case ']': lsym = lsym_rbracket; next_unary = false; break; | |
580 | case '{': lsym = lsym_lbrace; next_unary = true; break; | 578 | case '{': lsym = lsym_lbrace; next_unary = true; break; | |
581 | case '}': lsym = lsym_rbrace; next_unary = true; break; | 579 | case '}': lsym = lsym_rbrace; next_unary = true; break; | |
582 | case '.': lsym = lsym_period; next_unary = false; break; | 580 | case '.': lsym = lsym_period; next_unary = false; break; | |
583 | case '?': lsym = lsym_question; next_unary = true; break; | 581 | case '?': lsym = lsym_question; next_unary = true; break; | |
584 | case ',': lsym = lsym_comma; next_unary = true; break; | 582 | case ',': lsym = lsym_comma; next_unary = true; break; | |
585 | case ';': lsym = lsym_semicolon; next_unary = true; break; | 583 | case ';': lsym = lsym_semicolon; next_unary = true; break; | |
586 | /* INDENT ON */ | 584 | /* INDENT ON */ | |
587 | 585 | |||
588 | case '-': | 586 | case '-': | |
589 | case '+': | 587 | case '+': | |
590 | lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; | 588 | lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; | |
591 | next_unary = true; | 589 | next_unary = true; | |
592 | 590 | |||
593 | /* '++' or '--' */ | 591 | /* '++' or '--' */ | |
594 | if (inp_p[0] == token.s[token.len - 1]) { | 592 | if (inp_p[0] == token.s[token.len - 1]) { | |
595 | token_add_char(*inp_p++); | 593 | token_add_char(*inp_p++); | |
596 | if (ps.prev_lsym == lsym_word || | 594 | if (ps.prev_lsym == lsym_word || | |
597 | ps.prev_lsym == lsym_rparen || | 595 | ps.prev_lsym == lsym_rparen || | |
598 | ps.prev_lsym == lsym_rbracket) { | 596 | ps.prev_lsym == lsym_rbracket) { | |
599 | lsym = ps.next_unary | 597 | lsym = ps.next_unary | |
600 | ? lsym_unary_op : lsym_postfix_op; | 598 | ? lsym_unary_op : lsym_postfix_op; | |
601 | next_unary = false; | 599 | next_unary = false; | |
602 | } | 600 | } | |
603 | 601 | |||
604 | } else if (inp_p[0] == '=') { /* '+=' or '-=' */ | 602 | } else if (inp_p[0] == '=') { /* '+=' or '-=' */ | |
605 | token_add_char(*inp_p++); | 603 | token_add_char(*inp_p++); | |
606 | 604 | |||
607 | } else if (inp_p[0] == '>') { /* '->' */ | 605 | } else if (inp_p[0] == '>') { /* '->' */ | |
608 | token_add_char(*inp_p++); | 606 | token_add_char(*inp_p++); | |
609 | lsym = lsym_unary_op; | 607 | lsym = lsym_unary_op; | |
610 | next_unary = false; | 608 | next_unary = false; | |
611 | ps.want_blank = false; | 609 | ps.want_blank = false; | |
612 | } | 610 | } | |
613 | break; | 611 | break; | |
614 | 612 | |||
615 | case ':': | 613 | case ':': | |
616 | lsym = ps.quest_level > 0 | 614 | lsym = ps.quest_level > 0 | |
617 | ? (ps.quest_level--, lsym_question_colon) | 615 | ? (ps.quest_level--, lsym_question_colon) | |
618 | : ps.in_var_decl ? lsym_other_colon : lsym_label_colon; | 616 | : ps.in_var_decl ? lsym_other_colon : lsym_label_colon; | |
619 | next_unary = true; | 617 | next_unary = true; | |
620 | break; | 618 | break; | |
621 | 619 | |||
622 | case '*': | 620 | case '*': | |
623 | if (inp_p[0] == '=') { | 621 | if (inp_p[0] == '=') { | |
624 | token_add_char(*inp_p++); | 622 | token_add_char(*inp_p++); | |
625 | lsym = lsym_binary_op; | 623 | lsym = lsym_binary_op; | |
626 | } else if (is_asterisk_pointer()) { | 624 | } else if (is_asterisk_pointer()) { | |
627 | lex_asterisk_pointer(); | 625 | lex_asterisk_pointer(); | |
628 | lsym = lsym_unary_op; | 626 | lsym = lsym_unary_op; | |
629 | } else | 627 | } else | |
630 | lsym = lsym_binary_op; | 628 | lsym = lsym_binary_op; | |
631 | next_unary = true; | 629 | next_unary = true; | |
632 | break; | 630 | break; | |
633 | 631 | |||
634 | case '=': | 632 | case '=': | |
635 | if (ps.in_var_decl) | 633 | if (ps.in_var_decl) | |
636 | ps.in_init = true; | 634 | ps.in_init = true; | |
637 | if (inp_p[0] == '=') | 635 | if (inp_p[0] == '=') | |
638 | token_add_char(*inp_p++); | 636 | token_add_char(*inp_p++); | |
639 | lsym = lsym_binary_op; | 637 | lsym = lsym_binary_op; | |
640 | next_unary = true; | 638 | next_unary = true; | |
641 | break; | 639 | break; | |
642 | 640 | |||
643 | case '>': | 641 | case '>': | |
644 | case '<': | 642 | case '<': | |
645 | case '!': /* ops like <, <<, <=, !=, etc. */ | 643 | case '!': /* ops like <, <<, <=, !=, etc. */ | |
646 | if (inp_p[0] == '>' || inp_p[0] == '<' || inp_p[0] == '=') | 644 | if (inp_p[0] == '>' || inp_p[0] == '<' || inp_p[0] == '=') | |
647 | token_add_char(*inp_p++); | 645 | token_add_char(*inp_p++); | |
648 | if (inp_p[0] == '=') | 646 | if (inp_p[0] == '=') | |
649 | token_add_char(*inp_p++); | 647 | token_add_char(*inp_p++); | |
650 | lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; | 648 | lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; | |
651 | next_unary = true; | 649 | next_unary = true; | |
652 | break; | 650 | break; | |
653 | 651 | |||
654 | case '\'': | 652 | case '\'': | |
655 | case '"': | 653 | case '"': | |
656 | lex_char_or_string(); | 654 | lex_char_or_string(); | |
657 | lsym = lsym_word; | 655 | lsym = lsym_word; | |
658 | next_unary = false; | 656 | next_unary = false; | |
659 | break; | 657 | break; | |
660 | 658 | |||
661 | default: | 659 | default: | |
662 | if (token.s[token.len - 1] == '/' | 660 | if (token.s[token.len - 1] == '/' | |
663 | && (inp_p[0] == '*' || inp_p[0] == '/')) { | 661 | && (inp_p[0] == '*' || inp_p[0] == '/')) { | |
664 | enum indent_enabled prev = indent_enabled; | 662 | enum indent_enabled prev = indent_enabled; | |
665 | lex_indent_comment(); | 663 | lex_indent_comment(); | |
666 | if (prev == indent_on && indent_enabled == indent_off) | 664 | if (prev == indent_on && indent_enabled == indent_off) | |
667 | buf_clear(&out.indent_off_text); | 665 | buf_clear(&out.indent_off_text); | |
668 | token_add_char(*inp_p++); | 666 | token_add_char(*inp_p++); | |
669 | lsym = lsym_comment; | 667 | lsym = lsym_comment; | |
670 | next_unary = ps.next_unary; | 668 | next_unary = ps.next_unary; | |
671 | break; | 669 | break; | |
672 | } | 670 | } | |
673 | 671 | |||
674 | /* punctuation like '%', '&&', '/', '^', '||', '~' */ | 672 | /* punctuation like '%', '&&', '/', '^', '||', '~' */ | |
675 | lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; | 673 | lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; | |
676 | if (inp_p[0] == token.s[token.len - 1]) | 674 | if (inp_p[0] == token.s[token.len - 1]) | |
677 | token_add_char(*inp_p++), lsym = lsym_binary_op; | 675 | token_add_char(*inp_p++), lsym = lsym_binary_op; | |
678 | if (inp_p[0] == '=') | 676 | if (inp_p[0] == '=') | |
679 | token_add_char(*inp_p++), lsym = lsym_binary_op; | 677 | token_add_char(*inp_p++), lsym = lsym_binary_op; | |
680 | 678 | |||
681 | next_unary = true; | 679 | next_unary = true; | |
682 | } | 680 | } | |
683 | 681 | |||
684 | ps.next_unary = next_unary; | 682 | ps.next_unary = next_unary; | |
685 | 683 | |||
686 | return lsym; | 684 | return lsym; | |
687 | } | 685 | } |
--- src/usr.bin/indent/pr_comment.c 2023/06/10 16:43:56 1.161
+++ src/usr.bin/indent/pr_comment.c 2023/06/14 08:25:15 1.162
@@ -1,357 +1,357 @@ | @@ -1,357 +1,357 @@ | |||
1 | /* $NetBSD: pr_comment.c,v 1.161 2023/06/10 16:43:56 rillig Exp $ */ | 1 | /* $NetBSD: pr_comment.c,v 1.162 2023/06/14 08:25:15 rillig Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * SPDX-License-Identifier: BSD-4-Clause | 4 | * SPDX-License-Identifier: BSD-4-Clause | |
5 | * | 5 | * | |
6 | * Copyright (c) 1985 Sun Microsystems, Inc. | 6 | * Copyright (c) 1985 Sun Microsystems, Inc. | |
7 | * Copyright (c) 1980, 1993 | 7 | * Copyright (c) 1980, 1993 | |
8 | * The Regents of the University of California. All rights reserved. | 8 | * The Regents of the University of California. All rights reserved. | |
9 | * All rights reserved. | 9 | * All rights reserved. | |
10 | * | 10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | 11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | 12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | 13 | * are met: | |
14 | * 1. Redistributions of source code must retain the above copyright | 14 | * 1. Redistributions of source code must retain the above copyright | |
15 | * notice, this list of conditions and the following disclaimer. | 15 | * notice, this list of conditions and the following disclaimer. | |
16 | * 2. Redistributions in binary form must reproduce the above copyright | 16 | * 2. Redistributions in binary form must reproduce the above copyright | |
17 | * notice, this list of conditions and the following disclaimer in the | 17 | * notice, this list of conditions and the following disclaimer in the | |
18 | * documentation and/or other materials provided with the distribution. | 18 | * documentation and/or other materials provided with the distribution. | |
19 | * 3. All advertising materials mentioning features or use of this software | 19 | * 3. All advertising materials mentioning features or use of this software | |
20 | * must display the following acknowledgement: | 20 | * must display the following acknowledgement: | |
21 | * This product includes software developed by the University of | 21 | * This product includes software developed by the University of | |
22 | * California, Berkeley and its contributors. | 22 | * California, Berkeley and its contributors. | |
23 | * 4. Neither the name of the University nor the names of its contributors | 23 | * 4. Neither the name of the University nor the names of its contributors | |
24 | * may be used to endorse or promote products derived from this software | 24 | * may be used to endorse or promote products derived from this software | |
25 | * without specific prior written permission. | 25 | * without specific prior written permission. | |
26 | * | 26 | * | |
27 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 27 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
28 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 28 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
29 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 29 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
30 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 30 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
32 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 32 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
33 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 33 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
34 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 34 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
36 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 36 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
37 | * SUCH DAMAGE. | 37 | * SUCH DAMAGE. | |
38 | */ | 38 | */ | |
39 | 39 | |||
40 | #include <sys/cdefs.h> | 40 | #include <sys/cdefs.h> | |
41 | __RCSID("$NetBSD: pr_comment.c,v 1.161 2023/06/10 16:43:56 rillig Exp $"); | 41 | __RCSID("$NetBSD: pr_comment.c,v 1.162 2023/06/14 08:25:15 rillig Exp $"); | |
42 | 42 | |||
43 | #include <string.h> | 43 | #include <string.h> | |
44 | 44 | |||
45 | #include "indent.h" | 45 | #include "indent.h" | |
46 | 46 | |||
47 | static void | 47 | static void | |
48 | com_add_char(char ch) | 48 | com_add_char(char ch) | |
49 | { | 49 | { | |
50 | buf_add_char(&com, ch); | 50 | buf_add_char(&com, ch); | |
51 | } | 51 | } | |
52 | 52 | |||
53 | static void | 53 | static void | |
54 | com_add_delim(void) | 54 | com_add_delim(void) | |
55 | { | 55 | { | |
56 | if (opt.star_comment_cont) | 56 | if (opt.star_comment_cont) | |
57 | buf_add_chars(&com, " * ", 3); | 57 | buf_add_chars(&com, " * ", 3); | |
58 | } | 58 | } | |
59 | 59 | |||
60 | static bool | 60 | static bool | |
61 | fits_in_one_line(int com_ind, int max_line_length) | 61 | fits_in_one_line(int com_ind, int max_line_length) | |
62 | { | 62 | { | |
63 | for (const char *start = inp_p, *p = start; *p != '\n'; p++) { | 63 | for (const char *start = inp_p, *p = start; *p != '\n'; p++) { | |
64 | if (p[0] == '*' && p[1] == '/') { | 64 | if (p[0] == '*' && p[1] == '/') { | |
65 | while (p - inp_p >= 2 | 65 | while (p - inp_p >= 2 | |
66 | && ch_isblank(p[-1]) | 66 | && ch_isblank(p[-1]) | |
67 | && ch_isblank(p[-2])) | 67 | && ch_isblank(p[-2])) | |
68 | p--; | 68 | p--; | |
69 | int len = ind_add(com_ind + 3, | 69 | int len = ind_add(com_ind + 3, | |
70 | start, (size_t)(p - start)); | 70 | start, (size_t)(p - start)); | |
71 | len += p == start || ch_isblank(p[-1]) ? 2 : 3; | 71 | len += p == start || ch_isblank(p[-1]) ? 2 : 3; | |
72 | return len <= max_line_length; | 72 | return len <= max_line_length; | |
73 | } | 73 | } | |
74 | } | 74 | } | |
75 | return false; | 75 | return false; | |
76 | } | 76 | } | |
77 | 77 | |||
78 | static void | 78 | static void | |
79 | analyze_comment(bool *p_may_wrap, bool *p_delim, | 79 | analyze_comment(bool *p_may_wrap, bool *p_delim, | |
80 | int *p_ind, int *p_line_length) | 80 | int *p_ind, int *p_line_length) | |
81 | { | 81 | { | |
82 | bool may_wrap = true; | 82 | bool may_wrap = true; | |
83 | bool delim = false; | 83 | bool delim = false; | |
84 | int ind; | 84 | int ind; | |
85 | int line_length = opt.max_line_length; | 85 | int line_length = opt.max_line_length; | |
86 | 86 | |||
87 | if (ps.curr_col_1 && !opt.format_col1_comments) { | 87 | if (inp_p - inp.s == 2 && !opt.format_col1_comments) { | |
88 | may_wrap = false; | 88 | may_wrap = false; | |
89 | ind = 0; | 89 | ind = 0; | |
90 | } else { | 90 | } else { | |
91 | if (inp_p[0] == '-' || inp_p[0] == '*' || | 91 | if (inp_p[0] == '-' || inp_p[0] == '*' || | |
92 | token.s[token.len - 1] == '/' || | 92 | token.s[token.len - 1] == '/' || | |
93 | (inp_p[0] == '\n' && !opt.format_block_comments)) | 93 | (inp_p[0] == '\n' && !opt.format_block_comments)) | |
94 | may_wrap = false; | 94 | may_wrap = false; | |
95 | if (code.len == 0 && inp_p[strspn(inp_p, "*")] == '\n') | 95 | if (code.len == 0 && inp_p[strspn(inp_p, "*")] == '\n') | |
96 | out.line_kind = lk_block_comment; | 96 | out.line_kind = lk_block_comment; | |
97 | 97 | |||
98 | if (com.len > 0) | 98 | if (com.len > 0) | |
99 | output_line(); | 99 | output_line(); | |
100 | if (lab.len == 0 && code.len == 0) { | 100 | if (lab.len == 0 && code.len == 0) { | |
101 | ind = (ps.ind_level - opt.unindent_displace) | 101 | ind = (ps.ind_level - opt.unindent_displace) | |
102 | * opt.indent_size; | 102 | * opt.indent_size; | |
103 | if (ind <= 0) | 103 | if (ind <= 0) | |
104 | ind = opt.format_col1_comments ? 0 : 1; | 104 | ind = opt.format_col1_comments ? 0 : 1; | |
105 | line_length = opt.block_comment_max_line_length; | 105 | line_length = opt.block_comment_max_line_length; | |
106 | if (may_wrap && inp_p[0] == '\n') | 106 | if (may_wrap && inp_p[0] == '\n') | |
107 | delim = true; | 107 | delim = true; | |
108 | if (may_wrap && opt.comment_delimiter_on_blank_line) | 108 | if (may_wrap && opt.comment_delimiter_on_blank_line) | |
109 | delim = true; | 109 | delim = true; | |
110 | } else { | 110 | } else { | |
111 | int target_ind = code.len > 0 | 111 | int target_ind = code.len > 0 | |
112 | ? ind_add(compute_code_indent(), code.s, code.len) | 112 | ? ind_add(compute_code_indent(), code.s, code.len) | |
113 | : ind_add(compute_label_indent(), lab.s, lab.len); | 113 | : ind_add(compute_label_indent(), lab.s, lab.len); | |
114 | 114 | |||
115 | ind = ps.line_has_decl || ps.ind_level == 0 | 115 | ind = ps.line_has_decl || ps.ind_level == 0 | |
116 | ? opt.decl_comment_column - 1 | 116 | ? opt.decl_comment_column - 1 | |
117 | : opt.comment_column - 1; | 117 | : opt.comment_column - 1; | |
118 | if (ind <= target_ind) | 118 | if (ind <= target_ind) | |
119 | ind = next_tab(target_ind); | 119 | ind = next_tab(target_ind); | |
120 | if (ind + 25 > line_length) | 120 | if (ind + 25 > line_length) | |
121 | line_length = ind + 25; | 121 | line_length = ind + 25; | |
122 | } | 122 | } | |
123 | } | 123 | } | |
124 | 124 | |||
125 | ps.com_ind = ind; | 125 | ps.com_ind = ind; | |
126 | 126 | |||
127 | if (!may_wrap) { | 127 | if (!may_wrap) { | |
128 | /* Find out how much indentation there was originally, because | 128 | /* Find out how much indentation there was originally, because | |
129 | * that much will have to be ignored by output_line. */ | 129 | * that much will have to be ignored by output_line. */ | |
130 | size_t len = (size_t)(inp_p - 2 - inp.s); | 130 | size_t len = (size_t)(inp_p - 2 - inp.s); | |
131 | ps.n_comment_delta = -ind_add(0, inp.s, len); | 131 | ps.n_comment_delta = -ind_add(0, inp.s, len); | |
132 | } else { | 132 | } else { | |
133 | ps.n_comment_delta = 0; | 133 | ps.n_comment_delta = 0; | |
134 | if (!(inp_p[0] == '\t' && !ch_isblank(inp_p[1]))) | 134 | if (!(inp_p[0] == '\t' && !ch_isblank(inp_p[1]))) | |
135 | while (ch_isblank(inp_p[0])) | 135 | while (ch_isblank(inp_p[0])) | |
136 | inp_p++; | 136 | inp_p++; | |
137 | } | 137 | } | |
138 | 138 | |||
139 | *p_may_wrap = may_wrap; | 139 | *p_may_wrap = may_wrap; | |
140 | *p_delim = delim; | 140 | *p_delim = delim; | |
141 | *p_ind = ind; | 141 | *p_ind = ind; | |
142 | *p_line_length = line_length; | 142 | *p_line_length = line_length; | |
143 | } | 143 | } | |
144 | 144 | |||
145 | static void | 145 | static void | |
146 | copy_comment_start(bool may_wrap, bool *delim, int ind, int line_length) | 146 | copy_comment_start(bool may_wrap, bool *delim, int ind, int line_length) | |
147 | { | 147 | { | |
148 | ps.comment_delta = 0; | 148 | ps.comment_delta = 0; | |
149 | com_add_char('/'); | 149 | com_add_char('/'); | |
150 | com_add_char(token.s[token.len - 1]); /* either '*' or '/' */ | 150 | com_add_char(token.s[token.len - 1]); /* either '*' or '/' */ | |
151 | 151 | |||
152 | if (may_wrap) { | 152 | if (may_wrap) { | |
153 | if (!ch_isblank(inp_p[0])) | 153 | if (!ch_isblank(inp_p[0])) | |
154 | com_add_char(' '); | 154 | com_add_char(' '); | |
155 | 155 | |||
156 | if (*delim && fits_in_one_line(ind, line_length)) | 156 | if (*delim && fits_in_one_line(ind, line_length)) | |
157 | *delim = false; | 157 | *delim = false; | |
158 | if (*delim) { | 158 | if (*delim) { | |
159 | output_line(); | 159 | output_line(); | |
160 | com_add_delim(); | 160 | com_add_delim(); | |
161 | } | 161 | } | |
162 | } | 162 | } | |
163 | } | 163 | } | |
164 | 164 | |||
165 | static void | 165 | static void | |
166 | copy_comment_wrap_text(int line_length, ssize_t *last_blank) | 166 | copy_comment_wrap_text(int line_length, ssize_t *last_blank) | |
167 | { | 167 | { | |
168 | int now_len = ind_add(ps.com_ind, com.s, com.len); | 168 | int now_len = ind_add(ps.com_ind, com.s, com.len); | |
169 | for (;;) { | 169 | for (;;) { | |
170 | char ch = inp_next(); | 170 | char ch = inp_next(); | |
171 | if (ch_isblank(ch)) | 171 | if (ch_isblank(ch)) | |
172 | *last_blank = (ssize_t)com.len; | 172 | *last_blank = (ssize_t)com.len; | |
173 | com_add_char(ch); | 173 | com_add_char(ch); | |
174 | now_len++; | 174 | now_len++; | |
175 | if (memchr("*\n\r\b\t", inp_p[0], 6) != NULL) | 175 | if (memchr("*\n\r\b\t", inp_p[0], 6) != NULL) | |
176 | break; | 176 | break; | |
177 | if (now_len >= line_length && *last_blank != -1) | 177 | if (now_len >= line_length && *last_blank != -1) | |
178 | break; | 178 | break; | |
179 | } | 179 | } | |
180 | 180 | |||
181 | ps.next_col_1 = false; | 181 | ps.next_col_1 = false; | |
182 | 182 | |||
183 | if (now_len <= line_length) | 183 | if (now_len <= line_length) | |
184 | return; | 184 | return; | |
185 | if (ch_isspace(com.s[com.len - 1])) | 185 | if (ch_isspace(com.s[com.len - 1])) | |
186 | return; | 186 | return; | |
187 | 187 | |||
188 | if (*last_blank == -1) { | 188 | if (*last_blank == -1) { | |
189 | /* only a single word in this line */ | 189 | /* only a single word in this line */ | |
190 | output_line(); | 190 | output_line(); | |
191 | com_add_delim(); | 191 | com_add_delim(); | |
192 | return; | 192 | return; | |
193 | } | 193 | } | |
194 | 194 | |||
195 | const char *last_word_s = com.s + *last_blank + 1; | 195 | const char *last_word_s = com.s + *last_blank + 1; | |
196 | size_t last_word_len = com.len - (size_t)(*last_blank + 1); | 196 | size_t last_word_len = com.len - (size_t)(*last_blank + 1); | |
197 | com.len = (size_t)*last_blank; | 197 | com.len = (size_t)*last_blank; | |
198 | output_line(); | 198 | output_line(); | |
199 | com_add_delim(); | 199 | com_add_delim(); | |
200 | 200 | |||
201 | /* Assume that output_line and com_add_delim don't invalidate the | 201 | /* Assume that output_line and com_add_delim don't invalidate the | |
202 | * "unused" part of the buffer beyond com.s + com.len. */ | 202 | * "unused" part of the buffer beyond com.s + com.len. */ | |
203 | memmove(com.s + com.len, last_word_s, last_word_len); | 203 | memmove(com.s + com.len, last_word_s, last_word_len); | |
204 | com.len += last_word_len; | 204 | com.len += last_word_len; | |
205 | *last_blank = -1; | 205 | *last_blank = -1; | |
206 | } | 206 | } | |
207 | 207 | |||
208 | static bool | 208 | static bool | |
209 | copy_comment_wrap_newline(ssize_t *last_blank) | 209 | copy_comment_wrap_newline(ssize_t *last_blank) | |
210 | { | 210 | { | |
211 | *last_blank = -1; | 211 | *last_blank = -1; | |
212 | if (ps.next_col_1) { | 212 | if (ps.next_col_1) { | |
213 | if (com.len == 0) | 213 | if (com.len == 0) | |
214 | com_add_char(' '); /* force empty output line */ | 214 | com_add_char(' '); /* force empty output line */ | |
215 | if (com.len > 3) { | 215 | if (com.len > 3) { | |
216 | output_line(); | 216 | output_line(); | |
217 | com_add_delim(); | 217 | com_add_delim(); | |
218 | } | 218 | } | |
219 | output_line(); | 219 | output_line(); | |
220 | com_add_delim(); | 220 | com_add_delim(); | |
221 | } else { | 221 | } else { | |
222 | ps.next_col_1 = true; | 222 | ps.next_col_1 = true; | |
223 | if (!(com.len > 0 && ch_isblank(com.s[com.len - 1]))) | 223 | if (!(com.len > 0 && ch_isblank(com.s[com.len - 1]))) | |
224 | com_add_char(' '); | 224 | com_add_char(' '); | |
225 | *last_blank = (int)com.len - 1; | 225 | *last_blank = (int)com.len - 1; | |
226 | } | 226 | } | |
227 | ++line_no; | 227 | ++line_no; | |
228 | 228 | |||
229 | /* flush any blanks and/or tabs at start of next line */ | 229 | /* flush any blanks and/or tabs at start of next line */ | |
230 | inp_skip(); /* '\n' */ | 230 | inp_skip(); /* '\n' */ | |
231 | while (ch_isblank(inp_p[0])) | 231 | while (ch_isblank(inp_p[0])) | |
232 | inp_p++; | 232 | inp_p++; | |
233 | if (inp_p[0] == '*' && inp_p[1] == '/') | 233 | if (inp_p[0] == '*' && inp_p[1] == '/') | |
234 | return false; | 234 | return false; | |
235 | if (inp_p[0] == '*') { | 235 | if (inp_p[0] == '*') { | |
236 | inp_p++; | 236 | inp_p++; | |
237 | while (ch_isblank(inp_p[0])) | 237 | while (ch_isblank(inp_p[0])) | |
238 | inp_p++; | 238 | inp_p++; | |
239 | } | 239 | } | |
240 | 240 | |||
241 | return true; | 241 | return true; | |
242 | } | 242 | } | |
243 | 243 | |||
244 | static void | 244 | static void | |
245 | copy_comment_wrap_finish(int line_length, bool delim) | 245 | copy_comment_wrap_finish(int line_length, bool delim) | |
246 | { | 246 | { | |
247 | if (delim) { | 247 | if (delim) { | |
248 | if (com.len > 3) | 248 | if (com.len > 3) | |
249 | output_line(); | 249 | output_line(); | |
250 | else | 250 | else | |
251 | buf_clear(&com); | 251 | buf_clear(&com); | |
252 | com_add_char(' '); | 252 | com_add_char(' '); | |
253 | } else { | 253 | } else { | |
254 | size_t len = com.len; | 254 | size_t len = com.len; | |
255 | while (ch_isblank(com.s[len - 1])) | 255 | while (ch_isblank(com.s[len - 1])) | |
256 | len--; | 256 | len--; | |
257 | int end_ind = ind_add(ps.com_ind, com.s, len); | 257 | int end_ind = ind_add(ps.com_ind, com.s, len); | |
258 | if (end_ind + 3 > line_length) | 258 | if (end_ind + 3 > line_length) | |
259 | output_line(); | 259 | output_line(); | |
260 | } | 260 | } | |
261 | 261 | |||
262 | while (com.len >= 2 | 262 | while (com.len >= 2 | |
263 | && ch_isblank(com.s[com.len - 1]) | 263 | && ch_isblank(com.s[com.len - 1]) | |
264 | && ch_isblank(com.s[com.len - 2])) | 264 | && ch_isblank(com.s[com.len - 2])) | |
265 | com.len--; | 265 | com.len--; | |
266 | buf_terminate(&com); | 266 | buf_terminate(&com); | |
267 | 267 | |||
268 | inp_p += 2; | 268 | inp_p += 2; | |
269 | if (com.len > 0 && ch_isblank(com.s[com.len - 1])) | 269 | if (com.len > 0 && ch_isblank(com.s[com.len - 1])) | |
270 | buf_add_chars(&com, "*/", 2); | 270 | buf_add_chars(&com, "*/", 2); | |
271 | else | 271 | else | |
272 | buf_add_chars(&com, " */", 3); | 272 | buf_add_chars(&com, " */", 3); | |
273 | } | 273 | } | |
274 | 274 | |||
275 | /* | 275 | /* | |
276 | * Copy characters from 'inp' to 'com'. Try to keep comments from going over | 276 | * Copy characters from 'inp' to 'com'. Try to keep comments from going over | |
277 | * the maximum line length. To do that, remember where the last blank, tab, or | 277 | * the maximum line length. To do that, remember where the last blank, tab, or | |
278 | * newline was. When a line is filled, print up to the last blank and continue | 278 | * newline was. When a line is filled, print up to the last blank and continue | |
279 | * copying. | 279 | * copying. | |
280 | */ | 280 | */ | |
281 | static void | 281 | static void | |
282 | copy_comment_wrap(int line_length, bool delim) | 282 | copy_comment_wrap(int line_length, bool delim) | |
283 | { | 283 | { | |
284 | ssize_t last_blank = -1; /* index of the last blank in 'com' */ | 284 | ssize_t last_blank = -1; /* index of the last blank in 'com' */ | |
285 | 285 | |||
286 | for (;;) { | 286 | for (;;) { | |
287 | if (inp_p[0] == '\n') { | 287 | if (inp_p[0] == '\n') { | |
288 | if (had_eof) | 288 | if (had_eof) | |
289 | goto unterminated_comment; | 289 | goto unterminated_comment; | |
290 | if (!copy_comment_wrap_newline(&last_blank)) | 290 | if (!copy_comment_wrap_newline(&last_blank)) | |
291 | goto end_of_comment; | 291 | goto end_of_comment; | |
292 | } else if (inp_p[0] == '*' && inp_p[1] == '/') | 292 | } else if (inp_p[0] == '*' && inp_p[1] == '/') | |
293 | goto end_of_comment; | 293 | goto end_of_comment; | |
294 | else | 294 | else | |
295 | copy_comment_wrap_text(line_length, &last_blank); | 295 | copy_comment_wrap_text(line_length, &last_blank); | |
296 | } | 296 | } | |
297 | 297 | |||
298 | end_of_comment: | 298 | end_of_comment: | |
299 | copy_comment_wrap_finish(line_length, delim); | 299 | copy_comment_wrap_finish(line_length, delim); | |
300 | return; | 300 | return; | |
301 | 301 | |||
302 | unterminated_comment: | 302 | unterminated_comment: | |
303 | diag(1, "Unterminated comment"); | 303 | diag(1, "Unterminated comment"); | |
304 | output_line(); | 304 | output_line(); | |
305 | } | 305 | } | |
306 | 306 | |||
307 | static void | 307 | static void | |
308 | copy_comment_nowrap(void) | 308 | copy_comment_nowrap(void) | |
309 | { | 309 | { | |
310 | char kind = token.s[token.len - 1]; | 310 | char kind = token.s[token.len - 1]; | |
311 | 311 | |||
312 | for (;;) { | 312 | for (;;) { | |
313 | if (inp_p[0] == '\n') { | 313 | if (inp_p[0] == '\n') { | |
314 | if (kind == '/') | 314 | if (kind == '/') | |
315 | return; | 315 | return; | |
316 | 316 | |||
317 | if (had_eof) { | 317 | if (had_eof) { | |
318 | diag(1, "Unterminated comment"); | 318 | diag(1, "Unterminated comment"); | |
319 | output_line(); | 319 | output_line(); | |
320 | return; | 320 | return; | |
321 | } | 321 | } | |
322 | 322 | |||
323 | if (com.len == 0) | 323 | if (com.len == 0) | |
324 | com_add_char(' '); /* force output of an | 324 | com_add_char(' '); /* force output of an | |
325 | * empty line */ | 325 | * empty line */ | |
326 | output_line(); | 326 | output_line(); | |
327 | ++line_no; | 327 | ++line_no; | |
328 | inp_skip(); | 328 | inp_skip(); | |
329 | continue; | 329 | continue; | |
330 | } | 330 | } | |
331 | 331 | |||
332 | com_add_char(*inp_p++); | 332 | com_add_char(*inp_p++); | |
333 | if (com.len >= 2 | 333 | if (com.len >= 2 | |
334 | && com.s[com.len - 2] == '*' | 334 | && com.s[com.len - 2] == '*' | |
335 | && com.s[com.len - 1] == '/' | 335 | && com.s[com.len - 1] == '/' | |
336 | && kind == '*') | 336 | && kind == '*') | |
337 | return; | 337 | return; | |
338 | } | 338 | } | |
339 | } | 339 | } | |
340 | 340 | |||
341 | /* | 341 | /* | |
342 | * Scan, reformat and output a single comment, which is either a block comment | 342 | * Scan, reformat and output a single comment, which is either a block comment | |
343 | * starting with '/' '*' or an end-of-line comment starting with '//'. | 343 | * starting with '/' '*' or an end-of-line comment starting with '//'. | |
344 | */ | 344 | */ | |
345 | void | 345 | void | |
346 | process_comment(void) | 346 | process_comment(void) | |
347 | { | 347 | { | |
348 | bool may_wrap, delim; | 348 | bool may_wrap, delim; | |
349 | int ind, line_length; | 349 | int ind, line_length; | |
350 | 350 | |||
351 | analyze_comment(&may_wrap, &delim, &ind, &line_length); | 351 | analyze_comment(&may_wrap, &delim, &ind, &line_length); | |
352 | copy_comment_start(may_wrap, &delim, ind, line_length); | 352 | copy_comment_start(may_wrap, &delim, ind, line_length); | |
353 | if (may_wrap) | 353 | if (may_wrap) | |
354 | copy_comment_wrap(line_length, delim); | 354 | copy_comment_wrap(line_length, delim); | |
355 | else | 355 | else | |
356 | copy_comment_nowrap(); | 356 | copy_comment_nowrap(); | |
357 | } | 357 | } |