Fri Jun 9 07:20:31 2023 UTC ()
indent: format its own code


(rillig)
diff -r1.341 -r1.342 src/usr.bin/indent/indent.c
diff -r1.205 -r1.206 src/usr.bin/indent/io.c
diff -r1.217 -r1.218 src/usr.bin/indent/lexi.c
diff -r1.69 -r1.70 src/usr.bin/indent/parse.c
diff -r1.157 -r1.158 src/usr.bin/indent/pr_comment.c

cvs diff -r1.341 -r1.342 src/usr.bin/indent/indent.c (switch to unified diff)

--- src/usr.bin/indent/indent.c 2023/06/08 21:18:54 1.341
+++ src/usr.bin/indent/indent.c 2023/06/09 07:20:30 1.342
@@ -1,1111 +1,1111 @@ @@ -1,1111 +1,1111 @@
1/* $NetBSD: indent.c,v 1.341 2023/06/08 21:18:54 rillig Exp $ */ 1/* $NetBSD: indent.c,v 1.342 2023/06/09 07:20:30 rillig Exp $ */
2 2
3/*- 3/*-
4 * SPDX-License-Identifier: BSD-4-Clause 4 * SPDX-License-Identifier: BSD-4-Clause
5 * 5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc. 6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 7 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
8 * Copyright (c) 1980, 1993 8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved. 9 * The Regents of the University of California. All rights reserved.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software 19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement: 20 * must display the following acknowledgement:
21 * This product includes software developed by the University of 21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors. 22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors 23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software 24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission. 25 * without specific prior written permission.
26 * 26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE. 37 * SUCH DAMAGE.
38 */ 38 */
39 39
40#include <sys/cdefs.h> 40#include <sys/cdefs.h>
41__RCSID("$NetBSD: indent.c,v 1.341 2023/06/08 21:18:54 rillig Exp $"); 41__RCSID("$NetBSD: indent.c,v 1.342 2023/06/09 07:20:30 rillig Exp $");
42 42
43#include <sys/param.h> 43#include <sys/param.h>
44#include <err.h> 44#include <err.h>
45#include <fcntl.h> 45#include <fcntl.h>
46#include <stdarg.h> 46#include <stdarg.h>
47#include <stdio.h> 47#include <stdio.h>
48#include <stdlib.h> 48#include <stdlib.h>
49#include <string.h> 49#include <string.h>
50#include <unistd.h> 50#include <unistd.h>
51 51
52#include "indent.h" 52#include "indent.h"
53 53
54struct options opt = { 54struct options opt = {
55 .brace_same_line = true, 55 .brace_same_line = true,
56 .comment_delimiter_on_blankline = true, 56 .comment_delimiter_on_blankline = true,
57 .cuddle_else = true, 57 .cuddle_else = true,
58 .comment_column = 33, 58 .comment_column = 33,
59 .decl_indent = 16, 59 .decl_indent = 16,
60 .else_if_in_same_line = true, 60 .else_if_in_same_line = true,
61 .function_brace_split = true, 61 .function_brace_split = true,
62 .format_col1_comments = true, 62 .format_col1_comments = true,
63 .format_block_comments = true, 63 .format_block_comments = true,
64 .indent_parameters = true, 64 .indent_parameters = true,
65 .indent_size = 8, 65 .indent_size = 8,
66 .local_decl_indent = -1, 66 .local_decl_indent = -1,
67 .lineup_to_parens = true, 67 .lineup_to_parens = true,
68 .procnames_start_line = true, 68 .procnames_start_line = true,
69 .star_comment_cont = true, 69 .star_comment_cont = true,
70 .tabsize = 8, 70 .tabsize = 8,
71 .max_line_length = 78, 71 .max_line_length = 78,
72 .use_tabs = true, 72 .use_tabs = true,
73}; 73};
74 74
75struct parser_state ps; 75struct parser_state ps;
76 76
77struct buffer token; 77struct buffer token;
78 78
79struct buffer lab; 79struct buffer lab;
80struct buffer code; 80struct buffer code;
81struct buffer com; 81struct buffer com;
82 82
83bool found_err; 83bool found_err;
84bool had_eof; 84bool had_eof;
85int line_no = 1; 85int line_no = 1;
86enum indent_enabled indent_enabled; 86enum indent_enabled indent_enabled;
87 87
88static int ifdef_level; 88static int ifdef_level;
89static struct parser_state state_stack[5]; 89static struct parser_state state_stack[5];
90 90
91FILE *input; 91FILE *input;
92FILE *output; 92FILE *output;
93 93
94static const char *in_name = "Standard Input"; 94static const char *in_name = "Standard Input";
95static const char *out_name = "Standard Output"; 95static const char *out_name = "Standard Output";
96static const char *backup_suffix = ".BAK"; 96static const char *backup_suffix = ".BAK";
97static char bakfile[MAXPATHLEN] = ""; 97static char bakfile[MAXPATHLEN] = "";
98 98
99 99
100void * 100void *
101nonnull(void *p) 101nonnull(void *p)
102{ 102{
103 if (p == NULL) 103 if (p == NULL)
104 err(EXIT_FAILURE, NULL); 104 err(EXIT_FAILURE, NULL);
105 return p; 105 return p;
106} 106}
107 107
108static void 108static void
109buf_expand(struct buffer *buf, size_t add_size) 109buf_expand(struct buffer *buf, size_t add_size)
110{ 110{
111 buf->cap = buf->cap + add_size + 400; 111 buf->cap = buf->cap + add_size + 400;
112 buf->s = nonnull(realloc(buf->s, buf->cap)); 112 buf->s = nonnull(realloc(buf->s, buf->cap));
113} 113}
114 114
115void 115void
116buf_add_char(struct buffer *buf, char ch) 116buf_add_char(struct buffer *buf, char ch)
117{ 117{
118 if (buf->len == buf->cap) 118 if (buf->len == buf->cap)
119 buf_expand(buf, 1); 119 buf_expand(buf, 1);
120 buf->s[buf->len++] = ch; 120 buf->s[buf->len++] = ch;
121} 121}
122 122
123void 123void
124buf_add_chars(struct buffer *buf, const char *s, size_t len) 124buf_add_chars(struct buffer *buf, const char *s, size_t len)
125{ 125{
126 if (len == 0) 126 if (len == 0)
127 return; 127 return;
128 if (len > buf->cap - buf->len) 128 if (len > buf->cap - buf->len)
129 buf_expand(buf, len); 129 buf_expand(buf, len);
130 memcpy(buf->s + buf->len, s, len); 130 memcpy(buf->s + buf->len, s, len);
131 buf->len += len; 131 buf->len += len;
132} 132}
133 133
134static void 134static void
135buf_add_buf(struct buffer *buf, const struct buffer *add) 135buf_add_buf(struct buffer *buf, const struct buffer *add)
136{ 136{
137 buf_add_chars(buf, add->s, add->len); 137 buf_add_chars(buf, add->s, add->len);
138} 138}
139 139
140void 140void
141diag(int level, const char *msg, ...) 141diag(int level, const char *msg, ...)
142{ 142{
143 va_list ap; 143 va_list ap;
144 144
145 if (level != 0) 145 if (level != 0)
146 found_err = true; 146 found_err = true;
147 147
148 va_start(ap, msg); 148 va_start(ap, msg);
149 fprintf(stderr, "%s: %s:%d: ", 149 fprintf(stderr, "%s: %s:%d: ",
150 level == 0 ? "warning" : "error", in_name, line_no); 150 level == 0 ? "warning" : "error", in_name, line_no);
151 vfprintf(stderr, msg, ap); 151 vfprintf(stderr, msg, ap);
152 fprintf(stderr, "\n"); 152 fprintf(stderr, "\n");
153 va_end(ap); 153 va_end(ap);
154} 154}
155 155
156/* 156/*
157 * Compute the indentation from starting at 'ind' and adding the text starting 157 * Compute the indentation from starting at 'ind' and adding the text starting
158 * at 's'. 158 * at 's'.
159 */ 159 */
160int 160int
161ind_add(int ind, const char *s, size_t len) 161ind_add(int ind, const char *s, size_t len)
162{ 162{
163 for (const char *p = s; len > 0; p++, len--) { 163 for (const char *p = s; len > 0; p++, len--) {
164 if (*p == '\n') 164 if (*p == '\n')
165 ind = 0; 165 ind = 0;
166 else if (*p == '\t') 166 else if (*p == '\t')
167 ind = next_tab(ind); 167 ind = next_tab(ind);
168 else if (*p == '\b') 168 else if (*p == '\b')
169 --ind; 169 --ind;
170 else 170 else
171 ++ind; 171 ++ind;
172 } 172 }
173 return ind; 173 return ind;
174} 174}
175 175
176static void 176static void
177init_globals(void) 177init_globals(void)
178{ 178{
179 ps.psyms.sym[0] = psym_stmt_list; 179 ps.psyms.sym[0] = psym_stmt_list;
180 ps.prev_lsym = lsym_semicolon; 180 ps.prev_lsym = lsym_semicolon;
181 ps.next_col_1 = true; 181 ps.next_col_1 = true;
182 ps.lbrace_kind = psym_lbrace_block; 182 ps.lbrace_kind = psym_lbrace_block;
183 183
184 const char *suffix = getenv("SIMPLE_BACKUP_SUFFIX"); 184 const char *suffix = getenv("SIMPLE_BACKUP_SUFFIX");
185 if (suffix != NULL) 185 if (suffix != NULL)
186 backup_suffix = suffix; 186 backup_suffix = suffix;
187} 187}
188 188
189/* 189/*
190 * Copy the input file to the backup file, then make the backup file the input 190 * Copy the input file to the backup file, then make the backup file the input
191 * and the original input file the output. 191 * and the original input file the output.
192 */ 192 */
193static void 193static void
194bakcopy(void) 194bakcopy(void)
195{ 195{
196 ssize_t n; 196 ssize_t n;
197 int bak_fd; 197 int bak_fd;
198 char buff[8 * 1024]; 198 char buff[8 * 1024];
199 199
200 const char *last_slash = strrchr(in_name, '/'); 200 const char *last_slash = strrchr(in_name, '/');
201 snprintf(bakfile, sizeof(bakfile), "%s%s", 201 snprintf(bakfile, sizeof(bakfile), "%s%s",
202 last_slash != NULL ? last_slash + 1 : in_name, backup_suffix); 202 last_slash != NULL ? last_slash + 1 : in_name, backup_suffix);
203 203
204 /* copy in_name to backup file */ 204 /* copy in_name to backup file */
205 bak_fd = creat(bakfile, 0600); 205 bak_fd = creat(bakfile, 0600);
206 if (bak_fd < 0) 206 if (bak_fd < 0)
207 err(1, "%s", bakfile); 207 err(1, "%s", bakfile);
208 208
209 while ((n = read(fileno(input), buff, sizeof(buff))) > 0) 209 while ((n = read(fileno(input), buff, sizeof(buff))) > 0)
210 if (write(bak_fd, buff, (size_t)n) != n) 210 if (write(bak_fd, buff, (size_t)n) != n)
211 err(1, "%s", bakfile); 211 err(1, "%s", bakfile);
212 if (n < 0) 212 if (n < 0)
213 err(1, "%s", in_name); 213 err(1, "%s", in_name);
214 214
215 close(bak_fd); 215 close(bak_fd);
216 (void)fclose(input); 216 (void)fclose(input);
217 217
218 /* re-open backup file as the input file */ 218 /* re-open backup file as the input file */
219 input = fopen(bakfile, "r"); 219 input = fopen(bakfile, "r");
220 if (input == NULL) 220 if (input == NULL)
221 err(1, "%s", bakfile); 221 err(1, "%s", bakfile);
222 /* now the original input file will be the output */ 222 /* now the original input file will be the output */
223 output = fopen(in_name, "w"); 223 output = fopen(in_name, "w");
224 if (output == NULL) { 224 if (output == NULL) {
225 unlink(bakfile); 225 unlink(bakfile);
226 err(1, "%s", in_name); 226 err(1, "%s", in_name);
227 } 227 }
228} 228}
229 229
230static void 230static void
231load_profiles(int argc, char **argv) 231load_profiles(int argc, char **argv)
232{ 232{
233 const char *profile_name = NULL; 233 const char *profile_name = NULL;
234 234
235 for (int i = 1; i < argc; ++i) { 235 for (int i = 1; i < argc; ++i) {
236 const char *arg = argv[i]; 236 const char *arg = argv[i];
237 237
238 if (strcmp(arg, "-npro") == 0) 238 if (strcmp(arg, "-npro") == 0)
239 return; 239 return;
240 if (arg[0] == '-' && arg[1] == 'P' && arg[2] != '\0') 240 if (arg[0] == '-' && arg[1] == 'P' && arg[2] != '\0')
241 profile_name = arg + 2; 241 profile_name = arg + 2;
242 } 242 }
243 243
244 load_profile_files(profile_name); 244 load_profile_files(profile_name);
245} 245}
246 246
247static void 247static void
248parse_command_line(int argc, char **argv) 248parse_command_line(int argc, char **argv)
249{ 249{
250 for (int i = 1; i < argc; ++i) { 250 for (int i = 1; i < argc; ++i) {
251 const char *arg = argv[i]; 251 const char *arg = argv[i];
252 252
253 if (arg[0] == '-') { 253 if (arg[0] == '-') {
254 set_option(arg, "Command line"); 254 set_option(arg, "Command line");
255 255
256 } else if (input == NULL) { 256 } else if (input == NULL) {
257 in_name = arg; 257 in_name = arg;
258 if ((input = fopen(in_name, "r")) == NULL) 258 if ((input = fopen(in_name, "r")) == NULL)
259 err(1, "%s", in_name); 259 err(1, "%s", in_name);
260 260
261 } else if (output == NULL) { 261 } else if (output == NULL) {
262 out_name = arg; 262 out_name = arg;
263 if (strcmp(in_name, out_name) == 0) 263 if (strcmp(in_name, out_name) == 0)
264 errx(1, "input and output files " 264 errx(1, "input and output files "
265 "must be different"); 265 "must be different");
266 if ((output = fopen(out_name, "w")) == NULL) 266 if ((output = fopen(out_name, "w")) == NULL)
267 err(1, "%s", out_name); 267 err(1, "%s", out_name);
268 268
269 } else 269 } else
270 errx(1, "too many arguments: %s", arg); 270 errx(1, "too many arguments: %s", arg);
271 } 271 }
272 272
273 if (input == NULL) { 273 if (input == NULL) {
274 input = stdin; 274 input = stdin;
275 output = stdout; 275 output = stdout;
276 } else if (output == NULL) { 276 } else if (output == NULL) {
277 out_name = in_name; 277 out_name = in_name;
278 bakcopy(); 278 bakcopy();
279 } 279 }
280 280
281 if (opt.comment_column <= 1) 281 if (opt.comment_column <= 1)
282 opt.comment_column = 2; /* don't put normal comments in column 282 opt.comment_column = 2; /* don't put normal comments in column
283 * 1, see opt.format_col1_comments */ 283 * 1, see opt.format_col1_comments */
284 if (opt.block_comment_max_line_length <= 0) 284 if (opt.block_comment_max_line_length <= 0)
285 opt.block_comment_max_line_length = opt.max_line_length; 285 opt.block_comment_max_line_length = opt.max_line_length;
286 if (opt.local_decl_indent < 0) 286 if (opt.local_decl_indent < 0)
287 opt.local_decl_indent = opt.decl_indent; 287 opt.local_decl_indent = opt.decl_indent;
288 if (opt.decl_comment_column <= 0) 288 if (opt.decl_comment_column <= 0)
289 opt.decl_comment_column = opt.left_justify_decl 289 opt.decl_comment_column = opt.left_justify_decl
290 ? (opt.comment_column <= 10 ? 2 : opt.comment_column - 8) 290 ? (opt.comment_column <= 10 ? 2 : opt.comment_column - 8)
291 : opt.comment_column; 291 : opt.comment_column;
292 if (opt.continuation_indent == 0) 292 if (opt.continuation_indent == 0)
293 opt.continuation_indent = opt.indent_size; 293 opt.continuation_indent = opt.indent_size;
294} 294}
295 295
296static void 296static void
297set_initial_indentation(void) 297set_initial_indentation(void)
298{ 298{
299 inp_read_line(); 299 inp_read_line();
300 300
301 int ind = 0; 301 int ind = 0;
302 for (const char *p = inp_p;; p++) { 302 for (const char *p = inp_p;; p++) {
303 if (*p == ' ') 303 if (*p == ' ')
304 ind++; 304 ind++;
305 else if (*p == '\t') 305 else if (*p == '\t')
306 ind = next_tab(ind); 306 ind = next_tab(ind);
307 else 307 else
308 break; 308 break;
309 } 309 }
310 310
311 ps.ind_level = ps.ind_level_follow = ind / opt.indent_size; 311 ps.ind_level = ps.ind_level_follow = ind / opt.indent_size;
312} 312}
313 313
314static void 314static void
315indent_declarator(int decl_ind, bool tabs_to_var) 315indent_declarator(int decl_ind, bool tabs_to_var)
316{ 316{
317 int base = ps.ind_level * opt.indent_size; 317 int base = ps.ind_level * opt.indent_size;
318 int ind = base + (int)code.len; 318 int ind = base + (int)code.len;
319 int target = base + decl_ind; 319 int target = base + decl_ind;
320 size_t orig_code_len = code.len; 320 size_t orig_code_len = code.len;
321 321
322 if (tabs_to_var) 322 if (tabs_to_var)
323 for (int next; (next = next_tab(ind)) <= target; ind = next) 323 for (int next; (next = next_tab(ind)) <= target; ind = next)
324 buf_add_char(&code, '\t'); 324 buf_add_char(&code, '\t');
325 325
326 for (; ind < target; ind++) 326 for (; ind < target; ind++)
327 buf_add_char(&code, ' '); 327 buf_add_char(&code, ' ');
328 328
329 if (code.len == orig_code_len && ps.want_blank) { 329 if (code.len == orig_code_len && ps.want_blank) {
330 buf_add_char(&code, ' '); 330 buf_add_char(&code, ' ');
331 ps.want_blank = false; 331 ps.want_blank = false;
332 } 332 }
333 ps.decl_indent_done = true; 333 ps.decl_indent_done = true;
334} 334}
335 335
336static void 336static void
337update_ps_lbrace_kind(lexer_symbol lsym) 337update_ps_lbrace_kind(lexer_symbol lsym)
338{ 338{
339 if (lsym == lsym_tag) { 339 if (lsym == lsym_tag) {
340 ps.lbrace_kind = token.s[0] == 's' ? psym_lbrace_struct : 340 ps.lbrace_kind = token.s[0] == 's' ? psym_lbrace_struct :
341 token.s[0] == 'u' ? psym_lbrace_union : 341 token.s[0] == 'u' ? psym_lbrace_union :
342 psym_lbrace_enum; 342 psym_lbrace_enum;
343 } else if (lsym != lsym_type_outside_parentheses 343 } else if (lsym != lsym_type_outside_parentheses
344 && lsym != lsym_word 344 && lsym != lsym_word
345 && lsym != lsym_lbrace) 345 && lsym != lsym_lbrace)
346 ps.lbrace_kind = psym_lbrace_block; 346 ps.lbrace_kind = psym_lbrace_block;
347} 347}
348 348
349static int 349static int
350process_eof(void) 350process_eof(void)
351{ 351{
352 if (lab.len > 0 || code.len > 0 || com.len > 0) 352 if (lab.len > 0 || code.len > 0 || com.len > 0)
353 output_line(); 353 output_line();
354 if (indent_enabled != indent_on) { 354 if (indent_enabled != indent_on) {
355 indent_enabled = indent_last_off_line; 355 indent_enabled = indent_last_off_line;
356 output_line(); 356 output_line();
357 } 357 }
358 358
359 if (ps.psyms.top > 1) /* check for balanced braces */ 359 if (ps.psyms.top > 1) /* check for balanced braces */
360 diag(1, "Stuff missing from end of file"); 360 diag(1, "Stuff missing from end of file");
361 361
362 fflush(output); 362 fflush(output);
363 return found_err ? EXIT_FAILURE : EXIT_SUCCESS; 363 return found_err ? EXIT_FAILURE : EXIT_SUCCESS;
364} 364}
365 365
366static void 366static void
367maybe_break_line(lexer_symbol lsym) 367maybe_break_line(lexer_symbol lsym)
368{ 368{
369 if (!ps.force_nl) 369 if (!ps.force_nl)
370 return; 370 return;
371 if (lsym == lsym_semicolon) 371 if (lsym == lsym_semicolon)
372 return; 372 return;
373 if (lsym == lsym_lbrace && opt.brace_same_line 373 if (lsym == lsym_lbrace && opt.brace_same_line
374 && ps.prev_lsym != lsym_lbrace) 374 && ps.prev_lsym != lsym_lbrace)
375 return; 375 return;
376 376
377 output_line(); 377 output_line();
378 ps.force_nl = false; 378 ps.force_nl = false;
379} 379}
380 380
381static void 381static void
382move_com_to_code(lexer_symbol lsym) 382move_com_to_code(lexer_symbol lsym)
383{ 383{
384 if (ps.want_blank) 384 if (ps.want_blank)
385 buf_add_char(&code, ' '); 385 buf_add_char(&code, ' ');
386 buf_add_buf(&code, &com); 386 buf_add_buf(&code, &com);
387 com.len = 0; 387 com.len = 0;
388 ps.want_blank = lsym != lsym_rparen && lsym != lsym_rbracket; 388 ps.want_blank = lsym != lsym_rparen && lsym != lsym_rbracket;
389} 389}
390 390
391static void 391static void
392process_newline(void) 392process_newline(void)
393{ 393{
394 if (ps.prev_lsym == lsym_comma 394 if (ps.prev_lsym == lsym_comma
395 && ps.nparen == 0 && !ps.block_init 395 && ps.nparen == 0 && !ps.block_init
396 && !opt.break_after_comma && ps.break_after_comma 396 && !opt.break_after_comma && ps.break_after_comma
397 && lab.len == 0 /* for preprocessing lines */ 397 && lab.len == 0 /* for preprocessing lines */
398 && com.len == 0) 398 && com.len == 0)
399 goto stay_in_line; 399 goto stay_in_line;
400 if (ps.psyms.sym[ps.psyms.top] == psym_switch_expr 400 if (ps.psyms.sym[ps.psyms.top] == psym_switch_expr
401 && opt.brace_same_line) { 401 && opt.brace_same_line) {
402 ps.force_nl = true; 402 ps.force_nl = true;
403 goto stay_in_line; 403 goto stay_in_line;
404 } 404 }
405 405
406 output_line(); 406 output_line();
407 407
408stay_in_line: 408stay_in_line:
409 ++line_no; 409 ++line_no;
410} 410}
411 411
412static bool 412static bool
413is_function_pointer_declaration(void) 413is_function_pointer_declaration(void)
414{ 414{
415 return ps.in_decl 415 return ps.in_decl
416 && !ps.block_init 416 && !ps.block_init
417 && !ps.decl_indent_done 417 && !ps.decl_indent_done
418 && !ps.is_function_definition 418 && !ps.is_function_definition
419 && ps.line_start_nparen == 0; 419 && ps.line_start_nparen == 0;
420} 420}
421 421
422static bool 422static bool
423want_blank_before_lparen(void) 423want_blank_before_lparen(void)
424{ 424{
425 if (!ps.want_blank) 425 if (!ps.want_blank)
426 return false; 426 return false;
427 if (opt.proc_calls_space) 427 if (opt.proc_calls_space)
428 return true; 428 return true;
429 if (ps.prev_lsym == lsym_rparen || ps.prev_lsym == lsym_rbracket) 429 if (ps.prev_lsym == lsym_rparen || ps.prev_lsym == lsym_rbracket)
430 return false; 430 return false;
431 if (ps.prev_lsym == lsym_offsetof) 431 if (ps.prev_lsym == lsym_offsetof)
432 return false; 432 return false;
433 if (ps.prev_lsym == lsym_sizeof) 433 if (ps.prev_lsym == lsym_sizeof)
434 return opt.blank_after_sizeof; 434 return opt.blank_after_sizeof;
435 if (ps.prev_lsym == lsym_word || ps.prev_lsym == lsym_funcname) 435 if (ps.prev_lsym == lsym_word || ps.prev_lsym == lsym_funcname)
436 return false; 436 return false;
437 return true; 437 return true;
438} 438}
439 439
440static void 440static void
441process_lparen(void) 441process_lparen(void)
442{ 442{
443 if (++ps.nparen == array_length(ps.paren)) { 443 if (++ps.nparen == array_length(ps.paren)) {
444 diag(0, "Reached internal limit of %zu unclosed parentheses", 444 diag(0, "Reached internal limit of %zu unclosed parentheses",
445 array_length(ps.paren)); 445 array_length(ps.paren));
446 ps.nparen--; 446 ps.nparen--;
447 } 447 }
448 448
449 if (is_function_pointer_declaration()) 449 if (is_function_pointer_declaration())
450 indent_declarator(ps.decl_ind, ps.tabs_to_var); 450 indent_declarator(ps.decl_ind, ps.tabs_to_var);
451 else if (want_blank_before_lparen()) 451 else if (want_blank_before_lparen())
452 buf_add_char(&code, ' '); 452 buf_add_char(&code, ' ');
453 ps.want_blank = false; 453 ps.want_blank = false;
454 buf_add_char(&code, token.s[0]); 454 buf_add_char(&code, token.s[0]);
455 455
456 if (opt.extra_expr_indent && !opt.lineup_to_parens 456 if (opt.extra_expr_indent && !opt.lineup_to_parens
457 && ps.spaced_expr_psym != psym_0 && ps.nparen == 1 457 && ps.spaced_expr_psym != psym_0 && ps.nparen == 1
458 && opt.continuation_indent == opt.indent_size) 458 && opt.continuation_indent == opt.indent_size)
459 ps.extra_expr_indent = eei_yes; 459 ps.extra_expr_indent = eei_yes;
460 460
461 if (ps.init_or_struct && ps.psyms.top <= 2) { 461 if (ps.init_or_struct && ps.psyms.top <= 2) {
462 /* A kludge to correctly align function definitions. */ 462 /* A kludge to correctly align function definitions. */
463 parse(psym_stmt); 463 parse(psym_stmt);
464 ps.init_or_struct = false; 464 ps.init_or_struct = false;
465 } 465 }
466 466
467 int indent = ind_add(0, code.s, code.len); 467 int indent = ind_add(0, code.s, code.len);
468 if (opt.extra_expr_indent && ps.spaced_expr_psym != psym_0 468 if (opt.extra_expr_indent && ps.spaced_expr_psym != psym_0
469 && ps.nparen == 1 && indent < 2 * opt.indent_size) 469 && ps.nparen == 1 && indent < 2 * opt.indent_size)
470 indent = 2 * opt.indent_size; 470 indent = 2 * opt.indent_size;
471 471
472 enum paren_level_cast cast = cast_unknown; 472 enum paren_level_cast cast = cast_unknown;
473 if (ps.prev_lsym == lsym_offsetof 473 if (ps.prev_lsym == lsym_offsetof
474 || ps.prev_lsym == lsym_sizeof 474 || ps.prev_lsym == lsym_sizeof
475 || ps.prev_lsym == lsym_for 475 || ps.prev_lsym == lsym_for
476 || ps.prev_lsym == lsym_if 476 || ps.prev_lsym == lsym_if
477 || ps.prev_lsym == lsym_switch 477 || ps.prev_lsym == lsym_switch
478 || ps.prev_lsym == lsym_while 478 || ps.prev_lsym == lsym_while
479 || ps.is_function_definition) 479 || ps.is_function_definition)
480 cast = cast_no; 480 cast = cast_no;
481 481
482 ps.paren[ps.nparen - 1].indent = indent; 482 ps.paren[ps.nparen - 1].indent = indent;
483 ps.paren[ps.nparen - 1].cast = cast; 483 ps.paren[ps.nparen - 1].cast = cast;
484 debug_println("paren_indents[%d] is now %s%d", 484 debug_println("paren_indents[%d] is now %s%d",
485 ps.nparen - 1, paren_level_cast_name[cast], indent); 485 ps.nparen - 1, paren_level_cast_name[cast], indent);
486} 486}
487 487
488static void 488static void
489process_lbracket(void) 489process_lbracket(void)
490{ 490{
491 if (++ps.nparen == array_length(ps.paren)) { 491 if (++ps.nparen == array_length(ps.paren)) {
492 diag(0, "Reached internal limit of %zu unclosed parentheses", 492 diag(0, "Reached internal limit of %zu unclosed parentheses",
493 array_length(ps.paren)); 493 array_length(ps.paren));
494 ps.nparen--; 494 ps.nparen--;
495 } 495 }
496 496
497 if (code.len > 0 497 if (code.len > 0
498 && (ps.prev_lsym == lsym_comma || ps.prev_lsym == lsym_binary_op)) 498 && (ps.prev_lsym == lsym_comma || ps.prev_lsym == lsym_binary_op))
499 buf_add_char(&code, ' '); 499 buf_add_char(&code, ' ');
500 ps.want_blank = false; 500 ps.want_blank = false;
501 buf_add_char(&code, token.s[0]); 501 buf_add_char(&code, token.s[0]);
502 502
503 int indent = ind_add(0, code.s, code.len); 503 int indent = ind_add(0, code.s, code.len);
504 504
505 ps.paren[ps.nparen - 1].indent = indent; 505 ps.paren[ps.nparen - 1].indent = indent;
506 ps.paren[ps.nparen - 1].cast = cast_no; 506 ps.paren[ps.nparen - 1].cast = cast_no;
507 debug_println("paren_indents[%d] is now %d", ps.nparen - 1, indent); 507 debug_println("paren_indents[%d] is now %d", ps.nparen - 1, indent);
508} 508}
509 509
510static void 510static void
511process_rparen(void) 511process_rparen(void)
512{ 512{
513 if (ps.nparen == 0) { 513 if (ps.nparen == 0) {
514 diag(0, "Extra '%c'", *token.s); 514 diag(0, "Extra '%c'", *token.s);
515 goto unbalanced; 515 goto unbalanced;
516 } 516 }
517 517
518 enum paren_level_cast cast = ps.paren[--ps.nparen].cast; 518 enum paren_level_cast cast = ps.paren[--ps.nparen].cast;
519 if (ps.decl_on_line && !ps.block_init) 519 if (ps.decl_on_line && !ps.block_init)
520 cast = cast_no; 520 cast = cast_no;
521 521
522 if (cast == cast_maybe) { 522 if (cast == cast_maybe) {
523 ps.next_unary = true; 523 ps.next_unary = true;
524 ps.want_blank = opt.space_after_cast; 524 ps.want_blank = opt.space_after_cast;
525 } else 525 } else
526 ps.want_blank = true; 526 ps.want_blank = true;
527 527
528 if (code.len == 0) 528 if (code.len == 0)
529 ps.line_start_nparen = ps.nparen; 529 ps.line_start_nparen = ps.nparen;
530 530
531unbalanced: 531unbalanced:
532 buf_add_char(&code, token.s[0]); 532 buf_add_char(&code, token.s[0]);
533 533
534 if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) { 534 if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) {
535 if (ps.extra_expr_indent == eei_yes) 535 if (ps.extra_expr_indent == eei_yes)
536 ps.extra_expr_indent = eei_last; 536 ps.extra_expr_indent = eei_last;
537 ps.force_nl = true; 537 ps.force_nl = true;
538 ps.next_unary = true; 538 ps.next_unary = true;
539 ps.in_stmt_or_decl = false; 539 ps.in_stmt_or_decl = false;
540 parse(ps.spaced_expr_psym); 540 parse(ps.spaced_expr_psym);
541 ps.spaced_expr_psym = psym_0; 541 ps.spaced_expr_psym = psym_0;
542 ps.want_blank = true; 542 ps.want_blank = true;
543 out.line_kind = lk_stmt_head; 543 out.line_kind = lk_stmt_head;
544 } 544 }
545} 545}
546 546
547static void 547static void
548process_rbracket(void) 548process_rbracket(void)
549{ 549{
550 if (ps.nparen == 0) { 550 if (ps.nparen == 0) {
551 diag(0, "Extra '%c'", *token.s); 551 diag(0, "Extra '%c'", *token.s);
552 goto unbalanced; 552 goto unbalanced;
553 } 553 }
554 --ps.nparen; 554 --ps.nparen;
555 555
556 ps.want_blank = true; 556 ps.want_blank = true;
557 if (code.len == 0) 557 if (code.len == 0)
558 ps.line_start_nparen = ps.nparen; 558 ps.line_start_nparen = ps.nparen;
559 559
560unbalanced: 560unbalanced:
561 buf_add_char(&code, token.s[0]); 561 buf_add_char(&code, token.s[0]);
562} 562}
563 563
564static void 564static void
565process_unary_op(void) 565process_unary_op(void)
566{ 566{
567 if (is_function_pointer_declaration()) { 567 if (is_function_pointer_declaration()) {
568 int ind = ps.decl_ind - (int)token.len; 568 int ind = ps.decl_ind - (int)token.len;
569 indent_declarator(ind, ps.tabs_to_var); 569 indent_declarator(ind, ps.tabs_to_var);
570 ps.want_blank = false; 570 ps.want_blank = false;
571 } else if ((token.s[0] == '+' || token.s[0] == '-') 571 } else if ((token.s[0] == '+' || token.s[0] == '-')
572 && code.len > 0 && code.s[code.len - 1] == token.s[0]) 572 && code.len > 0 && code.s[code.len - 1] == token.s[0])
573 ps.want_blank = true; 573 ps.want_blank = true;
574 574
575 if (ps.want_blank) 575 if (ps.want_blank)
576 buf_add_char(&code, ' '); 576 buf_add_char(&code, ' ');
577 buf_add_buf(&code, &token); 577 buf_add_buf(&code, &token);
578 ps.want_blank = false; 578 ps.want_blank = false;
579} 579}
580 580
581static void 581static void
582process_postfix_op(void) 582process_postfix_op(void)
583{ 583{
584 buf_add_buf(&code, &token); 584 buf_add_buf(&code, &token);
585 ps.want_blank = true; 585 ps.want_blank = true;
586} 586}
587 587
588static void 588static void
589process_question(void) 589process_question(void)
590{ 590{
591 ps.quest_level++; 591 ps.quest_level++;
592 if (code.len == 0) { 592 if (code.len == 0) {
593 ps.in_stmt_cont = true; 593 ps.in_stmt_cont = true;
594 ps.in_stmt_or_decl = true; 594 ps.in_stmt_or_decl = true;
595 ps.in_decl = false; 595 ps.in_decl = false;
596 } 596 }
597} 597}
598 598
599static void 599static void
600process_colon_question(void) 600process_colon_question(void)
601{ 601{
602 if (code.len == 0) { 602 if (code.len == 0) {
603 ps.in_stmt_cont = true; 603 ps.in_stmt_cont = true;
604 ps.in_stmt_or_decl = true; 604 ps.in_stmt_or_decl = true;
605 ps.in_decl = false; 605 ps.in_decl = false;
606 } 606 }
607} 607}
608 608
609static void 609static void
610process_colon_label(void) 610process_colon_label(void)
611{ 611{
612 buf_add_buf(&lab, &code); 612 buf_add_buf(&lab, &code);
613 buf_add_char(&lab, ':'); 613 buf_add_char(&lab, ':');
614 code.len = 0; 614 code.len = 0;
615 615
616 if (ps.seen_case) 616 if (ps.seen_case)
617 out.line_kind = lk_case_or_default; 617 out.line_kind = lk_case_or_default;
618 ps.in_stmt_or_decl = false; 618 ps.in_stmt_or_decl = false;
619 ps.force_nl = ps.seen_case; 619 ps.force_nl = ps.seen_case;
620 ps.seen_case = false; 620 ps.seen_case = false;
621 ps.want_blank = false; 621 ps.want_blank = false;
622} 622}
623 623
624static void 624static void
625process_colon_other(void) 625process_colon_other(void)
626{ 626{
627 buf_add_char(&code, ':'); 627 buf_add_char(&code, ':');
628 ps.want_blank = false; 628 ps.want_blank = false;
629} 629}
630 630
631static void 631static void
632process_semicolon(void) 632process_semicolon(void)
633{ 633{
634 if (out.line_kind == lk_stmt_head) 634 if (out.line_kind == lk_stmt_head)
635 out.line_kind = lk_other; 635 out.line_kind = lk_other;
636 if (ps.decl_level == 0) 636 if (ps.decl_level == 0)
637 ps.init_or_struct = false; 637 ps.init_or_struct = false;
638 ps.seen_case = false; /* only needs to be reset on error */ 638 ps.seen_case = false; /* only needs to be reset on error */
639 ps.quest_level = 0; /* only needs to be reset on error */ 639 ps.quest_level = 0; /* only needs to be reset on error */
640 if (ps.prev_lsym == lsym_rparen) 640 if (ps.prev_lsym == lsym_rparen)
641 ps.in_func_def_params = false; 641 ps.in_func_def_params = false;
642 ps.block_init = false; 642 ps.block_init = false;
643 ps.block_init_level = 0; 643 ps.block_init_level = 0;
644 ps.declaration = ps.declaration == decl_begin ? decl_end : decl_no; 644 ps.declaration = ps.declaration == decl_begin ? decl_end : decl_no;
645 645
646 if (ps.in_decl && code.len == 0 && !ps.block_init && 646 if (ps.in_decl && code.len == 0 && !ps.block_init &&
647 !ps.decl_indent_done && ps.line_start_nparen == 0) { 647 !ps.decl_indent_done && ps.line_start_nparen == 0) {
648 /* indent stray semicolons in declarations */ 648 /* indent stray semicolons in declarations */
649 indent_declarator(ps.decl_ind - 1, ps.tabs_to_var); 649 indent_declarator(ps.decl_ind - 1, ps.tabs_to_var);
650 } 650 }
651 651
652 ps.in_decl = ps.decl_level > 0; /* if we were in a first level 652 ps.in_decl = ps.decl_level > 0; /* if we were in a first level
653 * structure declaration before, we 653 * structure declaration before, we
654 * aren't anymore */ 654 * aren't anymore */
655 655
656 if (ps.nparen > 0 && ps.spaced_expr_psym != psym_for_exprs) { 656 if (ps.nparen > 0 && ps.spaced_expr_psym != psym_for_exprs) {
657 /* There were unbalanced parentheses in the statement. It is a 657 /* There were unbalanced parentheses in the statement. It is a
658 * bit complicated, because the semicolon might be in a for 658 * bit complicated, because the semicolon might be in a for
659 * statement. */ 659 * statement. */
660 diag(1, "Unbalanced parentheses"); 660 diag(1, "Unbalanced parentheses");
661 ps.nparen = 0; 661 ps.nparen = 0;
662 if (ps.spaced_expr_psym != psym_0) { 662 if (ps.spaced_expr_psym != psym_0) {
663 parse(ps.spaced_expr_psym); 663 parse(ps.spaced_expr_psym);
664 ps.spaced_expr_psym = psym_0; 664 ps.spaced_expr_psym = psym_0;
665 } 665 }
666 } 666 }
667 buf_add_char(&code, ';'); 667 buf_add_char(&code, ';');
668 ps.want_blank = true; 668 ps.want_blank = true;
669 ps.in_stmt_or_decl = ps.nparen > 0; 669 ps.in_stmt_or_decl = ps.nparen > 0;
670 ps.decl_ind = 0; 670 ps.decl_ind = 0;
671 671
672 if (ps.spaced_expr_psym == psym_0) { 672 if (ps.spaced_expr_psym == psym_0) {
673 parse(psym_stmt); 673 parse(psym_stmt);
674 ps.force_nl = true; 674 ps.force_nl = true;
675 } 675 }
676} 676}
677 677
678static void 678static void
679process_lbrace(void) 679process_lbrace(void)
680{ 680{
681 parser_symbol psym = ps.psyms.sym[ps.psyms.top]; 681 parser_symbol psym = ps.psyms.sym[ps.psyms.top];
682 if (ps.prev_lsym == lsym_rparen 682 if (ps.prev_lsym == lsym_rparen
683 && ps.psyms.top >= 2 683 && ps.psyms.top >= 2
684 && !(psym == psym_for_exprs || psym == psym_if_expr 684 && !(psym == psym_for_exprs || psym == psym_if_expr
685 || psym == psym_switch_expr || psym == psym_while_expr)) { 685 || psym == psym_switch_expr || psym == psym_while_expr)) {
686 ps.block_init = true; 686 ps.block_init = true;
687 ps.init_or_struct = true; 687 ps.init_or_struct = true;
688 } 688 }
689 689
690 if (out.line_kind == lk_stmt_head) 690 if (out.line_kind == lk_stmt_head)
691 out.line_kind = lk_other; 691 out.line_kind = lk_other;
692 692
693 ps.in_stmt_or_decl = false; /* don't indent the {} */ 693 ps.in_stmt_or_decl = false; /* don't indent the {} */
694 694
695 if (!ps.block_init) 695 if (!ps.block_init)
696 ps.force_nl = true; 696 ps.force_nl = true;
697 else 697 else
698 ps.block_init_level++; 698 ps.block_init_level++;
699 699
700 if (code.len > 0 && !ps.block_init) { 700 if (code.len > 0 && !ps.block_init) {
701 if (!opt.brace_same_line || 701 if (!opt.brace_same_line ||
702 (code.len > 0 && code.s[code.len - 1] == '}')) 702 (code.len > 0 && code.s[code.len - 1] == '}'))
703 output_line(); 703 output_line();
704 else if (ps.in_func_def_params && !ps.init_or_struct) { 704 else if (ps.in_func_def_params && !ps.init_or_struct) {
705 ps.ind_level_follow = 0; 705 ps.ind_level_follow = 0;
706 if (opt.function_brace_split) 706 if (opt.function_brace_split)
707 output_line(); 707 output_line();
708 else 708 else
709 ps.want_blank = true; 709 ps.want_blank = true;
710 } 710 }
711 } 711 }
712 712
713 if (ps.nparen > 0) { 713 if (ps.nparen > 0) {
714 diag(1, "Unbalanced parentheses"); 714 diag(1, "Unbalanced parentheses");
715 ps.nparen = 0; 715 ps.nparen = 0;
716 if (ps.spaced_expr_psym != psym_0) { 716 if (ps.spaced_expr_psym != psym_0) {
717 parse(ps.spaced_expr_psym); 717 parse(ps.spaced_expr_psym);
718 ps.spaced_expr_psym = psym_0; 718 ps.spaced_expr_psym = psym_0;
719 ps.ind_level = ps.ind_level_follow; 719 ps.ind_level = ps.ind_level_follow;
720 } 720 }
721 } 721 }
722 722
723 if (code.len == 0) 723 if (code.len == 0)
724 ps.in_stmt_cont = false; /* don't indent the '{' itself 724 ps.in_stmt_cont = false; /* don't indent the '{' itself
725 */ 725 */
726 if (ps.in_decl && ps.init_or_struct) { 726 if (ps.in_decl && ps.init_or_struct) {
727 ps.di_stack[ps.decl_level] = ps.decl_ind; 727 ps.di_stack[ps.decl_level] = ps.decl_ind;
728 if (++ps.decl_level == (int)array_length(ps.di_stack)) { 728 if (++ps.decl_level == (int)array_length(ps.di_stack)) {
729 diag(0, "Reached internal limit of %d struct levels", 729 diag(0, "Reached internal limit of %d struct levels",
730 (int)array_length(ps.di_stack)); 730 (int)array_length(ps.di_stack));
731 ps.decl_level--; 731 ps.decl_level--;
732 } 732 }
733 } else { 733 } else {
734 ps.decl_on_line = false; /* we can't be in the middle of 734 ps.decl_on_line = false; /* we can't be in the middle of
735 * a declaration, so don't do 735 * a declaration, so don't do
736 * special indentation of 736 * special indentation of
737 * comments */ 737 * comments */
738 ps.in_func_def_params = false; 738 ps.in_func_def_params = false;
739 ps.in_decl = false; 739 ps.in_decl = false;
740 } 740 }
741 741
742 ps.decl_ind = 0; 742 ps.decl_ind = 0;
743 parse(ps.lbrace_kind); 743 parse(ps.lbrace_kind);
744 if (ps.want_blank) 744 if (ps.want_blank)
745 buf_add_char(&code, ' '); 745 buf_add_char(&code, ' ');
746 ps.want_blank = false; 746 ps.want_blank = false;
747 buf_add_char(&code, '{'); 747 buf_add_char(&code, '{');
748 ps.declaration = decl_no; 748 ps.declaration = decl_no;
749} 749}
750 750
751static void 751static void
752process_rbrace(void) 752process_rbrace(void)
753{ 753{
754 if (ps.nparen > 0) { /* check for unclosed if, for, else. */ 754 if (ps.nparen > 0) { /* check for unclosed if, for, else. */
755 diag(1, "Unbalanced parentheses"); 755 diag(1, "Unbalanced parentheses");
756 ps.nparen = 0; 756 ps.nparen = 0;
757 ps.spaced_expr_psym = psym_0; 757 ps.spaced_expr_psym = psym_0;
758 } 758 }
759 759
760 ps.declaration = decl_no; 760 ps.declaration = decl_no;
761 if (ps.block_init_level > 0) 761 if (ps.block_init_level > 0)
762 ps.block_init_level--; 762 ps.block_init_level--;
763 763
764 if (code.len > 0 && !ps.block_init) 764 if (code.len > 0 && !ps.block_init)
765 output_line(); 765 output_line();
766 766
767 buf_add_char(&code, '}'); 767 buf_add_char(&code, '}');
768 ps.want_blank = true; 768 ps.want_blank = true;
769 ps.in_stmt_or_decl = false; // XXX: Initializers don't end a stmt 769 ps.in_stmt_or_decl = false; // XXX: Initializers don't end a stmt
770 ps.in_stmt_cont = false; 770 ps.in_stmt_cont = false;
771 771
772 if (ps.decl_level > 0) { /* multi-level structure declaration */ 772 if (ps.decl_level > 0) { /* multi-level structure declaration */
773 ps.decl_ind = ps.di_stack[--ps.decl_level]; 773 ps.decl_ind = ps.di_stack[--ps.decl_level];
774 if (ps.decl_level == 0 && !ps.in_func_def_params) { 774 if (ps.decl_level == 0 && !ps.in_func_def_params) {
775 ps.declaration = decl_begin; 775 ps.declaration = decl_begin;
776 ps.decl_ind = ps.ind_level == 0 776 ps.decl_ind = ps.ind_level == 0
777 ? opt.decl_indent : opt.local_decl_indent; 777 ? opt.decl_indent : opt.local_decl_indent;
778 } 778 }
779 ps.in_decl = true; 779 ps.in_decl = true;
780 } 780 }
781 781
782 if (ps.psyms.top == 2) 782 if (ps.psyms.top == 2)
783 out.line_kind = lk_func_end; 783 out.line_kind = lk_func_end;
784 784
785 parse(psym_rbrace); 785 parse(psym_rbrace);
786 786
787 if (!ps.init_or_struct 787 if (!ps.init_or_struct
788 && ps.psyms.sym[ps.psyms.top] != psym_do_stmt 788 && ps.psyms.sym[ps.psyms.top] != psym_do_stmt
789 && ps.psyms.sym[ps.psyms.top] != psym_if_expr_stmt) 789 && ps.psyms.sym[ps.psyms.top] != psym_if_expr_stmt)
790 ps.force_nl = true; 790 ps.force_nl = true;
791} 791}
792 792
793static void 793static void
794process_do(void) 794process_do(void)
795{ 795{
796 ps.in_stmt_or_decl = false; 796 ps.in_stmt_or_decl = false;
797 ps.in_decl = false; 797 ps.in_decl = false;
798 798
799 if (code.len > 0) 799 if (code.len > 0)
800 output_line(); 800 output_line();
801 801
802 ps.force_nl = true; 802 ps.force_nl = true;
803 parse(psym_do); 803 parse(psym_do);
804} 804}
805 805
806static void 806static void
807process_else(void) 807process_else(void)
808{ 808{
809 ps.in_stmt_or_decl = false; 809 ps.in_stmt_or_decl = false;
810 810
811 if (code.len > 0 811 if (code.len > 0
812 && !(opt.cuddle_else && code.s[code.len - 1] == '}')) 812 && !(opt.cuddle_else && code.s[code.len - 1] == '}'))
813 output_line(); 813 output_line();
814 814
815 ps.force_nl = true; 815 ps.force_nl = true;
816 parse(psym_else); 816 parse(psym_else);
817} 817}
818 818
819static void 819static void
820process_type(void) 820process_type(void)
821{ 821{
822 parse(psym_decl); /* let the parser worry about indentation */ 822 parse(psym_decl); /* let the parser worry about indentation */
823 823
824 if (ps.prev_lsym == lsym_rparen && ps.psyms.top <= 1 && code.len > 0) 824 if (ps.prev_lsym == lsym_rparen && ps.psyms.top <= 1 && code.len > 0)
825 output_line(); 825 output_line();
826 826
827 if (ps.in_func_def_params && opt.indent_parameters && 827 if (ps.in_func_def_params && opt.indent_parameters &&
828 ps.decl_level == 0) { 828 ps.decl_level == 0) {
829 ps.ind_level = ps.ind_level_follow = 1; 829 ps.ind_level = ps.ind_level_follow = 1;
830 ps.in_stmt_cont = false; 830 ps.in_stmt_cont = false;
831 } 831 }
832 832
833 ps.init_or_struct = /* maybe */ true; 833 ps.init_or_struct = /* maybe */ true;
834 ps.in_decl = ps.decl_on_line = ps.prev_lsym != lsym_typedef; 834 ps.in_decl = ps.decl_on_line = ps.prev_lsym != lsym_typedef;
835 if (ps.decl_level <= 0) 835 if (ps.decl_level <= 0)
836 ps.declaration = decl_begin; 836 ps.declaration = decl_begin;
837 837
838 int len = (int)token.len + 1; 838 int len = (int)token.len + 1;
839 int ind = ps.ind_level == 0 || ps.decl_level > 0 839 int ind = ps.ind_level == 0 || ps.decl_level > 0
840 ? opt.decl_indent /* global variable or local member */ 840 ? opt.decl_indent /* global variable or local member */
841 : opt.local_decl_indent; /* local variable */ 841 : opt.local_decl_indent; /* local variable */
842 ps.decl_ind = ind > 0 ? ind : len; 842 ps.decl_ind = ind > 0 ? ind : len;
843 ps.tabs_to_var = opt.use_tabs && ind > 0; 843 ps.tabs_to_var = opt.use_tabs && ind > 0;
844} 844}
845 845
846static void 846static void
847process_ident(lexer_symbol lsym) 847process_ident(lexer_symbol lsym)
848{ 848{
849 if (ps.in_decl) { 849 if (ps.in_decl) {
850 if (lsym == lsym_funcname) { 850 if (lsym == lsym_funcname) {
851 ps.in_decl = false; 851 ps.in_decl = false;
852 if (opt.procnames_start_line && code.len > 0) 852 if (opt.procnames_start_line && code.len > 0)
853 output_line(); 853 output_line();
854 else if (ps.want_blank) 854 else if (ps.want_blank)
855 buf_add_char(&code, ' '); 855 buf_add_char(&code, ' ');
856 ps.want_blank = false; 856 ps.want_blank = false;
857 857
858 } else if (!ps.block_init && !ps.decl_indent_done && 858 } else if (!ps.block_init && !ps.decl_indent_done &&
859 ps.line_start_nparen == 0) { 859 ps.line_start_nparen == 0) {
860 if (opt.decl_indent == 0 860 if (opt.decl_indent == 0
861 && code.len > 0 && code.s[code.len - 1] == '}') 861 && code.len > 0 && code.s[code.len - 1] == '}')
862 ps.decl_ind = ind_add(0, code.s, code.len) + 1; 862 ps.decl_ind = ind_add(0, code.s, code.len) + 1;
863 indent_declarator(ps.decl_ind, ps.tabs_to_var); 863 indent_declarator(ps.decl_ind, ps.tabs_to_var);
864 ps.want_blank = false; 864 ps.want_blank = false;
865 } 865 }
866 866
867 } else if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) { 867 } else if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) {
868 ps.force_nl = true; 868 ps.force_nl = true;
869 ps.next_unary = true; 869 ps.next_unary = true;
870 ps.in_stmt_or_decl = false; 870 ps.in_stmt_or_decl = false;
871 parse(ps.spaced_expr_psym); 871 parse(ps.spaced_expr_psym);
872 ps.spaced_expr_psym = psym_0; 872 ps.spaced_expr_psym = psym_0;
873 } 873 }
874} 874}
875 875
876static void 876static void
877process_period(void) 877process_period(void)
878{ 878{
879 if (code.len > 0 && code.s[code.len - 1] == ',') 879 if (code.len > 0 && code.s[code.len - 1] == ',')
880 buf_add_char(&code, ' '); 880 buf_add_char(&code, ' ');
881 buf_add_char(&code, '.'); 881 buf_add_char(&code, '.');
882 ps.want_blank = false; 882 ps.want_blank = false;
883} 883}
884 884
885static void 885static void
886process_comma(void) 886process_comma(void)
887{ 887{
888 ps.want_blank = code.len > 0; /* only put blank after comma if comma 888 ps.want_blank = code.len > 0; /* only put blank after comma if comma
889 * does not start the line */ 889 * does not start the line */
890 890
891 if (ps.in_decl && !ps.is_function_definition && !ps.block_init && 891 if (ps.in_decl && !ps.is_function_definition && !ps.block_init &&
892 !ps.decl_indent_done && ps.line_start_nparen == 0) { 892 !ps.decl_indent_done && ps.line_start_nparen == 0) {
893 /* indent leading commas and not the actual identifiers */ 893 /* indent leading commas and not the actual identifiers */
894 indent_declarator(ps.decl_ind - 1, ps.tabs_to_var); 894 indent_declarator(ps.decl_ind - 1, ps.tabs_to_var);
895 } 895 }
896 896
897 buf_add_char(&code, ','); 897 buf_add_char(&code, ',');
898 898
899 if (ps.nparen == 0) { 899 if (ps.nparen == 0) {
900 if (ps.block_init_level == 0) 900 if (ps.block_init_level == 0)
901 ps.block_init = false; 901 ps.block_init = false;
902 int typical_varname_length = 8; 902 int typical_varname_length = 8;
903 if (ps.break_after_comma && (opt.break_after_comma || 903 if (ps.break_after_comma && (opt.break_after_comma ||
904 ind_add(compute_code_indent(), code.s, code.len) 904 ind_add(compute_code_indent(), code.s, code.len)
905 >= opt.max_line_length - typical_varname_length)) 905 >= opt.max_line_length - typical_varname_length))
906 ps.force_nl = true; 906 ps.force_nl = true;
907 } 907 }
908} 908}
909 909
910/* move the whole line to the 'label' buffer */ 910/* move the whole line to the 'label' buffer */
911static void 911static void
912read_preprocessing_line(void) 912read_preprocessing_line(void)
913{ 913{
914 enum { 914 enum {
915 PLAIN, STR, CHR, COMM 915 PLAIN, STR, CHR, COMM
916 } state = PLAIN; 916 } state = PLAIN;
917 917
918 buf_add_char(&lab, '#'); 918 buf_add_char(&lab, '#');
919 919
920 while (inp_p[0] != '\n' || (state == COMM && !had_eof)) { 920 while (inp_p[0] != '\n' || (state == COMM && !had_eof)) {
921 buf_add_char(&lab, inp_next()); 921 buf_add_char(&lab, inp_next());
922 switch (lab.s[lab.len - 1]) { 922 switch (lab.s[lab.len - 1]) {
923 case '\\': 923 case '\\':
924 if (state != COMM) 924 if (state != COMM)
925 buf_add_char(&lab, inp_next()); 925 buf_add_char(&lab, inp_next());
926 break; 926 break;
927 case '/': 927 case '/':
928 if (inp_p[0] == '*' && state == PLAIN) { 928 if (inp_p[0] == '*' && state == PLAIN) {
929 state = COMM; 929 state = COMM;
930 buf_add_char(&lab, *inp_p++); 930 buf_add_char(&lab, *inp_p++);
931 } 931 }
932 break; 932 break;
933 case '"': 933 case '"':
934 if (state == STR) 934 if (state == STR)
935 state = PLAIN; 935 state = PLAIN;
936 else if (state == PLAIN) 936 else if (state == PLAIN)
937 state = STR; 937 state = STR;
938 break; 938 break;
939 case '\'': 939 case '\'':
940 if (state == CHR) 940 if (state == CHR)
941 state = PLAIN; 941 state = PLAIN;
942 else if (state == PLAIN) 942 else if (state == PLAIN)
943 state = CHR; 943 state = CHR;
944 break; 944 break;
945 case '*': 945 case '*':
946 if (inp_p[0] == '/' && state == COMM) { 946 if (inp_p[0] == '/' && state == COMM) {
947 state = PLAIN; 947 state = PLAIN;
948 buf_add_char(&lab, *inp_p++); 948 buf_add_char(&lab, *inp_p++);
949 } 949 }
950 break; 950 break;
951 } 951 }
952 } 952 }
953 953
954 while (lab.len > 0 && ch_isblank(lab.s[lab.len - 1])) 954 while (lab.len > 0 && ch_isblank(lab.s[lab.len - 1]))
955 lab.len--; 955 lab.len--;
956} 956}
957 957
958static void 958static void
959process_preprocessing(void) 959process_preprocessing(void)
960{ 960{
961 if (lab.len > 0 || code.len > 0 || com.len > 0) 961 if (lab.len > 0 || code.len > 0 || com.len > 0)
962 output_line(); 962 output_line();
963 963
964 read_preprocessing_line(); 964 read_preprocessing_line();
965 965
966 const char *dir = lab.s + 1, *line_end = lab.s + lab.len; 966 const char *dir = lab.s + 1, *line_end = lab.s + lab.len;
967 while (dir < line_end && ch_isblank(*dir)) 967 while (dir < line_end && ch_isblank(*dir))
968 dir++; 968 dir++;
969 size_t dir_len = 0; 969 size_t dir_len = 0;
970 while (dir + dir_len < line_end && ch_isalpha(dir[dir_len])) 970 while (dir + dir_len < line_end && ch_isalpha(dir[dir_len]))
971 dir_len++; 971 dir_len++;
972 972
973 if (dir_len >= 2 && memcmp(dir, "if", 2) == 0) { 973 if (dir_len >= 2 && memcmp(dir, "if", 2) == 0) {
974 if ((size_t)ifdef_level < array_length(state_stack)) 974 if ((size_t)ifdef_level < array_length(state_stack))
975 state_stack[ifdef_level++] = ps; 975 state_stack[ifdef_level++] = ps;
976 else 976 else
977 diag(1, "#if stack overflow"); 977 diag(1, "#if stack overflow");
978 out.line_kind = lk_if; 978 out.line_kind = lk_if;
979 979
980 } else if (dir_len >= 2 && memcmp(dir, "el", 2) == 0) { 980 } else if (dir_len >= 2 && memcmp(dir, "el", 2) == 0) {
981 if (ifdef_level <= 0) 981 if (ifdef_level <= 0)
982 diag(1, dir[2] == 'i' 982 diag(1, dir[2] == 'i'
983 ? "Unmatched #elif" : "Unmatched #else"); 983 ? "Unmatched #elif" : "Unmatched #else");
984 else 984 else
985 ps = state_stack[ifdef_level - 1]; 985 ps = state_stack[ifdef_level - 1];
986 986
987 } else if (dir_len == 5 && memcmp(dir, "endif", 5) == 0) { 987 } else if (dir_len == 5 && memcmp(dir, "endif", 5) == 0) {
988 if (ifdef_level <= 0) 988 if (ifdef_level <= 0)
989 diag(1, "Unmatched #endif"); 989 diag(1, "Unmatched #endif");
990 else 990 else
991 ifdef_level--; 991 ifdef_level--;
992 out.line_kind = lk_endif; 992 out.line_kind = lk_endif;
993 } 993 }
994} 994}
995 995
996static void 996static void
997process_lsym(lexer_symbol lsym) 997process_lsym(lexer_symbol lsym)
998{ 998{
999 switch (lsym) { 999 switch (lsym) {
1000 /* INDENT OFF */ 1000 /* INDENT OFF */
1001 case lsym_preprocessing: process_preprocessing(); break; 1001 case lsym_preprocessing: process_preprocessing(); break;
1002 case lsym_newline: process_newline(); break; 1002 case lsym_newline: process_newline(); break;
1003 case lsym_comment: process_comment(); break; 1003 case lsym_comment: process_comment(); break;
1004 case lsym_lparen: process_lparen(); break; 1004 case lsym_lparen: process_lparen(); break;
1005 case lsym_lbracket: process_lbracket(); break; 1005 case lsym_lbracket: process_lbracket(); break;
1006 case lsym_rparen: process_rparen(); break; 1006 case lsym_rparen: process_rparen(); break;
1007 case lsym_rbracket: process_rbracket(); break; 1007 case lsym_rbracket: process_rbracket(); break;
1008 case lsym_lbrace: process_lbrace(); break; 1008 case lsym_lbrace: process_lbrace(); break;
1009 case lsym_rbrace: process_rbrace(); break; 1009 case lsym_rbrace: process_rbrace(); break;
1010 case lsym_period: process_period(); break; 1010 case lsym_period: process_period(); break;
1011 case lsym_unary_op: process_unary_op(); break; 1011 case lsym_unary_op: process_unary_op(); break;
1012 case lsym_postfix_op: process_postfix_op(); break; 1012 case lsym_postfix_op: process_postfix_op(); break;
1013 case lsym_binary_op: goto copy_token; 1013 case lsym_binary_op: goto copy_token;
1014 case lsym_question: process_question(); goto copy_token; 1014 case lsym_question: process_question(); goto copy_token;
1015 case lsym_colon_question: process_colon_question(); goto copy_token; 1015 case lsym_colon_question: process_colon_question(); goto copy_token;
1016 case lsym_colon_label: process_colon_label(); break; 1016 case lsym_colon_label: process_colon_label(); break;
1017 case lsym_colon_other: process_colon_other(); break; 1017 case lsym_colon_other: process_colon_other(); break;
1018 case lsym_comma: process_comma(); break; 1018 case lsym_comma: process_comma(); break;
1019 case lsym_semicolon: process_semicolon(); break; 1019 case lsym_semicolon: process_semicolon(); break;
1020 case lsym_typedef: goto copy_token; 1020 case lsym_typedef: goto copy_token;
1021 case lsym_modifier: goto copy_token; 1021 case lsym_modifier: goto copy_token;
1022 case lsym_case: ps.seen_case = true; goto copy_token; 1022 case lsym_case: ps.seen_case = true; goto copy_token;
1023 case lsym_default: ps.seen_case = true; goto copy_token; 1023 case lsym_default: ps.seen_case = true; goto copy_token;
1024 case lsym_do: process_do(); goto copy_token; 1024 case lsym_do: process_do(); goto copy_token;
1025 case lsym_else: process_else(); goto copy_token; 1025 case lsym_else: process_else(); goto copy_token;
1026 case lsym_for: ps.spaced_expr_psym = psym_for_exprs; goto copy_token; 1026 case lsym_for: ps.spaced_expr_psym = psym_for_exprs; goto copy_token;
1027 case lsym_if: ps.spaced_expr_psym = psym_if_expr; goto copy_token; 1027 case lsym_if: ps.spaced_expr_psym = psym_if_expr; goto copy_token;
1028 case lsym_switch: ps.spaced_expr_psym = psym_switch_expr; goto copy_token; 1028 case lsym_switch: ps.spaced_expr_psym = psym_switch_expr; goto copy_token;
1029 case lsym_while: ps.spaced_expr_psym = psym_while_expr; goto copy_token; 1029 case lsym_while: ps.spaced_expr_psym = psym_while_expr; goto copy_token;
1030 /* INDENT ON */ 1030 /* INDENT ON */
1031 1031
1032 case lsym_tag: 1032 case lsym_tag:
1033 if (ps.nparen > 0) 1033 if (ps.nparen > 0)
1034 goto copy_token; 1034 goto copy_token;
1035 /* FALLTHROUGH */ 1035 /* FALLTHROUGH */
1036 case lsym_type_outside_parentheses: 1036 case lsym_type_outside_parentheses:
1037 process_type(); 1037 process_type();
1038 goto copy_token; 1038 goto copy_token;
1039 1039
1040 case lsym_type_in_parentheses: 1040 case lsym_type_in_parentheses:
1041 case lsym_sizeof: 1041 case lsym_sizeof:
1042 case lsym_offsetof: 1042 case lsym_offsetof:
1043 case lsym_word: 1043 case lsym_word:
1044 case lsym_funcname: 1044 case lsym_funcname:
1045 case lsym_return: 1045 case lsym_return:
1046 process_ident(lsym); 1046 process_ident(lsym);
1047copy_token: 1047copy_token:
1048 if (ps.want_blank) 1048 if (ps.want_blank)
1049 buf_add_char(&code, ' '); 1049 buf_add_char(&code, ' ');
1050 buf_add_buf(&code, &token); 1050 buf_add_buf(&code, &token);
1051 if (lsym != lsym_funcname) 1051 if (lsym != lsym_funcname)
1052 ps.want_blank = true; 1052 ps.want_blank = true;
1053 break; 1053 break;
1054 1054
1055 default: 1055 default:
1056 break; 1056 break;
1057 } 1057 }
1058} 1058}
1059 1059
1060static int 1060static int
1061indent(void) 1061indent(void)
1062{ 1062{
1063 debug_parser_state(); 1063 debug_parser_state();
1064 1064
1065 for (;;) { /* loop until we reach eof */ 1065 for (;;) { /* loop until we reach eof */
1066 lexer_symbol lsym = lexi(); 1066 lexer_symbol lsym = lexi();
1067 1067
1068 debug_blank_line(); 1068 debug_blank_line();
1069 debug_printf("line %d: %s", line_no, lsym_name[lsym]); 1069 debug_printf("line %d: %s", line_no, lsym_name[lsym]);
1070 debug_print_buf("token", &token); 1070 debug_print_buf("token", &token);
1071 debug_buffers(); 1071 debug_buffers();
1072 debug_blank_line(); 1072 debug_blank_line();
1073 1073
1074 if (lsym == lsym_eof) 1074 if (lsym == lsym_eof)
1075 return process_eof(); 1075 return process_eof();
1076 1076
1077 if (lsym == lsym_if && ps.prev_lsym == lsym_else 1077 if (lsym == lsym_if && ps.prev_lsym == lsym_else
1078 && opt.else_if_in_same_line) 1078 && opt.else_if_in_same_line)
1079 ps.force_nl = false; 1079 ps.force_nl = false;
1080 1080
1081 if (lsym == lsym_newline || lsym == lsym_preprocessing) 1081 if (lsym == lsym_newline || lsym == lsym_preprocessing)
1082 ps.force_nl = false; 1082 ps.force_nl = false;
1083 else if (lsym == lsym_comment) { 1083 else if (lsym == lsym_comment) {
1084 /* no special processing */ 1084 /* no special processing */
1085 } else { 1085 } else {
1086 maybe_break_line(lsym); 1086 maybe_break_line(lsym);
1087 ps.in_stmt_or_decl = true; 1087 ps.in_stmt_or_decl = true;
1088 if (com.len > 0) 1088 if (com.len > 0)
1089 move_com_to_code(lsym); 1089 move_com_to_code(lsym);
1090 update_ps_lbrace_kind(lsym); 1090 update_ps_lbrace_kind(lsym);
1091 } 1091 }
1092 1092
1093 process_lsym(lsym); 1093 process_lsym(lsym);
1094 1094
1095 debug_parser_state(); 1095 debug_parser_state();
1096 1096
1097 if (lsym != lsym_comment && lsym != lsym_newline && 1097 if (lsym != lsym_comment && lsym != lsym_newline &&
1098 lsym != lsym_preprocessing) 1098 lsym != lsym_preprocessing)
1099 ps.prev_lsym = lsym; 1099 ps.prev_lsym = lsym;
1100 } 1100 }
1101} 1101}
1102 1102
1103int 1103int
1104main(int argc, char **argv) 1104main(int argc, char **argv)
1105{ 1105{
1106 init_globals(); 1106 init_globals();
1107 load_profiles(argc, argv); 1107 load_profiles(argc, argv);
1108 parse_command_line(argc, argv); 1108 parse_command_line(argc, argv);
1109 set_initial_indentation(); 1109 set_initial_indentation();
1110 return indent(); 1110 return indent();
1111} 1111}

cvs diff -r1.205 -r1.206 src/usr.bin/indent/io.c (switch to unified diff)

--- src/usr.bin/indent/io.c 2023/06/09 06:36:57 1.205
+++ src/usr.bin/indent/io.c 2023/06/09 07:20:30 1.206
@@ -1,390 +1,390 @@ @@ -1,390 +1,390 @@
1/* $NetBSD: io.c,v 1.205 2023/06/09 06:36:57 rillig Exp $ */ 1/* $NetBSD: io.c,v 1.206 2023/06/09 07:20:30 rillig Exp $ */
2 2
3/*- 3/*-
4 * SPDX-License-Identifier: BSD-4-Clause 4 * SPDX-License-Identifier: BSD-4-Clause
5 * 5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc. 6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993 7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved. 8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved. 9 * All rights reserved.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software 19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement: 20 * must display the following acknowledgement:
21 * This product includes software developed by the University of 21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors. 22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors 23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software 24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission. 25 * without specific prior written permission.
26 * 26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE. 37 * SUCH DAMAGE.
38 */ 38 */
39 39
40#include <sys/cdefs.h> 40#include <sys/cdefs.h>
41__RCSID("$NetBSD: io.c,v 1.205 2023/06/09 06:36:57 rillig Exp $"); 41__RCSID("$NetBSD: io.c,v 1.206 2023/06/09 07:20:30 rillig Exp $");
42 42
43#include <stdio.h> 43#include <stdio.h>
44 44
45#include "indent.h" 45#include "indent.h"
46 46
47struct buffer inp; 47struct buffer inp;
48const char *inp_p; 48const char *inp_p;
49 49
50struct output_state out; 50struct output_state out;
51static int out_ind; /* width of the line that is being written */ 51static int out_ind; /* width of the line that is being written */
52static unsigned wrote_newlines = 2; /* 0 in the middle of a line, 1 after a 52static unsigned wrote_newlines = 2; /* 0 in the middle of a line, 1 after a
53 * single '\n', > 1 means there were (n 53 * single '\n', > 1 means there were (n
54 * - 1) blank lines above */ 54 * - 1) blank lines above */
55static int paren_indent; 55static int paren_indent;
56 56
57 57
58static void 58static void
59inp_read_next_line(FILE *f) 59inp_read_next_line(FILE *f)
60{ 60{
61 inp.len = 0; 61 inp.len = 0;
62 62
63 for (;;) { 63 for (;;) {
64 int ch = getc(f); 64 int ch = getc(f);
65 if (ch == EOF) { 65 if (ch == EOF) {
66 if (indent_enabled == indent_on) { 66 if (indent_enabled == indent_on) {
67 buf_add_char(&inp, ' '); 67 buf_add_char(&inp, ' ');
68 buf_add_char(&inp, '\n'); 68 buf_add_char(&inp, '\n');
69 } 69 }
70 had_eof = true; 70 had_eof = true;
71 break; 71 break;
72 } 72 }
73 73
74 if (ch != '\0') 74 if (ch != '\0')
75 buf_add_char(&inp, (char)ch); 75 buf_add_char(&inp, (char)ch);
76 if (ch == '\n') 76 if (ch == '\n')
77 break; 77 break;
78 } 78 }
79 inp_p = inp.s; 79 inp_p = inp.s;
80} 80}
81 81
82void 82void
83inp_read_line(void) 83inp_read_line(void)
84{ 84{
85 if (indent_enabled == indent_on) 85 if (indent_enabled == indent_on)
86 out.indent_off_text.len = 0; 86 out.indent_off_text.len = 0;
87 buf_add_chars(&out.indent_off_text, inp.s, inp.len); 87 buf_add_chars(&out.indent_off_text, inp.s, inp.len);
88 inp_read_next_line(input); 88 inp_read_next_line(input);
89} 89}
90 90
91void 91void
92inp_skip(void) 92inp_skip(void)
93{ 93{
94 inp_p++; 94 inp_p++;
95 if ((size_t)(inp_p - inp.s) >= inp.len) 95 if ((size_t)(inp_p - inp.s) >= inp.len)
96 inp_read_line(); 96 inp_read_line();
97} 97}
98 98
99char 99char
100inp_next(void) 100inp_next(void)
101{ 101{
102 char ch = inp_p[0]; 102 char ch = inp_p[0];
103 inp_skip(); 103 inp_skip();
104 return ch; 104 return ch;
105} 105}
106 106
107 107
108static void 108static void
109output_newline(void) 109output_newline(void)
110{ 110{
111 fputc('\n', output); 111 fputc('\n', output);
112 debug_println("output_newline"); 112 debug_println("output_newline");
113 wrote_newlines++; 113 wrote_newlines++;
114 out_ind = 0; 114 out_ind = 0;
115} 115}
116 116
117static void 117static void
118output_range(const char *s, size_t len) 118output_range(const char *s, size_t len)
119{ 119{
120 fwrite(s, 1, len, output); 120 fwrite(s, 1, len, output);
121 debug_vis_range("output_range \"", s, len, "\"\n"); 121 debug_vis_range("output_range \"", s, len, "\"\n");
122 for (size_t i = 0; i < len; i++) 122 for (size_t i = 0; i < len; i++)
123 wrote_newlines = s[i] == '\n' ? wrote_newlines + 1 : 0; 123 wrote_newlines = s[i] == '\n' ? wrote_newlines + 1 : 0;
124 out_ind = ind_add(out_ind, s, len); 124 out_ind = ind_add(out_ind, s, len);
125} 125}
126 126
127static void 127static void
128output_indent(int new_ind) 128output_indent(int new_ind)
129{ 129{
130 int ind = out_ind; 130 int ind = out_ind;
131 131
132 if (opt.use_tabs) { 132 if (opt.use_tabs) {
133 int n = new_ind / opt.tabsize - ind / opt.tabsize; 133 int n = new_ind / opt.tabsize - ind / opt.tabsize;
134 if (n > 0) { 134 if (n > 0) {
135 ind = ind - ind % opt.tabsize + n * opt.tabsize; 135 ind = ind - ind % opt.tabsize + n * opt.tabsize;
136 while (n-- > 0) 136 while (n-- > 0)
137 fputc('\t', output); 137 fputc('\t', output);
138 wrote_newlines = 0; 138 wrote_newlines = 0;
139 } 139 }
140 } 140 }
141 141
142 for (; ind < new_ind; ind++) { 142 for (; ind < new_ind; ind++) {
143 fputc(' ', output); 143 fputc(' ', output);
144 wrote_newlines = 0; 144 wrote_newlines = 0;
145 } 145 }
146 146
147 debug_println("output_indent %d", ind); 147 debug_println("output_indent %d", ind);
148 out_ind = ind; 148 out_ind = ind;
149} 149}
150 150
151static bool 151static bool
152want_blank_line(void) 152want_blank_line(void)
153{ 153{
154 debug_println("%s: %s -> %s", __func__, 154 debug_println("%s: %s -> %s", __func__,
155 line_kind_name[out.prev_line_kind], line_kind_name[out.line_kind]); 155 line_kind_name[out.prev_line_kind], line_kind_name[out.line_kind]);
156 156
157 if (ps.blank_line_after_decl && ps.declaration == decl_no) { 157 if (ps.blank_line_after_decl && ps.declaration == decl_no) {
158 ps.blank_line_after_decl = false; 158 ps.blank_line_after_decl = false;
159 return true; 159 return true;
160 } 160 }
161 if (opt.blanklines_around_conditional_compilation) { 161 if (opt.blanklines_around_conditional_compilation) {
162 if (out.prev_line_kind != lk_if && out.line_kind == lk_if) 162 if (out.prev_line_kind != lk_if && out.line_kind == lk_if)
163 return true; 163 return true;
164 if (out.prev_line_kind == lk_endif 164 if (out.prev_line_kind == lk_endif
165 && out.line_kind != lk_endif) 165 && out.line_kind != lk_endif)
166 return true; 166 return true;
167 } 167 }
168 if (opt.blanklines_after_procs && out.prev_line_kind == lk_func_end 168 if (opt.blanklines_after_procs && out.prev_line_kind == lk_func_end
169 && out.line_kind != lk_endif) 169 && out.line_kind != lk_endif)
170 return true; 170 return true;
171 if (opt.blanklines_before_block_comments 171 if (opt.blanklines_before_block_comments
172 && out.line_kind == lk_block_comment) 172 && out.line_kind == lk_block_comment)
173 return true; 173 return true;
174 return false; 174 return false;
175} 175}
176 176
177static bool 177static bool
178is_blank_line_optional(void) 178is_blank_line_optional(void)
179{ 179{
180 if (out.prev_line_kind == lk_stmt_head) 180 if (out.prev_line_kind == lk_stmt_head)
181 return wrote_newlines >= 1; 181 return wrote_newlines >= 1;
182 if (ps.psyms.top >= 2) 182 if (ps.psyms.top >= 2)
183 return wrote_newlines >= 2; 183 return wrote_newlines >= 2;
184 return wrote_newlines >= 3; 184 return wrote_newlines >= 3;
185} 185}
186 186
187static int 187static int
188compute_case_label_indent(void) 188compute_case_label_indent(void)
189{ 189{
190 int i = ps.psyms.top; 190 int i = ps.psyms.top;
191 while (i > 0 && ps.psyms.sym[i] != psym_switch_expr) 191 while (i > 0 && ps.psyms.sym[i] != psym_switch_expr)
192 i--; 192 i--;
193 float case_ind = (float)ps.psyms.ind_level[i] + opt.case_indent; 193 float case_ind = (float)ps.psyms.ind_level[i] + opt.case_indent;
194 return (int)(case_ind * (float)opt.indent_size); 194 return (int)(case_ind * (float)opt.indent_size);
195} 195}
196 196
197int 197int
198compute_label_indent(void) 198compute_label_indent(void)
199{ 199{
200 if (out.line_kind == lk_case_or_default) 200 if (out.line_kind == lk_case_or_default)
201 return compute_case_label_indent(); 201 return compute_case_label_indent();
202 if (lab.s[0] == '#') 202 if (lab.s[0] == '#')
203 return 0; 203 return 0;
204 return opt.indent_size * (ps.ind_level - 2); 204 return opt.indent_size * (ps.ind_level - 2);
205} 205}
206 206
207static void 207static void
208output_line_label(void) 208output_line_label(void)
209{ 209{
210 output_indent(compute_label_indent()); 210 output_indent(compute_label_indent());
211 output_range(lab.s, lab.len); 211 output_range(lab.s, lab.len);
212} 212}
213 213
214static int 214static int
215compute_code_indent_lineup(int base_ind) 215compute_code_indent_lineup(int base_ind)
216{ 216{
217 int ind = paren_indent; 217 int ind = paren_indent;
218 int overflow = ind_add(ind, code.s, code.len) - opt.max_line_length; 218 int overflow = ind_add(ind, code.s, code.len) - opt.max_line_length;
219 if (overflow < 0) 219 if (overflow < 0)
220 return ind; 220 return ind;
221 221
222 if (ind_add(base_ind, code.s, code.len) < opt.max_line_length) { 222 if (ind_add(base_ind, code.s, code.len) < opt.max_line_length) {
223 ind -= overflow + 2; 223 ind -= overflow + 2;
224 if (ind > base_ind) 224 if (ind > base_ind)
225 return ind; 225 return ind;
226 return base_ind; 226 return base_ind;
227 } 227 }
228 228
229 return ind; 229 return ind;
230} 230}
231 231
232int 232int
233compute_code_indent(void) 233compute_code_indent(void)
234{ 234{
235 int base_ind = ps.ind_level * opt.indent_size; 235 int base_ind = ps.ind_level * opt.indent_size;
236 236
237 if (ps.line_start_nparen == 0) { 237 if (ps.line_start_nparen == 0) {
238 if (ps.psyms.top >= 1 238 if (ps.psyms.top >= 1
239 && ps.psyms.sym[ps.psyms.top - 1] == psym_lbrace_enum) 239 && ps.psyms.sym[ps.psyms.top - 1] == psym_lbrace_enum)
240 return base_ind; 240 return base_ind;
241 if (ps.in_stmt_cont) 241 if (ps.in_stmt_cont)
242 return base_ind + opt.continuation_indent; 242 return base_ind + opt.continuation_indent;
243 return base_ind; 243 return base_ind;
244 } 244 }
245 245
246 if (opt.lineup_to_parens) { 246 if (opt.lineup_to_parens) {
247 if (opt.lineup_to_parens_always) 247 if (opt.lineup_to_parens_always)
248 return paren_indent; 248 return paren_indent;
249 return compute_code_indent_lineup(base_ind); 249 return compute_code_indent_lineup(base_ind);
250 } 250 }
251 251
252 if (ps.extra_expr_indent != eei_no) 252 if (ps.extra_expr_indent != eei_no)
253 return base_ind + 2 * opt.continuation_indent; 253 return base_ind + 2 * opt.continuation_indent;
254 254
255 return base_ind + opt.continuation_indent * ps.line_start_nparen; 255 return base_ind + opt.continuation_indent * ps.line_start_nparen;
256} 256}
257 257
258static void 258static void
259output_line_code(void) 259output_line_code(void)
260{ 260{
261 int target_ind = compute_code_indent(); 261 int target_ind = compute_code_indent();
262 for (int i = 0; i < ps.nparen; i++) { 262 for (int i = 0; i < ps.nparen; i++) {
263 int paren_ind = ps.paren[i].indent; 263 int paren_ind = ps.paren[i].indent;
264 if (paren_ind >= 0) { 264 if (paren_ind >= 0) {
265 ps.paren[i].indent = -1 - (paren_ind + target_ind); 265 ps.paren[i].indent = -1 - (paren_ind + target_ind);
266 debug_println( 266 debug_println(
267 "setting paren_indents[%d] from %d to %d " 267 "setting paren_indents[%d] from %d to %d "
268 "for column %d", 268 "for column %d",
269 i, paren_ind, ps.paren[i].indent, target_ind + 1); 269 i, paren_ind, ps.paren[i].indent, target_ind + 1);
270 } 270 }
271 } 271 }
272 272
273 if (lab.len > 0 && target_ind <= out_ind) 273 if (lab.len > 0 && target_ind <= out_ind)
274 output_range(" ", 1); 274 output_range(" ", 1);
275 output_indent(target_ind); 275 output_indent(target_ind);
276 output_range(code.s, code.len); 276 output_range(code.s, code.len);
277} 277}
278 278
279static void 279static void
280output_line_comment(void) 280output_line_comment(void)
281{ 281{
282 int target_ind = ps.com_ind + ps.comment_delta; 282 int target_ind = ps.com_ind + ps.comment_delta;
283 const char *p; 283 const char *p;
284 284
285 /* consider original indentation in case this is a box comment */ 285 /* consider original indentation in case this is a box comment */
286 for (p = com.s; *p == '\t'; p++) 286 for (p = com.s; *p == '\t'; p++)
287 target_ind += opt.tabsize; 287 target_ind += opt.tabsize;
288 288
289 for (; target_ind < 0; p++) { 289 for (; target_ind < 0; p++) {
290 if (*p == ' ') 290 if (*p == ' ')
291 target_ind++; 291 target_ind++;
292 else if (*p == '\t') 292 else if (*p == '\t')
293 target_ind = next_tab(target_ind); 293 target_ind = next_tab(target_ind);
294 else { 294 else {
295 target_ind = 0; 295 target_ind = 0;
296 break; 296 break;
297 } 297 }
298 } 298 }
299 299
300 if (out_ind > target_ind) 300 if (out_ind > target_ind)
301 output_newline(); 301 output_newline();
302 302
303 while (com.s + com.len > p && ch_isspace(com.s[com.len - 1])) 303 while (com.s + com.len > p && ch_isspace(com.s[com.len - 1]))
304 com.len--; 304 com.len--;
305 305
306 output_indent(target_ind); 306 output_indent(target_ind);
307 output_range(p, com.len - (size_t)(p - com.s)); 307 output_range(p, com.len - (size_t)(p - com.s));
308 308
309 ps.comment_delta = ps.n_comment_delta; 309 ps.comment_delta = ps.n_comment_delta;
310} 310}
311 311
312/* 312/*
313 * Write a line of formatted source to the output file. The line consists of 313 * Write a line of formatted source to the output file. The line consists of
314 * the label, the code and the comment. 314 * the label, the code and the comment.
315 */ 315 */
316void 316void
317output_line(void) 317output_line(void)
318{ 318{
319 debug_blank_line(); 319 debug_blank_line();
320 debug_printf("%s", __func__); 320 debug_printf("%s", __func__);
321 debug_buffers(); 321 debug_buffers();
322 322
323 ps.is_function_definition = false; 323 ps.is_function_definition = false;
324 324
325 if (indent_enabled == indent_on) { 325 if (indent_enabled == indent_on) {
326 if (lab.len == 0 && code.len == 0 && com.len == 0) 326 if (lab.len == 0 && code.len == 0 && com.len == 0)
327 out.line_kind = lk_blank; 327 out.line_kind = lk_blank;
328 328
329 if (want_blank_line() && wrote_newlines < 2 329 if (want_blank_line() && wrote_newlines < 2
330 && out.line_kind != lk_blank) 330 && out.line_kind != lk_blank)
331 output_newline(); 331 output_newline();
332 332
333 /* This kludge aligns function definitions correctly. */ 333 /* This kludge aligns function definitions correctly. */
334 if (ps.ind_level == 0) 334 if (ps.ind_level == 0)
335 ps.in_stmt_cont = false; 335 ps.in_stmt_cont = false;
336 336
337 if (opt.blank_line_after_decl && ps.declaration == decl_end 337 if (opt.blank_line_after_decl && ps.declaration == decl_end
338 && ps.psyms.top > 1) { 338 && ps.psyms.top > 1) {
339 ps.declaration = decl_no; 339 ps.declaration = decl_no;
340 ps.blank_line_after_decl = true; 340 ps.blank_line_after_decl = true;
341 } 341 }
342 342
343 if (opt.swallow_optional_blanklines 343 if (opt.swallow_optional_blanklines
344 && out.line_kind == lk_blank 344 && out.line_kind == lk_blank
345 && is_blank_line_optional()) 345 && is_blank_line_optional())
346 goto prepare_next_line; 346 goto prepare_next_line;
347 347
348 if (lab.len > 0) 348 if (lab.len > 0)
349 output_line_label(); 349 output_line_label();
350 if (code.len > 0) 350 if (code.len > 0)
351 output_line_code(); 351 output_line_code();
352 if (com.len > 0) 352 if (com.len > 0)
353 output_line_comment(); 353 output_line_comment();
354 354
355 output_newline(); 355 output_newline();
356 out.prev_line_kind = out.line_kind; 356 out.prev_line_kind = out.line_kind;
357 } 357 }
358 358
359 if (indent_enabled == indent_last_off_line) { 359 if (indent_enabled == indent_last_off_line) {
360 indent_enabled = indent_on; 360 indent_enabled = indent_on;
361 output_range(out.indent_off_text.s, out.indent_off_text.len); 361 output_range(out.indent_off_text.s, out.indent_off_text.len);
362 out.indent_off_text.len = 0; 362 out.indent_off_text.len = 0;
363 } 363 }
364 364
365prepare_next_line: 365prepare_next_line:
366 lab.len = 0; 366 lab.len = 0;
367 code.len = 0; 367 code.len = 0;
368 com.len = 0; 368 com.len = 0;
369 369
370 ps.decl_on_line = ps.in_decl; 370 ps.decl_on_line = ps.in_decl;
371 // XXX: don't reset in_stmt_cont here; see process_colon_question. 371 // XXX: don't reset in_stmt_cont here; see process_colon_question.
372 ps.in_stmt_cont = ps.in_stmt_or_decl 372 ps.in_stmt_cont = ps.in_stmt_or_decl
373 && !ps.in_decl && ps.block_init_level == 0; 373 && !ps.in_decl && ps.block_init_level == 0;
374 ps.decl_indent_done = false; 374 ps.decl_indent_done = false;
375 if (ps.extra_expr_indent == eei_last) 375 if (ps.extra_expr_indent == eei_last)
376 ps.extra_expr_indent = eei_no; 376 ps.extra_expr_indent = eei_no;
377 if (!(ps.psyms.sym[ps.psyms.top] == psym_if_expr_stmt_else 377 if (!(ps.psyms.sym[ps.psyms.top] == psym_if_expr_stmt_else
378 && ps.nparen > 0)) 378 && ps.nparen > 0))
379 ps.ind_level = ps.ind_level_follow; 379 ps.ind_level = ps.ind_level_follow;
380 ps.line_start_nparen = ps.nparen; 380 ps.line_start_nparen = ps.nparen;
381 ps.want_blank = false; 381 ps.want_blank = false;
382 382
383 if (ps.nparen > 0) { 383 if (ps.nparen > 0) {
384 /* TODO: explain what negative indentation means */ 384 /* TODO: explain what negative indentation means */
385 paren_indent = -1 - ps.paren[ps.nparen - 1].indent; 385 paren_indent = -1 - ps.paren[ps.nparen - 1].indent;
386 debug_println("paren_indent is now %d", paren_indent); 386 debug_println("paren_indent is now %d", paren_indent);
387 } 387 }
388 388
389 out.line_kind = lk_other; 389 out.line_kind = lk_other;
390} 390}

cvs diff -r1.217 -r1.218 src/usr.bin/indent/lexi.c (switch to unified diff)

--- src/usr.bin/indent/lexi.c 2023/06/08 21:18:54 1.217
+++ src/usr.bin/indent/lexi.c 2023/06/09 07:20:30 1.218
@@ -1,703 +1,703 @@ @@ -1,703 +1,703 @@
1/* $NetBSD: lexi.c,v 1.217 2023/06/08 21:18:54 rillig Exp $ */ 1/* $NetBSD: lexi.c,v 1.218 2023/06/09 07:20:30 rillig Exp $ */
2 2
3/*- 3/*-
4 * SPDX-License-Identifier: BSD-4-Clause 4 * SPDX-License-Identifier: BSD-4-Clause
5 * 5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc. 6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993 7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved. 8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved. 9 * All rights reserved.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software 19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement: 20 * must display the following acknowledgement:
21 * This product includes software developed by the University of 21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors. 22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors 23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software 24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission. 25 * without specific prior written permission.
26 * 26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE. 37 * SUCH DAMAGE.
38 */ 38 */
39 39
40#include <sys/cdefs.h> 40#include <sys/cdefs.h>
41__RCSID("$NetBSD: lexi.c,v 1.217 2023/06/08 21:18:54 rillig Exp $"); 41__RCSID("$NetBSD: lexi.c,v 1.218 2023/06/09 07:20:30 rillig Exp $");
42 42
43#include <stdlib.h> 43#include <stdlib.h>
44#include <string.h> 44#include <string.h>
45 45
46#include "indent.h" 46#include "indent.h"
47 47
48/* In lexi_alnum, this constant marks a type, independent of parentheses. */ 48/* In lexi_alnum, this constant marks a type, independent of parentheses. */
49#define lsym_type lsym_type_outside_parentheses 49#define lsym_type lsym_type_outside_parentheses
50 50
51/* must be sorted alphabetically, is used in binary search */ 51/* must be sorted alphabetically, is used in binary search */
52static const struct keyword { 52static const struct keyword {
53 const char name[12]; 53 const char name[12];
54 lexer_symbol lsym; 54 lexer_symbol lsym;
55} keywords[] = { 55} keywords[] = {
56 {"_Bool", lsym_type}, 56 {"_Bool", lsym_type},
57 {"_Complex", lsym_type}, 57 {"_Complex", lsym_type},
58 {"_Imaginary", lsym_type}, 58 {"_Imaginary", lsym_type},
59 {"auto", lsym_modifier}, 59 {"auto", lsym_modifier},
60 {"bool", lsym_type}, 60 {"bool", lsym_type},
61 {"break", lsym_word}, 61 {"break", lsym_word},
62 {"case", lsym_case}, 62 {"case", lsym_case},
63 {"char", lsym_type}, 63 {"char", lsym_type},
64 {"complex", lsym_type}, 64 {"complex", lsym_type},
65 {"const", lsym_modifier}, 65 {"const", lsym_modifier},
66 {"continue", lsym_word}, 66 {"continue", lsym_word},
67 {"default", lsym_default}, 67 {"default", lsym_default},
68 {"do", lsym_do}, 68 {"do", lsym_do},
69 {"double", lsym_type}, 69 {"double", lsym_type},
70 {"else", lsym_else}, 70 {"else", lsym_else},
71 {"enum", lsym_tag}, 71 {"enum", lsym_tag},
72 {"extern", lsym_modifier}, 72 {"extern", lsym_modifier},
73 {"float", lsym_type}, 73 {"float", lsym_type},
74 {"for", lsym_for}, 74 {"for", lsym_for},
75 {"goto", lsym_word}, 75 {"goto", lsym_word},
76 {"if", lsym_if}, 76 {"if", lsym_if},
77 {"imaginary", lsym_type}, 77 {"imaginary", lsym_type},
78 {"inline", lsym_modifier}, 78 {"inline", lsym_modifier},
79 {"int", lsym_type}, 79 {"int", lsym_type},
80 {"long", lsym_type}, 80 {"long", lsym_type},
81 {"offsetof", lsym_offsetof}, 81 {"offsetof", lsym_offsetof},
82 {"register", lsym_modifier}, 82 {"register", lsym_modifier},
83 {"restrict", lsym_word}, 83 {"restrict", lsym_word},
84 {"return", lsym_return}, 84 {"return", lsym_return},
85 {"short", lsym_type}, 85 {"short", lsym_type},
86 {"signed", lsym_type}, 86 {"signed", lsym_type},
87 {"sizeof", lsym_sizeof}, 87 {"sizeof", lsym_sizeof},
88 {"static", lsym_modifier}, 88 {"static", lsym_modifier},
89 {"struct", lsym_tag}, 89 {"struct", lsym_tag},
90 {"switch", lsym_switch}, 90 {"switch", lsym_switch},
91 {"typedef", lsym_typedef}, 91 {"typedef", lsym_typedef},
92 {"union", lsym_tag}, 92 {"union", lsym_tag},
93 {"unsigned", lsym_type}, 93 {"unsigned", lsym_type},
94 {"void", lsym_type}, 94 {"void", lsym_type},
95 {"volatile", lsym_modifier}, 95 {"volatile", lsym_modifier},
96 {"while", lsym_while} 96 {"while", lsym_while}
97}; 97};
98 98
99static struct { 99static struct {
100 const char **items; 100 const char **items;
101 unsigned int len; 101 unsigned int len;
102 unsigned int cap; 102 unsigned int cap;
103} typenames; 103} typenames;
104 104
105/*- 105/*-
106 * The transition table below was rewritten by hand from lx's output, given 106 * The transition table below was rewritten by hand from lx's output, given
107 * the following definitions. lx is Katherine Flavel's lexer generator. 107 * the following definitions. lx is Katherine Flavel's lexer generator.
108 * 108 *
109 * O = /[0-7]/; D = /[0-9]/; NZ = /[1-9]/; 109 * O = /[0-7]/; D = /[0-9]/; NZ = /[1-9]/;
110 * H = /[a-f0-9]/i; B = /[0-1]/; HP = /0x/i; 110 * H = /[a-f0-9]/i; B = /[0-1]/; HP = /0x/i;
111 * BP = /0b/i; E = /e[+\-]?/i D+; P = /p[+\-]?/i D+; 111 * BP = /0b/i; E = /e[+\-]?/i D+; P = /p[+\-]?/i D+;
112 * FS = /[fl]/i; IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?; 112 * FS = /[fl]/i; IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?;
113 * 113 *
114 * D+ E FS? -> $float; 114 * D+ E FS? -> $float;
115 * D* "." D+ E? FS? -> $float; 115 * D* "." D+ E? FS? -> $float;
116 * D+ "." E? FS? -> $float; HP H+ IS? -> $int; 116 * D+ "." E? FS? -> $float; HP H+ IS? -> $int;
117 * HP H+ P FS? -> $float; NZ D* IS? -> $int; 117 * HP H+ P FS? -> $float; NZ D* IS? -> $int;
118 * HP H* "." H+ P FS? -> $float; "0" O* IS? -> $int; 118 * HP H* "." H+ P FS? -> $float; "0" O* IS? -> $int;
119 * HP H+ "." P FS -> $float; BP B+ IS? -> $int; 119 * HP H+ "." P FS -> $float; BP B+ IS? -> $int;
120 */ 120 */
121/* INDENT OFF */ 121/* INDENT OFF */
122static const unsigned char lex_number_state[][26] = { 122static const unsigned char lex_number_state[][26] = {
123 /* examples: 123 /* examples:
124 00 124 00
125 s 0xx 125 s 0xx
126 t 00xaa 126 t 00xaa
127 a 11 101100xxa.. 127 a 11 101100xxa..
128 r 11ee0001101lbuuxx.a.pp 128 r 11ee0001101lbuuxx.a.pp
129 t.01.e+008bLuxll0Ll.aa.p+0 129 t.01.e+008bLuxll0Ll.aa.p+0
130 states: ABCDEFGHIJKLMNOPQRSTUVWXYZ */ 130 states: ABCDEFGHIJKLMNOPQRSTUVWXYZ */
131 [0] = "uuiifuufiuuiiuiiiiiuiuuuuu", /* (other) */ 131 [0] = "uuiifuufiuuiiuiiiiiuiuuuuu", /* (other) */
132 [1] = "CEIDEHHHIJQ U Q VUVVZZZ", /* 0 */ 132 [1] = "CEIDEHHHIJQ U Q VUVVZZZ", /* 0 */
133 [2] = "DEIDEHHHIJQ U Q VUVVZZZ", /* 1 */ 133 [2] = "DEIDEHHHIJQ U Q VUVVZZZ", /* 1 */
134 [3] = "DEIDEHHHIJ U VUVVZZZ", /* 2 3 4 5 6 7 */ 134 [3] = "DEIDEHHHIJ U VUVVZZZ", /* 2 3 4 5 6 7 */
135 [4] = "DEJDEHHHJJ U VUVVZZZ", /* 8 9 */ 135 [4] = "DEJDEHHHJJ U VUVVZZZ", /* 8 9 */
136 [5] = " U VUVV ", /* A a C c D d */ 136 [5] = " U VUVV ", /* A a C c D d */
137 [6] = " K U VUVV ", /* B b */ 137 [6] = " K U VUVV ", /* B b */
138 [7] = " FFF FF U VUVV ", /* E e */ 138 [7] = " FFF FF U VUVV ", /* E e */
139 [8] = " f f U VUVV f", /* F f */ 139 [8] = " f f U VUVV f", /* F f */
140 [9] = " LLf fL PR Li L f", /* L */ 140 [9] = " LLf fL PR Li L f", /* L */
141 [10] = " OOf fO S P O i O f", /* l */ 141 [10] = " OOf fO S P O i O f", /* l */
142 [11] = " FFX ", /* P p */ 142 [11] = " FFX ", /* P p */
143 [12] = " MM M i iiM M ", /* U u */ 143 [12] = " MM M i iiM M ", /* U u */
144 [13] = " N ", /* X x */ 144 [13] = " N ", /* X x */
145 [14] = " G Y ", /* + - */ 145 [14] = " G Y ", /* + - */
146 [15] = "B EE EE T W ", /* . */ 146 [15] = "B EE EE T W ", /* . */
147 /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ 147 /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
148}; 148};
149/* INDENT ON */ 149/* INDENT ON */
150 150
151static const unsigned char lex_number_row[] = { 151static const unsigned char lex_number_row[] = {
152 ['0'] = 1, 152 ['0'] = 1,
153 ['1'] = 2, 153 ['1'] = 2,
154 ['2'] = 3, ['3'] = 3, ['4'] = 3, ['5'] = 3, ['6'] = 3, ['7'] = 3, 154 ['2'] = 3, ['3'] = 3, ['4'] = 3, ['5'] = 3, ['6'] = 3, ['7'] = 3,
155 ['8'] = 4, ['9'] = 4, 155 ['8'] = 4, ['9'] = 4,
156 ['A'] = 5, ['a'] = 5, ['C'] = 5, ['c'] = 5, ['D'] = 5, ['d'] = 5, 156 ['A'] = 5, ['a'] = 5, ['C'] = 5, ['c'] = 5, ['D'] = 5, ['d'] = 5,
157 ['B'] = 6, ['b'] = 6, 157 ['B'] = 6, ['b'] = 6,
158 ['E'] = 7, ['e'] = 7, 158 ['E'] = 7, ['e'] = 7,
159 ['F'] = 8, ['f'] = 8, 159 ['F'] = 8, ['f'] = 8,
160 ['L'] = 9, 160 ['L'] = 9,
161 ['l'] = 10, 161 ['l'] = 10,
162 ['P'] = 11, ['p'] = 11, 162 ['P'] = 11, ['p'] = 11,
163 ['U'] = 12, ['u'] = 12, 163 ['U'] = 12, ['u'] = 12,
164 ['X'] = 13, ['x'] = 13, 164 ['X'] = 13, ['x'] = 13,
165 ['+'] = 14, ['-'] = 14, 165 ['+'] = 14, ['-'] = 14,
166 ['.'] = 15, 166 ['.'] = 15,
167}; 167};
168 168
169 169
170static void 170static void
171token_add_char(char ch) 171token_add_char(char ch)
172{ 172{
173 buf_add_char(&token, ch); 173 buf_add_char(&token, ch);
174} 174}
175 175
176static void 176static void
177lex_number(void) 177lex_number(void)
178{ 178{
179 for (unsigned char s = 'A'; s != 'f' && s != 'i' && s != 'u';) { 179 for (unsigned char s = 'A'; s != 'f' && s != 'i' && s != 'u';) {
180 unsigned char ch = (unsigned char)inp_p[0]; 180 unsigned char ch = (unsigned char)inp_p[0];
181 if (ch == '\\' && inp_p[1] == '\n') { 181 if (ch == '\\' && inp_p[1] == '\n') {
182 inp_p++; 182 inp_p++;
183 inp_skip(); 183 inp_skip();
184 line_no++; 184 line_no++;
185 continue; 185 continue;
186 } 186 }
187 if (ch >= array_length(lex_number_row) 187 if (ch >= array_length(lex_number_row)
188 || lex_number_row[ch] == 0) 188 || lex_number_row[ch] == 0)
189 break; 189 break;
190 190
191 unsigned char row = lex_number_row[ch]; 191 unsigned char row = lex_number_row[ch];
192 if (lex_number_state[row][s - 'A'] == ' ') { 192 if (lex_number_state[row][s - 'A'] == ' ') {
193 /*- 193 /*-
194 * lex_number_state[0][s - 'A'] now indicates the type: 194 * lex_number_state[0][s - 'A'] now indicates the type:
195 * f = floating, i = integer, u = unknown 195 * f = floating, i = integer, u = unknown
196 */ 196 */
197 return; 197 return;
198 } 198 }
199 199
200 s = lex_number_state[row][s - 'A']; 200 s = lex_number_state[row][s - 'A'];
201 token_add_char(inp_next()); 201 token_add_char(inp_next());
202 } 202 }
203} 203}
204 204
205static bool 205static bool
206is_identifier_start(char ch) 206is_identifier_start(char ch)
207{ 207{
208 return ch_isalpha(ch) || ch == '_' || ch == '$'; 208 return ch_isalpha(ch) || ch == '_' || ch == '$';
209} 209}
210 210
211static bool 211static bool
212is_identifier_part(char ch) 212is_identifier_part(char ch)
213{ 213{
214 return ch_isalnum(ch) || ch == '_' || ch == '$'; 214 return ch_isalnum(ch) || ch == '_' || ch == '$';
215} 215}
216 216
217static void 217static void
218lex_word(void) 218lex_word(void)
219{ 219{
220 for (;;) { 220 for (;;) {
221 if (is_identifier_part(inp_p[0])) 221 if (is_identifier_part(inp_p[0]))
222 token_add_char(*inp_p++); 222 token_add_char(*inp_p++);
223 else if (inp_p[0] == '\\' && inp_p[1] == '\n') { 223 else if (inp_p[0] == '\\' && inp_p[1] == '\n') {
224 inp_p++; 224 inp_p++;
225 inp_skip(); 225 inp_skip();
226 line_no++; 226 line_no++;
227 } else 227 } else
228 return; 228 return;
229 } 229 }
230} 230}
231 231
232static void 232static void
233lex_char_or_string(void) 233lex_char_or_string(void)
234{ 234{
235 for (char delim = token.s[token.len - 1];;) { 235 for (char delim = token.s[token.len - 1];;) {
236 if (inp_p[0] == '\n') { 236 if (inp_p[0] == '\n') {
237 diag(1, "Unterminated literal"); 237 diag(1, "Unterminated literal");
238 return; 238 return;
239 } 239 }
240 240
241 token_add_char(*inp_p++); 241 token_add_char(*inp_p++);
242 if (token.s[token.len - 1] == delim) 242 if (token.s[token.len - 1] == delim)
243 return; 243 return;
244 244
245 if (token.s[token.len - 1] == '\\') { 245 if (token.s[token.len - 1] == '\\') {
246 if (inp_p[0] == '\n') 246 if (inp_p[0] == '\n')
247 ++line_no; 247 ++line_no;
248 token_add_char(inp_next()); 248 token_add_char(inp_next());
249 } 249 }
250 } 250 }
251} 251}
252 252
253/* Guess whether the current token is a declared type. */ 253/* Guess whether the current token is a declared type. */
254static bool 254static bool
255probably_typename(void) 255probably_typename(void)
256{ 256{
257 if (ps.prev_lsym == lsym_modifier) 257 if (ps.prev_lsym == lsym_modifier)
258 return true; 258 return true;
259 if (ps.block_init) 259 if (ps.block_init)
260 return false; 260 return false;
261 if (ps.in_stmt_or_decl) /* XXX: this condition looks incorrect */ 261 if (ps.in_stmt_or_decl) /* XXX: this condition looks incorrect */
262 return false; 262 return false;
263 if (inp_p[0] == '*' && inp_p[1] != '=') 263 if (inp_p[0] == '*' && inp_p[1] != '=')
264 goto maybe; 264 goto maybe;
265 /* XXX: is_identifier_start */ 265 /* XXX: is_identifier_start */
266 if (ch_isalpha(inp_p[0])) 266 if (ch_isalpha(inp_p[0]))
267 goto maybe; 267 goto maybe;
268 return false; 268 return false;
269maybe: 269maybe:
270 return ps.prev_lsym == lsym_semicolon || 270 return ps.prev_lsym == lsym_semicolon ||
271 ps.prev_lsym == lsym_lbrace || 271 ps.prev_lsym == lsym_lbrace ||
272 ps.prev_lsym == lsym_rbrace; 272 ps.prev_lsym == lsym_rbrace;
273} 273}
274 274
275static int 275static int
276bsearch_typenames(const char *key) 276bsearch_typenames(const char *key)
277{ 277{
278 const char **arr = typenames.items; 278 const char **arr = typenames.items;
279 int lo = 0; 279 int lo = 0;
280 int hi = (int)typenames.len - 1; 280 int hi = (int)typenames.len - 1;
281 281
282 while (lo <= hi) { 282 while (lo <= hi) {
283 int mid = (int)((unsigned)(lo + hi) >> 1); 283 int mid = (int)((unsigned)(lo + hi) >> 1);
284 int cmp = strcmp(arr[mid], key); 284 int cmp = strcmp(arr[mid], key);
285 if (cmp < 0) 285 if (cmp < 0)
286 lo = mid + 1; 286 lo = mid + 1;
287 else if (cmp > 0) 287 else if (cmp > 0)
288 hi = mid - 1; 288 hi = mid - 1;
289 else 289 else
290 return mid; 290 return mid;
291 } 291 }
292 return -(lo + 1); 292 return -(lo + 1);
293} 293}
294 294
295static bool 295static bool
296is_typename(void) 296is_typename(void)
297{ 297{
298 if (opt.auto_typedefs && 298 if (opt.auto_typedefs &&
299 token.len >= 2 && memcmp(token.s + token.len - 2, "_t", 2) == 0) 299 token.len >= 2 && memcmp(token.s + token.len - 2, "_t", 2) == 0)
300 return true; 300 return true;
301 301
302 return bsearch_typenames(token.s) >= 0; 302 return bsearch_typenames(token.s) >= 0;
303} 303}
304 304
305static int 305static int
306cmp_keyword_by_name(const void *key, const void *elem) 306cmp_keyword_by_name(const void *key, const void *elem)
307{ 307{
308 return strcmp(key, ((const struct keyword *)elem)->name); 308 return strcmp(key, ((const struct keyword *)elem)->name);
309} 309}
310 310
311/* 311/*
312 * Looking at something like 'function_name(...)' in a line, guess whether 312 * Looking at something like 'function_name(...)' in a line, guess whether
313 * this starts a function definition or a declaration. 313 * this starts a function definition or a declaration.
314 */ 314 */
315static bool 315static bool
316probably_looking_at_definition(void) 316probably_looking_at_definition(void)
317{ 317{
318 int paren_level = 0; 318 int paren_level = 0;
319 for (const char *p = inp_p; *p != '\n'; p++) { 319 for (const char *p = inp_p; *p != '\n'; p++) {
320 if (*p == '(') 320 if (*p == '(')
321 paren_level++; 321 paren_level++;
322 if (*p == ')' && --paren_level == 0) { 322 if (*p == ')' && --paren_level == 0) {
323 p++; 323 p++;
324 324
325 while (*p != '\n' 325 while (*p != '\n'
326 && (ch_isspace(*p) || is_identifier_part(*p))) 326 && (ch_isspace(*p) || is_identifier_part(*p)))
327 p++; /* '__dead' or '__unused' */ 327 p++; /* '__dead' or '__unused' */
328 328
329 if (*p == '\n') /* func(...) */ 329 if (*p == '\n') /* func(...) */
330 break; 330 break;
331 if (*p == ';') /* func(...); */ 331 if (*p == ';') /* func(...); */
332 return false; 332 return false;
333 if (*p == ',') /* double abs(), pi; */ 333 if (*p == ',') /* double abs(), pi; */
334 return false; 334 return false;
335 if (*p == '(') /* func(...) __attribute__((...)) */ 335 if (*p == '(') /* func(...) __attribute__((...)) */
336 paren_level++; /* func(...) __printflike(...) 336 paren_level++; /* func(...) __printflike(...)
337 */ 337 */
338 else 338 else
339 break; /* func(...) { ... */ 339 break; /* func(...) { ... */
340 } 340 }
341 } 341 }
342 342
343 /* To further reduce the cases where indent wrongly treats an 343 /* To further reduce the cases where indent wrongly treats an
344 * incomplete function declaration as a function definition, thus 344 * incomplete function declaration as a function definition, thus
345 * adding a newline before the function name, it may be worth looking 345 * adding a newline before the function name, it may be worth looking
346 * for parameter names, as these are often omitted in function 346 * for parameter names, as these are often omitted in function
347 * declarations and only included in function definitions. Or just 347 * declarations and only included in function definitions. Or just
348 * increase the lookahead to more than just the current line of input, 348 * increase the lookahead to more than just the current line of input,
349 * until the next '{'. */ 349 * until the next '{'. */
350 return true; 350 return true;
351} 351}
352 352
353/* Read an alphanumeric token into 'token', or return lsym_eof. */ 353/* Read an alphanumeric token into 'token', or return lsym_eof. */
354static lexer_symbol 354static lexer_symbol
355lexi_alnum(void) 355lexi_alnum(void)
356{ 356{
357 if (ch_isdigit(inp_p[0]) || 357 if (ch_isdigit(inp_p[0]) ||
358 (inp_p[0] == '.' && ch_isdigit(inp_p[1]))) { 358 (inp_p[0] == '.' && ch_isdigit(inp_p[1]))) {
359 lex_number(); 359 lex_number();
360 } else if (is_identifier_start(inp_p[0])) { 360 } else if (is_identifier_start(inp_p[0])) {
361 lex_word(); 361 lex_word();
362 362
363 if (token.len == 1 && token.s[0] == 'L' && 363 if (token.len == 1 && token.s[0] == 'L' &&
364 (inp_p[0] == '"' || inp_p[0] == '\'')) { 364 (inp_p[0] == '"' || inp_p[0] == '\'')) {
365 token_add_char(*inp_p++); 365 token_add_char(*inp_p++);
366 lex_char_or_string(); 366 lex_char_or_string();
367 ps.next_unary = false; 367 ps.next_unary = false;
368 return lsym_word; 368 return lsym_word;
369 } 369 }
370 } else 370 } else
371 return lsym_eof; /* just as a placeholder */ 371 return lsym_eof; /* just as a placeholder */
372 372
373 while (ch_isblank(inp_p[0])) 373 while (ch_isblank(inp_p[0]))
374 inp_p++; 374 inp_p++;
375 375
376 ps.next_unary = ps.prev_lsym == lsym_tag 376 ps.next_unary = ps.prev_lsym == lsym_tag
377 || ps.prev_lsym == lsym_typedef; 377 || ps.prev_lsym == lsym_typedef;
378 378
379 if (ps.prev_lsym == lsym_tag && ps.nparen == 0) 379 if (ps.prev_lsym == lsym_tag && ps.nparen == 0)
380 return lsym_type_outside_parentheses; 380 return lsym_type_outside_parentheses;
381 381
382 token_add_char('\0'); 382 token_add_char('\0');
383 token.len--; 383 token.len--;
384 const struct keyword *kw = bsearch(token.s, keywords, 384 const struct keyword *kw = bsearch(token.s, keywords,
385 array_length(keywords), sizeof(keywords[0]), cmp_keyword_by_name); 385 array_length(keywords), sizeof(keywords[0]), cmp_keyword_by_name);
386 lexer_symbol lsym = lsym_word; 386 lexer_symbol lsym = lsym_word;
387 if (kw != NULL) { 387 if (kw != NULL) {
388 if (kw->lsym == lsym_type) 388 if (kw->lsym == lsym_type)
389 lsym = lsym_type_in_parentheses; 389 lsym = lsym_type_in_parentheses;
390 ps.next_unary = true; 390 ps.next_unary = true;
391 if (kw->lsym == lsym_tag || kw->lsym == lsym_type) 391 if (kw->lsym == lsym_tag || kw->lsym == lsym_type)
392 goto found_typename; 392 goto found_typename;
393 return kw->lsym; 393 return kw->lsym;
394 } 394 }
395 395
396 if (is_typename()) { 396 if (is_typename()) {
397 lsym = lsym_type_in_parentheses; 397 lsym = lsym_type_in_parentheses;
398 ps.next_unary = true; 398 ps.next_unary = true;
399found_typename: 399found_typename:
400 if (ps.nparen > 0) { 400 if (ps.nparen > 0) {
401 /* inside parentheses: cast, param list, offsetof or 401 /* inside parentheses: cast, param list, offsetof or
402 * sizeof */ 402 * sizeof */
403 if (ps.paren[ps.nparen - 1].cast == cast_unknown) 403 if (ps.paren[ps.nparen - 1].cast == cast_unknown)
404 ps.paren[ps.nparen - 1].cast = cast_maybe; 404 ps.paren[ps.nparen - 1].cast = cast_maybe;
405 } 405 }
406 if (ps.prev_lsym != lsym_period 406 if (ps.prev_lsym != lsym_period
407 && ps.prev_lsym != lsym_unary_op) { 407 && ps.prev_lsym != lsym_unary_op) {
408 if (kw != NULL && kw->lsym == lsym_tag) 408 if (kw != NULL && kw->lsym == lsym_tag)
409 return lsym_tag; 409 return lsym_tag;
410 if (ps.nparen == 0) 410 if (ps.nparen == 0)
411 return lsym_type_outside_parentheses; 411 return lsym_type_outside_parentheses;
412 } 412 }
413 } 413 }
414 414
415 if (inp_p[0] == '(' && ps.psyms.top <= 1 && ps.ind_level == 0 && 415 if (inp_p[0] == '(' && ps.psyms.top <= 1 && ps.ind_level == 0 &&
416 !ps.in_func_def_params && !ps.block_init) { 416 !ps.in_func_def_params && !ps.block_init) {
417 417
418 if (ps.nparen == 0 && probably_looking_at_definition()) { 418 if (ps.nparen == 0 && probably_looking_at_definition()) {
419 ps.is_function_definition = true; 419 ps.is_function_definition = true;
420 if (ps.in_decl) 420 if (ps.in_decl)
421 ps.in_func_def_params = true; 421 ps.in_func_def_params = true;
422 return lsym_funcname; 422 return lsym_funcname;
423 } 423 }
424 424
425 } else if (ps.nparen == 0 && probably_typename()) { 425 } else if (ps.nparen == 0 && probably_typename()) {
426 ps.next_unary = true; 426 ps.next_unary = true;
427 return lsym_type_outside_parentheses; 427 return lsym_type_outside_parentheses;
428 } 428 }
429 429
430 return lsym; 430 return lsym;
431} 431}
432 432
433static bool 433static bool
434is_asterisk_unary(void) 434is_asterisk_unary(void)
435{ 435{
436 if (inp_p[strspn(inp_p, "* \t")] == ')') 436 if (inp_p[strspn(inp_p, "* \t")] == ')')
437 return true; 437 return true;
438 if (ps.next_unary || ps.in_func_def_params) 438 if (ps.next_unary || ps.in_func_def_params)
439 return true; 439 return true;
440 if (ps.prev_lsym == lsym_word || 440 if (ps.prev_lsym == lsym_word ||
441 ps.prev_lsym == lsym_rparen || 441 ps.prev_lsym == lsym_rparen ||
442 ps.prev_lsym == lsym_rbracket) 442 ps.prev_lsym == lsym_rbracket)
443 return false; 443 return false;
444 return ps.in_decl && ps.nparen > 0; 444 return ps.in_decl && ps.nparen > 0;
445} 445}
446 446
447static bool 447static bool
448probably_in_function_definition(void) 448probably_in_function_definition(void)
449{ 449{
450 for (const char *tp = inp_p; *tp != '\n';) { 450 for (const char *tp = inp_p; *tp != '\n';) {
451 if (ch_isspace(*tp)) 451 if (ch_isspace(*tp))
452 tp++; 452 tp++;
453 else if (is_identifier_start(*tp)) { 453 else if (is_identifier_start(*tp)) {
454 tp++; 454 tp++;
455 while (is_identifier_part(*tp)) 455 while (is_identifier_part(*tp))
456 tp++; 456 tp++;
457 } else 457 } else
458 return *tp == '('; 458 return *tp == '(';
459 } 459 }
460 return false; 460 return false;
461} 461}
462 462
463static void 463static void
464lex_asterisk_unary(void) 464lex_asterisk_unary(void)
465{ 465{
466 while (inp_p[0] == '*' || ch_isspace(inp_p[0])) { 466 while (inp_p[0] == '*' || ch_isspace(inp_p[0])) {
467 if (inp_p[0] == '*') 467 if (inp_p[0] == '*')
468 token_add_char('*'); 468 token_add_char('*');
469 inp_skip(); 469 inp_skip();
470 } 470 }
471 471
472 if (ps.in_decl && probably_in_function_definition()) 472 if (ps.in_decl && probably_in_function_definition())
473 ps.is_function_definition = true; 473 ps.is_function_definition = true;
474} 474}
475 475
476static void 476static void
477skip_blank(const char **pp) 477skip_blank(const char **pp)
478{ 478{
479 while (ch_isblank(**pp)) 479 while (ch_isblank(**pp))
480 (*pp)++; 480 (*pp)++;
481} 481}
482 482
483static bool 483static bool
484skip_string(const char **pp, const char *s) 484skip_string(const char **pp, const char *s)
485{ 485{
486 size_t len = strlen(s); 486 size_t len = strlen(s);
487 if (strncmp(*pp, s, len) == 0) { 487 if (strncmp(*pp, s, len) == 0) {
488 *pp += len; 488 *pp += len;
489 return true; 489 return true;
490 } 490 }
491 return false; 491 return false;
492} 492}
493 493
494static void 494static void
495lex_indent_comment(void) 495lex_indent_comment(void)
496{ 496{
497 const char *p = inp.s; 497 const char *p = inp.s;
498 498
499 skip_blank(&p); 499 skip_blank(&p);
500 if (!skip_string(&p, "/*")) 500 if (!skip_string(&p, "/*"))
501 return; 501 return;
502 skip_blank(&p); 502 skip_blank(&p);
503 if (!skip_string(&p, "INDENT")) 503 if (!skip_string(&p, "INDENT"))
504 return; 504 return;
505 505
506 enum indent_enabled enabled; 506 enum indent_enabled enabled;
507 skip_blank(&p); 507 skip_blank(&p);
508 if (*p == '*' || skip_string(&p, "ON")) 508 if (*p == '*' || skip_string(&p, "ON"))
509 enabled = indent_last_off_line; 509 enabled = indent_last_off_line;
510 else if (skip_string(&p, "OFF")) 510 else if (skip_string(&p, "OFF"))
511 enabled = indent_off; 511 enabled = indent_off;
512 else 512 else
513 return; 513 return;
514 514
515 skip_blank(&p); 515 skip_blank(&p);
516 if (!skip_string(&p, "*/\n")) 516 if (!skip_string(&p, "*/\n"))
517 return; 517 return;
518 518
519 if (lab.len > 0 || code.len > 0 || com.len > 0) 519 if (lab.len > 0 || code.len > 0 || com.len > 0)
520 output_line(); 520 output_line();
521 521
522 indent_enabled = enabled; 522 indent_enabled = enabled;
523} 523}
524 524
525/* Reads the next token, placing it in the global variable "token". */ 525/* Reads the next token, placing it in the global variable "token". */
526lexer_symbol 526lexer_symbol
527lexi(void) 527lexi(void)
528{ 528{
529 token.len = 0; 529 token.len = 0;
530 ps.curr_col_1 = ps.next_col_1; 530 ps.curr_col_1 = ps.next_col_1;
531 ps.next_col_1 = false; 531 ps.next_col_1 = false;
532 532
533 for (;;) { 533 for (;;) {
534 if (ch_isblank(inp_p[0])) { 534 if (ch_isblank(inp_p[0])) {
535 ps.curr_col_1 = false; 535 ps.curr_col_1 = false;
536 inp_p++; 536 inp_p++;
537 } else if (inp_p[0] == '\\' && inp_p[1] == '\n') { 537 } else if (inp_p[0] == '\\' && inp_p[1] == '\n') {
538 inp_p++; 538 inp_p++;
539 inp_skip(); 539 inp_skip();
540 line_no++; 540 line_no++;
541 } else 541 } else
542 break; 542 break;
543 } 543 }
544 544
545 lexer_symbol alnum_lsym = lexi_alnum(); 545 lexer_symbol alnum_lsym = lexi_alnum();
546 if (alnum_lsym != lsym_eof) 546 if (alnum_lsym != lsym_eof)
547 return alnum_lsym; 547 return alnum_lsym;
548 548
549 /* Scan a non-alphanumeric token */ 549 /* Scan a non-alphanumeric token */
550 550
551 token_add_char(inp_next()); 551 token_add_char(inp_next());
552 552
553 lexer_symbol lsym; 553 lexer_symbol lsym;
554 bool next_unary; 554 bool next_unary;
555 555
556 switch (token.s[token.len - 1]) { 556 switch (token.s[token.len - 1]) {
557 557
558 /* INDENT OFF */ 558 /* INDENT OFF */
559 case '(': lsym = lsym_lparen; next_unary = true; break; 559 case '(': lsym = lsym_lparen; next_unary = true; break;
560 case '[': lsym = lsym_lbracket; next_unary = true; break; 560 case '[': lsym = lsym_lbracket; next_unary = true; break;
561 case ')': lsym = lsym_rparen; next_unary = false; break; 561 case ')': lsym = lsym_rparen; next_unary = false; break;
562 case ']': lsym = lsym_rbracket; next_unary = false; break; 562 case ']': lsym = lsym_rbracket; next_unary = false; break;
563 case '?': lsym = lsym_question; next_unary = true; break; 563 case '?': lsym = lsym_question; next_unary = true; break;
564 case ';': lsym = lsym_semicolon; next_unary = true; break; 564 case ';': lsym = lsym_semicolon; next_unary = true; break;
565 case '{': lsym = lsym_lbrace; next_unary = true; break; 565 case '{': lsym = lsym_lbrace; next_unary = true; break;
566 case '}': lsym = lsym_rbrace; next_unary = true; break; 566 case '}': lsym = lsym_rbrace; next_unary = true; break;
567 case ',': lsym = lsym_comma; next_unary = true; break; 567 case ',': lsym = lsym_comma; next_unary = true; break;
568 case '.': lsym = lsym_period; next_unary = false; break; 568 case '.': lsym = lsym_period; next_unary = false; break;
569 /* INDENT ON */ 569 /* INDENT ON */
570 570
571 case ':': 571 case ':':
572 lsym = ps.quest_level > 0 572 lsym = ps.quest_level > 0
573 ? (ps.quest_level--, lsym_colon_question) 573 ? (ps.quest_level--, lsym_colon_question)
574 : ps.init_or_struct 574 : ps.init_or_struct
575 ? lsym_colon_other 575 ? lsym_colon_other
576 : lsym_colon_label; 576 : lsym_colon_label;
577 next_unary = true; 577 next_unary = true;
578 break; 578 break;
579 579
580 case '\n': 580 case '\n':
581 /* if data has been exhausted, the '\n' is a dummy. */ 581 /* if data has been exhausted, the '\n' is a dummy. */
582 lsym = had_eof ? lsym_eof : lsym_newline; 582 lsym = had_eof ? lsym_eof : lsym_newline;
583 next_unary = ps.next_unary; 583 next_unary = ps.next_unary;
584 ps.next_col_1 = true; 584 ps.next_col_1 = true;
585 break; 585 break;
586 586
587 case '#': 587 case '#':
588 lsym = lsym_preprocessing; 588 lsym = lsym_preprocessing;
589 next_unary = ps.next_unary; 589 next_unary = ps.next_unary;
590 break; 590 break;
591 591
592 case '\'': 592 case '\'':
593 case '"': 593 case '"':
594 lex_char_or_string(); 594 lex_char_or_string();
595 lsym = lsym_word; 595 lsym = lsym_word;
596 next_unary = false; 596 next_unary = false;
597 break; 597 break;
598 598
599 case '-': 599 case '-':
600 case '+': 600 case '+':
601 lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; 601 lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
602 next_unary = true; 602 next_unary = true;
603 603
604 /* '++' or '--' */ 604 /* '++' or '--' */
605 if (inp_p[0] == token.s[token.len - 1]) { 605 if (inp_p[0] == token.s[token.len - 1]) {
606 token_add_char(*inp_p++); 606 token_add_char(*inp_p++);
607 if (ps.prev_lsym == lsym_word || 607 if (ps.prev_lsym == lsym_word ||
608 ps.prev_lsym == lsym_rparen || 608 ps.prev_lsym == lsym_rparen ||
609 ps.prev_lsym == lsym_rbracket) { 609 ps.prev_lsym == lsym_rbracket) {
610 lsym = ps.next_unary 610 lsym = ps.next_unary
611 ? lsym_unary_op : lsym_postfix_op; 611 ? lsym_unary_op : lsym_postfix_op;
612 next_unary = false; 612 next_unary = false;
613 } 613 }
614 614
615 } else if (inp_p[0] == '=') { /* '+=' or '-=' */ 615 } else if (inp_p[0] == '=') { /* '+=' or '-=' */
616 token_add_char(*inp_p++); 616 token_add_char(*inp_p++);
617 617
618 } else if (inp_p[0] == '>') { /* '->' */ 618 } else if (inp_p[0] == '>') { /* '->' */
619 token_add_char(*inp_p++); 619 token_add_char(*inp_p++);
620 lsym = lsym_unary_op; 620 lsym = lsym_unary_op;
621 next_unary = false; 621 next_unary = false;
622 ps.want_blank = false; 622 ps.want_blank = false;
623 } 623 }
624 break; 624 break;
625 625
626 case '=': 626 case '=':
627 if (ps.init_or_struct) 627 if (ps.init_or_struct)
628 ps.block_init = true; 628 ps.block_init = true;
629 if (inp_p[0] == '=') 629 if (inp_p[0] == '=')
630 token_add_char(*inp_p++); 630 token_add_char(*inp_p++);
631 lsym = lsym_binary_op; 631 lsym = lsym_binary_op;
632 next_unary = true; 632 next_unary = true;
633 break; 633 break;
634 634
635 case '>': 635 case '>':
636 case '<': 636 case '<':
637 case '!': /* ops like <, <<, <=, !=, etc */ 637 case '!': /* ops like <, <<, <=, !=, etc */
638 if (inp_p[0] == '>' || inp_p[0] == '<' || inp_p[0] == '=') 638 if (inp_p[0] == '>' || inp_p[0] == '<' || inp_p[0] == '=')
639 token_add_char(*inp_p++); 639 token_add_char(*inp_p++);
640 if (inp_p[0] == '=') 640 if (inp_p[0] == '=')
641 token_add_char(*inp_p++); 641 token_add_char(*inp_p++);
642 lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; 642 lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
643 next_unary = true; 643 next_unary = true;
644 break; 644 break;
645 645
646 case '*': 646 case '*':
647 if (inp_p[0] == '=') { 647 if (inp_p[0] == '=') {
648 token_add_char(*inp_p++); 648 token_add_char(*inp_p++);
649 lsym = lsym_binary_op; 649 lsym = lsym_binary_op;
650 } else if (is_asterisk_unary()) { 650 } else if (is_asterisk_unary()) {
651 lex_asterisk_unary(); 651 lex_asterisk_unary();
652 lsym = lsym_unary_op; 652 lsym = lsym_unary_op;
653 } else 653 } else
654 lsym = lsym_binary_op; 654 lsym = lsym_binary_op;
655 next_unary = true; 655 next_unary = true;
656 break; 656 break;
657 657
658 default: 658 default:
659 if (token.s[token.len - 1] == '/' 659 if (token.s[token.len - 1] == '/'
660 && (inp_p[0] == '*' || inp_p[0] == '/')) { 660 && (inp_p[0] == '*' || inp_p[0] == '/')) {
661 enum indent_enabled prev = indent_enabled; 661 enum indent_enabled prev = indent_enabled;
662 lex_indent_comment(); 662 lex_indent_comment();
663 if (prev == indent_on && indent_enabled == indent_off) 663 if (prev == indent_on && indent_enabled == indent_off)
664 out.indent_off_text.len = 0; 664 out.indent_off_text.len = 0;
665 token_add_char(*inp_p++); 665 token_add_char(*inp_p++);
666 lsym = lsym_comment; 666 lsym = lsym_comment;
667 next_unary = ps.next_unary; 667 next_unary = ps.next_unary;
668 break; 668 break;
669 } 669 }
670 670
671 /* things like '||', '&&', '<<=' */ 671 /* things like '||', '&&', '<<=' */
672 lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; 672 lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
673 if (inp_p[0] == token.s[token.len - 1]) 673 if (inp_p[0] == token.s[token.len - 1])
674 token_add_char(*inp_p++), lsym = lsym_binary_op; 674 token_add_char(*inp_p++), lsym = lsym_binary_op;
675 if (inp_p[0] == '=') 675 if (inp_p[0] == '=')
676 token_add_char(*inp_p++), lsym = lsym_binary_op; 676 token_add_char(*inp_p++), lsym = lsym_binary_op;
677 677
678 next_unary = true; 678 next_unary = true;
679 } 679 }
680 680
681 ps.next_unary = next_unary; 681 ps.next_unary = next_unary;
682 682
683 return lsym; 683 return lsym;
684} 684}
685 685
686void 686void
687register_typename(const char *name) 687register_typename(const char *name)
688{ 688{
689 if (typenames.len >= typenames.cap) { 689 if (typenames.len >= typenames.cap) {
690 typenames.cap = 16 + 2 * typenames.cap; 690 typenames.cap = 16 + 2 * typenames.cap;
691 typenames.items = nonnull(realloc(typenames.items, 691 typenames.items = nonnull(realloc(typenames.items,
692 sizeof(typenames.items[0]) * typenames.cap)); 692 sizeof(typenames.items[0]) * typenames.cap));
693 } 693 }
694 694
695 int pos = bsearch_typenames(name); 695 int pos = bsearch_typenames(name);
696 if (pos >= 0) 696 if (pos >= 0)
697 return; /* already in the list */ 697 return; /* already in the list */
698 698
699 pos = -(pos + 1); 699 pos = -(pos + 1);
700 memmove(typenames.items + pos + 1, typenames.items + pos, 700 memmove(typenames.items + pos + 1, typenames.items + pos,
701 sizeof(typenames.items[0]) * (typenames.len++ - (unsigned)pos)); 701 sizeof(typenames.items[0]) * (typenames.len++ - (unsigned)pos));
702 typenames.items[pos] = nonnull(strdup(name)); 702 typenames.items[pos] = nonnull(strdup(name));
703} 703}

cvs diff -r1.69 -r1.70 src/usr.bin/indent/parse.c (switch to unified diff)

--- src/usr.bin/indent/parse.c 2023/06/07 15:46:12 1.69
+++ src/usr.bin/indent/parse.c 2023/06/09 07:20:30 1.70
@@ -1,269 +1,269 @@ @@ -1,269 +1,269 @@
1/* $NetBSD: parse.c,v 1.69 2023/06/07 15:46:12 rillig Exp $ */ 1/* $NetBSD: parse.c,v 1.70 2023/06/09 07:20:30 rillig Exp $ */
2 2
3/*- 3/*-
4 * SPDX-License-Identifier: BSD-4-Clause 4 * SPDX-License-Identifier: BSD-4-Clause
5 * 5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc. 6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993 7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved. 8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved. 9 * All rights reserved.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software 19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement: 20 * must display the following acknowledgement:
21 * This product includes software developed by the University of 21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors. 22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors 23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software 24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission. 25 * without specific prior written permission.
26 * 26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE. 37 * SUCH DAMAGE.
38 */ 38 */
39 39
40#include <sys/cdefs.h> 40#include <sys/cdefs.h>
41__RCSID("$NetBSD: parse.c,v 1.69 2023/06/07 15:46:12 rillig Exp $"); 41__RCSID("$NetBSD: parse.c,v 1.70 2023/06/09 07:20:30 rillig Exp $");
42 42
43#include <err.h> 43#include <err.h>
44 44
45#include "indent.h" 45#include "indent.h"
46 46
47/* 47/*
48 * Try to combine the statement on the top of the parse stack with the symbol 48 * Try to combine the statement on the top of the parse stack with the symbol
49 * directly below it, replacing these two symbols with a single symbol. 49 * directly below it, replacing these two symbols with a single symbol.
50 */ 50 */
51static bool 51static bool
52psyms_reduce_stmt(struct psym_stack *psyms) 52psyms_reduce_stmt(struct psym_stack *psyms)
53{ 53{
54 switch (psyms->sym[psyms->top - 1]) { 54 switch (psyms->sym[psyms->top - 1]) {
55 55
56 case psym_stmt: 56 case psym_stmt:
57 case psym_stmt_list: 57 case psym_stmt_list:
58 psyms->sym[--psyms->top] = psym_stmt_list; 58 psyms->sym[--psyms->top] = psym_stmt_list;
59 return true; 59 return true;
60 60
61 case psym_do: 61 case psym_do:
62 psyms->sym[--psyms->top] = psym_do_stmt; 62 psyms->sym[--psyms->top] = psym_do_stmt;
63 ps.ind_level_follow = psyms->ind_level[psyms->top]; 63 ps.ind_level_follow = psyms->ind_level[psyms->top];
64 return true; 64 return true;
65 65
66 case psym_if_expr: 66 case psym_if_expr:
67 psyms->sym[--psyms->top] = psym_if_expr_stmt; 67 psyms->sym[--psyms->top] = psym_if_expr_stmt;
68 int i = psyms->top - 1; 68 int i = psyms->top - 1;
69 while (psyms->sym[i] != psym_stmt && 69 while (psyms->sym[i] != psym_stmt &&
70 psyms->sym[i] != psym_stmt_list && 70 psyms->sym[i] != psym_stmt_list &&
71 psyms->sym[i] != psym_lbrace_block) 71 psyms->sym[i] != psym_lbrace_block)
72 --i; 72 --i;
73 ps.ind_level_follow = psyms->ind_level[i]; 73 ps.ind_level_follow = psyms->ind_level[i];
74 /* For the time being, assume that there is no 'else' on this 74 /* For the time being, assume that there is no 'else' on this
75 * 'if', and set the indentation level accordingly. If an 75 * 'if', and set the indentation level accordingly. If an
76 * 'else' is scanned, it will be fixed up later. */ 76 * 'else' is scanned, it will be fixed up later. */
77 return true; 77 return true;
78 78
79 case psym_switch_expr: 79 case psym_switch_expr:
80 case psym_decl: 80 case psym_decl:
81 case psym_if_expr_stmt_else: 81 case psym_if_expr_stmt_else:
82 case psym_for_exprs: 82 case psym_for_exprs:
83 case psym_while_expr: 83 case psym_while_expr:
84 psyms->sym[--psyms->top] = psym_stmt; 84 psyms->sym[--psyms->top] = psym_stmt;
85 ps.ind_level_follow = psyms->ind_level[psyms->top]; 85 ps.ind_level_follow = psyms->ind_level[psyms->top];
86 return true; 86 return true;
87 87
88 default: 88 default:
89 return false; 89 return false;
90 } 90 }
91} 91}
92 92
93static int 93static int
94decl_level(void) 94decl_level(void)
95{ 95{
96 int level = 0; 96 int level = 0;
97 for (int i = ps.psyms.top - 1; i > 0; i--) 97 for (int i = ps.psyms.top - 1; i > 0; i--)
98 if (ps.psyms.sym[i] == psym_decl) 98 if (ps.psyms.sym[i] == psym_decl)
99 level++; 99 level++;
100 return level; 100 return level;
101} 101}
102 102
103static void 103static void
104ps_push(parser_symbol psym) 104ps_push(parser_symbol psym)
105{ 105{
106 ps.psyms.sym[++ps.psyms.top] = psym; 106 ps.psyms.sym[++ps.psyms.top] = psym;
107 ps.psyms.ind_level[ps.psyms.top] = ps.ind_level; 107 ps.psyms.ind_level[ps.psyms.top] = ps.ind_level;
108} 108}
109 109
110static void 110static void
111ps_push_follow(parser_symbol psym) 111ps_push_follow(parser_symbol psym)
112{ 112{
113 ps.psyms.sym[++ps.psyms.top] = psym; 113 ps.psyms.sym[++ps.psyms.top] = psym;
114 ps.psyms.ind_level[ps.psyms.top] = ps.ind_level_follow; 114 ps.psyms.ind_level[ps.psyms.top] = ps.ind_level_follow;
115} 115}
116 116
117/* 117/*
118 * Repeatedly try to reduce the top two symbols on the parse stack to a single 118 * Repeatedly try to reduce the top two symbols on the parse stack to a single
119 * symbol, until no more reductions are possible. 119 * symbol, until no more reductions are possible.
120 */ 120 */
121static void 121static void
122psyms_reduce(struct psym_stack *psyms) 122psyms_reduce(struct psym_stack *psyms)
123{ 123{
124again: 124again:
125 if (psyms->sym[psyms->top] == psym_stmt && psyms_reduce_stmt(psyms)) 125 if (psyms->sym[psyms->top] == psym_stmt && psyms_reduce_stmt(psyms))
126 goto again; 126 goto again;
127 if (psyms->sym[psyms->top] == psym_while_expr && 127 if (psyms->sym[psyms->top] == psym_while_expr &&
128 psyms->sym[psyms->top - 1] == psym_do_stmt) { 128 psyms->sym[psyms->top - 1] == psym_do_stmt) {
129 psyms->top -= 2; 129 psyms->top -= 2;
130 goto again; 130 goto again;
131 } 131 }
132} 132}
133 133
134static bool 134static bool
135is_lbrace(parser_symbol psym) 135is_lbrace(parser_symbol psym)
136{ 136{
137 return psym == psym_lbrace_block 137 return psym == psym_lbrace_block
138 || psym == psym_lbrace_struct 138 || psym == psym_lbrace_struct
139 || psym == psym_lbrace_union 139 || psym == psym_lbrace_union
140 || psym == psym_lbrace_enum; 140 || psym == psym_lbrace_enum;
141} 141}
142 142
143/* 143/*
144 * Shift the token onto the parser stack, or reduce it by combining it with 144 * Shift the token onto the parser stack, or reduce it by combining it with
145 * previous tokens. 145 * previous tokens.
146 */ 146 */
147void 147void
148parse(parser_symbol psym) 148parse(parser_symbol psym)
149{ 149{
150 debug_blank_line(); 150 debug_blank_line();
151 debug_println("parse token: %s", psym_name[psym]); 151 debug_println("parse token: %s", psym_name[psym]);
152 152
153 struct psym_stack *psyms = &ps.psyms; 153 struct psym_stack *psyms = &ps.psyms;
154 if (psym != psym_else) { 154 if (psym != psym_else) {
155 while (psyms->sym[psyms->top] == psym_if_expr_stmt) { 155 while (psyms->sym[psyms->top] == psym_if_expr_stmt) {
156 psyms->sym[psyms->top] = psym_stmt; 156 psyms->sym[psyms->top] = psym_stmt;
157 psyms_reduce(&ps.psyms); 157 psyms_reduce(&ps.psyms);
158 } 158 }
159 } 159 }
160 160
161 switch (psym) { 161 switch (psym) {
162 162
163 case psym_decl: 163 case psym_decl:
164 if (psyms->sym[psyms->top] == psym_decl) 164 if (psyms->sym[psyms->top] == psym_decl)
165 break; /* only put one declaration onto stack */ 165 break; /* only put one declaration onto stack */
166 166
167 ps.break_after_comma = true; 167 ps.break_after_comma = true;
168 ps_push_follow(psym_decl); 168 ps_push_follow(psym_decl);
169 169
170 if (opt.left_justify_decl) 170 if (opt.left_justify_decl)
171 ps.ind_level_follow = ps.ind_level = decl_level(); 171 ps.ind_level_follow = ps.ind_level = decl_level();
172 break; 172 break;
173 173
174 case psym_if_expr: 174 case psym_if_expr:
175 if (psyms->sym[psyms->top] == psym_if_expr_stmt_else 175 if (psyms->sym[psyms->top] == psym_if_expr_stmt_else
176 && opt.else_if_in_same_line) 176 && opt.else_if_in_same_line)
177 ps.ind_level_follow = psyms->ind_level[psyms->top--]; 177 ps.ind_level_follow = psyms->ind_level[psyms->top--];
178 /* FALLTHROUGH */ 178 /* FALLTHROUGH */
179 case psym_do: 179 case psym_do:
180 case psym_for_exprs: 180 case psym_for_exprs:
181 ps.ind_level = ps.ind_level_follow++; 181 ps.ind_level = ps.ind_level_follow++;
182 ps_push(psym); 182 ps_push(psym);
183 break; 183 break;
184 184
185 case psym_lbrace_block: 185 case psym_lbrace_block:
186 case psym_lbrace_struct: 186 case psym_lbrace_struct:
187 case psym_lbrace_union: 187 case psym_lbrace_union:
188 case psym_lbrace_enum: 188 case psym_lbrace_enum:
189 ps.break_after_comma = false; 189 ps.break_after_comma = false;
190 if (psyms->sym[psyms->top] == psym_stmt 190 if (psyms->sym[psyms->top] == psym_stmt
191 || psyms->sym[psyms->top] == psym_decl 191 || psyms->sym[psyms->top] == psym_decl
192 || psyms->sym[psyms->top] == psym_stmt_list) 192 || psyms->sym[psyms->top] == psym_stmt_list)
193 ++ps.ind_level_follow; /* it is a random, isolated 193 ++ps.ind_level_follow; /* it is a random, isolated
194 * stmt group or a declaration 194 * stmt group or a declaration
195 */ 195 */
196 else { 196 else {
197 if (code.len == 0) { 197 if (code.len == 0) {
198 /* it is a group as part of a while, for, etc. 198 /* it is a group as part of a while, for, etc.
199 */ 199 */
200 --ps.ind_level; 200 --ps.ind_level;
201 201
202 /* for a switch, brace should be two levels out 202 /* for a switch, brace should be two levels out
203 * from the code */ 203 * from the code */
204 if (psyms->sym[psyms->top] == psym_switch_expr 204 if (psyms->sym[psyms->top] == psym_switch_expr
205 && opt.case_indent >= 1.0F) 205 && opt.case_indent >= 1.0F)
206 --ps.ind_level; 206 --ps.ind_level;
207 } 207 }
208 } 208 }
209 209
210 ps_push(psym); 210 ps_push(psym);
211 ps_push_follow(psym_stmt); 211 ps_push_follow(psym_stmt);
212 break; 212 break;
213 213
214 case psym_while_expr: 214 case psym_while_expr:
215 if (psyms->sym[psyms->top] == psym_do_stmt) { 215 if (psyms->sym[psyms->top] == psym_do_stmt) {
216 ps.ind_level = 216 ps.ind_level =
217 ps.ind_level_follow = psyms->ind_level[psyms->top]; 217 ps.ind_level_follow = psyms->ind_level[psyms->top];
218 ps_push(psym_while_expr); 218 ps_push(psym_while_expr);
219 } else { 219 } else {
220 ps_push_follow(psym_while_expr); 220 ps_push_follow(psym_while_expr);
221 ++ps.ind_level_follow; 221 ++ps.ind_level_follow;
222 } 222 }
223 223
224 break; 224 break;
225 225
226 case psym_else: 226 case psym_else:
227 if (psyms->sym[psyms->top] != psym_if_expr_stmt) { 227 if (psyms->sym[psyms->top] != psym_if_expr_stmt) {
228 diag(1, "Unmatched 'else'"); 228 diag(1, "Unmatched 'else'");
229 break; 229 break;
230 } 230 }
231 ps.ind_level = psyms->ind_level[psyms->top]; 231 ps.ind_level = psyms->ind_level[psyms->top];
232 ps.ind_level_follow = ps.ind_level + 1; 232 ps.ind_level_follow = ps.ind_level + 1;
233 psyms->sym[psyms->top] = psym_if_expr_stmt_else; 233 psyms->sym[psyms->top] = psym_if_expr_stmt_else;
234 break; 234 break;
235 235
236 case psym_rbrace: 236 case psym_rbrace:
237 /* stack should have <lbrace> <stmt> or <lbrace> <stmt_list> */ 237 /* stack should have <lbrace> <stmt> or <lbrace> <stmt_list> */
238 if (!(psyms->top > 0 238 if (!(psyms->top > 0
239 && is_lbrace(psyms->sym[psyms->top - 1]))) { 239 && is_lbrace(psyms->sym[psyms->top - 1]))) {
240 diag(1, "Statement nesting error"); 240 diag(1, "Statement nesting error");
241 break; 241 break;
242 } 242 }
243 ps.ind_level = ps.ind_level_follow = 243 ps.ind_level = ps.ind_level_follow =
244 psyms->ind_level[--psyms->top]; 244 psyms->ind_level[--psyms->top];
245 psyms->sym[psyms->top] = psym_stmt; 245 psyms->sym[psyms->top] = psym_stmt;
246 break; 246 break;
247 247
248 case psym_switch_expr: 248 case psym_switch_expr:
249 ps_push_follow(psym_switch_expr); 249 ps_push_follow(psym_switch_expr);
250 ps.ind_level_follow += (int)opt.case_indent + 1; 250 ps.ind_level_follow += (int)opt.case_indent + 1;
251 break; 251 break;
252 252
253 case psym_stmt: 253 case psym_stmt:
254 ps.break_after_comma = false; 254 ps.break_after_comma = false;
255 ps_push(psym_stmt); 255 ps_push(psym_stmt);
256 break; 256 break;
257 257
258 default: 258 default:
259 diag(1, "Unknown code to parser"); 259 diag(1, "Unknown code to parser");
260 return; 260 return;
261 } 261 }
262 262
263 if (psyms->top >= STACKSIZE - 1) 263 if (psyms->top >= STACKSIZE - 1)
264 errx(1, "Parser stack overflow"); 264 errx(1, "Parser stack overflow");
265 265
266 debug_parse_stack("before reduction"); 266 debug_parse_stack("before reduction");
267 psyms_reduce(&ps.psyms); 267 psyms_reduce(&ps.psyms);
268 debug_parse_stack("after reduction"); 268 debug_parse_stack("after reduction");
269} 269}

cvs diff -r1.157 -r1.158 src/usr.bin/indent/pr_comment.c (switch to unified diff)

--- src/usr.bin/indent/pr_comment.c 2023/06/09 07:18:52 1.157
+++ src/usr.bin/indent/pr_comment.c 2023/06/09 07:20:30 1.158
@@ -1,357 +1,356 @@ @@ -1,357 +1,356 @@
1/* $NetBSD: pr_comment.c,v 1.157 2023/06/09 07:18:52 rillig Exp $ */ 1/* $NetBSD: pr_comment.c,v 1.158 2023/06/09 07:20:30 rillig Exp $ */
2 2
3/*- 3/*-
4 * SPDX-License-Identifier: BSD-4-Clause 4 * SPDX-License-Identifier: BSD-4-Clause
5 * 5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc. 6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993 7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved. 8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved. 9 * All rights reserved.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software 19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement: 20 * must display the following acknowledgement:
21 * This product includes software developed by the University of 21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors. 22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors 23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software 24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission. 25 * without specific prior written permission.
26 * 26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE. 37 * SUCH DAMAGE.
38 */ 38 */
39 39
40#include <sys/cdefs.h> 40#include <sys/cdefs.h>
41__RCSID("$NetBSD: pr_comment.c,v 1.157 2023/06/09 07:18:52 rillig Exp $"); 41__RCSID("$NetBSD: pr_comment.c,v 1.158 2023/06/09 07:20:30 rillig Exp $");
42 42
43#include <string.h> 43#include <string.h>
44 44
45#include "indent.h" 45#include "indent.h"
46 46
47static void 47static void
48com_add_char(char ch) 48com_add_char(char ch)
49{ 49{
50 buf_add_char(&com, ch); 50 buf_add_char(&com, ch);
51} 51}
52 52
53static void 53static void
54com_add_delim(void) 54com_add_delim(void)
55{ 55{
56 if (opt.star_comment_cont) 56 if (opt.star_comment_cont)
57 buf_add_chars(&com, " * ", 3); 57 buf_add_chars(&com, " * ", 3);
58} 58}
59 59
60static bool 60static bool
61fits_in_one_line(int com_ind, int max_line_length) 61fits_in_one_line(int com_ind, int max_line_length)
62{ 62{
63 for (const char *start = inp_p, *p = start; *p != '\n'; p++) { 63 for (const char *start = inp_p, *p = start; *p != '\n'; p++) {
64 if (p[0] == '*' && p[1] == '/') { 64 if (p[0] == '*' && p[1] == '/') {
65 while (p - inp_p >= 2 65 while (p - inp_p >= 2
66 && ch_isblank(p[-1]) 66 && ch_isblank(p[-1])
67 && ch_isblank(p[-2])) 67 && ch_isblank(p[-2]))
68 p--; 68 p--;
69 int len = ind_add(com_ind + 3, 69 int len = ind_add(com_ind + 3,
70 start, (size_t)(p - start)); 70 start, (size_t)(p - start));
71 len += p == start || ch_isblank(p[-1]) ? 2 : 3; 71 len += p == start || ch_isblank(p[-1]) ? 2 : 3;
72 return len <= max_line_length; 72 return len <= max_line_length;
73 } 73 }
74 } 74 }
75 return false; 75 return false;
76} 76}
77 77
78static void 78static void
79analyze_comment(bool *p_may_wrap, bool *p_delim, 79analyze_comment(bool *p_may_wrap, bool *p_delim,
80 int *p_ind, int *p_line_length) 80 int *p_ind, int *p_line_length)
81{ 81{
82 bool may_wrap = true; 82 bool may_wrap = true;
83 bool delim = false; 83 bool delim = false;
84 int ind; 84 int ind;
85 int line_length = opt.max_line_length; 85 int line_length = opt.max_line_length;
86 86
87 if (ps.curr_col_1 && !opt.format_col1_comments) { 87 if (ps.curr_col_1 && !opt.format_col1_comments) {
88 may_wrap = false; 88 may_wrap = false;
89 ind = 0; 89 ind = 0;
90 } else { 90 } else {
91 if (inp_p[0] == '-' || inp_p[0] == '*' || 91 if (inp_p[0] == '-' || inp_p[0] == '*' ||
92 token.s[token.len - 1] == '/' || 92 token.s[token.len - 1] == '/' ||
93 (inp_p[0] == '\n' && !opt.format_block_comments)) 93 (inp_p[0] == '\n' && !opt.format_block_comments))
94 may_wrap = false; 94 may_wrap = false;
95 if (code.len == 0 && inp_p[strspn(inp_p, "*")] == '\n') 95 if (code.len == 0 && inp_p[strspn(inp_p, "*")] == '\n')
96 out.line_kind = lk_block_comment; 96 out.line_kind = lk_block_comment;
97 97
98 if (com.len > 0) 98 if (com.len > 0)
99 output_line(); 99 output_line();
100 if (lab.len == 0 && code.len == 0) { 100 if (lab.len == 0 && code.len == 0) {
101 ind = (ps.ind_level - opt.unindent_displace) 101 ind = (ps.ind_level - opt.unindent_displace)
102 * opt.indent_size; 102 * opt.indent_size;
103 if (ind <= 0) 103 if (ind <= 0)
104 ind = opt.format_col1_comments ? 0 : 1; 104 ind = opt.format_col1_comments ? 0 : 1;
105 line_length = opt.block_comment_max_line_length; 105 line_length = opt.block_comment_max_line_length;
106 if (may_wrap && inp_p[0] == '\n') 106 if (may_wrap && inp_p[0] == '\n')
107 delim = true; 107 delim = true;
108 if (may_wrap && opt.comment_delimiter_on_blankline) 108 if (may_wrap && opt.comment_delimiter_on_blankline)
109 delim = true; 109 delim = true;
110 } else { 110 } else {
111 int target_ind = code.len > 0 111 int target_ind = code.len > 0
112 ? ind_add(compute_code_indent(), code.s, code.len) 112 ? ind_add(compute_code_indent(), code.s, code.len)
113 : ind_add(compute_label_indent(), lab.s, lab.len); 113 : ind_add(compute_label_indent(), lab.s, lab.len);
114 114
115 ind = ps.decl_on_line || ps.ind_level == 0 115 ind = ps.decl_on_line || ps.ind_level == 0
116 ? opt.decl_comment_column - 1 116 ? opt.decl_comment_column - 1
117 : opt.comment_column - 1; 117 : opt.comment_column - 1;
118 if (ind <= target_ind) 118 if (ind <= target_ind)
119 ind = next_tab(target_ind); 119 ind = next_tab(target_ind);
120 if (ind + 25 > line_length) 120 if (ind + 25 > line_length)
121 line_length = ind + 25; 121 line_length = ind + 25;
122 } 122 }
123 } 123 }
124 124
125 ps.com_ind = ind; 125 ps.com_ind = ind;
126 126
127 if (!may_wrap) { 127 if (!may_wrap) {
128 /* Find out how much indentation there was originally, because 128 /* Find out how much indentation there was originally, because
129 * that much will have to be ignored by output_line. */ 129 * that much will have to be ignored by output_line. */
130 size_t len = (size_t)(inp_p - 2 - inp.s); 130 size_t len = (size_t)(inp_p - 2 - inp.s);
131 ps.n_comment_delta = -ind_add(0, inp.s, len); 131 ps.n_comment_delta = -ind_add(0, inp.s, len);
132 } else { 132 } else {
133 ps.n_comment_delta = 0; 133 ps.n_comment_delta = 0;
134 if (!(inp_p[0] == '\t' && !ch_isblank(inp_p[1]))) 134 if (!(inp_p[0] == '\t' && !ch_isblank(inp_p[1])))
135 while (ch_isblank(inp_p[0])) 135 while (ch_isblank(inp_p[0]))
136 inp_p++; 136 inp_p++;
137 } 137 }
138 138
139 *p_may_wrap = may_wrap; 139 *p_may_wrap = may_wrap;
140 *p_delim = delim; 140 *p_delim = delim;
141 *p_ind = ind; 141 *p_ind = ind;
142 *p_line_length = line_length; 142 *p_line_length = line_length;
143} 143}
144 144
145static void 145static void
146copy_comment_start(bool may_wrap, bool *delim, int ind, int line_length) 146copy_comment_start(bool may_wrap, bool *delim, int ind, int line_length)
147{ 147{
148 ps.comment_delta = 0; 148 ps.comment_delta = 0;
149 com_add_char('/'); 149 com_add_char('/');
150 com_add_char(token.s[token.len - 1]); /* either '*' or '/' */ 150 com_add_char(token.s[token.len - 1]); /* either '*' or '/' */
151 151
152 if (may_wrap) { 152 if (may_wrap) {
153 if (!ch_isblank(inp_p[0])) 153 if (!ch_isblank(inp_p[0]))
154 com_add_char(' '); 154 com_add_char(' ');
155 155
156 if (*delim && fits_in_one_line(ind, line_length)) 156 if (*delim && fits_in_one_line(ind, line_length))
157 *delim = false; 157 *delim = false;
158 if (*delim) { 158 if (*delim) {
159 output_line(); 159 output_line();
160 com_add_delim(); 160 com_add_delim();
161 } 161 }
162 } 162 }
163} 163}
164 164
165static void 165static void
166copy_comment_wrap_text(int line_length, ssize_t *last_blank) 166copy_comment_wrap_text(int line_length, ssize_t *last_blank)
167{ 167{
168 int now_len = ind_add(ps.com_ind, com.s, com.len); 168 int now_len = ind_add(ps.com_ind, com.s, com.len);
169 for (;;) { 169 for (;;) {
170 char ch = inp_next(); 170 char ch = inp_next();
171 if (ch_isblank(ch)) 171 if (ch_isblank(ch))
172 *last_blank = (ssize_t)com.len; 172 *last_blank = (ssize_t)com.len;
173 com_add_char(ch); 173 com_add_char(ch);
174 now_len++; 174 now_len++;
175 if (memchr("*\n\r\b\t", inp_p[0], 6) != NULL) 175 if (memchr("*\n\r\b\t", inp_p[0], 6) != NULL)
176 break; 176 break;
177 if (now_len >= line_length && *last_blank != -1) 177 if (now_len >= line_length && *last_blank != -1)
178 break; 178 break;
179 } 179 }
180 180
181 ps.next_col_1 = false; 181 ps.next_col_1 = false;
182 182
183 if (now_len <= line_length) 183 if (now_len <= line_length)
184 return; 184 return;
185 if (ch_isspace(com.s[com.len - 1])) 185 if (ch_isspace(com.s[com.len - 1]))
186 return; 186 return;
187 187
188 if (*last_blank == -1) { 188 if (*last_blank == -1) {
189 /* only a single word in this line */ 189 /* only a single word in this line */
190 output_line(); 190 output_line();
191 com_add_delim(); 191 com_add_delim();
192 return; 192 return;
193 } 193 }
194 194
195 const char *last_word_s = com.s + *last_blank + 1; 195 const char *last_word_s = com.s + *last_blank + 1;
196 size_t last_word_len = com.len - (size_t)(*last_blank + 1); 196 size_t last_word_len = com.len - (size_t)(*last_blank + 1);
197 com.len = (size_t)*last_blank; 197 com.len = (size_t)*last_blank;
198 output_line(); 198 output_line();
199 com_add_delim(); 199 com_add_delim();
200 200
201 /* Assume that output_line and com_add_delim don't 201 /* Assume that output_line and com_add_delim don't invalidate the
202 * invalidate the "unused" part of the buffer beyond 202 * "unused" part of the buffer beyond com.s + com.len. */
203 * com.s + com.len. */ 
204 memmove(com.s + com.len, last_word_s, last_word_len); 203 memmove(com.s + com.len, last_word_s, last_word_len);
205 com.len += last_word_len; 204 com.len += last_word_len;
206 *last_blank = -1; 205 *last_blank = -1;
207} 206}
208 207
209static bool 208static bool
210copy_comment_wrap_newline(ssize_t *last_blank) 209copy_comment_wrap_newline(ssize_t *last_blank)
211{ 210{
212 *last_blank = -1; 211 *last_blank = -1;
213 if (ps.next_col_1) { 212 if (ps.next_col_1) {
214 if (com.len == 0) 213 if (com.len == 0)
215 com_add_char(' '); /* force empty output line */ 214 com_add_char(' '); /* force empty output line */
216 if (com.len > 3) { 215 if (com.len > 3) {
217 output_line(); 216 output_line();
218 com_add_delim(); 217 com_add_delim();
219 } 218 }
220 output_line(); 219 output_line();
221 com_add_delim(); 220 com_add_delim();
222 } else { 221 } else {
223 ps.next_col_1 = true; 222 ps.next_col_1 = true;
224 if (!(com.len > 0 && ch_isblank(com.s[com.len - 1]))) 223 if (!(com.len > 0 && ch_isblank(com.s[com.len - 1])))
225 com_add_char(' '); 224 com_add_char(' ');
226 *last_blank = (int)com.len - 1; 225 *last_blank = (int)com.len - 1;
227 } 226 }
228 ++line_no; 227 ++line_no;
229 228
230 /* flush any blanks and/or tabs at start of next line */ 229 /* flush any blanks and/or tabs at start of next line */
231 inp_skip(); /* '\n' */ 230 inp_skip(); /* '\n' */
232 while (ch_isblank(inp_p[0])) 231 while (ch_isblank(inp_p[0]))
233 inp_p++; 232 inp_p++;
234 if (inp_p[0] == '*' && inp_p[1] == '/') 233 if (inp_p[0] == '*' && inp_p[1] == '/')
235 return false; 234 return false;
236 if (inp_p[0] == '*') { 235 if (inp_p[0] == '*') {
237 inp_p++; 236 inp_p++;
238 while (ch_isblank(inp_p[0])) 237 while (ch_isblank(inp_p[0]))
239 inp_p++; 238 inp_p++;
240 } 239 }
241 240
242 return true; 241 return true;
243} 242}
244 243
245static void 244static void
246copy_comment_wrap_finish(int line_length, bool delim) 245copy_comment_wrap_finish(int line_length, bool delim)
247{ 246{
248 if (delim) { 247 if (delim) {
249 if (com.len > 3) 248 if (com.len > 3)
250 output_line(); 249 output_line();
251 else 250 else
252 com.len = 0; 251 com.len = 0;
253 com_add_char(' '); 252 com_add_char(' ');
254 } else { 253 } else {
255 size_t len = com.len; 254 size_t len = com.len;
256 while (ch_isblank(com.s[len - 1])) 255 while (ch_isblank(com.s[len - 1]))
257 len--; 256 len--;
258 int end_ind = ind_add(ps.com_ind, com.s, len); 257 int end_ind = ind_add(ps.com_ind, com.s, len);
259 if (end_ind + 3 > line_length) 258 if (end_ind + 3 > line_length)
260 output_line(); 259 output_line();
261 } 260 }
262 261
263 while (com.len >= 2 262 while (com.len >= 2
264 && ch_isblank(com.s[com.len - 1]) 263 && ch_isblank(com.s[com.len - 1])
265 && ch_isblank(com.s[com.len - 2])) 264 && ch_isblank(com.s[com.len - 2]))
266 com.len--; 265 com.len--;
267 266
268 inp_p += 2; 267 inp_p += 2;
269 if (com.len > 0 && ch_isblank(com.s[com.len - 1])) 268 if (com.len > 0 && ch_isblank(com.s[com.len - 1]))
270 buf_add_chars(&com, "*/", 2); 269 buf_add_chars(&com, "*/", 2);
271 else 270 else
272 buf_add_chars(&com, " */", 3); 271 buf_add_chars(&com, " */", 3);
273} 272}
274 273
275/* 274/*
276 * Copy characters from 'inp' to 'com'. Try to keep comments from going over 275 * Copy characters from 'inp' to 'com'. Try to keep comments from going over
277 * the maximum line length. To do that, remember where the last blank, tab, or 276 * the maximum line length. To do that, remember where the last blank, tab, or
278 * newline was. When a line is filled, print up to the last blank and continue 277 * newline was. When a line is filled, print up to the last blank and continue
279 * copying. 278 * copying.
280 */ 279 */
281static void 280static void
282copy_comment_wrap(int line_length, bool delim) 281copy_comment_wrap(int line_length, bool delim)
283{ 282{
284 ssize_t last_blank = -1; /* index of the last blank in 'com' */ 283 ssize_t last_blank = -1; /* index of the last blank in 'com' */
285 284
286 for (;;) { 285 for (;;) {
287 if (inp_p[0] == '\n') { 286 if (inp_p[0] == '\n') {
288 if (had_eof) 287 if (had_eof)
289 goto unterminated_comment; 288 goto unterminated_comment;
290 if (!copy_comment_wrap_newline(&last_blank)) 289 if (!copy_comment_wrap_newline(&last_blank))
291 goto end_of_comment; 290 goto end_of_comment;
292 } else if (inp_p[0] == '*' && inp_p[1] == '/') 291 } else if (inp_p[0] == '*' && inp_p[1] == '/')
293 goto end_of_comment; 292 goto end_of_comment;
294 else 293 else
295 copy_comment_wrap_text(line_length, &last_blank); 294 copy_comment_wrap_text(line_length, &last_blank);
296 } 295 }
297 296
298end_of_comment: 297end_of_comment:
299 copy_comment_wrap_finish(line_length, delim); 298 copy_comment_wrap_finish(line_length, delim);
300 return; 299 return;
301 300
302unterminated_comment: 301unterminated_comment:
303 diag(1, "Unterminated comment"); 302 diag(1, "Unterminated comment");
304 output_line(); 303 output_line();
305} 304}
306 305
307static void 306static void
308copy_comment_nowrap(void) 307copy_comment_nowrap(void)
309{ 308{
310 char kind = token.s[token.len - 1]; 309 char kind = token.s[token.len - 1];
311 310
312 for (;;) { 311 for (;;) {
313 if (inp_p[0] == '\n') { 312 if (inp_p[0] == '\n') {
314 if (kind == '/') 313 if (kind == '/')
315 return; 314 return;
316 315
317 if (had_eof) { 316 if (had_eof) {
318 diag(1, "Unterminated comment"); 317 diag(1, "Unterminated comment");
319 output_line(); 318 output_line();
320 return; 319 return;
321 } 320 }
322 321
323 if (com.len == 0) 322 if (com.len == 0)
324 com_add_char(' '); /* force output of an 323 com_add_char(' '); /* force output of an
325 * empty line */ 324 * empty line */
326 output_line(); 325 output_line();
327 ++line_no; 326 ++line_no;
328 inp_skip(); 327 inp_skip();
329 continue; 328 continue;
330 } 329 }
331 330
332 com_add_char(*inp_p++); 331 com_add_char(*inp_p++);
333 if (com.len >= 2 332 if (com.len >= 2
334 && com.s[com.len - 2] == '*' 333 && com.s[com.len - 2] == '*'
335 && com.s[com.len - 1] == '/' 334 && com.s[com.len - 1] == '/'
336 && kind == '*') 335 && kind == '*')
337 return; 336 return;
338 } 337 }
339} 338}
340 339
341/* 340/*
342 * Scan, reformat and output a single comment, which is either a block comment 341 * Scan, reformat and output a single comment, which is either a block comment
343 * starting with '/' '*' or an end-of-line comment starting with '//'. 342 * starting with '/' '*' or an end-of-line comment starting with '//'.
344 */ 343 */
345void 344void
346process_comment(void) 345process_comment(void)
347{ 346{
348 bool may_wrap, delim; 347 bool may_wrap, delim;
349 int ind, line_length; 348 int ind, line_length;
350 349
351 analyze_comment(&may_wrap, &delim, &ind, &line_length); 350 analyze_comment(&may_wrap, &delim, &ind, &line_length);
352 copy_comment_start(may_wrap, &delim, ind, line_length); 351 copy_comment_start(may_wrap, &delim, ind, line_length);
353 if (may_wrap) 352 if (may_wrap)
354 copy_comment_wrap(line_length, delim); 353 copy_comment_wrap(line_length, delim);
355 else 354 else
356 copy_comment_nowrap(); 355 copy_comment_nowrap();
357} 356}