Sat Jun 10 12:59:31 2023 UTC ()
indent: in debug mode, null-terminate buffers


(rillig)
diff -r1.352 -r1.353 src/usr.bin/indent/indent.c
diff -r1.185 -r1.186 src/usr.bin/indent/indent.h
diff -r1.214 -r1.215 src/usr.bin/indent/io.c
diff -r1.222 -r1.223 src/usr.bin/indent/lexi.c
diff -r1.159 -r1.160 src/usr.bin/indent/pr_comment.c

cvs diff -r1.352 -r1.353 src/usr.bin/indent/indent.c (switch to unified diff)

--- src/usr.bin/indent/indent.c 2023/06/10 08:17:04 1.352
+++ src/usr.bin/indent/indent.c 2023/06/10 12:59:31 1.353
@@ -1,1070 +1,1083 @@ @@ -1,1070 +1,1083 @@
1/* $NetBSD: indent.c,v 1.352 2023/06/10 08:17:04 rillig Exp $ */ 1/* $NetBSD: indent.c,v 1.353 2023/06/10 12:59:31 rillig Exp $ */
2 2
3/*- 3/*-
4 * SPDX-License-Identifier: BSD-4-Clause 4 * SPDX-License-Identifier: BSD-4-Clause
5 * 5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc. 6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 7 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
8 * Copyright (c) 1980, 1993 8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved. 9 * The Regents of the University of California. All rights reserved.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software 19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement: 20 * must display the following acknowledgement:
21 * This product includes software developed by the University of 21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors. 22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors 23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software 24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission. 25 * without specific prior written permission.
26 * 26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE. 37 * SUCH DAMAGE.
38 */ 38 */
39 39
40#include <sys/cdefs.h> 40#include <sys/cdefs.h>
41__RCSID("$NetBSD: indent.c,v 1.352 2023/06/10 08:17:04 rillig Exp $"); 41__RCSID("$NetBSD: indent.c,v 1.353 2023/06/10 12:59:31 rillig Exp $");
42 42
43#include <sys/param.h> 43#include <sys/param.h>
44#include <err.h> 44#include <err.h>
45#include <fcntl.h> 45#include <fcntl.h>
46#include <stdarg.h> 46#include <stdarg.h>
47#include <stdio.h> 47#include <stdio.h>
48#include <stdlib.h> 48#include <stdlib.h>
49#include <string.h> 49#include <string.h>
50#include <unistd.h> 50#include <unistd.h>
51 51
52#include "indent.h" 52#include "indent.h"
53 53
54struct options opt = { 54struct options opt = {
55 .brace_same_line = true, 55 .brace_same_line = true,
56 .comment_delimiter_on_blankline = true, 56 .comment_delimiter_on_blankline = true,
57 .cuddle_else = true, 57 .cuddle_else = true,
58 .comment_column = 33, 58 .comment_column = 33,
59 .decl_indent = 16, 59 .decl_indent = 16,
60 .else_if_in_same_line = true, 60 .else_if_in_same_line = true,
61 .function_brace_split = true, 61 .function_brace_split = true,
62 .format_col1_comments = true, 62 .format_col1_comments = true,
63 .format_block_comments = true, 63 .format_block_comments = true,
64 .indent_parameters = true, 64 .indent_parameters = true,
65 .indent_size = 8, 65 .indent_size = 8,
66 .local_decl_indent = -1, 66 .local_decl_indent = -1,
67 .lineup_to_parens = true, 67 .lineup_to_parens = true,
68 .procnames_start_line = true, 68 .procnames_start_line = true,
69 .star_comment_cont = true, 69 .star_comment_cont = true,
70 .tabsize = 8, 70 .tabsize = 8,
71 .max_line_length = 78, 71 .max_line_length = 78,
72 .use_tabs = true, 72 .use_tabs = true,
73}; 73};
74 74
75struct parser_state ps; 75struct parser_state ps;
76 76
77struct buffer token; 77struct buffer token;
78 78
79struct buffer lab; 79struct buffer lab;
80struct buffer code; 80struct buffer code;
81struct buffer com; 81struct buffer com;
82 82
83bool found_err; 83bool found_err;
84bool had_eof; 84bool had_eof;
85int line_no = 1; 85int line_no = 1;
86 86
87static int ifdef_level; 87static int ifdef_level;
88static struct parser_state state_stack[5]; 88static struct parser_state state_stack[5];
89 89
90FILE *input; 90FILE *input;
91FILE *output; 91FILE *output;
92 92
93static const char *in_name = "Standard Input"; 93static const char *in_name = "Standard Input";
94static const char *backup_suffix = ".BAK"; 94static const char *backup_suffix = ".BAK";
95static char bakfile[MAXPATHLEN] = ""; 95static char bakfile[MAXPATHLEN] = "";
96 96
97 97
98void * 98void *
99nonnull(void *p) 99nonnull(void *p)
100{ 100{
101 if (p == NULL) 101 if (p == NULL)
102 err(EXIT_FAILURE, NULL); 102 err(EXIT_FAILURE, NULL);
103 return p; 103 return p;
104} 104}
105 105
106static void 106static void
107buf_expand(struct buffer *buf, size_t add_size) 107buf_expand(struct buffer *buf, size_t add_size)
108{ 108{
109 buf->cap = buf->cap + add_size + 400; 109 buf->cap = buf->cap + add_size + 400;
110 buf->s = nonnull(realloc(buf->s, buf->cap)); 110 buf->s = nonnull(realloc(buf->s, buf->cap));
111} 111}
112 112
 113#ifdef debug
 114void
 115buf_terminate(struct buffer *buf)
 116{
 117 if (buf->len == buf->cap)
 118 buf_expand(buf, 1);
 119 buf->s[buf->len] = '\0';
 120}
 121#endif
 122
113void 123void
114buf_add_char(struct buffer *buf, char ch) 124buf_add_char(struct buffer *buf, char ch)
115{ 125{
116 if (buf->len == buf->cap) 126 if (buf->len == buf->cap)
117 buf_expand(buf, 1); 127 buf_expand(buf, 1);
118 buf->s[buf->len++] = ch; 128 buf->s[buf->len++] = ch;
 129 buf_terminate(buf);
119} 130}
120 131
121void 132void
122buf_add_chars(struct buffer *buf, const char *s, size_t len) 133buf_add_chars(struct buffer *buf, const char *s, size_t len)
123{ 134{
124 if (len == 0) 135 if (len == 0)
125 return; 136 return;
126 if (len > buf->cap - buf->len) 137 if (len > buf->cap - buf->len)
127 buf_expand(buf, len); 138 buf_expand(buf, len);
128 memcpy(buf->s + buf->len, s, len); 139 memcpy(buf->s + buf->len, s, len);
129 buf->len += len; 140 buf->len += len;
 141 buf_terminate(buf);
130} 142}
131 143
132static void 144static void
133buf_add_buf(struct buffer *buf, const struct buffer *add) 145buf_add_buf(struct buffer *buf, const struct buffer *add)
134{ 146{
135 buf_add_chars(buf, add->s, add->len); 147 buf_add_chars(buf, add->s, add->len);
136} 148}
137 149
138void 150void
139diag(int level, const char *msg, ...) 151diag(int level, const char *msg, ...)
140{ 152{
141 va_list ap; 153 va_list ap;
142 154
143 if (level != 0) 155 if (level != 0)
144 found_err = true; 156 found_err = true;
145 157
146 va_start(ap, msg); 158 va_start(ap, msg);
147 fprintf(stderr, "%s: %s:%d: ", 159 fprintf(stderr, "%s: %s:%d: ",
148 level == 0 ? "warning" : "error", in_name, line_no); 160 level == 0 ? "warning" : "error", in_name, line_no);
149 vfprintf(stderr, msg, ap); 161 vfprintf(stderr, msg, ap);
150 fprintf(stderr, "\n"); 162 fprintf(stderr, "\n");
151 va_end(ap); 163 va_end(ap);
152} 164}
153 165
154/* 166/*
155 * Compute the indentation from starting at 'ind' and adding the text starting 167 * Compute the indentation from starting at 'ind' and adding the text starting
156 * at 's'. 168 * at 's'.
157 */ 169 */
158int 170int
159ind_add(int ind, const char *s, size_t len) 171ind_add(int ind, const char *s, size_t len)
160{ 172{
161 for (const char *p = s; len > 0; p++, len--) { 173 for (const char *p = s; len > 0; p++, len--) {
162 if (*p == '\n') 174 if (*p == '\n')
163 ind = 0; 175 ind = 0;
164 else if (*p == '\t') 176 else if (*p == '\t')
165 ind = next_tab(ind); 177 ind = next_tab(ind);
166 else if (*p == '\b') 178 else if (*p == '\b')
167 --ind; 179 --ind;
168 else 180 else
169 ++ind; 181 ++ind;
170 } 182 }
171 return ind; 183 return ind;
172} 184}
173 185
174static void 186static void
175init_globals(void) 187init_globals(void)
176{ 188{
177 ps.psyms.sym[0] = psym_stmt_list; 189 ps.psyms.sym[0] = psym_stmt_list;
178 ps.prev_lsym = lsym_semicolon; 190 ps.prev_lsym = lsym_semicolon;
179 ps.next_col_1 = true; 191 ps.next_col_1 = true;
180 ps.lbrace_kind = psym_lbrace_block; 192 ps.lbrace_kind = psym_lbrace_block;
181 193
182 const char *suffix = getenv("SIMPLE_BACKUP_SUFFIX"); 194 const char *suffix = getenv("SIMPLE_BACKUP_SUFFIX");
183 if (suffix != NULL) 195 if (suffix != NULL)
184 backup_suffix = suffix; 196 backup_suffix = suffix;
185} 197}
186 198
187static void 199static void
188load_profiles(int argc, char **argv) 200load_profiles(int argc, char **argv)
189{ 201{
190 const char *profile_name = NULL; 202 const char *profile_name = NULL;
191 203
192 for (int i = 1; i < argc; ++i) { 204 for (int i = 1; i < argc; ++i) {
193 const char *arg = argv[i]; 205 const char *arg = argv[i];
194 206
195 if (strcmp(arg, "-npro") == 0) 207 if (strcmp(arg, "-npro") == 0)
196 return; 208 return;
197 if (arg[0] == '-' && arg[1] == 'P' && arg[2] != '\0') 209 if (arg[0] == '-' && arg[1] == 'P' && arg[2] != '\0')
198 profile_name = arg + 2; 210 profile_name = arg + 2;
199 } 211 }
200 212
201 load_profile_files(profile_name); 213 load_profile_files(profile_name);
202} 214}
203 215
204/* 216/*
205 * Copy the input file to the backup file, then make the backup file the input 217 * Copy the input file to the backup file, then make the backup file the input
206 * and the original input file the output. 218 * and the original input file the output.
207 */ 219 */
208static void 220static void
209bakcopy(void) 221bakcopy(void)
210{ 222{
211 ssize_t n; 223 ssize_t n;
212 int bak_fd; 224 int bak_fd;
213 char buff[8 * 1024]; 225 char buff[8 * 1024];
214 226
215 const char *last_slash = strrchr(in_name, '/'); 227 const char *last_slash = strrchr(in_name, '/');
216 snprintf(bakfile, sizeof(bakfile), "%s%s", 228 snprintf(bakfile, sizeof(bakfile), "%s%s",
217 last_slash != NULL ? last_slash + 1 : in_name, backup_suffix); 229 last_slash != NULL ? last_slash + 1 : in_name, backup_suffix);
218 230
219 /* copy in_name to backup file */ 231 /* copy in_name to backup file */
220 bak_fd = creat(bakfile, 0600); 232 bak_fd = creat(bakfile, 0600);
221 if (bak_fd < 0) 233 if (bak_fd < 0)
222 err(1, "%s", bakfile); 234 err(1, "%s", bakfile);
223 235
224 while ((n = read(fileno(input), buff, sizeof(buff))) > 0) 236 while ((n = read(fileno(input), buff, sizeof(buff))) > 0)
225 if (write(bak_fd, buff, (size_t)n) != n) 237 if (write(bak_fd, buff, (size_t)n) != n)
226 err(1, "%s", bakfile); 238 err(1, "%s", bakfile);
227 if (n < 0) 239 if (n < 0)
228 err(1, "%s", in_name); 240 err(1, "%s", in_name);
229 241
230 close(bak_fd); 242 close(bak_fd);
231 (void)fclose(input); 243 (void)fclose(input);
232 244
233 /* re-open backup file as the input file */ 245 /* re-open backup file as the input file */
234 input = fopen(bakfile, "r"); 246 input = fopen(bakfile, "r");
235 if (input == NULL) 247 if (input == NULL)
236 err(1, "%s", bakfile); 248 err(1, "%s", bakfile);
237 /* now the original input file will be the output */ 249 /* now the original input file will be the output */
238 output = fopen(in_name, "w"); 250 output = fopen(in_name, "w");
239 if (output == NULL) { 251 if (output == NULL) {
240 unlink(bakfile); 252 unlink(bakfile);
241 err(1, "%s", in_name); 253 err(1, "%s", in_name);
242 } 254 }
243} 255}
244 256
245static void 257static void
246parse_command_line(int argc, char **argv) 258parse_command_line(int argc, char **argv)
247{ 259{
248 for (int i = 1; i < argc; ++i) { 260 for (int i = 1; i < argc; ++i) {
249 const char *arg = argv[i]; 261 const char *arg = argv[i];
250 262
251 if (arg[0] == '-') { 263 if (arg[0] == '-') {
252 set_option(arg, "Command line"); 264 set_option(arg, "Command line");
253 265
254 } else if (input == NULL) { 266 } else if (input == NULL) {
255 in_name = arg; 267 in_name = arg;
256 if ((input = fopen(in_name, "r")) == NULL) 268 if ((input = fopen(in_name, "r")) == NULL)
257 err(1, "%s", in_name); 269 err(1, "%s", in_name);
258 270
259 } else if (output == NULL) { 271 } else if (output == NULL) {
260 if (strcmp(arg, in_name) == 0) 272 if (strcmp(arg, in_name) == 0)
261 errx(1, "input and output files " 273 errx(1, "input and output files "
262 "must be different"); 274 "must be different");
263 if ((output = fopen(arg, "w")) == NULL) 275 if ((output = fopen(arg, "w")) == NULL)
264 err(1, "%s", arg); 276 err(1, "%s", arg);
265 277
266 } else 278 } else
267 errx(1, "too many arguments: %s", arg); 279 errx(1, "too many arguments: %s", arg);
268 } 280 }
269 281
270 if (input == NULL) { 282 if (input == NULL) {
271 input = stdin; 283 input = stdin;
272 output = stdout; 284 output = stdout;
273 } else if (output == NULL) 285 } else if (output == NULL)
274 bakcopy(); 286 bakcopy();
275 287
276 if (opt.comment_column <= 1) 288 if (opt.comment_column <= 1)
277 opt.comment_column = 2; /* don't put normal comments in column 289 opt.comment_column = 2; /* don't put normal comments in column
278 * 1, see opt.format_col1_comments */ 290 * 1, see opt.format_col1_comments */
279 if (opt.block_comment_max_line_length <= 0) 291 if (opt.block_comment_max_line_length <= 0)
280 opt.block_comment_max_line_length = opt.max_line_length; 292 opt.block_comment_max_line_length = opt.max_line_length;
281 if (opt.local_decl_indent < 0) 293 if (opt.local_decl_indent < 0)
282 opt.local_decl_indent = opt.decl_indent; 294 opt.local_decl_indent = opt.decl_indent;
283 if (opt.decl_comment_column <= 0) 295 if (opt.decl_comment_column <= 0)
284 opt.decl_comment_column = opt.left_justify_decl 296 opt.decl_comment_column = opt.left_justify_decl
285 ? (opt.comment_column <= 10 ? 2 : opt.comment_column - 8) 297 ? (opt.comment_column <= 10 ? 2 : opt.comment_column - 8)
286 : opt.comment_column; 298 : opt.comment_column;
287 if (opt.continuation_indent == 0) 299 if (opt.continuation_indent == 0)
288 opt.continuation_indent = opt.indent_size; 300 opt.continuation_indent = opt.indent_size;
289} 301}
290 302
291static void 303static void
292set_initial_indentation(void) 304set_initial_indentation(void)
293{ 305{
294 inp_read_line(); 306 inp_read_line();
295 307
296 int ind = 0; 308 int ind = 0;
297 for (const char *p = inp_p;; p++) { 309 for (const char *p = inp_p;; p++) {
298 if (*p == ' ') 310 if (*p == ' ')
299 ind++; 311 ind++;
300 else if (*p == '\t') 312 else if (*p == '\t')
301 ind = next_tab(ind); 313 ind = next_tab(ind);
302 else 314 else
303 break; 315 break;
304 } 316 }
305 317
306 ps.ind_level = ps.ind_level_follow = ind / opt.indent_size; 318 ps.ind_level = ps.ind_level_follow = ind / opt.indent_size;
307} 319}
308 320
309static void 321static void
310maybe_break_line(lexer_symbol lsym) 322maybe_break_line(lexer_symbol lsym)
311{ 323{
312 if (!ps.force_nl) 324 if (!ps.force_nl)
313 return; 325 return;
314 if (lsym == lsym_semicolon) 326 if (lsym == lsym_semicolon)
315 return; 327 return;
316 if (lsym == lsym_lbrace && opt.brace_same_line 328 if (lsym == lsym_lbrace && opt.brace_same_line
317 && ps.prev_lsym != lsym_lbrace) 329 && ps.prev_lsym != lsym_lbrace)
318 return; 330 return;
319 331
320 output_line(); 332 output_line();
321 ps.force_nl = false; 333 ps.force_nl = false;
322} 334}
323 335
324static void 336static void
325move_com_to_code(lexer_symbol lsym) 337move_com_to_code(lexer_symbol lsym)
326{ 338{
327 if (ps.want_blank) 339 if (ps.want_blank)
328 buf_add_char(&code, ' '); 340 buf_add_char(&code, ' ');
329 buf_add_buf(&code, &com); 341 buf_add_buf(&code, &com);
330 com.len = 0; 342 buf_clear(&com);
331 ps.want_blank = lsym != lsym_rparen && lsym != lsym_rbracket; 343 ps.want_blank = lsym != lsym_rparen && lsym != lsym_rbracket;
332} 344}
333 345
334static void 346static void
335update_ps_lbrace_kind(lexer_symbol lsym) 347update_ps_lbrace_kind(lexer_symbol lsym)
336{ 348{
337 if (lsym == lsym_tag) { 349 if (lsym == lsym_tag) {
338 ps.lbrace_kind = token.s[0] == 's' ? psym_lbrace_struct : 350 ps.lbrace_kind = token.s[0] == 's' ? psym_lbrace_struct :
339 token.s[0] == 'u' ? psym_lbrace_union : 351 token.s[0] == 'u' ? psym_lbrace_union :
340 psym_lbrace_enum; 352 psym_lbrace_enum;
341 } else if (lsym != lsym_type_outside_parentheses 353 } else if (lsym != lsym_type_outside_parentheses
342 && lsym != lsym_word 354 && lsym != lsym_word
343 && lsym != lsym_lbrace) 355 && lsym != lsym_lbrace)
344 ps.lbrace_kind = psym_lbrace_block; 356 ps.lbrace_kind = psym_lbrace_block;
345} 357}
346 358
347static void 359static void
348indent_declarator(int decl_ind, bool tabs_to_var) 360indent_declarator(int decl_ind, bool tabs_to_var)
349{ 361{
350 int base = ps.ind_level * opt.indent_size; 362 int base = ps.ind_level * opt.indent_size;
351 int ind = base + (int)code.len; 363 int ind = base + (int)code.len;
352 int target = base + decl_ind; 364 int target = base + decl_ind;
353 size_t orig_code_len = code.len; 365 size_t orig_code_len = code.len;
354 366
355 if (tabs_to_var) 367 if (tabs_to_var)
356 for (int next; (next = next_tab(ind)) <= target; ind = next) 368 for (int next; (next = next_tab(ind)) <= target; ind = next)
357 buf_add_char(&code, '\t'); 369 buf_add_char(&code, '\t');
358 370
359 for (; ind < target; ind++) 371 for (; ind < target; ind++)
360 buf_add_char(&code, ' '); 372 buf_add_char(&code, ' ');
361 373
362 if (code.len == orig_code_len && ps.want_blank) { 374 if (code.len == orig_code_len && ps.want_blank) {
363 buf_add_char(&code, ' '); 375 buf_add_char(&code, ' ');
364 ps.want_blank = false; 376 ps.want_blank = false;
365 } 377 }
366 ps.decl_indent_done = true; 378 ps.decl_indent_done = true;
367} 379}
368 380
369static bool 381static bool
370is_function_pointer_declaration(void) 382is_function_pointer_declaration(void)
371{ 383{
372 return ps.in_decl 384 return ps.in_decl
373 && !ps.in_init 385 && !ps.in_init
374 && !ps.decl_indent_done 386 && !ps.decl_indent_done
375 && !ps.line_has_func_def 387 && !ps.line_has_func_def
376 && ps.line_start_nparen == 0; 388 && ps.line_start_nparen == 0;
377} 389}
378 390
379static int 391static int
380process_eof(void) 392process_eof(void)
381{ 393{
382 output_finish(); 394 output_finish();
383 395
384 if (ps.psyms.top > 1) /* check for balanced braces */ 396 if (ps.psyms.top > 1) /* check for balanced braces */
385 diag(1, "Stuff missing from end of file"); 397 diag(1, "Stuff missing from end of file");
386 398
387 return found_err ? EXIT_FAILURE : EXIT_SUCCESS; 399 return found_err ? EXIT_FAILURE : EXIT_SUCCESS;
388} 400}
389 401
390/* move the whole line to the 'label' buffer */ 402/* move the whole line to the 'label' buffer */
391static void 403static void
392read_preprocessing_line(void) 404read_preprocessing_line(void)
393{ 405{
394 enum { 406 enum {
395 PLAIN, STR, CHR, COMM 407 PLAIN, STR, CHR, COMM
396 } state = PLAIN; 408 } state = PLAIN;
397 409
398 buf_add_char(&lab, '#'); 410 buf_add_char(&lab, '#');
399 411
400 while (inp_p[0] != '\n' || (state == COMM && !had_eof)) { 412 while (inp_p[0] != '\n' || (state == COMM && !had_eof)) {
401 buf_add_char(&lab, inp_next()); 413 buf_add_char(&lab, inp_next());
402 switch (lab.s[lab.len - 1]) { 414 switch (lab.s[lab.len - 1]) {
403 case '\\': 415 case '\\':
404 if (state != COMM) 416 if (state != COMM)
405 buf_add_char(&lab, inp_next()); 417 buf_add_char(&lab, inp_next());
406 break; 418 break;
407 case '/': 419 case '/':
408 if (inp_p[0] == '*' && state == PLAIN) { 420 if (inp_p[0] == '*' && state == PLAIN) {
409 state = COMM; 421 state = COMM;
410 buf_add_char(&lab, *inp_p++); 422 buf_add_char(&lab, *inp_p++);
411 } 423 }
412 break; 424 break;
413 case '"': 425 case '"':
414 if (state == STR) 426 if (state == STR)
415 state = PLAIN; 427 state = PLAIN;
416 else if (state == PLAIN) 428 else if (state == PLAIN)
417 state = STR; 429 state = STR;
418 break; 430 break;
419 case '\'': 431 case '\'':
420 if (state == CHR) 432 if (state == CHR)
421 state = PLAIN; 433 state = PLAIN;
422 else if (state == PLAIN) 434 else if (state == PLAIN)
423 state = CHR; 435 state = CHR;
424 break; 436 break;
425 case '*': 437 case '*':
426 if (inp_p[0] == '/' && state == COMM) { 438 if (inp_p[0] == '/' && state == COMM) {
427 state = PLAIN; 439 state = PLAIN;
428 buf_add_char(&lab, *inp_p++); 440 buf_add_char(&lab, *inp_p++);
429 } 441 }
430 break; 442 break;
431 } 443 }
432 } 444 }
433 445
434 while (lab.len > 0 && ch_isblank(lab.s[lab.len - 1])) 446 while (lab.len > 0 && ch_isblank(lab.s[lab.len - 1]))
435 lab.len--; 447 lab.len--;
 448 buf_terminate(&lab);
436} 449}
437 450
438static void 451static void
439process_preprocessing(void) 452process_preprocessing(void)
440{ 453{
441 if (lab.len > 0 || code.len > 0 || com.len > 0) 454 if (lab.len > 0 || code.len > 0 || com.len > 0)
442 output_line(); 455 output_line();
443 456
444 read_preprocessing_line(); 457 read_preprocessing_line();
445 458
446 const char *dir = lab.s + 1, *line_end = lab.s + lab.len; 459 const char *dir = lab.s + 1, *line_end = lab.s + lab.len;
447 while (dir < line_end && ch_isblank(*dir)) 460 while (dir < line_end && ch_isblank(*dir))
448 dir++; 461 dir++;
449 size_t dir_len = 0; 462 size_t dir_len = 0;
450 while (dir + dir_len < line_end && ch_isalpha(dir[dir_len])) 463 while (dir + dir_len < line_end && ch_isalpha(dir[dir_len]))
451 dir_len++; 464 dir_len++;
452 465
453 if (dir_len >= 2 && memcmp(dir, "if", 2) == 0) { 466 if (dir_len >= 2 && memcmp(dir, "if", 2) == 0) {
454 if ((size_t)ifdef_level < array_length(state_stack)) 467 if ((size_t)ifdef_level < array_length(state_stack))
455 state_stack[ifdef_level++] = ps; 468 state_stack[ifdef_level++] = ps;
456 else 469 else
457 diag(1, "#if stack overflow"); 470 diag(1, "#if stack overflow");
458 out.line_kind = lk_if; 471 out.line_kind = lk_if;
459 472
460 } else if (dir_len >= 2 && memcmp(dir, "el", 2) == 0) { 473 } else if (dir_len >= 2 && memcmp(dir, "el", 2) == 0) {
461 if (ifdef_level <= 0) 474 if (ifdef_level <= 0)
462 diag(1, dir[2] == 'i' 475 diag(1, dir[2] == 'i'
463 ? "Unmatched #elif" : "Unmatched #else"); 476 ? "Unmatched #elif" : "Unmatched #else");
464 else 477 else
465 ps = state_stack[ifdef_level - 1]; 478 ps = state_stack[ifdef_level - 1];
466 479
467 } else if (dir_len == 5 && memcmp(dir, "endif", 5) == 0) { 480 } else if (dir_len == 5 && memcmp(dir, "endif", 5) == 0) {
468 if (ifdef_level <= 0) 481 if (ifdef_level <= 0)
469 diag(1, "Unmatched #endif"); 482 diag(1, "Unmatched #endif");
470 else 483 else
471 ifdef_level--; 484 ifdef_level--;
472 out.line_kind = lk_endif; 485 out.line_kind = lk_endif;
473 } 486 }
474} 487}
475 488
476static void 489static void
477process_newline(void) 490process_newline(void)
478{ 491{
479 if (ps.prev_lsym == lsym_comma 492 if (ps.prev_lsym == lsym_comma
480 && ps.nparen == 0 && !ps.in_init 493 && ps.nparen == 0 && !ps.in_init
481 && !opt.break_after_comma && ps.break_after_comma 494 && !opt.break_after_comma && ps.break_after_comma
482 && lab.len == 0 /* for preprocessing lines */ 495 && lab.len == 0 /* for preprocessing lines */
483 && com.len == 0) 496 && com.len == 0)
484 goto stay_in_line; 497 goto stay_in_line;
485 if (ps.psyms.sym[ps.psyms.top] == psym_switch_expr 498 if (ps.psyms.sym[ps.psyms.top] == psym_switch_expr
486 && opt.brace_same_line) { 499 && opt.brace_same_line) {
487 ps.force_nl = true; 500 ps.force_nl = true;
488 goto stay_in_line; 501 goto stay_in_line;
489 } 502 }
490 503
491 output_line(); 504 output_line();
492 505
493stay_in_line: 506stay_in_line:
494 ++line_no; 507 ++line_no;
495} 508}
496 509
497static bool 510static bool
498want_blank_before_lparen(void) 511want_blank_before_lparen(void)
499{ 512{
500 if (!ps.want_blank) 513 if (!ps.want_blank)
501 return false; 514 return false;
502 if (opt.proc_calls_space) 515 if (opt.proc_calls_space)
503 return true; 516 return true;
504 if (ps.prev_lsym == lsym_rparen || ps.prev_lsym == lsym_rbracket) 517 if (ps.prev_lsym == lsym_rparen || ps.prev_lsym == lsym_rbracket)
505 return false; 518 return false;
506 if (ps.prev_lsym == lsym_offsetof) 519 if (ps.prev_lsym == lsym_offsetof)
507 return false; 520 return false;
508 if (ps.prev_lsym == lsym_sizeof) 521 if (ps.prev_lsym == lsym_sizeof)
509 return opt.blank_after_sizeof; 522 return opt.blank_after_sizeof;
510 if (ps.prev_lsym == lsym_word || ps.prev_lsym == lsym_funcname) 523 if (ps.prev_lsym == lsym_word || ps.prev_lsym == lsym_funcname)
511 return false; 524 return false;
512 return true; 525 return true;
513} 526}
514 527
515static void 528static void
516process_lparen(void) 529process_lparen(void)
517{ 530{
518 if (++ps.nparen == array_length(ps.paren)) { 531 if (++ps.nparen == array_length(ps.paren)) {
519 diag(0, "Reached internal limit of %zu unclosed parentheses", 532 diag(0, "Reached internal limit of %zu unclosed parentheses",
520 array_length(ps.paren)); 533 array_length(ps.paren));
521 ps.nparen--; 534 ps.nparen--;
522 } 535 }
523 536
524 if (is_function_pointer_declaration()) 537 if (is_function_pointer_declaration())
525 indent_declarator(ps.decl_ind, ps.tabs_to_var); 538 indent_declarator(ps.decl_ind, ps.tabs_to_var);
526 else if (want_blank_before_lparen()) 539 else if (want_blank_before_lparen())
527 buf_add_char(&code, ' '); 540 buf_add_char(&code, ' ');
528 ps.want_blank = false; 541 ps.want_blank = false;
529 buf_add_char(&code, token.s[0]); 542 buf_add_char(&code, token.s[0]);
530 543
531 if (opt.extra_expr_indent && ps.spaced_expr_psym != psym_0) 544 if (opt.extra_expr_indent && ps.spaced_expr_psym != psym_0)
532 ps.extra_expr_indent = eei_maybe; 545 ps.extra_expr_indent = eei_maybe;
533 546
534 if (ps.in_var_decl && ps.psyms.top <= 2 && !ps.in_init) { 547 if (ps.in_var_decl && ps.psyms.top <= 2 && !ps.in_init) {
535 parse(psym_stmt); /* prepare for function definition */ 548 parse(psym_stmt); /* prepare for function definition */
536 ps.in_var_decl = false; 549 ps.in_var_decl = false;
537 } 550 }
538 551
539 int indent = ind_add(0, code.s, code.len); 552 int indent = ind_add(0, code.s, code.len);
540 553
541 enum paren_level_cast cast = cast_unknown; 554 enum paren_level_cast cast = cast_unknown;
542 if (ps.prev_lsym == lsym_offsetof 555 if (ps.prev_lsym == lsym_offsetof
543 || ps.prev_lsym == lsym_sizeof 556 || ps.prev_lsym == lsym_sizeof
544 || ps.prev_lsym == lsym_for 557 || ps.prev_lsym == lsym_for
545 || ps.prev_lsym == lsym_if 558 || ps.prev_lsym == lsym_if
546 || ps.prev_lsym == lsym_switch 559 || ps.prev_lsym == lsym_switch
547 || ps.prev_lsym == lsym_while 560 || ps.prev_lsym == lsym_while
548 || ps.line_has_func_def) 561 || ps.line_has_func_def)
549 cast = cast_no; 562 cast = cast_no;
550 563
551 ps.paren[ps.nparen - 1].indent = indent; 564 ps.paren[ps.nparen - 1].indent = indent;
552 ps.paren[ps.nparen - 1].cast = cast; 565 ps.paren[ps.nparen - 1].cast = cast;
553 debug_println("paren_indents[%d] is now %s%d", 566 debug_println("paren_indents[%d] is now %s%d",
554 ps.nparen - 1, paren_level_cast_name[cast], indent); 567 ps.nparen - 1, paren_level_cast_name[cast], indent);
555} 568}
556 569
557static void 570static void
558process_rparen(void) 571process_rparen(void)
559{ 572{
560 if (ps.nparen == 0) { 573 if (ps.nparen == 0) {
561 diag(0, "Extra '%c'", *token.s); 574 diag(0, "Extra '%c'", *token.s);
562 goto unbalanced; 575 goto unbalanced;
563 } 576 }
564 577
565 enum paren_level_cast cast = ps.paren[--ps.nparen].cast; 578 enum paren_level_cast cast = ps.paren[--ps.nparen].cast;
566 if (ps.in_func_def_params || (ps.line_has_decl && !ps.in_init)) 579 if (ps.in_func_def_params || (ps.line_has_decl && !ps.in_init))
567 cast = cast_no; 580 cast = cast_no;
568 581
569 ps.prev_paren_was_cast = cast == cast_maybe; 582 ps.prev_paren_was_cast = cast == cast_maybe;
570 if (cast == cast_maybe) { 583 if (cast == cast_maybe) {
571 ps.next_unary = true; 584 ps.next_unary = true;
572 ps.want_blank = opt.space_after_cast; 585 ps.want_blank = opt.space_after_cast;
573 } else 586 } else
574 ps.want_blank = true; 587 ps.want_blank = true;
575 588
576 if (code.len == 0) 589 if (code.len == 0)
577 ps.line_start_nparen = ps.nparen; 590 ps.line_start_nparen = ps.nparen;
578 591
579unbalanced: 592unbalanced:
580 buf_add_char(&code, token.s[0]); 593 buf_add_char(&code, token.s[0]);
581 594
582 if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) { 595 if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) {
583 if (ps.extra_expr_indent == eei_maybe) 596 if (ps.extra_expr_indent == eei_maybe)
584 ps.extra_expr_indent = eei_last; 597 ps.extra_expr_indent = eei_last;
585 ps.force_nl = true; 598 ps.force_nl = true;
586 ps.next_unary = true; 599 ps.next_unary = true;
587 ps.in_stmt_or_decl = false; 600 ps.in_stmt_or_decl = false;
588 parse(ps.spaced_expr_psym); 601 parse(ps.spaced_expr_psym);
589 ps.spaced_expr_psym = psym_0; 602 ps.spaced_expr_psym = psym_0;
590 ps.want_blank = true; 603 ps.want_blank = true;
591 out.line_kind = lk_stmt_head; 604 out.line_kind = lk_stmt_head;
592 } 605 }
593} 606}
594 607
595static void 608static void
596process_lbracket(void) 609process_lbracket(void)
597{ 610{
598 if (++ps.nparen == array_length(ps.paren)) { 611 if (++ps.nparen == array_length(ps.paren)) {
599 diag(0, "Reached internal limit of %zu unclosed parentheses", 612 diag(0, "Reached internal limit of %zu unclosed parentheses",
600 array_length(ps.paren)); 613 array_length(ps.paren));
601 ps.nparen--; 614 ps.nparen--;
602 } 615 }
603 616
604 if (code.len > 0 617 if (code.len > 0
605 && (ps.prev_lsym == lsym_comma || ps.prev_lsym == lsym_binary_op)) 618 && (ps.prev_lsym == lsym_comma || ps.prev_lsym == lsym_binary_op))
606 buf_add_char(&code, ' '); 619 buf_add_char(&code, ' ');
607 ps.want_blank = false; 620 ps.want_blank = false;
608 buf_add_char(&code, token.s[0]); 621 buf_add_char(&code, token.s[0]);
609 622
610 int indent = ind_add(0, code.s, code.len); 623 int indent = ind_add(0, code.s, code.len);
611 624
612 ps.paren[ps.nparen - 1].indent = indent; 625 ps.paren[ps.nparen - 1].indent = indent;
613 ps.paren[ps.nparen - 1].cast = cast_no; 626 ps.paren[ps.nparen - 1].cast = cast_no;
614 debug_println("paren_indents[%d] is now %d", ps.nparen - 1, indent); 627 debug_println("paren_indents[%d] is now %d", ps.nparen - 1, indent);
615} 628}
616 629
617static void 630static void
618process_rbracket(void) 631process_rbracket(void)
619{ 632{
620 if (ps.nparen == 0) { 633 if (ps.nparen == 0) {
621 diag(0, "Extra '%c'", *token.s); 634 diag(0, "Extra '%c'", *token.s);
622 goto unbalanced; 635 goto unbalanced;
623 } 636 }
624 --ps.nparen; 637 --ps.nparen;
625 638
626 ps.want_blank = true; 639 ps.want_blank = true;
627 if (code.len == 0) 640 if (code.len == 0)
628 ps.line_start_nparen = ps.nparen; 641 ps.line_start_nparen = ps.nparen;
629 642
630unbalanced: 643unbalanced:
631 buf_add_char(&code, token.s[0]); 644 buf_add_char(&code, token.s[0]);
632} 645}
633 646
634static void 647static void
635process_lbrace(void) 648process_lbrace(void)
636{ 649{
637 if (ps.prev_lsym == lsym_rparen && ps.prev_paren_was_cast) { 650 if (ps.prev_lsym == lsym_rparen && ps.prev_paren_was_cast) {
638 ps.in_var_decl = true; // XXX: not really 651 ps.in_var_decl = true; // XXX: not really
639 ps.in_init = true; 652 ps.in_init = true;
640 } 653 }
641 654
642 if (out.line_kind == lk_stmt_head) 655 if (out.line_kind == lk_stmt_head)
643 out.line_kind = lk_other; 656 out.line_kind = lk_other;
644 657
645 ps.in_stmt_or_decl = false; /* don't indent the {} */ 658 ps.in_stmt_or_decl = false; /* don't indent the {} */
646 659
647 if (!ps.in_init) 660 if (!ps.in_init)
648 ps.force_nl = true; 661 ps.force_nl = true;
649 else 662 else
650 ps.init_level++; 663 ps.init_level++;
651 664
652 if (code.len > 0 && !ps.in_init) { 665 if (code.len > 0 && !ps.in_init) {
653 if (!opt.brace_same_line || 666 if (!opt.brace_same_line ||
654 (code.len > 0 && code.s[code.len - 1] == '}')) 667 (code.len > 0 && code.s[code.len - 1] == '}'))
655 output_line(); 668 output_line();
656 else if (ps.in_func_def_params && !ps.in_var_decl) { 669 else if (ps.in_func_def_params && !ps.in_var_decl) {
657 ps.ind_level_follow = 0; 670 ps.ind_level_follow = 0;
658 if (opt.function_brace_split) 671 if (opt.function_brace_split)
659 output_line(); 672 output_line();
660 else 673 else
661 ps.want_blank = true; 674 ps.want_blank = true;
662 } 675 }
663 } 676 }
664 677
665 if (ps.nparen > 0 && ps.init_level == 0) { 678 if (ps.nparen > 0 && ps.init_level == 0) {
666 diag(1, "Unbalanced parentheses"); 679 diag(1, "Unbalanced parentheses");
667 ps.nparen = 0; 680 ps.nparen = 0;
668 if (ps.spaced_expr_psym != psym_0) { 681 if (ps.spaced_expr_psym != psym_0) {
669 parse(ps.spaced_expr_psym); 682 parse(ps.spaced_expr_psym);
670 ps.spaced_expr_psym = psym_0; 683 ps.spaced_expr_psym = psym_0;
671 ps.ind_level = ps.ind_level_follow; 684 ps.ind_level = ps.ind_level_follow;
672 } 685 }
673 } 686 }
674 687
675 if (code.len == 0) 688 if (code.len == 0)
676 ps.in_stmt_cont = false; /* don't indent the '{' itself 689 ps.in_stmt_cont = false; /* don't indent the '{' itself
677 */ 690 */
678 if (ps.in_decl && ps.in_var_decl) { 691 if (ps.in_decl && ps.in_var_decl) {
679 ps.di_stack[ps.decl_level] = ps.decl_ind; 692 ps.di_stack[ps.decl_level] = ps.decl_ind;
680 if (++ps.decl_level == (int)array_length(ps.di_stack)) { 693 if (++ps.decl_level == (int)array_length(ps.di_stack)) {
681 diag(0, "Reached internal limit of %zu struct levels", 694 diag(0, "Reached internal limit of %zu struct levels",
682 array_length(ps.di_stack)); 695 array_length(ps.di_stack));
683 ps.decl_level--; 696 ps.decl_level--;
684 } 697 }
685 } else { 698 } else {
686 ps.line_has_decl = false; /* we can't be in the middle of 699 ps.line_has_decl = false; /* we can't be in the middle of
687 * a declaration, so don't do 700 * a declaration, so don't do
688 * special indentation of 701 * special indentation of
689 * comments */ 702 * comments */
690 ps.in_func_def_params = false; 703 ps.in_func_def_params = false;
691 ps.in_decl = false; 704 ps.in_decl = false;
692 } 705 }
693 706
694 ps.decl_ind = 0; 707 ps.decl_ind = 0;
695 parse(ps.lbrace_kind); 708 parse(ps.lbrace_kind);
696 if (ps.want_blank) 709 if (ps.want_blank)
697 buf_add_char(&code, ' '); 710 buf_add_char(&code, ' ');
698 ps.want_blank = false; 711 ps.want_blank = false;
699 buf_add_char(&code, '{'); 712 buf_add_char(&code, '{');
700 ps.declaration = decl_no; 713 ps.declaration = decl_no;
701} 714}
702 715
703static void 716static void
704process_rbrace(void) 717process_rbrace(void)
705{ 718{
706 if (ps.nparen > 0 && ps.init_level == 0) { 719 if (ps.nparen > 0 && ps.init_level == 0) {
707 diag(1, "Unbalanced parentheses"); 720 diag(1, "Unbalanced parentheses");
708 ps.nparen = 0; 721 ps.nparen = 0;
709 ps.spaced_expr_psym = psym_0; 722 ps.spaced_expr_psym = psym_0;
710 } 723 }
711 724
712 ps.declaration = decl_no; 725 ps.declaration = decl_no;
713 if (ps.init_level > 0) 726 if (ps.init_level > 0)
714 ps.init_level--; 727 ps.init_level--;
715 728
716 if (code.len > 0 && !ps.in_init) 729 if (code.len > 0 && !ps.in_init)
717 output_line(); 730 output_line();
718 731
719 buf_add_char(&code, '}'); 732 buf_add_char(&code, '}');
720 ps.want_blank = true; 733 ps.want_blank = true;
721 ps.in_stmt_or_decl = false; // XXX: Initializers don't end a stmt 734 ps.in_stmt_or_decl = false; // XXX: Initializers don't end a stmt
722 ps.in_stmt_cont = false; 735 ps.in_stmt_cont = false;
723 736
724 if (ps.decl_level > 0) { /* multi-level structure declaration */ 737 if (ps.decl_level > 0) { /* multi-level structure declaration */
725 ps.decl_ind = ps.di_stack[--ps.decl_level]; 738 ps.decl_ind = ps.di_stack[--ps.decl_level];
726 if (ps.decl_level == 0 && !ps.in_func_def_params) { 739 if (ps.decl_level == 0 && !ps.in_func_def_params) {
727 ps.declaration = decl_begin; 740 ps.declaration = decl_begin;
728 ps.decl_ind = ps.ind_level == 0 741 ps.decl_ind = ps.ind_level == 0
729 ? opt.decl_indent : opt.local_decl_indent; 742 ? opt.decl_indent : opt.local_decl_indent;
730 } 743 }
731 ps.in_decl = true; 744 ps.in_decl = true;
732 } 745 }
733 746
734 if (ps.psyms.top == 2) 747 if (ps.psyms.top == 2)
735 out.line_kind = lk_func_end; 748 out.line_kind = lk_func_end;
736 749
737 parse(psym_rbrace); 750 parse(psym_rbrace);
738 751
739 if (!ps.in_var_decl 752 if (!ps.in_var_decl
740 && ps.psyms.sym[ps.psyms.top] != psym_do_stmt 753 && ps.psyms.sym[ps.psyms.top] != psym_do_stmt
741 && ps.psyms.sym[ps.psyms.top] != psym_if_expr_stmt) 754 && ps.psyms.sym[ps.psyms.top] != psym_if_expr_stmt)
742 ps.force_nl = true; 755 ps.force_nl = true;
743} 756}
744 757
745static void 758static void
746process_period(void) 759process_period(void)
747{ 760{
748 if (code.len > 0 && code.s[code.len - 1] == ',') 761 if (code.len > 0 && code.s[code.len - 1] == ',')
749 buf_add_char(&code, ' '); 762 buf_add_char(&code, ' ');
750 buf_add_char(&code, '.'); 763 buf_add_char(&code, '.');
751 ps.want_blank = false; 764 ps.want_blank = false;
752} 765}
753 766
754static void 767static void
755process_unary_op(void) 768process_unary_op(void)
756{ 769{
757 if (is_function_pointer_declaration()) { 770 if (is_function_pointer_declaration()) {
758 int ind = ps.decl_ind - (int)token.len; 771 int ind = ps.decl_ind - (int)token.len;
759 indent_declarator(ind, ps.tabs_to_var); 772 indent_declarator(ind, ps.tabs_to_var);
760 ps.want_blank = false; 773 ps.want_blank = false;
761 } else if ((token.s[0] == '+' || token.s[0] == '-') 774 } else if ((token.s[0] == '+' || token.s[0] == '-')
762 && code.len > 0 && code.s[code.len - 1] == token.s[0]) 775 && code.len > 0 && code.s[code.len - 1] == token.s[0])
763 ps.want_blank = true; 776 ps.want_blank = true;
764 777
765 if (ps.want_blank) 778 if (ps.want_blank)
766 buf_add_char(&code, ' '); 779 buf_add_char(&code, ' ');
767 buf_add_buf(&code, &token); 780 buf_add_buf(&code, &token);
768 ps.want_blank = false; 781 ps.want_blank = false;
769} 782}
770 783
771static void 784static void
772process_postfix_op(void) 785process_postfix_op(void)
773{ 786{
774 buf_add_buf(&code, &token); 787 buf_add_buf(&code, &token);
775 ps.want_blank = true; 788 ps.want_blank = true;
776} 789}
777 790
778static void 791static void
779process_comma(void) 792process_comma(void)
780{ 793{
781 ps.want_blank = code.len > 0; /* only put blank after comma if comma 794 ps.want_blank = code.len > 0; /* only put blank after comma if comma
782 * does not start the line */ 795 * does not start the line */
783 796
784 if (ps.in_decl && !ps.line_has_func_def && !ps.in_init && 797 if (ps.in_decl && !ps.line_has_func_def && !ps.in_init &&
785 !ps.decl_indent_done && ps.line_start_nparen == 0) { 798 !ps.decl_indent_done && ps.line_start_nparen == 0) {
786 /* indent leading commas and not the actual identifiers */ 799 /* indent leading commas and not the actual identifiers */
787 indent_declarator(ps.decl_ind - 1, ps.tabs_to_var); 800 indent_declarator(ps.decl_ind - 1, ps.tabs_to_var);
788 } 801 }
789 802
790 buf_add_char(&code, ','); 803 buf_add_char(&code, ',');
791 804
792 if (ps.nparen == 0) { 805 if (ps.nparen == 0) {
793 if (ps.init_level == 0) 806 if (ps.init_level == 0)
794 ps.in_init = false; 807 ps.in_init = false;
795 int typical_varname_length = 8; 808 int typical_varname_length = 8;
796 if (ps.break_after_comma && (opt.break_after_comma || 809 if (ps.break_after_comma && (opt.break_after_comma ||
797 ind_add(compute_code_indent(), code.s, code.len) 810 ind_add(compute_code_indent(), code.s, code.len)
798 >= opt.max_line_length - typical_varname_length)) 811 >= opt.max_line_length - typical_varname_length))
799 ps.force_nl = true; 812 ps.force_nl = true;
800 } 813 }
801} 814}
802 815
803static void 816static void
804process_colon_label(void) 817process_colon_label(void)
805{ 818{
806 buf_add_buf(&lab, &code); 819 buf_add_buf(&lab, &code);
807 buf_add_char(&lab, ':'); 820 buf_add_char(&lab, ':');
808 code.len = 0; 821 buf_clear(&code);
809 822
810 if (ps.seen_case) 823 if (ps.seen_case)
811 out.line_kind = lk_case_or_default; 824 out.line_kind = lk_case_or_default;
812 ps.in_stmt_or_decl = false; 825 ps.in_stmt_or_decl = false;
813 ps.force_nl = ps.seen_case; 826 ps.force_nl = ps.seen_case;
814 ps.seen_case = false; 827 ps.seen_case = false;
815 ps.want_blank = false; 828 ps.want_blank = false;
816} 829}
817 830
818static void 831static void
819process_colon_other(void) 832process_colon_other(void)
820{ 833{
821 buf_add_char(&code, ':'); 834 buf_add_char(&code, ':');
822 ps.want_blank = ps.decl_level == 0; 835 ps.want_blank = ps.decl_level == 0;
823} 836}
824 837
825static void 838static void
826process_semicolon(void) 839process_semicolon(void)
827{ 840{
828 if (out.line_kind == lk_stmt_head) 841 if (out.line_kind == lk_stmt_head)
829 out.line_kind = lk_other; 842 out.line_kind = lk_other;
830 if (ps.decl_level == 0) 843 if (ps.decl_level == 0)
831 ps.in_var_decl = false; 844 ps.in_var_decl = false;
832 ps.seen_case = false; /* only needs to be reset on error */ 845 ps.seen_case = false; /* only needs to be reset on error */
833 ps.quest_level = 0; /* only needs to be reset on error */ 846 ps.quest_level = 0; /* only needs to be reset on error */
834 if (ps.prev_lsym == lsym_rparen) 847 if (ps.prev_lsym == lsym_rparen)
835 ps.in_func_def_params = false; 848 ps.in_func_def_params = false;
836 ps.in_init = false; 849 ps.in_init = false;
837 ps.init_level = 0; 850 ps.init_level = 0;
838 ps.declaration = ps.declaration == decl_begin ? decl_end : decl_no; 851 ps.declaration = ps.declaration == decl_begin ? decl_end : decl_no;
839 852
840 if (ps.in_decl && code.len == 0 && !ps.in_init && 853 if (ps.in_decl && code.len == 0 && !ps.in_init &&
841 !ps.decl_indent_done && ps.line_start_nparen == 0) { 854 !ps.decl_indent_done && ps.line_start_nparen == 0) {
842 /* indent stray semicolons in declarations */ 855 /* indent stray semicolons in declarations */
843 indent_declarator(ps.decl_ind - 1, ps.tabs_to_var); 856 indent_declarator(ps.decl_ind - 1, ps.tabs_to_var);
844 } 857 }
845 858
846 ps.in_decl = ps.decl_level > 0; /* if we were in a first level 859 ps.in_decl = ps.decl_level > 0; /* if we were in a first level
847 * structure declaration before, we 860 * structure declaration before, we
848 * aren't anymore */ 861 * aren't anymore */
849 862
850 if (ps.nparen > 0 && ps.spaced_expr_psym != psym_for_exprs) { 863 if (ps.nparen > 0 && ps.spaced_expr_psym != psym_for_exprs) {
851 /* There were unbalanced parentheses in the statement. It is a 864 /* There were unbalanced parentheses in the statement. It is a
852 * bit complicated, because the semicolon might be in a for 865 * bit complicated, because the semicolon might be in a for
853 * statement. */ 866 * statement. */
854 diag(1, "Unbalanced parentheses"); 867 diag(1, "Unbalanced parentheses");
855 ps.nparen = 0; 868 ps.nparen = 0;
856 if (ps.spaced_expr_psym != psym_0) { 869 if (ps.spaced_expr_psym != psym_0) {
857 parse(ps.spaced_expr_psym); 870 parse(ps.spaced_expr_psym);
858 ps.spaced_expr_psym = psym_0; 871 ps.spaced_expr_psym = psym_0;
859 } 872 }
860 } 873 }
861 buf_add_char(&code, ';'); 874 buf_add_char(&code, ';');
862 ps.want_blank = true; 875 ps.want_blank = true;
863 ps.in_stmt_or_decl = ps.nparen > 0; 876 ps.in_stmt_or_decl = ps.nparen > 0;
864 ps.decl_ind = 0; 877 ps.decl_ind = 0;
865 878
866 if (ps.spaced_expr_psym == psym_0) { 879 if (ps.spaced_expr_psym == psym_0) {
867 parse(psym_stmt); 880 parse(psym_stmt);
868 ps.force_nl = true; 881 ps.force_nl = true;
869 } 882 }
870} 883}
871 884
872static void 885static void
873process_type_outside_parentheses(void) 886process_type_outside_parentheses(void)
874{ 887{
875 parse(psym_decl); /* let the parser worry about indentation */ 888 parse(psym_decl); /* let the parser worry about indentation */
876 889
877 if (ps.prev_lsym == lsym_rparen && ps.psyms.top <= 1 && code.len > 0) 890 if (ps.prev_lsym == lsym_rparen && ps.psyms.top <= 1 && code.len > 0)
878 output_line(); 891 output_line();
879 892
880 if (ps.in_func_def_params && opt.indent_parameters && 893 if (ps.in_func_def_params && opt.indent_parameters &&
881 ps.decl_level == 0) { 894 ps.decl_level == 0) {
882 ps.ind_level = ps.ind_level_follow = 1; 895 ps.ind_level = ps.ind_level_follow = 1;
883 ps.in_stmt_cont = false; 896 ps.in_stmt_cont = false;
884 } 897 }
885 898
886 ps.in_var_decl = /* maybe */ true; 899 ps.in_var_decl = /* maybe */ true;
887 ps.in_decl = ps.line_has_decl = ps.prev_lsym != lsym_typedef; 900 ps.in_decl = ps.line_has_decl = ps.prev_lsym != lsym_typedef;
888 if (ps.decl_level <= 0) 901 if (ps.decl_level <= 0)
889 ps.declaration = decl_begin; 902 ps.declaration = decl_begin;
890 903
891 int len = (int)token.len + 1; 904 int len = (int)token.len + 1;
892 int ind = ps.ind_level == 0 || ps.decl_level > 0 905 int ind = ps.ind_level == 0 || ps.decl_level > 0
893 ? opt.decl_indent /* global variable or local member */ 906 ? opt.decl_indent /* global variable or local member */
894 : opt.local_decl_indent; /* local variable */ 907 : opt.local_decl_indent; /* local variable */
895 ps.decl_ind = ind > 0 ? ind : len; 908 ps.decl_ind = ind > 0 ? ind : len;
896 ps.tabs_to_var = opt.use_tabs && ind > 0; 909 ps.tabs_to_var = opt.use_tabs && ind > 0;
897} 910}
898 911
899static void 912static void
900process_word(lexer_symbol lsym) 913process_word(lexer_symbol lsym)
901{ 914{
902 if (ps.in_decl) { 915 if (ps.in_decl) {
903 if (lsym == lsym_funcname) { 916 if (lsym == lsym_funcname) {
904 ps.in_decl = false; 917 ps.in_decl = false;
905 if (opt.procnames_start_line && code.len > 0) 918 if (opt.procnames_start_line && code.len > 0)
906 output_line(); 919 output_line();
907 else if (ps.want_blank) 920 else if (ps.want_blank)
908 buf_add_char(&code, ' '); 921 buf_add_char(&code, ' ');
909 ps.want_blank = false; 922 ps.want_blank = false;
910 923
911 } else if (!ps.in_init && !ps.decl_indent_done && 924 } else if (!ps.in_init && !ps.decl_indent_done &&
912 ps.line_start_nparen == 0) { 925 ps.line_start_nparen == 0) {
913 if (opt.decl_indent == 0 926 if (opt.decl_indent == 0
914 && code.len > 0 && code.s[code.len - 1] == '}') 927 && code.len > 0 && code.s[code.len - 1] == '}')
915 ps.decl_ind = ind_add(0, code.s, code.len) + 1; 928 ps.decl_ind = ind_add(0, code.s, code.len) + 1;
916 indent_declarator(ps.decl_ind, ps.tabs_to_var); 929 indent_declarator(ps.decl_ind, ps.tabs_to_var);
917 ps.want_blank = false; 930 ps.want_blank = false;
918 } 931 }
919 932
920 } else if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) { 933 } else if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) {
921 ps.force_nl = true; 934 ps.force_nl = true;
922 ps.in_stmt_or_decl = false; 935 ps.in_stmt_or_decl = false;
923 ps.next_unary = true; 936 ps.next_unary = true;
924 parse(ps.spaced_expr_psym); 937 parse(ps.spaced_expr_psym);
925 ps.spaced_expr_psym = psym_0; 938 ps.spaced_expr_psym = psym_0;
926 } 939 }
927} 940}
928 941
929static void 942static void
930process_do(void) 943process_do(void)
931{ 944{
932 ps.in_stmt_or_decl = false; 945 ps.in_stmt_or_decl = false;
933 ps.in_decl = false; 946 ps.in_decl = false;
934 947
935 if (code.len > 0) 948 if (code.len > 0)
936 output_line(); 949 output_line();
937 950
938 ps.force_nl = true; 951 ps.force_nl = true;
939 parse(psym_do); 952 parse(psym_do);
940} 953}
941 954
942static void 955static void
943process_else(void) 956process_else(void)
944{ 957{
945 ps.in_stmt_or_decl = false; 958 ps.in_stmt_or_decl = false;
946 959
947 if (code.len > 0 960 if (code.len > 0
948 && !(opt.cuddle_else && code.s[code.len - 1] == '}')) 961 && !(opt.cuddle_else && code.s[code.len - 1] == '}'))
949 output_line(); 962 output_line();
950 963
951 ps.force_nl = true; 964 ps.force_nl = true;
952 parse(psym_else); 965 parse(psym_else);
953} 966}
954 967
955static void 968static void
956process_lsym(lexer_symbol lsym) 969process_lsym(lexer_symbol lsym)
957{ 970{
958 switch (lsym) { 971 switch (lsym) {
959 /* INDENT OFF */ 972 /* INDENT OFF */
960 case lsym_preprocessing: process_preprocessing(); break; 973 case lsym_preprocessing: process_preprocessing(); break;
961 case lsym_newline: process_newline(); break; 974 case lsym_newline: process_newline(); break;
962 case lsym_comment: process_comment(); break; 975 case lsym_comment: process_comment(); break;
963 case lsym_lparen: process_lparen(); break; 976 case lsym_lparen: process_lparen(); break;
964 case lsym_lbracket: process_lbracket(); break; 977 case lsym_lbracket: process_lbracket(); break;
965 case lsym_rparen: process_rparen(); break; 978 case lsym_rparen: process_rparen(); break;
966 case lsym_rbracket: process_rbracket(); break; 979 case lsym_rbracket: process_rbracket(); break;
967 case lsym_lbrace: process_lbrace(); break; 980 case lsym_lbrace: process_lbrace(); break;
968 case lsym_rbrace: process_rbrace(); break; 981 case lsym_rbrace: process_rbrace(); break;
969 case lsym_period: process_period(); break; 982 case lsym_period: process_period(); break;
970 case lsym_unary_op: process_unary_op(); break; 983 case lsym_unary_op: process_unary_op(); break;
971 case lsym_postfix_op: process_postfix_op(); break; 984 case lsym_postfix_op: process_postfix_op(); break;
972 case lsym_binary_op: goto copy_token; 985 case lsym_binary_op: goto copy_token;
973 case lsym_question: ps.quest_level++; goto copy_token; 986 case lsym_question: ps.quest_level++; goto copy_token;
974 case lsym_colon_question: goto copy_token; 987 case lsym_colon_question: goto copy_token;
975 case lsym_colon_label: process_colon_label(); break; 988 case lsym_colon_label: process_colon_label(); break;
976 case lsym_colon_other: process_colon_other(); break; 989 case lsym_colon_other: process_colon_other(); break;
977 case lsym_comma: process_comma(); break; 990 case lsym_comma: process_comma(); break;
978 case lsym_semicolon: process_semicolon(); break; 991 case lsym_semicolon: process_semicolon(); break;
979 case lsym_typedef: goto copy_token; 992 case lsym_typedef: goto copy_token;
980 case lsym_modifier: goto copy_token; 993 case lsym_modifier: goto copy_token;
981 case lsym_case: ps.seen_case = true; goto copy_token; 994 case lsym_case: ps.seen_case = true; goto copy_token;
982 case lsym_default: ps.seen_case = true; goto copy_token; 995 case lsym_default: ps.seen_case = true; goto copy_token;
983 case lsym_do: process_do(); goto copy_token; 996 case lsym_do: process_do(); goto copy_token;
984 case lsym_else: process_else(); goto copy_token; 997 case lsym_else: process_else(); goto copy_token;
985 case lsym_for: ps.spaced_expr_psym = psym_for_exprs; goto copy_token; 998 case lsym_for: ps.spaced_expr_psym = psym_for_exprs; goto copy_token;
986 case lsym_if: ps.spaced_expr_psym = psym_if_expr; goto copy_token; 999 case lsym_if: ps.spaced_expr_psym = psym_if_expr; goto copy_token;
987 case lsym_switch: ps.spaced_expr_psym = psym_switch_expr; goto copy_token; 1000 case lsym_switch: ps.spaced_expr_psym = psym_switch_expr; goto copy_token;
988 case lsym_while: ps.spaced_expr_psym = psym_while_expr; goto copy_token; 1001 case lsym_while: ps.spaced_expr_psym = psym_while_expr; goto copy_token;
989 /* INDENT ON */ 1002 /* INDENT ON */
990 1003
991 case lsym_tag: 1004 case lsym_tag:
992 if (ps.nparen > 0) 1005 if (ps.nparen > 0)
993 goto copy_token; 1006 goto copy_token;
994 /* FALLTHROUGH */ 1007 /* FALLTHROUGH */
995 case lsym_type_outside_parentheses: 1008 case lsym_type_outside_parentheses:
996 process_type_outside_parentheses(); 1009 process_type_outside_parentheses();
997 goto copy_token; 1010 goto copy_token;
998 1011
999 case lsym_type_in_parentheses: 1012 case lsym_type_in_parentheses:
1000 case lsym_sizeof: 1013 case lsym_sizeof:
1001 case lsym_offsetof: 1014 case lsym_offsetof:
1002 case lsym_word: 1015 case lsym_word:
1003 case lsym_funcname: 1016 case lsym_funcname:
1004 case lsym_return: 1017 case lsym_return:
1005 process_word(lsym); 1018 process_word(lsym);
1006copy_token: 1019copy_token:
1007 if (ps.want_blank) 1020 if (ps.want_blank)
1008 buf_add_char(&code, ' '); 1021 buf_add_char(&code, ' ');
1009 buf_add_buf(&code, &token); 1022 buf_add_buf(&code, &token);
1010 if (lsym != lsym_funcname) 1023 if (lsym != lsym_funcname)
1011 ps.want_blank = true; 1024 ps.want_blank = true;
1012 break; 1025 break;
1013 1026
1014 default: 1027 default:
1015 break; 1028 break;
1016 } 1029 }
1017} 1030}
1018 1031
1019static int 1032static int
1020indent(void) 1033indent(void)
1021{ 1034{
1022 debug_parser_state(); 1035 debug_parser_state();
1023 1036
1024 for (;;) { /* loop until we reach eof */ 1037 for (;;) { /* loop until we reach eof */
1025 lexer_symbol lsym = lexi(); 1038 lexer_symbol lsym = lexi();
1026 1039
1027 debug_blank_line(); 1040 debug_blank_line();
1028 debug_printf("line %d: %s", line_no, lsym_name[lsym]); 1041 debug_printf("line %d: %s", line_no, lsym_name[lsym]);
1029 debug_print_buf("token", &token); 1042 debug_print_buf("token", &token);
1030 debug_buffers(); 1043 debug_buffers();
1031 debug_blank_line(); 1044 debug_blank_line();
1032 1045
1033 if (lsym == lsym_eof) 1046 if (lsym == lsym_eof)
1034 return process_eof(); 1047 return process_eof();
1035 1048
1036 if (lsym == lsym_if && ps.prev_lsym == lsym_else 1049 if (lsym == lsym_if && ps.prev_lsym == lsym_else
1037 && opt.else_if_in_same_line) 1050 && opt.else_if_in_same_line)
1038 ps.force_nl = false; 1051 ps.force_nl = false;
1039 1052
1040 if (lsym == lsym_newline || lsym == lsym_preprocessing) 1053 if (lsym == lsym_newline || lsym == lsym_preprocessing)
1041 ps.force_nl = false; 1054 ps.force_nl = false;
1042 else if (lsym == lsym_comment) { 1055 else if (lsym == lsym_comment) {
1043 /* no special processing */ 1056 /* no special processing */
1044 } else { 1057 } else {
1045 maybe_break_line(lsym); 1058 maybe_break_line(lsym);
1046 ps.in_stmt_or_decl = true; 1059 ps.in_stmt_or_decl = true;
1047 if (com.len > 0) 1060 if (com.len > 0)
1048 move_com_to_code(lsym); 1061 move_com_to_code(lsym);
1049 update_ps_lbrace_kind(lsym); 1062 update_ps_lbrace_kind(lsym);
1050 } 1063 }
1051 1064
1052 process_lsym(lsym); 1065 process_lsym(lsym);
1053 1066
1054 debug_parser_state(); 1067 debug_parser_state();
1055 1068
1056 if (lsym != lsym_comment && lsym != lsym_newline && 1069 if (lsym != lsym_comment && lsym != lsym_newline &&
1057 lsym != lsym_preprocessing) 1070 lsym != lsym_preprocessing)
1058 ps.prev_lsym = lsym; 1071 ps.prev_lsym = lsym;
1059 } 1072 }
1060} 1073}
1061 1074
1062int 1075int
1063main(int argc, char **argv) 1076main(int argc, char **argv)
1064{ 1077{
1065 init_globals(); 1078 init_globals();
1066 load_profiles(argc, argv); 1079 load_profiles(argc, argv);
1067 parse_command_line(argc, argv); 1080 parse_command_line(argc, argv);
1068 set_initial_indentation(); 1081 set_initial_indentation();
1069 return indent(); 1082 return indent();
1070} 1083}

cvs diff -r1.185 -r1.186 src/usr.bin/indent/indent.h (switch to unified diff)

--- src/usr.bin/indent/indent.h 2023/06/10 07:42:41 1.185
+++ src/usr.bin/indent/indent.h 2023/06/10 12:59:31 1.186
@@ -1,524 +1,537 @@ @@ -1,524 +1,537 @@
1/* $NetBSD: indent.h,v 1.185 2023/06/10 07:42:41 rillig Exp $ */ 1/* $NetBSD: indent.h,v 1.186 2023/06/10 12:59:31 rillig Exp $ */
2 2
3/*- 3/*-
4 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 4 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
5 * 5 *
6 * Copyright (c) 2001 Jens Schweikhardt 6 * Copyright (c) 2001 Jens Schweikhardt
7 * All rights reserved. 7 * All rights reserved.
8 * 8 *
9 * Redistribution and use in source and binary forms, with or without 9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions 10 * modification, are permitted provided that the following conditions
11 * are met: 11 * are met:
12 * 1. Redistributions of source code must retain the above copyright 12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer. 13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright 14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the 15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution. 16 * documentation and/or other materials provided with the distribution.
17 * 17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE. 28 * SUCH DAMAGE.
29 */ 29 */
30/*- 30/*-
31 * SPDX-License-Identifier: BSD-4-Clause 31 * SPDX-License-Identifier: BSD-4-Clause
32 * 32 *
33 * Copyright (c) 1985 Sun Microsystems, Inc. 33 * Copyright (c) 1985 Sun Microsystems, Inc.
34 * Copyright (c) 1980, 1993 34 * Copyright (c) 1980, 1993
35 * The Regents of the University of California. All rights reserved. 35 * The Regents of the University of California. All rights reserved.
36 * All rights reserved. 36 * All rights reserved.
37 * 37 *
38 * Redistribution and use in source and binary forms, with or without 38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions 39 * modification, are permitted provided that the following conditions
40 * are met: 40 * are met:
41 * 1. Redistributions of source code must retain the above copyright 41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer. 42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright 43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the 44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution. 45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software 46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement: 47 * must display the following acknowledgement:
48 * This product includes software developed by the University of 48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors. 49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors 50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software 51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission. 52 * without specific prior written permission.
53 * 53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE. 64 * SUCH DAMAGE.
65 */ 65 */
66 66
67#include <ctype.h> 67#include <ctype.h>
68#include <stdbool.h> 68#include <stdbool.h>
69#include <stdio.h> 69#include <stdio.h>
70 70
71typedef enum lexer_symbol { 71typedef enum lexer_symbol {
72 lsym_eof, 72 lsym_eof,
73 lsym_preprocessing, /* the initial '#' of a preprocessing line */ 73 lsym_preprocessing, /* the initial '#' of a preprocessing line */
74 lsym_newline, 74 lsym_newline,
75 lsym_comment, /* the initial '/ *' or '//' of a comment */ 75 lsym_comment, /* the initial '/ *' or '//' of a comment */
76 76
77 lsym_lparen, 77 lsym_lparen,
78 lsym_rparen, 78 lsym_rparen,
79 lsym_lbracket, 79 lsym_lbracket,
80 lsym_rbracket, 80 lsym_rbracket,
81 lsym_lbrace, 81 lsym_lbrace,
82 lsym_rbrace, 82 lsym_rbrace,
83 83
84 lsym_period, 84 lsym_period,
85 lsym_unary_op, /* e.g. '*', '&', '-' or leading '++' */ 85 lsym_unary_op, /* e.g. '*', '&', '-' or leading '++' */
86 lsym_sizeof, 86 lsym_sizeof,
87 lsym_offsetof, 87 lsym_offsetof,
88 lsym_postfix_op, /* trailing '++' or '--' */ 88 lsym_postfix_op, /* trailing '++' or '--' */
89 lsym_binary_op, /* e.g. '*', '&', '<<', '&&' or '/=' */ 89 lsym_binary_op, /* e.g. '*', '&', '<<', '&&' or '/=' */
90 lsym_question, /* the '?' from a '?:' expression */ 90 lsym_question, /* the '?' from a '?:' expression */
91 lsym_colon_question, /* the ':' from a '?:' expression */ 91 lsym_colon_question, /* the ':' from a '?:' expression */
92 lsym_comma, 92 lsym_comma,
93 93
94 lsym_typedef, 94 lsym_typedef,
95 lsym_modifier, /* modifiers for types, functions, variables */ 95 lsym_modifier, /* modifiers for types, functions, variables */
96 lsym_tag, /* 'struct', 'union' or 'enum' */ 96 lsym_tag, /* 'struct', 'union' or 'enum' */
97 lsym_type_outside_parentheses, 97 lsym_type_outside_parentheses,
98 lsym_type_in_parentheses, 98 lsym_type_in_parentheses,
99 lsym_word, /* identifier, constant or string */ 99 lsym_word, /* identifier, constant or string */
100 lsym_funcname, /* name of a function being defined */ 100 lsym_funcname, /* name of a function being defined */
101 lsym_colon_label, /* the ':' after a label */ 101 lsym_colon_label, /* the ':' after a label */
102 lsym_colon_other, /* bit-fields, generic-association (C11), 102 lsym_colon_other, /* bit-fields, generic-association (C11),
103 * enum-type-specifier (C23), 103 * enum-type-specifier (C23),
104 * attribute-prefixed-token (C23), 104 * attribute-prefixed-token (C23),
105 * pp-prefixed-parameter (C23 6.10) */ 105 * pp-prefixed-parameter (C23 6.10) */
106 lsym_semicolon, 106 lsym_semicolon,
107 107
108 lsym_case, 108 lsym_case,
109 lsym_default, 109 lsym_default,
110 lsym_do, 110 lsym_do,
111 lsym_else, 111 lsym_else,
112 lsym_for, 112 lsym_for,
113 lsym_if, 113 lsym_if,
114 lsym_switch, 114 lsym_switch,
115 lsym_while, 115 lsym_while,
116 lsym_return, 116 lsym_return,
117} lexer_symbol; 117} lexer_symbol;
118 118
119/* 119/*
120 * Structure of the source code, in terms of declarations, statements and 120 * Structure of the source code, in terms of declarations, statements and
121 * braces; used to determine the indentation level of these parts. 121 * braces; used to determine the indentation level of these parts.
122 */ 122 */
123typedef enum parser_symbol { 123typedef enum parser_symbol {
124 psym_0, /* a placeholder; not stored on the stack */ 124 psym_0, /* a placeholder; not stored on the stack */
125 psym_lbrace_block, /* '{' for a block of code */ 125 psym_lbrace_block, /* '{' for a block of code */
126 psym_lbrace_struct, /* '{' in 'struct ... { ... }' */ 126 psym_lbrace_struct, /* '{' in 'struct ... { ... }' */
127 psym_lbrace_union, /* '{' in 'union ... { ... }' */ 127 psym_lbrace_union, /* '{' in 'union ... { ... }' */
128 psym_lbrace_enum, /* '{' in 'enum ... { ... }' */ 128 psym_lbrace_enum, /* '{' in 'enum ... { ... }' */
129 psym_rbrace, /* not stored on the stack */ 129 psym_rbrace, /* not stored on the stack */
130 psym_decl, 130 psym_decl,
131 psym_stmt, 131 psym_stmt,
132 psym_stmt_list, 132 psym_stmt_list,
133 psym_for_exprs, /* 'for' '(' ... ')' */ 133 psym_for_exprs, /* 'for' '(' ... ')' */
134 psym_if_expr, /* 'if' '(' expr ')' */ 134 psym_if_expr, /* 'if' '(' expr ')' */
135 psym_if_expr_stmt, /* 'if' '(' expr ')' stmt */ 135 psym_if_expr_stmt, /* 'if' '(' expr ')' stmt */
136 psym_if_expr_stmt_else, /* 'if' '(' expr ')' stmt 'else' */ 136 psym_if_expr_stmt_else, /* 'if' '(' expr ')' stmt 'else' */
137 psym_else, /* 'else'; not stored on the stack */ 137 psym_else, /* 'else'; not stored on the stack */
138 psym_switch_expr, /* 'switch' '(' expr ')' */ 138 psym_switch_expr, /* 'switch' '(' expr ')' */
139 psym_do, /* 'do' */ 139 psym_do, /* 'do' */
140 psym_do_stmt, /* 'do' stmt */ 140 psym_do_stmt, /* 'do' stmt */
141 psym_while_expr, /* 'while' '(' expr ')' */ 141 psym_while_expr, /* 'while' '(' expr ')' */
142} parser_symbol; 142} parser_symbol;
143 143
144/* A range of characters, not null-terminated. */ 144/* A range of characters, only null-terminated in debug mode. */
145struct buffer { 145struct buffer {
146 char *s; 146 char *s;
147 size_t len; 147 size_t len;
148 size_t cap; 148 size_t cap;
149}; 149};
150 150
151extern FILE *input; 151extern FILE *input;
152extern FILE *output; 152extern FILE *output;
153 153
154/* 154/*
155 * The current line from the input file, used by the lexer to generate tokens. 155 * The current line from the input file, used by the lexer to generate tokens.
156 * To read from the line, start at inp_p and continue up to and including the 156 * To read from the line, start at inp_p and continue up to and including the
157 * next '\n'. To read beyond the '\n', call inp_skip or inp_next, which will 157 * next '\n'. To read beyond the '\n', call inp_skip or inp_next, which will
158 * make the next line available, invalidating any pointers into the previous 158 * make the next line available, invalidating any pointers into the previous
159 * line. 159 * line.
160 */ 160 */
161extern struct buffer inp; 161extern struct buffer inp;
162extern const char *inp_p; 162extern const char *inp_p;
163 163
164extern struct buffer token; /* the current token to be processed, is 164extern struct buffer token; /* the current token to be processed, is
165 * typically copied to the buffer 'code', or in 165 * typically copied to the buffer 'code', or in
166 * some cases to 'lab'. */ 166 * some cases to 'lab'. */
167 167
168extern struct buffer lab; /* the label or preprocessor directive */ 168extern struct buffer lab; /* the label or preprocessor directive */
169extern struct buffer code; /* the main part of the current line of code, 169extern struct buffer code; /* the main part of the current line of code,
170 * containing declarations or statements */ 170 * containing declarations or statements */
171extern struct buffer com; /* the trailing comment of the line, or the 171extern struct buffer com; /* the trailing comment of the line, or the
172 * start or end of a multi-line comment, or 172 * start or end of a multi-line comment, or
173 * while in process_comment, a single line of a 173 * while in process_comment, a single line of a
174 * multi-line comment */ 174 * multi-line comment */
175 175
176extern struct options { 176extern struct options {
177 bool blanklines_around_conditional_compilation; 177 bool blanklines_around_conditional_compilation;
178 bool blank_line_after_decl_at_top; /* this is vaguely similar to 178 bool blank_line_after_decl_at_top; /* this is vaguely similar to
179 * blank_line_after_decl except 179 * blank_line_after_decl except
180 * that it only applies to the 180 * that it only applies to the
181 * first set of declarations in 181 * first set of declarations in
182 * a procedure (just after the 182 * a procedure (just after the
183 * first '{') and it causes a 183 * first '{') and it causes a
184 * blank line to be generated 184 * blank line to be generated
185 * even if there are no 185 * even if there are no
186 * declarations */ 186 * declarations */
187 bool blank_line_after_decl; 187 bool blank_line_after_decl;
188 bool blanklines_after_procs; 188 bool blanklines_after_procs;
189 bool blanklines_before_block_comments; 189 bool blanklines_before_block_comments;
190 bool break_after_comma; /* whether to add a line break after each 190 bool break_after_comma; /* whether to add a line break after each
191 * declarator */ 191 * declarator */
192 bool brace_same_line; /* whether brace should be on same line as if, 192 bool brace_same_line; /* whether brace should be on same line as if,
193 * while, etc */ 193 * while, etc */
194 bool blank_after_sizeof; /* whether a blank should always be 194 bool blank_after_sizeof; /* whether a blank should always be
195 * inserted after sizeof */ 195 * inserted after sizeof */
196 bool comment_delimiter_on_blankline; 196 bool comment_delimiter_on_blankline;
197 int decl_comment_column; /* the column in which comments after 197 int decl_comment_column; /* the column in which comments after
198 * declarations should be put */ 198 * declarations should be put */
199 bool cuddle_else; /* whether 'else' should cuddle up to '}' */ 199 bool cuddle_else; /* whether 'else' should cuddle up to '}' */
200 int continuation_indent; /* the indentation between the edge of 200 int continuation_indent; /* the indentation between the edge of
201 * code and continuation lines */ 201 * code and continuation lines */
202 float case_indent; /* The distance (measured in indentation 202 float case_indent; /* The distance (measured in indentation
203 * levels) to indent case labels from the 203 * levels) to indent case labels from the
204 * switch statement */ 204 * switch statement */
205 int comment_column; /* the column in which comments to the right of 205 int comment_column; /* the column in which comments to the right of
206 * code should start */ 206 * code should start */
207 int decl_indent; /* indentation of identifier in declaration */ 207 int decl_indent; /* indentation of identifier in declaration */
208 bool left_justify_decl; 208 bool left_justify_decl;
209 int unindent_displace; /* comments not to the right of code will be 209 int unindent_displace; /* comments not to the right of code will be
210 * placed this many indentation levels to the 210 * placed this many indentation levels to the
211 * left of code */ 211 * left of code */
212 bool extra_expr_indent; /* whether continuation lines from the 212 bool extra_expr_indent; /* whether continuation lines from the
213 * expression part of "if (e)", "while (e)", 213 * expression part of "if (e)", "while (e)",
214 * "for (e; e; e)" should be indented an extra 214 * "for (e; e; e)" should be indented an extra
215 * tab stop so that they are not confused with 215 * tab stop so that they are not confused with
216 * the code that follows */ 216 * the code that follows */
217 bool else_if_in_same_line; 217 bool else_if_in_same_line;
218 bool function_brace_split; /* split function declaration and brace 218 bool function_brace_split; /* split function declaration and brace
219 * onto separate lines */ 219 * onto separate lines */
220 bool format_col1_comments; /* If comments which start in column 1 220 bool format_col1_comments; /* If comments which start in column 1
221 * are to be reformatted (just like 221 * are to be reformatted (just like
222 * comments that begin in later 222 * comments that begin in later
223 * columns) */ 223 * columns) */
224 bool format_block_comments; /* whether comments beginning with '/ * 224 bool format_block_comments; /* whether comments beginning with '/ *
225 * \n' are to be reformatted */ 225 * \n' are to be reformatted */
226 bool indent_parameters; 226 bool indent_parameters;
227 int indent_size; /* the size of one indentation level */ 227 int indent_size; /* the size of one indentation level */
228 int block_comment_max_line_length; 228 int block_comment_max_line_length;
229 int local_decl_indent; /* like decl_indent but for locals */ 229 int local_decl_indent; /* like decl_indent but for locals */
230 bool lineup_to_parens_always; /* whether to not(?) attempt to keep 230 bool lineup_to_parens_always; /* whether to not(?) attempt to keep
231 * lined-up code within the margin */ 231 * lined-up code within the margin */
232 bool lineup_to_parens; /* whether continued code within parens will be 232 bool lineup_to_parens; /* whether continued code within parens will be
233 * lined up to the open paren */ 233 * lined up to the open paren */
234 bool proc_calls_space; /* whether function calls look like: foo (bar) 234 bool proc_calls_space; /* whether function calls look like: foo (bar)
235 * rather than foo(bar) */ 235 * rather than foo(bar) */
236 bool procnames_start_line; /* whether the names of functions being 236 bool procnames_start_line; /* whether the names of functions being
237 * defined get placed in column 1 (i.e. 237 * defined get placed in column 1 (i.e.
238 * a newline is placed between the type 238 * a newline is placed between the type
239 * of the function and its name) */ 239 * of the function and its name) */
240 bool space_after_cast; /* "b = (int) a" vs. "b = (int)a" */ 240 bool space_after_cast; /* "b = (int) a" vs. "b = (int)a" */
241 bool star_comment_cont; /* whether comment continuation lines should 241 bool star_comment_cont; /* whether comment continuation lines should
242 * have stars at the beginning of each line */ 242 * have stars at the beginning of each line */
243 bool swallow_optional_blanklines; 243 bool swallow_optional_blanklines;
244 bool auto_typedefs; /* whether to recognize identifiers ending in 244 bool auto_typedefs; /* whether to recognize identifiers ending in
245 * "_t" like typedefs */ 245 * "_t" like typedefs */
246 int tabsize; /* the size of a tab */ 246 int tabsize; /* the size of a tab */
247 int max_line_length; 247 int max_line_length;
248 bool use_tabs; /* set true to use tabs for spacing, false uses 248 bool use_tabs; /* set true to use tabs for spacing, false uses
249 * all spaces */ 249 * all spaces */
250 bool verbose; /* print configuration to stderr */ 250 bool verbose; /* print configuration to stderr */
251} opt; 251} opt;
252 252
253extern bool found_err; 253extern bool found_err;
254extern bool had_eof; /* whether input is exhausted */ 254extern bool had_eof; /* whether input is exhausted */
255extern int line_no; /* the current line number. */ 255extern int line_no; /* the current line number. */
256extern enum indent_enabled { 256extern enum indent_enabled {
257 indent_on, 257 indent_on,
258 indent_off, 258 indent_off,
259 indent_last_off_line, 259 indent_last_off_line,
260} indent_enabled; 260} indent_enabled;
261 261
262#define STACKSIZE 256 262#define STACKSIZE 256
263 263
264/* Properties of each level of parentheses or brackets. */ 264/* Properties of each level of parentheses or brackets. */
265typedef struct paren_level_props { 265typedef struct paren_level_props {
266 int indent; /* indentation of the operand/argument, 266 int indent; /* indentation of the operand/argument,
267 * relative to the enclosing statement; if 267 * relative to the enclosing statement; if
268 * negative, reflected at -1 */ 268 * negative, reflected at -1 */
269 enum paren_level_cast { 269 enum paren_level_cast {
270 cast_unknown, 270 cast_unknown,
271 cast_maybe, 271 cast_maybe,
272 cast_no, 272 cast_no,
273 } cast; /* whether the parentheses form a type cast */ 273 } cast; /* whether the parentheses form a type cast */
274} paren_level_props; 274} paren_level_props;
275 275
276struct psym_stack { 276struct psym_stack {
277 int top; /* pointer to top of stack */ 277 int top; /* pointer to top of stack */
278 parser_symbol sym[STACKSIZE]; 278 parser_symbol sym[STACKSIZE];
279 int ind_level[STACKSIZE]; 279 int ind_level[STACKSIZE];
280}; 280};
281 281
282/* 282/*
283 * The parser state determines the layout of the formatted text. 283 * The parser state determines the layout of the formatted text.
284 * 284 *
285 * At each '#if', the parser state is copied so that the corresponding '#else' 285 * At each '#if', the parser state is copied so that the corresponding '#else'
286 * lines start in the same state. 286 * lines start in the same state.
287 * 287 *
288 * In a function body, the number of block braces determines the indentation 288 * In a function body, the number of block braces determines the indentation
289 * of statements and declarations. 289 * of statements and declarations.
290 * 290 *
291 * In a statement, the number of parentheses or brackets determines the 291 * In a statement, the number of parentheses or brackets determines the
292 * indentation of follow-up lines. 292 * indentation of follow-up lines.
293 * 293 *
294 * In an expression, the token type determine whether to put spaces around. 294 * In an expression, the token type determine whether to put spaces around.
295 * 295 *
296 * In a source file, the types of line determine the vertical spacing, such as 296 * In a source file, the types of line determine the vertical spacing, such as
297 * around preprocessing directives or function bodies, or above block 297 * around preprocessing directives or function bodies, or above block
298 * comments. 298 * comments.
299 */ 299 */
300extern struct parser_state { 300extern struct parser_state {
301 lexer_symbol prev_lsym; /* the previous token, but never comment, 301 lexer_symbol prev_lsym; /* the previous token, but never comment,
302 * newline or preprocessing line */ 302 * newline or preprocessing line */
303 303
304 /* Token classification */ 304 /* Token classification */
305 305
306 bool in_stmt_or_decl; /* whether in a statement or a struct 306 bool in_stmt_or_decl; /* whether in a statement or a struct
307 * declaration or a plain declaration */ 307 * declaration or a plain declaration */
308 bool in_decl; /* XXX: double-check the exact meaning */ 308 bool in_decl; /* XXX: double-check the exact meaning */
309 bool in_var_decl; /* starts at a type name or a '){' from a 309 bool in_var_decl; /* starts at a type name or a '){' from a
310 * compound literal; ends at the '(' from a 310 * compound literal; ends at the '(' from a
311 * function definition or a ';' outside '{}'; 311 * function definition or a ';' outside '{}';
312 * when active, '{}' form struct or union 312 * when active, '{}' form struct or union
313 * declarations, ':' marks a bit-field, and '=' 313 * declarations, ':' marks a bit-field, and '='
314 * starts an initializer */ 314 * starts an initializer */
315 bool in_init; /* whether inside an initializer */ 315 bool in_init; /* whether inside an initializer */
316 int init_level; /* the number of '{}' in an initializer */ 316 int init_level; /* the number of '{}' in an initializer */
317 bool line_has_func_def; /* starts either at the 'name(' from a function 317 bool line_has_func_def; /* starts either at the 'name(' from a function
318 * definition if it occurs at the beginning of 318 * definition if it occurs at the beginning of
319 * a line, or at the first '*' from inside a 319 * a line, or at the first '*' from inside a
320 * declaration when the line starts with words 320 * declaration when the line starts with words
321 * followed by a '(' */ 321 * followed by a '(' */
322 bool in_func_def_params; /* for old-style functions */ 322 bool in_func_def_params; /* for old-style functions */
323 bool line_has_decl; /* whether this line of code has part of a 323 bool line_has_decl; /* whether this line of code has part of a
324 * declaration on it; used for indenting 324 * declaration on it; used for indenting
325 * comments */ 325 * comments */
326 parser_symbol lbrace_kind; /* the kind of brace to be pushed to 326 parser_symbol lbrace_kind; /* the kind of brace to be pushed to
327 * the parser symbol stack next */ 327 * the parser symbol stack next */
328 parser_symbol spaced_expr_psym; /* the parser symbol to be shifted 328 parser_symbol spaced_expr_psym; /* the parser symbol to be shifted
329 * after the parenthesized expression 329 * after the parenthesized expression
330 * from a 'for', 'if', 'switch' or 330 * from a 'for', 'if', 'switch' or
331 * 'while'; or psym_0 */ 331 * 'while'; or psym_0 */
332 bool seen_case; /* whether there was a 'case' or 'default', to 332 bool seen_case; /* whether there was a 'case' or 'default', to
333 * properly space the following ':' */ 333 * properly space the following ':' */
334 bool prev_paren_was_cast; 334 bool prev_paren_was_cast;
335 int quest_level; /* when this is positive, we have seen a '?' 335 int quest_level; /* when this is positive, we have seen a '?'
336 * without the matching ':' in a '?:' 336 * without the matching ':' in a '?:'
337 * expression */ 337 * expression */
338 338
339 /* Indentation of statements and declarations */ 339 /* Indentation of statements and declarations */
340 340
341 int ind_level; /* the indentation level for the line that is 341 int ind_level; /* the indentation level for the line that is
342 * currently prepared for output */ 342 * currently prepared for output */
343 int ind_level_follow; /* the level to which ind_level should be set 343 int ind_level_follow; /* the level to which ind_level should be set
344 * after the current line is printed */ 344 * after the current line is printed */
345 bool in_stmt_cont; /* whether the current line should have an 345 bool in_stmt_cont; /* whether the current line should have an
346 * extra indentation level because we are in 346 * extra indentation level because we are in
347 * the middle of a statement */ 347 * the middle of a statement */
348 int decl_level; /* current nesting level for a structure 348 int decl_level; /* current nesting level for a structure
349 * declaration or an initializer */ 349 * declaration or an initializer */
350 int di_stack[20]; /* a stack of structure indentation levels */ 350 int di_stack[20]; /* a stack of structure indentation levels */
351 bool decl_indent_done; /* whether the indentation for a declaration 351 bool decl_indent_done; /* whether the indentation for a declaration
352 * has been added to the code buffer. */ 352 * has been added to the code buffer. */
353 int decl_ind; /* current indentation for declarations */ 353 int decl_ind; /* current indentation for declarations */
354 bool tabs_to_var; /* true if using tabs to indent to var name */ 354 bool tabs_to_var; /* true if using tabs to indent to var name */
355 355
356 enum { 356 enum {
357 eei_no, 357 eei_no,
358 eei_maybe, 358 eei_maybe,
359 eei_last 359 eei_last
360 } extra_expr_indent; 360 } extra_expr_indent;
361 361
362 struct psym_stack psyms; 362 struct psym_stack psyms;
363 363
364 /* Spacing inside a statement or declaration */ 364 /* Spacing inside a statement or declaration */
365 365
366 bool next_unary; /* whether the following operator should be 366 bool next_unary; /* whether the following operator should be
367 * unary; is used in declarations for '*', as 367 * unary; is used in declarations for '*', as
368 * well as in expressions */ 368 * well as in expressions */
369 bool want_blank; /* whether the following token should be 369 bool want_blank; /* whether the following token should be
370 * prefixed by a blank. (Said prefixing is 370 * prefixed by a blank. (Said prefixing is
371 * ignored in some cases.) */ 371 * ignored in some cases.) */
372 int line_start_nparen; /* the number of parentheses or brackets that 372 int line_start_nparen; /* the number of parentheses or brackets that
373 * were open at the beginning of the current 373 * were open at the beginning of the current
374 * line; used to indent within statements, 374 * line; used to indent within statements,
375 * initializers and declarations */ 375 * initializers and declarations */
376 int nparen; /* the number of parentheses or brackets that 376 int nparen; /* the number of parentheses or brackets that
377 * are currently open; used to indent the 377 * are currently open; used to indent the
378 * remaining lines of the statement, 378 * remaining lines of the statement,
379 * initializer or declaration */ 379 * initializer or declaration */
380 paren_level_props paren[20]; 380 paren_level_props paren[20];
381 381
382 /* Horizontal spacing for comments */ 382 /* Horizontal spacing for comments */
383 383
384 int comment_delta; /* used to set up indentation for all lines of 384 int comment_delta; /* used to set up indentation for all lines of
385 * a boxed comment after the first one */ 385 * a boxed comment after the first one */
386 int n_comment_delta; /* remembers how many columns there were before 386 int n_comment_delta; /* remembers how many columns there were before
387 * the start of a box comment so that 387 * the start of a box comment so that
388 * forthcoming lines of the comment are 388 * forthcoming lines of the comment are
389 * indented properly */ 389 * indented properly */
390 int com_ind; /* indentation of the current comment */ 390 int com_ind; /* indentation of the current comment */
391 391
392 /* Vertical spacing */ 392 /* Vertical spacing */
393 393
394 bool break_after_comma; /* whether to add a newline after the next 394 bool break_after_comma; /* whether to add a newline after the next
395 * comma; used in declarations but not in 395 * comma; used in declarations but not in
396 * initializer lists */ 396 * initializer lists */
397 bool force_nl; /* whether the next token is forced to go to a 397 bool force_nl; /* whether the next token is forced to go to a
398 * new line; used after 'if (expr)' and in 398 * new line; used after 'if (expr)' and in
399 * similar situations; tokens like '{' may 399 * similar situations; tokens like '{' may
400 * ignore this */ 400 * ignore this */
401 401
402 enum declaration { 402 enum declaration {
403 decl_no, /* no declaration anywhere nearby */ 403 decl_no, /* no declaration anywhere nearby */
404 decl_begin, /* collecting tokens of a declaration */ 404 decl_begin, /* collecting tokens of a declaration */
405 decl_end, /* finished a declaration */ 405 decl_end, /* finished a declaration */
406 } declaration; 406 } declaration;
407 bool blank_line_after_decl; 407 bool blank_line_after_decl;
408 408
409 /* Comments */ 409 /* Comments */
410 410
411 bool curr_col_1; /* whether the current token started in column 411 bool curr_col_1; /* whether the current token started in column
412 * 1 of the original input */ 412 * 1 of the original input */
413 bool next_col_1; 413 bool next_col_1;
414} ps; 414} ps;
415 415
416extern struct output_state { 416extern struct output_state {
417 enum line_kind { 417 enum line_kind {
418 lk_other, 418 lk_other,
419 lk_blank, 419 lk_blank,
420 lk_if, /* #if, #ifdef, #ifndef */ 420 lk_if, /* #if, #ifdef, #ifndef */
421 lk_endif, /* #endif */ 421 lk_endif, /* #endif */
422 lk_stmt_head, /* the ')' of an incomplete statement such as 422 lk_stmt_head, /* the ')' of an incomplete statement such as
423 * 'if (expr)' or 'for (expr; expr; expr)' */ 423 * 'if (expr)' or 'for (expr; expr; expr)' */
424 lk_func_end, /* the last '}' of a function body */ 424 lk_func_end, /* the last '}' of a function body */
425 lk_block_comment, 425 lk_block_comment,
426 lk_case_or_default, 426 lk_case_or_default,
427 } line_kind; /* kind of the line that is being prepared for 427 } line_kind; /* kind of the line that is being prepared for
428 * output; is reset to lk_other each time after 428 * output; is reset to lk_other each time after
429 * trying to send a line to the output, even if 429 * trying to send a line to the output, even if
430 * that line was a suppressed blank line; used 430 * that line was a suppressed blank line; used
431 * for inserting or removing blank lines */ 431 * for inserting or removing blank lines */
432 enum line_kind prev_line_kind; /* the kind of line that was actually 432 enum line_kind prev_line_kind; /* the kind of line that was actually
433 * sent to the output */ 433 * sent to the output */
434 434
435 struct buffer indent_off_text; /* text from between 'INDENT OFF' and 435 struct buffer indent_off_text; /* text from between 'INDENT OFF' and
436 * 'INDENT ON', both inclusive */ 436 * 'INDENT ON', both inclusive */
437} out; 437} out;
438 438
439 439
440#define array_length(array) (sizeof(array) / sizeof((array)[0])) 440#define array_length(array) (sizeof(array) / sizeof((array)[0]))
441 441
442#ifdef debug 442#ifdef debug
443void debug_printf(const char *, ...) __printflike(1, 2); 443void debug_printf(const char *, ...) __printflike(1, 2);
444void debug_println(const char *, ...) __printflike(1, 2); 444void debug_println(const char *, ...) __printflike(1, 2);
445void debug_blank_line(void); 445void debug_blank_line(void);
446void debug_vis_range(const char *, const char *, size_t, const char *); 446void debug_vis_range(const char *, const char *, size_t, const char *);
447void debug_parser_state(void); 447void debug_parser_state(void);
448void debug_parse_stack(const char *); 448void debug_parse_stack(const char *);
449void debug_print_buf(const char *, const struct buffer *); 449void debug_print_buf(const char *, const struct buffer *);
450void debug_buffers(void); 450void debug_buffers(void);
451extern const char *const lsym_name[]; 451extern const char *const lsym_name[];
452extern const char *const psym_name[]; 452extern const char *const psym_name[];
453extern const char *const paren_level_cast_name[]; 453extern const char *const paren_level_cast_name[];
454extern const char *const line_kind_name[]; 454extern const char *const line_kind_name[];
455#else 455#else
456#define debug_noop() do { } while (false) 456#define debug_noop() do { } while (false)
457#define debug_printf(fmt, ...) debug_noop() 457#define debug_printf(fmt, ...) debug_noop()
458#define debug_println(fmt, ...) debug_noop() 458#define debug_println(fmt, ...) debug_noop()
459#define debug_blank_line() debug_noop() 459#define debug_blank_line() debug_noop()
460#define debug_vis_range(prefix, s, e, suffix) debug_noop() 460#define debug_vis_range(prefix, s, e, suffix) debug_noop()
461#define debug_parser_state() debug_noop() 461#define debug_parser_state() debug_noop()
462#define debug_parse_stack(situation) debug_noop() 462#define debug_parse_stack(situation) debug_noop()
463#define debug_print_buf(name, buf) debug_noop() 463#define debug_print_buf(name, buf) debug_noop()
464#define debug_buffers() debug_noop() 464#define debug_buffers() debug_noop()
465#endif 465#endif
466 466
467void register_typename(const char *); 467void register_typename(const char *);
468int compute_code_indent(void); 468int compute_code_indent(void);
469int compute_label_indent(void); 469int compute_label_indent(void);
470int ind_add(int, const char *, size_t); 470int ind_add(int, const char *, size_t);
471 471
472void inp_skip(void); 472void inp_skip(void);
473char inp_next(void); 473char inp_next(void);
474void output_finish(void); 474void output_finish(void);
475 475
476lexer_symbol lexi(void); 476lexer_symbol lexi(void);
477void diag(int, const char *, ...) __printflike(2, 3); 477void diag(int, const char *, ...) __printflike(2, 3);
478void output_line(void); 478void output_line(void);
479void inp_read_line(void); 479void inp_read_line(void);
480void parse(parser_symbol); 480void parse(parser_symbol);
481void process_comment(void); 481void process_comment(void);
482void set_option(const char *, const char *); 482void set_option(const char *, const char *);
483void load_profile_files(const char *); 483void load_profile_files(const char *);
484 484
485void *nonnull(void *); 485void *nonnull(void *);
486 486
487void buf_add_char(struct buffer *, char); 487void buf_add_char(struct buffer *, char);
488void buf_add_chars(struct buffer *, const char *, size_t); 488void buf_add_chars(struct buffer *, const char *, size_t);
489 489
490static inline bool 490static inline bool
491ch_isalnum(char ch) 491ch_isalnum(char ch)
492{ 492{
493 return isalnum((unsigned char)ch) != 0; 493 return isalnum((unsigned char)ch) != 0;
494} 494}
495 495
496static inline bool 496static inline bool
497ch_isalpha(char ch) 497ch_isalpha(char ch)
498{ 498{
499 return isalpha((unsigned char)ch) != 0; 499 return isalpha((unsigned char)ch) != 0;
500} 500}
501 501
502static inline bool 502static inline bool
503ch_isblank(char ch) 503ch_isblank(char ch)
504{ 504{
505 return ch == ' ' || ch == '\t'; 505 return ch == ' ' || ch == '\t';
506} 506}
507 507
508static inline bool 508static inline bool
509ch_isdigit(char ch) 509ch_isdigit(char ch)
510{ 510{
511 return '0' <= ch && ch <= '9'; 511 return '0' <= ch && ch <= '9';
512} 512}
513 513
514static inline bool 514static inline bool
515ch_isspace(char ch) 515ch_isspace(char ch)
516{ 516{
517 return isspace((unsigned char)ch) != 0; 517 return isspace((unsigned char)ch) != 0;
518} 518}
519 519
520static inline int 520static inline int
521next_tab(int ind) 521next_tab(int ind)
522{ 522{
523 return ind - ind % opt.tabsize + opt.tabsize; 523 return ind - ind % opt.tabsize + opt.tabsize;
524} 524}
 525
 526#ifdef debug
 527void buf_terminate(struct buffer *);
 528#else
 529#define buf_terminate(buf) debug_noop()
 530#endif
 531
 532static inline void
 533buf_clear(struct buffer *buf)
 534{
 535 buf->len = 0;
 536 buf_terminate(buf);
 537}

cvs diff -r1.214 -r1.215 src/usr.bin/indent/io.c (switch to unified diff)

--- src/usr.bin/indent/io.c 2023/06/10 11:01:58 1.214
+++ src/usr.bin/indent/io.c 2023/06/10 12:59:31 1.215
@@ -1,419 +1,421 @@ @@ -1,419 +1,421 @@
1/* $NetBSD: io.c,v 1.214 2023/06/10 11:01:58 rillig Exp $ */ 1/* $NetBSD: io.c,v 1.215 2023/06/10 12:59:31 rillig Exp $ */
2 2
3/*- 3/*-
4 * SPDX-License-Identifier: BSD-4-Clause 4 * SPDX-License-Identifier: BSD-4-Clause
5 * 5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc. 6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993 7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved. 8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved. 9 * All rights reserved.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software 19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement: 20 * must display the following acknowledgement:
21 * This product includes software developed by the University of 21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors. 22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors 23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software 24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission. 25 * without specific prior written permission.
26 * 26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE. 37 * SUCH DAMAGE.
38 */ 38 */
39 39
40#include <sys/cdefs.h> 40#include <sys/cdefs.h>
41__RCSID("$NetBSD: io.c,v 1.214 2023/06/10 11:01:58 rillig Exp $"); 41__RCSID("$NetBSD: io.c,v 1.215 2023/06/10 12:59:31 rillig Exp $");
42 42
43#include <stdio.h> 43#include <stdio.h>
44 44
45#include "indent.h" 45#include "indent.h"
46 46
47struct buffer inp; 47struct buffer inp;
48const char *inp_p; 48const char *inp_p;
49 49
50struct output_state out; 50struct output_state out;
51enum indent_enabled indent_enabled; 51enum indent_enabled indent_enabled;
52static int out_ind; /* width of the line that is being written */ 52static int out_ind; /* width of the line that is being written */
53static unsigned newlines = 2; /* the total of written and buffered newlines; 53static unsigned newlines = 2; /* the total of written and buffered newlines;
54 * 0 in the middle of a line, 1 after a single 54 * 0 in the middle of a line, 1 after a single
55 * finished line, anything > 1 are trailing 55 * finished line, anything > 1 are trailing
56 * blank lines */ 56 * blank lines */
57static unsigned buffered_newlines; /* not yet written */ 57static unsigned buffered_newlines; /* not yet written */
58static int paren_indent; 58static int paren_indent;
59 59
60 60
61static void 61static void
62inp_read_next_line(FILE *f) 62inp_read_next_line(FILE *f)
63{ 63{
64 inp.len = 0; 64 buf_clear(&inp);
65 65
66 for (;;) { 66 for (;;) {
67 int ch = getc(f); 67 int ch = getc(f);
68 if (ch == EOF) { 68 if (ch == EOF) {
69 if (indent_enabled == indent_on) { 69 if (indent_enabled == indent_on) {
70 buf_add_char(&inp, ' '); 70 buf_add_char(&inp, ' ');
71 buf_add_char(&inp, '\n'); 71 buf_add_char(&inp, '\n');
72 } 72 }
73 had_eof = true; 73 had_eof = true;
74 break; 74 break;
75 } 75 }
76 76
77 if (ch != '\0') 77 if (ch != '\0')
78 buf_add_char(&inp, (char)ch); 78 buf_add_char(&inp, (char)ch);
79 if (ch == '\n') 79 if (ch == '\n')
80 break; 80 break;
81 } 81 }
 82 buf_terminate(&inp);
82 inp_p = inp.s; 83 inp_p = inp.s;
83} 84}
84 85
85void 86void
86inp_read_line(void) 87inp_read_line(void)
87{ 88{
88 if (indent_enabled == indent_on) 89 if (indent_enabled == indent_on)
89 out.indent_off_text.len = 0; 90 buf_clear(&out.indent_off_text);
90 buf_add_chars(&out.indent_off_text, inp.s, inp.len); 91 buf_add_chars(&out.indent_off_text, inp.s, inp.len);
91 inp_read_next_line(input); 92 inp_read_next_line(input);
92} 93}
93 94
94void 95void
95inp_skip(void) 96inp_skip(void)
96{ 97{
97 inp_p++; 98 inp_p++;
98 if ((size_t)(inp_p - inp.s) >= inp.len) 99 if ((size_t)(inp_p - inp.s) >= inp.len)
99 inp_read_line(); 100 inp_read_line();
100} 101}
101 102
102char 103char
103inp_next(void) 104inp_next(void)
104{ 105{
105 char ch = inp_p[0]; 106 char ch = inp_p[0];
106 inp_skip(); 107 inp_skip();
107 return ch; 108 return ch;
108} 109}
109 110
110 111
111static void 112static void
112buffer_newline(void) 113buffer_newline(void)
113{ 114{
114 buffered_newlines++; 115 buffered_newlines++;
115 newlines++; 116 newlines++;
116 out_ind = 0; 117 out_ind = 0;
117} 118}
118 119
119static void 120static void
120write_buffered_newlines(void) 121write_buffered_newlines(void)
121{ 122{
122 for (; buffered_newlines > 0; buffered_newlines--) { 123 for (; buffered_newlines > 0; buffered_newlines--) {
123 fputc('\n', output); 124 fputc('\n', output);
124 debug_println("write_newline"); 125 debug_println("write_newline");
125 } 126 }
126} 127}
127 128
128static void 129static void
129write_range(const char *s, size_t len) 130write_range(const char *s, size_t len)
130{ 131{
131 write_buffered_newlines(); 132 write_buffered_newlines();
132 fwrite(s, 1, len, output); 133 fwrite(s, 1, len, output);
133 debug_vis_range("write_range \"", s, len, "\"\n"); 134 debug_vis_range("write_range \"", s, len, "\"\n");
134 for (size_t i = 0; i < len; i++) 135 for (size_t i = 0; i < len; i++)
135 newlines = s[i] == '\n' ? newlines + 1 : 0; 136 newlines = s[i] == '\n' ? newlines + 1 : 0;
136 out_ind = ind_add(out_ind, s, len); 137 out_ind = ind_add(out_ind, s, len);
137} 138}
138 139
139static void 140static void
140write_indent(int new_ind) 141write_indent(int new_ind)
141{ 142{
142 write_buffered_newlines(); 143 write_buffered_newlines();
143 144
144 int ind = out_ind; 145 int ind = out_ind;
145 146
146 if (opt.use_tabs) { 147 if (opt.use_tabs) {
147 int n = new_ind / opt.tabsize - ind / opt.tabsize; 148 int n = new_ind / opt.tabsize - ind / opt.tabsize;
148 if (n > 0) { 149 if (n > 0) {
149 ind = ind - ind % opt.tabsize + n * opt.tabsize; 150 ind = ind - ind % opt.tabsize + n * opt.tabsize;
150 while (n-- > 0) 151 while (n-- > 0)
151 fputc('\t', output); 152 fputc('\t', output);
152 newlines = 0; 153 newlines = 0;
153 } 154 }
154 } 155 }
155 156
156 for (; ind < new_ind; ind++) { 157 for (; ind < new_ind; ind++) {
157 fputc(' ', output); 158 fputc(' ', output);
158 newlines = 0; 159 newlines = 0;
159 } 160 }
160 161
161 debug_println("write_indent %d", ind); 162 debug_println("write_indent %d", ind);
162 out_ind = ind; 163 out_ind = ind;
163} 164}
164 165
165static bool 166static bool
166want_blank_line(void) 167want_blank_line(void)
167{ 168{
168 debug_println("%s: %s -> %s", __func__, 169 debug_println("%s: %s -> %s", __func__,
169 line_kind_name[out.prev_line_kind], line_kind_name[out.line_kind]); 170 line_kind_name[out.prev_line_kind], line_kind_name[out.line_kind]);
170 171
171 if (ps.blank_line_after_decl && ps.declaration == decl_no) { 172 if (ps.blank_line_after_decl && ps.declaration == decl_no) {
172 ps.blank_line_after_decl = false; 173 ps.blank_line_after_decl = false;
173 return true; 174 return true;
174 } 175 }
175 if (opt.blanklines_around_conditional_compilation) { 176 if (opt.blanklines_around_conditional_compilation) {
176 if (out.prev_line_kind != lk_if && out.line_kind == lk_if) 177 if (out.prev_line_kind != lk_if && out.line_kind == lk_if)
177 return true; 178 return true;
178 if (out.prev_line_kind == lk_endif 179 if (out.prev_line_kind == lk_endif
179 && out.line_kind != lk_endif) 180 && out.line_kind != lk_endif)
180 return true; 181 return true;
181 } 182 }
182 if (opt.blanklines_after_procs && out.prev_line_kind == lk_func_end 183 if (opt.blanklines_after_procs && out.prev_line_kind == lk_func_end
183 && out.line_kind != lk_endif) 184 && out.line_kind != lk_endif)
184 return true; 185 return true;
185 if (opt.blanklines_before_block_comments 186 if (opt.blanklines_before_block_comments
186 && out.line_kind == lk_block_comment) 187 && out.line_kind == lk_block_comment)
187 return true; 188 return true;
188 return false; 189 return false;
189} 190}
190 191
191static bool 192static bool
192is_blank_line_optional(void) 193is_blank_line_optional(void)
193{ 194{
194 if (out.prev_line_kind == lk_stmt_head) 195 if (out.prev_line_kind == lk_stmt_head)
195 return newlines >= 1; 196 return newlines >= 1;
196 if (ps.psyms.top >= 2) 197 if (ps.psyms.top >= 2)
197 return newlines >= 2; 198 return newlines >= 2;
198 return newlines >= 3; 199 return newlines >= 3;
199} 200}
200 201
201static int 202static int
202compute_case_label_indent(void) 203compute_case_label_indent(void)
203{ 204{
204 int i = ps.psyms.top; 205 int i = ps.psyms.top;
205 while (i > 0 && ps.psyms.sym[i] != psym_switch_expr) 206 while (i > 0 && ps.psyms.sym[i] != psym_switch_expr)
206 i--; 207 i--;
207 float case_ind = (float)ps.psyms.ind_level[i] + opt.case_indent; 208 float case_ind = (float)ps.psyms.ind_level[i] + opt.case_indent;
208 return (int)(case_ind * (float)opt.indent_size); 209 return (int)(case_ind * (float)opt.indent_size);
209} 210}
210 211
211int 212int
212compute_label_indent(void) 213compute_label_indent(void)
213{ 214{
214 if (out.line_kind == lk_case_or_default) 215 if (out.line_kind == lk_case_or_default)
215 return compute_case_label_indent(); 216 return compute_case_label_indent();
216 if (lab.s[0] == '#') 217 if (lab.s[0] == '#')
217 return 0; 218 return 0;
218 return opt.indent_size * (ps.ind_level - 2); 219 return opt.indent_size * (ps.ind_level - 2);
219} 220}
220 221
221static void 222static void
222output_line_label(void) 223output_line_label(void)
223{ 224{
224 write_indent(compute_label_indent()); 225 write_indent(compute_label_indent());
225 write_range(lab.s, lab.len); 226 write_range(lab.s, lab.len);
226} 227}
227 228
228static int 229static int
229compute_code_indent_lineup(int base_ind) 230compute_code_indent_lineup(int base_ind)
230{ 231{
231 int ind = paren_indent; 232 int ind = paren_indent;
232 int overflow = ind_add(ind, code.s, code.len) - opt.max_line_length; 233 int overflow = ind_add(ind, code.s, code.len) - opt.max_line_length;
233 if (overflow >= 0 234 if (overflow >= 0
234 && ind_add(base_ind, code.s, code.len) < opt.max_line_length) { 235 && ind_add(base_ind, code.s, code.len) < opt.max_line_length) {
235 ind -= overflow + 2; 236 ind -= overflow + 2;
236 if (ind < base_ind) 237 if (ind < base_ind)
237 ind = base_ind; 238 ind = base_ind;
238 } 239 }
239 240
240 if (ps.extra_expr_indent != eei_no 241 if (ps.extra_expr_indent != eei_no
241 && ind == base_ind + opt.indent_size) 242 && ind == base_ind + opt.indent_size)
242 ind += opt.continuation_indent; 243 ind += opt.continuation_indent;
243 return ind; 244 return ind;
244} 245}
245 246
246int 247int
247compute_code_indent(void) 248compute_code_indent(void)
248{ 249{
249 int base_ind = ps.ind_level * opt.indent_size; 250 int base_ind = ps.ind_level * opt.indent_size;
250 251
251 if (ps.line_start_nparen == 0) { 252 if (ps.line_start_nparen == 0) {
252 if (ps.psyms.top >= 1 253 if (ps.psyms.top >= 1
253 && ps.psyms.sym[ps.psyms.top - 1] == psym_lbrace_enum) 254 && ps.psyms.sym[ps.psyms.top - 1] == psym_lbrace_enum)
254 return base_ind; 255 return base_ind;
255 if (ps.in_stmt_cont) 256 if (ps.in_stmt_cont)
256 return base_ind + opt.continuation_indent; 257 return base_ind + opt.continuation_indent;
257 return base_ind; 258 return base_ind;
258 } 259 }
259 260
260 if (opt.lineup_to_parens) { 261 if (opt.lineup_to_parens) {
261 if (opt.lineup_to_parens_always) 262 if (opt.lineup_to_parens_always)
262 return paren_indent; 263 return paren_indent;
263 return compute_code_indent_lineup(base_ind); 264 return compute_code_indent_lineup(base_ind);
264 } 265 }
265 266
266 int rel_ind = opt.continuation_indent * ps.line_start_nparen; 267 int rel_ind = opt.continuation_indent * ps.line_start_nparen;
267 if (ps.extra_expr_indent != eei_no && rel_ind == opt.indent_size) 268 if (ps.extra_expr_indent != eei_no && rel_ind == opt.indent_size)
268 rel_ind += opt.continuation_indent; 269 rel_ind += opt.continuation_indent;
269 return base_ind + rel_ind; 270 return base_ind + rel_ind;
270} 271}
271 272
272static void 273static void
273output_line_code(void) 274output_line_code(void)
274{ 275{
275 int target_ind = compute_code_indent(); 276 int target_ind = compute_code_indent();
276 for (int i = 0; i < ps.nparen; i++) { 277 for (int i = 0; i < ps.nparen; i++) {
277 int paren_ind = ps.paren[i].indent; 278 int paren_ind = ps.paren[i].indent;
278 if (paren_ind >= 0) { 279 if (paren_ind >= 0) {
279 ps.paren[i].indent = -1 - (paren_ind + target_ind); 280 ps.paren[i].indent = -1 - (paren_ind + target_ind);
280 debug_println( 281 debug_println(
281 "setting paren_indents[%d] from %d to %d " 282 "setting paren_indents[%d] from %d to %d "
282 "for column %d", 283 "for column %d",
283 i, paren_ind, ps.paren[i].indent, target_ind + 1); 284 i, paren_ind, ps.paren[i].indent, target_ind + 1);
284 } 285 }
285 } 286 }
286 287
287 if (lab.len > 0 && target_ind <= out_ind) 288 if (lab.len > 0 && target_ind <= out_ind)
288 write_range(" ", 1); 289 write_range(" ", 1);
289 write_indent(target_ind); 290 write_indent(target_ind);
290 write_range(code.s, code.len); 291 write_range(code.s, code.len);
291} 292}
292 293
293static void 294static void
294output_line_comment(void) 295output_line_comment(void)
295{ 296{
296 int target_ind = ps.com_ind + ps.comment_delta; 297 int target_ind = ps.com_ind + ps.comment_delta;
297 const char *p; 298 const char *p;
298 299
299 /* consider original indentation in case this is a box comment */ 300 /* consider original indentation in case this is a box comment */
300 for (p = com.s; *p == '\t'; p++) 301 for (p = com.s; *p == '\t'; p++)
301 target_ind += opt.tabsize; 302 target_ind += opt.tabsize;
302 303
303 for (; target_ind < 0; p++) { 304 for (; target_ind < 0; p++) {
304 if (*p == ' ') 305 if (*p == ' ')
305 target_ind++; 306 target_ind++;
306 else if (*p == '\t') 307 else if (*p == '\t')
307 target_ind = next_tab(target_ind); 308 target_ind = next_tab(target_ind);
308 else { 309 else {
309 target_ind = 0; 310 target_ind = 0;
310 break; 311 break;
311 } 312 }
312 } 313 }
313 314
314 if (out_ind > target_ind) 315 if (out_ind > target_ind)
315 buffer_newline(); 316 buffer_newline();
316 317
317 while (com.s + com.len > p && ch_isspace(com.s[com.len - 1])) 318 while (com.s + com.len > p && ch_isspace(com.s[com.len - 1]))
318 com.len--; 319 com.len--;
 320 buf_terminate(&com);
319 321
320 write_indent(target_ind); 322 write_indent(target_ind);
321 write_range(p, com.len - (size_t)(p - com.s)); 323 write_range(p, com.len - (size_t)(p - com.s));
322 324
323 ps.comment_delta = ps.n_comment_delta; 325 ps.comment_delta = ps.n_comment_delta;
324} 326}
325 327
326static void 328static void
327output_line_indented(void) 329output_line_indented(void)
328{ 330{
329 if (lab.len == 0 && code.len == 0 && com.len == 0) 331 if (lab.len == 0 && code.len == 0 && com.len == 0)
330 out.line_kind = lk_blank; 332 out.line_kind = lk_blank;
331 333
332 if (want_blank_line() && newlines < 2 334 if (want_blank_line() && newlines < 2
333 && out.line_kind != lk_blank) 335 && out.line_kind != lk_blank)
334 buffer_newline(); 336 buffer_newline();
335 337
336 /* This kludge aligns function definitions correctly. */ 338 /* This kludge aligns function definitions correctly. */
337 if (ps.ind_level == 0) 339 if (ps.ind_level == 0)
338 ps.in_stmt_cont = false; 340 ps.in_stmt_cont = false;
339 341
340 if (opt.blank_line_after_decl && ps.declaration == decl_end 342 if (opt.blank_line_after_decl && ps.declaration == decl_end
341 && ps.psyms.top > 1) { 343 && ps.psyms.top > 1) {
342 ps.declaration = decl_no; 344 ps.declaration = decl_no;
343 ps.blank_line_after_decl = true; 345 ps.blank_line_after_decl = true;
344 } 346 }
345 347
346 if (opt.swallow_optional_blanklines 348 if (opt.swallow_optional_blanklines
347 && out.line_kind == lk_blank 349 && out.line_kind == lk_blank
348 && is_blank_line_optional()) 350 && is_blank_line_optional())
349 return; 351 return;
350 352
351 if (lab.len > 0) 353 if (lab.len > 0)
352 output_line_label(); 354 output_line_label();
353 if (code.len > 0) 355 if (code.len > 0)
354 output_line_code(); 356 output_line_code();
355 if (com.len > 0) 357 if (com.len > 0)
356 output_line_comment(); 358 output_line_comment();
357 buffer_newline(); 359 buffer_newline();
358 if (out.line_kind != lk_blank) 360 if (out.line_kind != lk_blank)
359 write_buffered_newlines(); 361 write_buffered_newlines();
360 362
361 out.prev_line_kind = out.line_kind; 363 out.prev_line_kind = out.line_kind;
362} 364}
363 365
364/* 366/*
365 * Write a line of formatted source to the output file. The line consists of 367 * Write a line of formatted source to the output file. The line consists of
366 * the label, the code and the comment. 368 * the label, the code and the comment.
367 */ 369 */
368void 370void
369output_line(void) 371output_line(void)
370{ 372{
371 debug_blank_line(); 373 debug_blank_line();
372 debug_printf("%s", __func__); 374 debug_printf("%s", __func__);
373 debug_buffers(); 375 debug_buffers();
374 376
375 if (indent_enabled == indent_on) 377 if (indent_enabled == indent_on)
376 output_line_indented(); 378 output_line_indented();
377 else if (indent_enabled == indent_last_off_line) { 379 else if (indent_enabled == indent_last_off_line) {
378 indent_enabled = indent_on; 380 indent_enabled = indent_on;
379 write_range(out.indent_off_text.s, out.indent_off_text.len); 381 write_range(out.indent_off_text.s, out.indent_off_text.len);
380 out.indent_off_text.len = 0; 382 buf_clear(&out.indent_off_text);
381 } 383 }
382 384
383 lab.len = 0; 385 buf_clear(&lab);
384 code.len = 0; 386 buf_clear(&code);
385 com.len = 0; 387 buf_clear(&com);
386 388
387 ps.line_has_decl = ps.in_decl; 389 ps.line_has_decl = ps.in_decl;
388 ps.line_has_func_def = false; 390 ps.line_has_func_def = false;
389 ps.in_stmt_cont = ps.in_stmt_or_decl 391 ps.in_stmt_cont = ps.in_stmt_or_decl
390 && (!ps.in_decl || ps.in_init) 392 && (!ps.in_decl || ps.in_init)
391 && ps.init_level == 0; 393 && ps.init_level == 0;
392 ps.decl_indent_done = false; 394 ps.decl_indent_done = false;
393 if (ps.extra_expr_indent == eei_last) 395 if (ps.extra_expr_indent == eei_last)
394 ps.extra_expr_indent = eei_no; 396 ps.extra_expr_indent = eei_no;
395 if (!(ps.psyms.sym[ps.psyms.top] == psym_if_expr_stmt_else 397 if (!(ps.psyms.sym[ps.psyms.top] == psym_if_expr_stmt_else
396 && ps.nparen > 0)) 398 && ps.nparen > 0))
397 ps.ind_level = ps.ind_level_follow; 399 ps.ind_level = ps.ind_level_follow;
398 ps.line_start_nparen = ps.nparen; 400 ps.line_start_nparen = ps.nparen;
399 ps.want_blank = false; 401 ps.want_blank = false;
400 402
401 if (ps.nparen > 0) { 403 if (ps.nparen > 0) {
402 /* TODO: explain what negative indentation means */ 404 /* TODO: explain what negative indentation means */
403 paren_indent = -1 - ps.paren[ps.nparen - 1].indent; 405 paren_indent = -1 - ps.paren[ps.nparen - 1].indent;
404 debug_println("paren_indent is now %d", paren_indent); 406 debug_println("paren_indent is now %d", paren_indent);
405 } 407 }
406 408
407 out.line_kind = lk_other; 409 out.line_kind = lk_other;
408} 410}
409 411
410void 412void
411output_finish(void) 413output_finish(void)
412{ 414{
413 output_line(); 415 output_line();
414 if (indent_enabled != indent_on) { 416 if (indent_enabled != indent_on) {
415 indent_enabled = indent_last_off_line; 417 indent_enabled = indent_last_off_line;
416 output_line(); 418 output_line();
417 } 419 }
418 fflush(output); 420 fflush(output);
419} 421}

cvs diff -r1.222 -r1.223 src/usr.bin/indent/lexi.c (switch to unified diff)

--- src/usr.bin/indent/lexi.c 2023/06/10 07:42:41 1.222
+++ src/usr.bin/indent/lexi.c 2023/06/10 12:59:31 1.223
@@ -1,706 +1,706 @@ @@ -1,706 +1,706 @@
1/* $NetBSD: lexi.c,v 1.222 2023/06/10 07:42:41 rillig Exp $ */ 1/* $NetBSD: lexi.c,v 1.223 2023/06/10 12:59:31 rillig Exp $ */
2 2
3/*- 3/*-
4 * SPDX-License-Identifier: BSD-4-Clause 4 * SPDX-License-Identifier: BSD-4-Clause
5 * 5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc. 6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993 7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved. 8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved. 9 * All rights reserved.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software 19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement: 20 * must display the following acknowledgement:
21 * This product includes software developed by the University of 21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors. 22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors 23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software 24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission. 25 * without specific prior written permission.
26 * 26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE. 37 * SUCH DAMAGE.
38 */ 38 */
39 39
40#include <sys/cdefs.h> 40#include <sys/cdefs.h>
41__RCSID("$NetBSD: lexi.c,v 1.222 2023/06/10 07:42:41 rillig Exp $"); 41__RCSID("$NetBSD: lexi.c,v 1.223 2023/06/10 12:59:31 rillig Exp $");
42 42
43#include <stdlib.h> 43#include <stdlib.h>
44#include <string.h> 44#include <string.h>
45 45
46#include "indent.h" 46#include "indent.h"
47 47
48/* In lexi_alnum, this constant marks a type, independent of parentheses. */ 48/* In lexi_alnum, this constant marks a type, independent of parentheses. */
49#define lsym_type lsym_type_outside_parentheses 49#define lsym_type lsym_type_outside_parentheses
50 50
51/* must be sorted alphabetically, is used in binary search */ 51/* must be sorted alphabetically, is used in binary search */
52static const struct keyword { 52static const struct keyword {
53 const char name[12]; 53 const char name[12];
54 lexer_symbol lsym; 54 lexer_symbol lsym;
55} keywords[] = { 55} keywords[] = {
56 {"_Bool", lsym_type}, 56 {"_Bool", lsym_type},
57 {"_Complex", lsym_type}, 57 {"_Complex", lsym_type},
58 {"_Imaginary", lsym_type}, 58 {"_Imaginary", lsym_type},
59 {"auto", lsym_modifier}, 59 {"auto", lsym_modifier},
60 {"bool", lsym_type}, 60 {"bool", lsym_type},
61 {"break", lsym_word}, 61 {"break", lsym_word},
62 {"case", lsym_case}, 62 {"case", lsym_case},
63 {"char", lsym_type}, 63 {"char", lsym_type},
64 {"complex", lsym_type}, 64 {"complex", lsym_type},
65 {"const", lsym_modifier}, 65 {"const", lsym_modifier},
66 {"continue", lsym_word}, 66 {"continue", lsym_word},
67 {"default", lsym_default}, 67 {"default", lsym_default},
68 {"do", lsym_do}, 68 {"do", lsym_do},
69 {"double", lsym_type}, 69 {"double", lsym_type},
70 {"else", lsym_else}, 70 {"else", lsym_else},
71 {"enum", lsym_tag}, 71 {"enum", lsym_tag},
72 {"extern", lsym_modifier}, 72 {"extern", lsym_modifier},
73 {"float", lsym_type}, 73 {"float", lsym_type},
74 {"for", lsym_for}, 74 {"for", lsym_for},
75 {"goto", lsym_word}, 75 {"goto", lsym_word},
76 {"if", lsym_if}, 76 {"if", lsym_if},
77 {"imaginary", lsym_type}, 77 {"imaginary", lsym_type},
78 {"inline", lsym_modifier}, 78 {"inline", lsym_modifier},
79 {"int", lsym_type}, 79 {"int", lsym_type},
80 {"long", lsym_type}, 80 {"long", lsym_type},
81 {"offsetof", lsym_offsetof}, 81 {"offsetof", lsym_offsetof},
82 {"register", lsym_modifier}, 82 {"register", lsym_modifier},
83 {"restrict", lsym_word}, 83 {"restrict", lsym_word},
84 {"return", lsym_return}, 84 {"return", lsym_return},
85 {"short", lsym_type}, 85 {"short", lsym_type},
86 {"signed", lsym_type}, 86 {"signed", lsym_type},
87 {"sizeof", lsym_sizeof}, 87 {"sizeof", lsym_sizeof},
88 {"static", lsym_modifier}, 88 {"static", lsym_modifier},
89 {"struct", lsym_tag}, 89 {"struct", lsym_tag},
90 {"switch", lsym_switch}, 90 {"switch", lsym_switch},
91 {"typedef", lsym_typedef}, 91 {"typedef", lsym_typedef},
92 {"union", lsym_tag}, 92 {"union", lsym_tag},
93 {"unsigned", lsym_type}, 93 {"unsigned", lsym_type},
94 {"void", lsym_type}, 94 {"void", lsym_type},
95 {"volatile", lsym_modifier}, 95 {"volatile", lsym_modifier},
96 {"while", lsym_while} 96 {"while", lsym_while}
97}; 97};
98 98
99static struct { 99static struct {
100 const char **items; 100 const char **items;
101 unsigned int len; 101 unsigned int len;
102 unsigned int cap; 102 unsigned int cap;
103} typenames; 103} typenames;
104 104
105/*- 105/*-
106 * The transition table below was rewritten by hand from lx's output, given 106 * The transition table below was rewritten by hand from lx's output, given
107 * the following definitions. lx is Katherine Flavel's lexer generator. 107 * the following definitions. lx is Katherine Flavel's lexer generator.
108 * 108 *
109 * O = /[0-7]/; D = /[0-9]/; NZ = /[1-9]/; 109 * O = /[0-7]/; D = /[0-9]/; NZ = /[1-9]/;
110 * H = /[a-f0-9]/i; B = /[0-1]/; HP = /0x/i; 110 * H = /[a-f0-9]/i; B = /[0-1]/; HP = /0x/i;
111 * BP = /0b/i; E = /e[+\-]?/i D+; P = /p[+\-]?/i D+; 111 * BP = /0b/i; E = /e[+\-]?/i D+; P = /p[+\-]?/i D+;
112 * FS = /[fl]/i; IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?; 112 * FS = /[fl]/i; IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?;
113 * 113 *
114 * D+ E FS? -> $float; 114 * D+ E FS? -> $float;
115 * D* "." D+ E? FS? -> $float; 115 * D* "." D+ E? FS? -> $float;
116 * D+ "." E? FS? -> $float; HP H+ IS? -> $int; 116 * D+ "." E? FS? -> $float; HP H+ IS? -> $int;
117 * HP H+ P FS? -> $float; NZ D* IS? -> $int; 117 * HP H+ P FS? -> $float; NZ D* IS? -> $int;
118 * HP H* "." H+ P FS? -> $float; "0" O* IS? -> $int; 118 * HP H* "." H+ P FS? -> $float; "0" O* IS? -> $int;
119 * HP H+ "." P FS -> $float; BP B+ IS? -> $int; 119 * HP H+ "." P FS -> $float; BP B+ IS? -> $int;
120 */ 120 */
121/* INDENT OFF */ 121/* INDENT OFF */
122static const unsigned char lex_number_state[][26] = { 122static const unsigned char lex_number_state[][26] = {
123 /* examples: 123 /* examples:
124 00 124 00
125 s 0xx 125 s 0xx
126 t 00xaa 126 t 00xaa
127 a 11 101100xxa.. 127 a 11 101100xxa..
128 r 11ee0001101lbuuxx.a.pp 128 r 11ee0001101lbuuxx.a.pp
129 t.01.e+008bLuxll0Ll.aa.p+0 129 t.01.e+008bLuxll0Ll.aa.p+0
130 states: ABCDEFGHIJKLMNOPQRSTUVWXYZ */ 130 states: ABCDEFGHIJKLMNOPQRSTUVWXYZ */
131 [0] = "uuiifuufiuuiiuiiiiiuiuuuuu", /* (other) */ 131 [0] = "uuiifuufiuuiiuiiiiiuiuuuuu", /* (other) */
132 [1] = "CEIDEHHHIJQ U Q VUVVZZZ", /* 0 */ 132 [1] = "CEIDEHHHIJQ U Q VUVVZZZ", /* 0 */
133 [2] = "DEIDEHHHIJQ U Q VUVVZZZ", /* 1 */ 133 [2] = "DEIDEHHHIJQ U Q VUVVZZZ", /* 1 */
134 [3] = "DEIDEHHHIJ U VUVVZZZ", /* 2 3 4 5 6 7 */ 134 [3] = "DEIDEHHHIJ U VUVVZZZ", /* 2 3 4 5 6 7 */
135 [4] = "DEJDEHHHJJ U VUVVZZZ", /* 8 9 */ 135 [4] = "DEJDEHHHJJ U VUVVZZZ", /* 8 9 */
136 [5] = " U VUVV ", /* A a C c D d */ 136 [5] = " U VUVV ", /* A a C c D d */
137 [6] = " K U VUVV ", /* B b */ 137 [6] = " K U VUVV ", /* B b */
138 [7] = " FFF FF U VUVV ", /* E e */ 138 [7] = " FFF FF U VUVV ", /* E e */
139 [8] = " f f U VUVV f", /* F f */ 139 [8] = " f f U VUVV f", /* F f */
140 [9] = " LLf fL PR Li L f", /* L */ 140 [9] = " LLf fL PR Li L f", /* L */
141 [10] = " OOf fO S P O i O f", /* l */ 141 [10] = " OOf fO S P O i O f", /* l */
142 [11] = " FFX ", /* P p */ 142 [11] = " FFX ", /* P p */
143 [12] = " MM M i iiM M ", /* U u */ 143 [12] = " MM M i iiM M ", /* U u */
144 [13] = " N ", /* X x */ 144 [13] = " N ", /* X x */
145 [14] = " G Y ", /* + - */ 145 [14] = " G Y ", /* + - */
146 [15] = "B EE EE T W ", /* . */ 146 [15] = "B EE EE T W ", /* . */
147 /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ 147 /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
148}; 148};
149/* INDENT ON */ 149/* INDENT ON */
150 150
151static const unsigned char lex_number_row[] = { 151static const unsigned char lex_number_row[] = {
152 ['0'] = 1, 152 ['0'] = 1,
153 ['1'] = 2, 153 ['1'] = 2,
154 ['2'] = 3, ['3'] = 3, ['4'] = 3, ['5'] = 3, ['6'] = 3, ['7'] = 3, 154 ['2'] = 3, ['3'] = 3, ['4'] = 3, ['5'] = 3, ['6'] = 3, ['7'] = 3,
155 ['8'] = 4, ['9'] = 4, 155 ['8'] = 4, ['9'] = 4,
156 ['A'] = 5, ['a'] = 5, ['C'] = 5, ['c'] = 5, ['D'] = 5, ['d'] = 5, 156 ['A'] = 5, ['a'] = 5, ['C'] = 5, ['c'] = 5, ['D'] = 5, ['d'] = 5,
157 ['B'] = 6, ['b'] = 6, 157 ['B'] = 6, ['b'] = 6,
158 ['E'] = 7, ['e'] = 7, 158 ['E'] = 7, ['e'] = 7,
159 ['F'] = 8, ['f'] = 8, 159 ['F'] = 8, ['f'] = 8,
160 ['L'] = 9, 160 ['L'] = 9,
161 ['l'] = 10, 161 ['l'] = 10,
162 ['P'] = 11, ['p'] = 11, 162 ['P'] = 11, ['p'] = 11,
163 ['U'] = 12, ['u'] = 12, 163 ['U'] = 12, ['u'] = 12,
164 ['X'] = 13, ['x'] = 13, 164 ['X'] = 13, ['x'] = 13,
165 ['+'] = 14, ['-'] = 14, 165 ['+'] = 14, ['-'] = 14,
166 ['.'] = 15, 166 ['.'] = 15,
167}; 167};
168 168
169 169
170static void 170static void
171token_add_char(char ch) 171token_add_char(char ch)
172{ 172{
173 buf_add_char(&token, ch); 173 buf_add_char(&token, ch);
174} 174}
175 175
176static void 176static void
177lex_number(void) 177lex_number(void)
178{ 178{
179 for (unsigned char s = 'A'; s != 'f' && s != 'i' && s != 'u';) { 179 for (unsigned char s = 'A'; s != 'f' && s != 'i' && s != 'u';) {
180 unsigned char ch = (unsigned char)inp_p[0]; 180 unsigned char ch = (unsigned char)inp_p[0];
181 if (ch == '\\' && inp_p[1] == '\n') { 181 if (ch == '\\' && inp_p[1] == '\n') {
182 inp_p++; 182 inp_p++;
183 inp_skip(); 183 inp_skip();
184 line_no++; 184 line_no++;
185 continue; 185 continue;
186 } 186 }
187 if (ch >= array_length(lex_number_row) 187 if (ch >= array_length(lex_number_row)
188 || lex_number_row[ch] == 0) 188 || lex_number_row[ch] == 0)
189 break; 189 break;
190 190
191 unsigned char row = lex_number_row[ch]; 191 unsigned char row = lex_number_row[ch];
192 if (lex_number_state[row][s - 'A'] == ' ') { 192 if (lex_number_state[row][s - 'A'] == ' ') {
193 /*- 193 /*-
194 * lex_number_state[0][s - 'A'] now indicates the type: 194 * lex_number_state[0][s - 'A'] now indicates the type:
195 * f = floating, i = integer, u = unknown 195 * f = floating, i = integer, u = unknown
196 */ 196 */
197 return; 197 return;
198 } 198 }
199 199
200 s = lex_number_state[row][s - 'A']; 200 s = lex_number_state[row][s - 'A'];
201 token_add_char(inp_next()); 201 token_add_char(inp_next());
202 } 202 }
203} 203}
204 204
205static bool 205static bool
206is_identifier_start(char ch) 206is_identifier_start(char ch)
207{ 207{
208 return ch_isalpha(ch) || ch == '_' || ch == '$'; 208 return ch_isalpha(ch) || ch == '_' || ch == '$';
209} 209}
210 210
211static bool 211static bool
212is_identifier_part(char ch) 212is_identifier_part(char ch)
213{ 213{
214 return ch_isalnum(ch) || ch == '_' || ch == '$'; 214 return ch_isalnum(ch) || ch == '_' || ch == '$';
215} 215}
216 216
217static void 217static void
218lex_word(void) 218lex_word(void)
219{ 219{
220 for (;;) { 220 for (;;) {
221 if (is_identifier_part(inp_p[0])) 221 if (is_identifier_part(inp_p[0]))
222 token_add_char(*inp_p++); 222 token_add_char(*inp_p++);
223 else if (inp_p[0] == '\\' && inp_p[1] == '\n') { 223 else if (inp_p[0] == '\\' && inp_p[1] == '\n') {
224 inp_p++; 224 inp_p++;
225 inp_skip(); 225 inp_skip();
226 line_no++; 226 line_no++;
227 } else 227 } else
228 return; 228 return;
229 } 229 }
230} 230}
231 231
232static void 232static void
233lex_char_or_string(void) 233lex_char_or_string(void)
234{ 234{
235 for (char delim = token.s[token.len - 1];;) { 235 for (char delim = token.s[token.len - 1];;) {
236 if (inp_p[0] == '\n') { 236 if (inp_p[0] == '\n') {
237 diag(1, "Unterminated literal"); 237 diag(1, "Unterminated literal");
238 return; 238 return;
239 } 239 }
240 240
241 token_add_char(*inp_p++); 241 token_add_char(*inp_p++);
242 if (token.s[token.len - 1] == delim) 242 if (token.s[token.len - 1] == delim)
243 return; 243 return;
244 244
245 if (token.s[token.len - 1] == '\\') { 245 if (token.s[token.len - 1] == '\\') {
246 if (inp_p[0] == '\n') 246 if (inp_p[0] == '\n')
247 ++line_no; 247 ++line_no;
248 token_add_char(inp_next()); 248 token_add_char(inp_next());
249 } 249 }
250 } 250 }
251} 251}
252 252
253/* Guess whether the current token is a declared type. */ 253/* Guess whether the current token is a declared type. */
254static bool 254static bool
255probably_typename(void) 255probably_typename(void)
256{ 256{
257 if (ps.prev_lsym == lsym_modifier) 257 if (ps.prev_lsym == lsym_modifier)
258 return true; 258 return true;
259 if (ps.in_init) 259 if (ps.in_init)
260 return false; 260 return false;
261 if (ps.in_stmt_or_decl) /* XXX: this condition looks incorrect */ 261 if (ps.in_stmt_or_decl) /* XXX: this condition looks incorrect */
262 return false; 262 return false;
263 if (ps.prev_lsym == lsym_semicolon 263 if (ps.prev_lsym == lsym_semicolon
264 || ps.prev_lsym == lsym_lbrace 264 || ps.prev_lsym == lsym_lbrace
265 || ps.prev_lsym == lsym_rbrace) { 265 || ps.prev_lsym == lsym_rbrace) {
266 if (inp_p[0] == '*' && inp_p[1] != '=') 266 if (inp_p[0] == '*' && inp_p[1] != '=')
267 return true; 267 return true;
268 /* XXX: is_identifier_start */ 268 /* XXX: is_identifier_start */
269 if (ch_isalpha(inp_p[0])) 269 if (ch_isalpha(inp_p[0]))
270 return true; 270 return true;
271 } 271 }
272 return false; 272 return false;
273} 273}
274 274
275static int 275static int
276bsearch_typenames(const char *key) 276bsearch_typenames(const char *key)
277{ 277{
278 const char **arr = typenames.items; 278 const char **arr = typenames.items;
279 int lo = 0; 279 int lo = 0;
280 int hi = (int)typenames.len - 1; 280 int hi = (int)typenames.len - 1;
281 281
282 while (lo <= hi) { 282 while (lo <= hi) {
283 int mid = (int)((unsigned)(lo + hi) >> 1); 283 int mid = (int)((unsigned)(lo + hi) >> 1);
284 int cmp = strcmp(arr[mid], key); 284 int cmp = strcmp(arr[mid], key);
285 if (cmp < 0) 285 if (cmp < 0)
286 lo = mid + 1; 286 lo = mid + 1;
287 else if (cmp > 0) 287 else if (cmp > 0)
288 hi = mid - 1; 288 hi = mid - 1;
289 else 289 else
290 return mid; 290 return mid;
291 } 291 }
292 return -(lo + 1); 292 return -(lo + 1);
293} 293}
294 294
295static bool 295static bool
296is_typename(void) 296is_typename(void)
297{ 297{
298 if (opt.auto_typedefs && 298 if (opt.auto_typedefs &&
299 token.len >= 2 && memcmp(token.s + token.len - 2, "_t", 2) == 0) 299 token.len >= 2 && memcmp(token.s + token.len - 2, "_t", 2) == 0)
300 return true; 300 return true;
301 301
302 return bsearch_typenames(token.s) >= 0; 302 return bsearch_typenames(token.s) >= 0;
303} 303}
304 304
305static int 305static int
306cmp_keyword_by_name(const void *key, const void *elem) 306cmp_keyword_by_name(const void *key, const void *elem)
307{ 307{
308 return strcmp(key, ((const struct keyword *)elem)->name); 308 return strcmp(key, ((const struct keyword *)elem)->name);
309} 309}
310 310
311/* 311/*
312 * Looking at something like 'function_name(...)' in a line, guess whether 312 * Looking at something like 'function_name(...)' in a line, guess whether
313 * this starts a function definition or a declaration. 313 * this starts a function definition or a declaration.
314 */ 314 */
315static bool 315static bool
316probably_looking_at_definition(void) 316probably_looking_at_definition(void)
317{ 317{
318 int paren_level = 0; 318 int paren_level = 0;
319 for (const char *p = inp_p; *p != '\n'; p++) { 319 for (const char *p = inp_p; *p != '\n'; p++) {
320 if (*p == '(') 320 if (*p == '(')
321 paren_level++; 321 paren_level++;
322 if (*p == ')' && --paren_level == 0) { 322 if (*p == ')' && --paren_level == 0) {
323 p++; 323 p++;
324 324
325 while (*p != '\n' 325 while (*p != '\n'
326 && (ch_isspace(*p) || is_identifier_part(*p))) 326 && (ch_isspace(*p) || is_identifier_part(*p)))
327 p++; /* '__dead' or '__unused' */ 327 p++; /* '__dead' or '__unused' */
328 328
329 if (*p == '\n') /* func(...) */ 329 if (*p == '\n') /* func(...) */
330 break; 330 break;
331 if (*p == ';') /* func(...); */ 331 if (*p == ';') /* func(...); */
332 return false; 332 return false;
333 if (*p == ',') /* double abs(), pi; */ 333 if (*p == ',') /* double abs(), pi; */
334 return false; 334 return false;
335 if (*p == '(') /* func(...) __attribute__((...)) */ 335 if (*p == '(') /* func(...) __attribute__((...)) */
336 paren_level++; /* func(...) __printflike(...) 336 paren_level++; /* func(...) __printflike(...)
337 */ 337 */
338 else 338 else
339 break; /* func(...) { ... */ 339 break; /* func(...) { ... */
340 } 340 }
341 341
342 if (paren_level == 1 && p[0] == '*' && p[1] == ',') 342 if (paren_level == 1 && p[0] == '*' && p[1] == ',')
343 return false; 343 return false;
344 } 344 }
345 345
346 /* To further reduce the cases where indent wrongly treats an 346 /* To further reduce the cases where indent wrongly treats an
347 * incomplete function declaration as a function definition, thus 347 * incomplete function declaration as a function definition, thus
348 * adding a newline before the function name, it may be worth looking 348 * adding a newline before the function name, it may be worth looking
349 * for parameter names, as these are often omitted in function 349 * for parameter names, as these are often omitted in function
350 * declarations and only included in function definitions. Or just 350 * declarations and only included in function definitions. Or just
351 * increase the lookahead to more than just the current line of input, 351 * increase the lookahead to more than just the current line of input,
352 * until the next '{'. */ 352 * until the next '{'. */
353 return true; 353 return true;
354} 354}
355 355
356/* Read an alphanumeric token into 'token', or return lsym_eof. */ 356/* Read an alphanumeric token into 'token', or return lsym_eof. */
357static lexer_symbol 357static lexer_symbol
358lexi_alnum(void) 358lexi_alnum(void)
359{ 359{
360 if (ch_isdigit(inp_p[0]) || 360 if (ch_isdigit(inp_p[0]) ||
361 (inp_p[0] == '.' && ch_isdigit(inp_p[1]))) { 361 (inp_p[0] == '.' && ch_isdigit(inp_p[1]))) {
362 lex_number(); 362 lex_number();
363 } else if (is_identifier_start(inp_p[0])) { 363 } else if (is_identifier_start(inp_p[0])) {
364 lex_word(); 364 lex_word();
365 365
366 if (token.len == 1 && token.s[0] == 'L' && 366 if (token.len == 1 && token.s[0] == 'L' &&
367 (inp_p[0] == '"' || inp_p[0] == '\'')) { 367 (inp_p[0] == '"' || inp_p[0] == '\'')) {
368 token_add_char(*inp_p++); 368 token_add_char(*inp_p++);
369 lex_char_or_string(); 369 lex_char_or_string();
370 ps.next_unary = false; 370 ps.next_unary = false;
371 return lsym_word; 371 return lsym_word;
372 } 372 }
373 } else 373 } else
374 return lsym_eof; /* just as a placeholder */ 374 return lsym_eof; /* just as a placeholder */
375 375
376 while (ch_isblank(inp_p[0])) 376 while (ch_isblank(inp_p[0]))
377 inp_p++; 377 inp_p++;
378 378
379 ps.next_unary = ps.prev_lsym == lsym_tag 379 ps.next_unary = ps.prev_lsym == lsym_tag
380 || ps.prev_lsym == lsym_typedef; 380 || ps.prev_lsym == lsym_typedef;
381 381
382 if (ps.prev_lsym == lsym_tag && ps.nparen == 0) 382 if (ps.prev_lsym == lsym_tag && ps.nparen == 0)
383 return lsym_type_outside_parentheses; 383 return lsym_type_outside_parentheses;
384 384
385 token_add_char('\0'); 385 token_add_char('\0');
386 token.len--; 386 token.len--;
387 const struct keyword *kw = bsearch(token.s, keywords, 387 const struct keyword *kw = bsearch(token.s, keywords,
388 array_length(keywords), sizeof(keywords[0]), cmp_keyword_by_name); 388 array_length(keywords), sizeof(keywords[0]), cmp_keyword_by_name);
389 lexer_symbol lsym = lsym_word; 389 lexer_symbol lsym = lsym_word;
390 if (kw != NULL) { 390 if (kw != NULL) {
391 if (kw->lsym == lsym_type) 391 if (kw->lsym == lsym_type)
392 lsym = lsym_type_in_parentheses; 392 lsym = lsym_type_in_parentheses;
393 ps.next_unary = true; 393 ps.next_unary = true;
394 if (kw->lsym == lsym_tag || kw->lsym == lsym_type) 394 if (kw->lsym == lsym_tag || kw->lsym == lsym_type)
395 goto found_typename; 395 goto found_typename;
396 return kw->lsym; 396 return kw->lsym;
397 } 397 }
398 398
399 if (is_typename()) { 399 if (is_typename()) {
400 lsym = lsym_type_in_parentheses; 400 lsym = lsym_type_in_parentheses;
401 ps.next_unary = true; 401 ps.next_unary = true;
402found_typename: 402found_typename:
403 if (ps.nparen > 0) { 403 if (ps.nparen > 0) {
404 /* inside parentheses: cast, param list, offsetof or 404 /* inside parentheses: cast, param list, offsetof or
405 * sizeof */ 405 * sizeof */
406 if (ps.paren[ps.nparen - 1].cast == cast_unknown) 406 if (ps.paren[ps.nparen - 1].cast == cast_unknown)
407 ps.paren[ps.nparen - 1].cast = cast_maybe; 407 ps.paren[ps.nparen - 1].cast = cast_maybe;
408 } 408 }
409 if (ps.prev_lsym != lsym_period 409 if (ps.prev_lsym != lsym_period
410 && ps.prev_lsym != lsym_unary_op) { 410 && ps.prev_lsym != lsym_unary_op) {
411 if (kw != NULL && kw->lsym == lsym_tag) 411 if (kw != NULL && kw->lsym == lsym_tag)
412 return lsym_tag; 412 return lsym_tag;
413 if (ps.nparen == 0) 413 if (ps.nparen == 0)
414 return lsym_type_outside_parentheses; 414 return lsym_type_outside_parentheses;
415 } 415 }
416 } 416 }
417 417
418 if (inp_p[0] == '(' && ps.psyms.top <= 1 && ps.ind_level == 0 && 418 if (inp_p[0] == '(' && ps.psyms.top <= 1 && ps.ind_level == 0 &&
419 !ps.in_func_def_params && !ps.in_init) { 419 !ps.in_func_def_params && !ps.in_init) {
420 420
421 if (ps.nparen == 0 && probably_looking_at_definition()) { 421 if (ps.nparen == 0 && probably_looking_at_definition()) {
422 ps.line_has_func_def = true; 422 ps.line_has_func_def = true;
423 if (ps.in_decl) 423 if (ps.in_decl)
424 ps.in_func_def_params = true; 424 ps.in_func_def_params = true;
425 return lsym_funcname; 425 return lsym_funcname;
426 } 426 }
427 427
428 } else if (ps.nparen == 0 && probably_typename()) { 428 } else if (ps.nparen == 0 && probably_typename()) {
429 ps.next_unary = true; 429 ps.next_unary = true;
430 return lsym_type_outside_parentheses; 430 return lsym_type_outside_parentheses;
431 } 431 }
432 432
433 return lsym; 433 return lsym;
434} 434}
435 435
436static bool 436static bool
437is_asterisk_unary(void) 437is_asterisk_unary(void)
438{ 438{
439 if (inp_p[strspn(inp_p, "* \t")] == ')') 439 if (inp_p[strspn(inp_p, "* \t")] == ')')
440 return true; 440 return true;
441 if (ps.next_unary || ps.in_func_def_params) 441 if (ps.next_unary || ps.in_func_def_params)
442 return true; 442 return true;
443 if (ps.prev_lsym == lsym_word || 443 if (ps.prev_lsym == lsym_word ||
444 ps.prev_lsym == lsym_rparen || 444 ps.prev_lsym == lsym_rparen ||
445 ps.prev_lsym == lsym_rbracket) 445 ps.prev_lsym == lsym_rbracket)
446 return false; 446 return false;
447 return ps.in_decl && ps.nparen > 0; 447 return ps.in_decl && ps.nparen > 0;
448} 448}
449 449
450static bool 450static bool
451probably_in_function_definition(void) 451probably_in_function_definition(void)
452{ 452{
453 for (const char *tp = inp_p; *tp != '\n';) { 453 for (const char *tp = inp_p; *tp != '\n';) {
454 if (ch_isspace(*tp)) 454 if (ch_isspace(*tp))
455 tp++; 455 tp++;
456 else if (is_identifier_start(*tp)) { 456 else if (is_identifier_start(*tp)) {
457 tp++; 457 tp++;
458 while (is_identifier_part(*tp)) 458 while (is_identifier_part(*tp))
459 tp++; 459 tp++;
460 } else 460 } else
461 return *tp == '('; 461 return *tp == '(';
462 } 462 }
463 return false; 463 return false;
464} 464}
465 465
466static void 466static void
467lex_asterisk_unary(void) 467lex_asterisk_unary(void)
468{ 468{
469 while (inp_p[0] == '*' || ch_isspace(inp_p[0])) { 469 while (inp_p[0] == '*' || ch_isspace(inp_p[0])) {
470 if (inp_p[0] == '*') 470 if (inp_p[0] == '*')
471 token_add_char('*'); 471 token_add_char('*');
472 inp_skip(); 472 inp_skip();
473 } 473 }
474 474
475 if (ps.in_decl && probably_in_function_definition()) 475 if (ps.in_decl && probably_in_function_definition())
476 ps.line_has_func_def = true; 476 ps.line_has_func_def = true;
477} 477}
478 478
479static void 479static void
480skip_blank(const char **pp) 480skip_blank(const char **pp)
481{ 481{
482 while (ch_isblank(**pp)) 482 while (ch_isblank(**pp))
483 (*pp)++; 483 (*pp)++;
484} 484}
485 485
486static bool 486static bool
487skip_string(const char **pp, const char *s) 487skip_string(const char **pp, const char *s)
488{ 488{
489 size_t len = strlen(s); 489 size_t len = strlen(s);
490 if (strncmp(*pp, s, len) == 0) { 490 if (strncmp(*pp, s, len) == 0) {
491 *pp += len; 491 *pp += len;
492 return true; 492 return true;
493 } 493 }
494 return false; 494 return false;
495} 495}
496 496
497static void 497static void
498lex_indent_comment(void) 498lex_indent_comment(void)
499{ 499{
500 const char *p = inp.s; 500 const char *p = inp.s;
501 501
502 skip_blank(&p); 502 skip_blank(&p);
503 if (!skip_string(&p, "/*")) 503 if (!skip_string(&p, "/*"))
504 return; 504 return;
505 skip_blank(&p); 505 skip_blank(&p);
506 if (!skip_string(&p, "INDENT")) 506 if (!skip_string(&p, "INDENT"))
507 return; 507 return;
508 508
509 enum indent_enabled enabled; 509 enum indent_enabled enabled;
510 skip_blank(&p); 510 skip_blank(&p);
511 if (*p == '*' || skip_string(&p, "ON")) 511 if (*p == '*' || skip_string(&p, "ON"))
512 enabled = indent_last_off_line; 512 enabled = indent_last_off_line;
513 else if (skip_string(&p, "OFF")) 513 else if (skip_string(&p, "OFF"))
514 enabled = indent_off; 514 enabled = indent_off;
515 else 515 else
516 return; 516 return;
517 517
518 skip_blank(&p); 518 skip_blank(&p);
519 if (!skip_string(&p, "*/\n")) 519 if (!skip_string(&p, "*/\n"))
520 return; 520 return;
521 521
522 if (lab.len > 0 || code.len > 0 || com.len > 0) 522 if (lab.len > 0 || code.len > 0 || com.len > 0)
523 output_line(); 523 output_line();
524 524
525 indent_enabled = enabled; 525 indent_enabled = enabled;
526} 526}
527 527
528/* Reads the next token, placing it in the global variable "token". */ 528/* Reads the next token, placing it in the global variable "token". */
529lexer_symbol 529lexer_symbol
530lexi(void) 530lexi(void)
531{ 531{
532 token.len = 0; 532 buf_clear(&token);
533 ps.curr_col_1 = ps.next_col_1; 533 ps.curr_col_1 = ps.next_col_1;
534 ps.next_col_1 = false; 534 ps.next_col_1 = false;
535 535
536 for (;;) { 536 for (;;) {
537 if (ch_isblank(inp_p[0])) { 537 if (ch_isblank(inp_p[0])) {
538 ps.curr_col_1 = false; 538 ps.curr_col_1 = false;
539 inp_p++; 539 inp_p++;
540 } else if (inp_p[0] == '\\' && inp_p[1] == '\n') { 540 } else if (inp_p[0] == '\\' && inp_p[1] == '\n') {
541 inp_p++; 541 inp_p++;
542 inp_skip(); 542 inp_skip();
543 line_no++; 543 line_no++;
544 } else 544 } else
545 break; 545 break;
546 } 546 }
547 547
548 lexer_symbol alnum_lsym = lexi_alnum(); 548 lexer_symbol alnum_lsym = lexi_alnum();
549 if (alnum_lsym != lsym_eof) 549 if (alnum_lsym != lsym_eof)
550 return alnum_lsym; 550 return alnum_lsym;
551 551
552 /* Scan a non-alphanumeric token */ 552 /* Scan a non-alphanumeric token */
553 553
554 token_add_char(inp_next()); 554 token_add_char(inp_next());
555 555
556 lexer_symbol lsym; 556 lexer_symbol lsym;
557 bool next_unary; 557 bool next_unary;
558 558
559 switch (token.s[token.len - 1]) { 559 switch (token.s[token.len - 1]) {
560 560
561 case '#': 561 case '#':
562 lsym = lsym_preprocessing; 562 lsym = lsym_preprocessing;
563 next_unary = ps.next_unary; 563 next_unary = ps.next_unary;
564 break; 564 break;
565 565
566 case '\n': 566 case '\n':
567 /* if data has been exhausted, the '\n' is a dummy. */ 567 /* if data has been exhausted, the '\n' is a dummy. */
568 lsym = had_eof ? lsym_eof : lsym_newline; 568 lsym = had_eof ? lsym_eof : lsym_newline;
569 next_unary = ps.next_unary; 569 next_unary = ps.next_unary;
570 ps.next_col_1 = true; 570 ps.next_col_1 = true;
571 break; 571 break;
572 572
573 /* INDENT OFF */ 573 /* INDENT OFF */
574 case '(': lsym = lsym_lparen; next_unary = true; break; 574 case '(': lsym = lsym_lparen; next_unary = true; break;
575 case ')': lsym = lsym_rparen; next_unary = false; break; 575 case ')': lsym = lsym_rparen; next_unary = false; break;
576 case '[': lsym = lsym_lbracket; next_unary = true; break; 576 case '[': lsym = lsym_lbracket; next_unary = true; break;
577 case ']': lsym = lsym_rbracket; next_unary = false; break; 577 case ']': lsym = lsym_rbracket; next_unary = false; break;
578 case '{': lsym = lsym_lbrace; next_unary = true; break; 578 case '{': lsym = lsym_lbrace; next_unary = true; break;
579 case '}': lsym = lsym_rbrace; next_unary = true; break; 579 case '}': lsym = lsym_rbrace; next_unary = true; break;
580 case '.': lsym = lsym_period; next_unary = false; break; 580 case '.': lsym = lsym_period; next_unary = false; break;
581 case '?': lsym = lsym_question; next_unary = true; break; 581 case '?': lsym = lsym_question; next_unary = true; break;
582 case ',': lsym = lsym_comma; next_unary = true; break; 582 case ',': lsym = lsym_comma; next_unary = true; break;
583 case ';': lsym = lsym_semicolon; next_unary = true; break; 583 case ';': lsym = lsym_semicolon; next_unary = true; break;
584 /* INDENT ON */ 584 /* INDENT ON */
585 585
586 case '-': 586 case '-':
587 case '+': 587 case '+':
588 lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; 588 lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
589 next_unary = true; 589 next_unary = true;
590 590
591 /* '++' or '--' */ 591 /* '++' or '--' */
592 if (inp_p[0] == token.s[token.len - 1]) { 592 if (inp_p[0] == token.s[token.len - 1]) {
593 token_add_char(*inp_p++); 593 token_add_char(*inp_p++);
594 if (ps.prev_lsym == lsym_word || 594 if (ps.prev_lsym == lsym_word ||
595 ps.prev_lsym == lsym_rparen || 595 ps.prev_lsym == lsym_rparen ||
596 ps.prev_lsym == lsym_rbracket) { 596 ps.prev_lsym == lsym_rbracket) {
597 lsym = ps.next_unary 597 lsym = ps.next_unary
598 ? lsym_unary_op : lsym_postfix_op; 598 ? lsym_unary_op : lsym_postfix_op;
599 next_unary = false; 599 next_unary = false;
600 } 600 }
601 601
602 } else if (inp_p[0] == '=') { /* '+=' or '-=' */ 602 } else if (inp_p[0] == '=') { /* '+=' or '-=' */
603 token_add_char(*inp_p++); 603 token_add_char(*inp_p++);
604 604
605 } else if (inp_p[0] == '>') { /* '->' */ 605 } else if (inp_p[0] == '>') { /* '->' */
606 token_add_char(*inp_p++); 606 token_add_char(*inp_p++);
607 lsym = lsym_unary_op; 607 lsym = lsym_unary_op;
608 next_unary = false; 608 next_unary = false;
609 ps.want_blank = false; 609 ps.want_blank = false;
610 } 610 }
611 break; 611 break;
612 612
613 case ':': 613 case ':':
614 lsym = ps.quest_level > 0 614 lsym = ps.quest_level > 0
615 ? (ps.quest_level--, lsym_colon_question) 615 ? (ps.quest_level--, lsym_colon_question)
616 : ps.in_var_decl 616 : ps.in_var_decl
617 ? lsym_colon_other 617 ? lsym_colon_other
618 : lsym_colon_label; 618 : lsym_colon_label;
619 next_unary = true; 619 next_unary = true;
620 break; 620 break;
621 621
622 case '*': 622 case '*':
623 if (inp_p[0] == '=') { 623 if (inp_p[0] == '=') {
624 token_add_char(*inp_p++); 624 token_add_char(*inp_p++);
625 lsym = lsym_binary_op; 625 lsym = lsym_binary_op;
626 } else if (is_asterisk_unary()) { 626 } else if (is_asterisk_unary()) {
627 lex_asterisk_unary(); 627 lex_asterisk_unary();
628 lsym = lsym_unary_op; 628 lsym = lsym_unary_op;
629 } else 629 } else
630 lsym = lsym_binary_op; 630 lsym = lsym_binary_op;
631 next_unary = true; 631 next_unary = true;
632 break; 632 break;
633 633
634 case '=': 634 case '=':
635 if (ps.in_var_decl) 635 if (ps.in_var_decl)
636 ps.in_init = true; 636 ps.in_init = true;
637 if (inp_p[0] == '=') 637 if (inp_p[0] == '=')
638 token_add_char(*inp_p++); 638 token_add_char(*inp_p++);
639 lsym = lsym_binary_op; 639 lsym = lsym_binary_op;
640 next_unary = true; 640 next_unary = true;
641 break; 641 break;
642 642
643 case '>': 643 case '>':
644 case '<': 644 case '<':
645 case '!': /* ops like <, <<, <=, !=, etc */ 645 case '!': /* ops like <, <<, <=, !=, etc */
646 if (inp_p[0] == '>' || inp_p[0] == '<' || inp_p[0] == '=') 646 if (inp_p[0] == '>' || inp_p[0] == '<' || inp_p[0] == '=')
647 token_add_char(*inp_p++); 647 token_add_char(*inp_p++);
648 if (inp_p[0] == '=') 648 if (inp_p[0] == '=')
649 token_add_char(*inp_p++); 649 token_add_char(*inp_p++);
650 lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; 650 lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
651 next_unary = true; 651 next_unary = true;
652 break; 652 break;
653 653
654 case '\'': 654 case '\'':
655 case '"': 655 case '"':
656 lex_char_or_string(); 656 lex_char_or_string();
657 lsym = lsym_word; 657 lsym = lsym_word;
658 next_unary = false; 658 next_unary = false;
659 break; 659 break;
660 660
661 default: 661 default:
662 if (token.s[token.len - 1] == '/' 662 if (token.s[token.len - 1] == '/'
663 && (inp_p[0] == '*' || inp_p[0] == '/')) { 663 && (inp_p[0] == '*' || inp_p[0] == '/')) {
664 enum indent_enabled prev = indent_enabled; 664 enum indent_enabled prev = indent_enabled;
665 lex_indent_comment(); 665 lex_indent_comment();
666 if (prev == indent_on && indent_enabled == indent_off) 666 if (prev == indent_on && indent_enabled == indent_off)
667 out.indent_off_text.len = 0; 667 buf_clear(&out.indent_off_text);
668 token_add_char(*inp_p++); 668 token_add_char(*inp_p++);
669 lsym = lsym_comment; 669 lsym = lsym_comment;
670 next_unary = ps.next_unary; 670 next_unary = ps.next_unary;
671 break; 671 break;
672 } 672 }
673 673
674 /* things like '||', '&&', '<<=' */ 674 /* things like '||', '&&', '<<=' */
675 lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; 675 lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
676 if (inp_p[0] == token.s[token.len - 1]) 676 if (inp_p[0] == token.s[token.len - 1])
677 token_add_char(*inp_p++), lsym = lsym_binary_op; 677 token_add_char(*inp_p++), lsym = lsym_binary_op;
678 if (inp_p[0] == '=') 678 if (inp_p[0] == '=')
679 token_add_char(*inp_p++), lsym = lsym_binary_op; 679 token_add_char(*inp_p++), lsym = lsym_binary_op;
680 680
681 next_unary = true; 681 next_unary = true;
682 } 682 }
683 683
684 ps.next_unary = next_unary; 684 ps.next_unary = next_unary;
685 685
686 return lsym; 686 return lsym;
687} 687}
688 688
689void 689void
690register_typename(const char *name) 690register_typename(const char *name)
691{ 691{
692 if (typenames.len >= typenames.cap) { 692 if (typenames.len >= typenames.cap) {
693 typenames.cap = 16 + 2 * typenames.cap; 693 typenames.cap = 16 + 2 * typenames.cap;
694 typenames.items = nonnull(realloc(typenames.items, 694 typenames.items = nonnull(realloc(typenames.items,
695 sizeof(typenames.items[0]) * typenames.cap)); 695 sizeof(typenames.items[0]) * typenames.cap));
696 } 696 }
697 697
698 int pos = bsearch_typenames(name); 698 int pos = bsearch_typenames(name);
699 if (pos >= 0) 699 if (pos >= 0)
700 return; /* already in the list */ 700 return; /* already in the list */
701 701
702 pos = -(pos + 1); 702 pos = -(pos + 1);
703 memmove(typenames.items + pos + 1, typenames.items + pos, 703 memmove(typenames.items + pos + 1, typenames.items + pos,
704 sizeof(typenames.items[0]) * (typenames.len++ - (unsigned)pos)); 704 sizeof(typenames.items[0]) * (typenames.len++ - (unsigned)pos));
705 typenames.items[pos] = nonnull(strdup(name)); 705 typenames.items[pos] = nonnull(strdup(name));
706} 706}

cvs diff -r1.159 -r1.160 src/usr.bin/indent/pr_comment.c (switch to unified diff)

--- src/usr.bin/indent/pr_comment.c 2023/06/10 06:38:21 1.159
+++ src/usr.bin/indent/pr_comment.c 2023/06/10 12:59:31 1.160
@@ -1,356 +1,357 @@ @@ -1,356 +1,357 @@
1/* $NetBSD: pr_comment.c,v 1.159 2023/06/10 06:38:21 rillig Exp $ */ 1/* $NetBSD: pr_comment.c,v 1.160 2023/06/10 12:59:31 rillig Exp $ */
2 2
3/*- 3/*-
4 * SPDX-License-Identifier: BSD-4-Clause 4 * SPDX-License-Identifier: BSD-4-Clause
5 * 5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc. 6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993 7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved. 8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved. 9 * All rights reserved.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software 19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement: 20 * must display the following acknowledgement:
21 * This product includes software developed by the University of 21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors. 22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors 23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software 24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission. 25 * without specific prior written permission.
26 * 26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE. 37 * SUCH DAMAGE.
38 */ 38 */
39 39
40#include <sys/cdefs.h> 40#include <sys/cdefs.h>
41__RCSID("$NetBSD: pr_comment.c,v 1.159 2023/06/10 06:38:21 rillig Exp $"); 41__RCSID("$NetBSD: pr_comment.c,v 1.160 2023/06/10 12:59:31 rillig Exp $");
42 42
43#include <string.h> 43#include <string.h>
44 44
45#include "indent.h" 45#include "indent.h"
46 46
47static void 47static void
48com_add_char(char ch) 48com_add_char(char ch)
49{ 49{
50 buf_add_char(&com, ch); 50 buf_add_char(&com, ch);
51} 51}
52 52
53static void 53static void
54com_add_delim(void) 54com_add_delim(void)
55{ 55{
56 if (opt.star_comment_cont) 56 if (opt.star_comment_cont)
57 buf_add_chars(&com, " * ", 3); 57 buf_add_chars(&com, " * ", 3);
58} 58}
59 59
60static bool 60static bool
61fits_in_one_line(int com_ind, int max_line_length) 61fits_in_one_line(int com_ind, int max_line_length)
62{ 62{
63 for (const char *start = inp_p, *p = start; *p != '\n'; p++) { 63 for (const char *start = inp_p, *p = start; *p != '\n'; p++) {
64 if (p[0] == '*' && p[1] == '/') { 64 if (p[0] == '*' && p[1] == '/') {
65 while (p - inp_p >= 2 65 while (p - inp_p >= 2
66 && ch_isblank(p[-1]) 66 && ch_isblank(p[-1])
67 && ch_isblank(p[-2])) 67 && ch_isblank(p[-2]))
68 p--; 68 p--;
69 int len = ind_add(com_ind + 3, 69 int len = ind_add(com_ind + 3,
70 start, (size_t)(p - start)); 70 start, (size_t)(p - start));
71 len += p == start || ch_isblank(p[-1]) ? 2 : 3; 71 len += p == start || ch_isblank(p[-1]) ? 2 : 3;
72 return len <= max_line_length; 72 return len <= max_line_length;
73 } 73 }
74 } 74 }
75 return false; 75 return false;
76} 76}
77 77
78static void 78static void
79analyze_comment(bool *p_may_wrap, bool *p_delim, 79analyze_comment(bool *p_may_wrap, bool *p_delim,
80 int *p_ind, int *p_line_length) 80 int *p_ind, int *p_line_length)
81{ 81{
82 bool may_wrap = true; 82 bool may_wrap = true;
83 bool delim = false; 83 bool delim = false;
84 int ind; 84 int ind;
85 int line_length = opt.max_line_length; 85 int line_length = opt.max_line_length;
86 86
87 if (ps.curr_col_1 && !opt.format_col1_comments) { 87 if (ps.curr_col_1 && !opt.format_col1_comments) {
88 may_wrap = false; 88 may_wrap = false;
89 ind = 0; 89 ind = 0;
90 } else { 90 } else {
91 if (inp_p[0] == '-' || inp_p[0] == '*' || 91 if (inp_p[0] == '-' || inp_p[0] == '*' ||
92 token.s[token.len - 1] == '/' || 92 token.s[token.len - 1] == '/' ||
93 (inp_p[0] == '\n' && !opt.format_block_comments)) 93 (inp_p[0] == '\n' && !opt.format_block_comments))
94 may_wrap = false; 94 may_wrap = false;
95 if (code.len == 0 && inp_p[strspn(inp_p, "*")] == '\n') 95 if (code.len == 0 && inp_p[strspn(inp_p, "*")] == '\n')
96 out.line_kind = lk_block_comment; 96 out.line_kind = lk_block_comment;
97 97
98 if (com.len > 0) 98 if (com.len > 0)
99 output_line(); 99 output_line();
100 if (lab.len == 0 && code.len == 0) { 100 if (lab.len == 0 && code.len == 0) {
101 ind = (ps.ind_level - opt.unindent_displace) 101 ind = (ps.ind_level - opt.unindent_displace)
102 * opt.indent_size; 102 * opt.indent_size;
103 if (ind <= 0) 103 if (ind <= 0)
104 ind = opt.format_col1_comments ? 0 : 1; 104 ind = opt.format_col1_comments ? 0 : 1;
105 line_length = opt.block_comment_max_line_length; 105 line_length = opt.block_comment_max_line_length;
106 if (may_wrap && inp_p[0] == '\n') 106 if (may_wrap && inp_p[0] == '\n')
107 delim = true; 107 delim = true;
108 if (may_wrap && opt.comment_delimiter_on_blankline) 108 if (may_wrap && opt.comment_delimiter_on_blankline)
109 delim = true; 109 delim = true;
110 } else { 110 } else {
111 int target_ind = code.len > 0 111 int target_ind = code.len > 0
112 ? ind_add(compute_code_indent(), code.s, code.len) 112 ? ind_add(compute_code_indent(), code.s, code.len)
113 : ind_add(compute_label_indent(), lab.s, lab.len); 113 : ind_add(compute_label_indent(), lab.s, lab.len);
114 114
115 ind = ps.line_has_decl || ps.ind_level == 0 115 ind = ps.line_has_decl || ps.ind_level == 0
116 ? opt.decl_comment_column - 1 116 ? opt.decl_comment_column - 1
117 : opt.comment_column - 1; 117 : opt.comment_column - 1;
118 if (ind <= target_ind) 118 if (ind <= target_ind)
119 ind = next_tab(target_ind); 119 ind = next_tab(target_ind);
120 if (ind + 25 > line_length) 120 if (ind + 25 > line_length)
121 line_length = ind + 25; 121 line_length = ind + 25;
122 } 122 }
123 } 123 }
124 124
125 ps.com_ind = ind; 125 ps.com_ind = ind;
126 126
127 if (!may_wrap) { 127 if (!may_wrap) {
128 /* Find out how much indentation there was originally, because 128 /* Find out how much indentation there was originally, because
129 * that much will have to be ignored by output_line. */ 129 * that much will have to be ignored by output_line. */
130 size_t len = (size_t)(inp_p - 2 - inp.s); 130 size_t len = (size_t)(inp_p - 2 - inp.s);
131 ps.n_comment_delta = -ind_add(0, inp.s, len); 131 ps.n_comment_delta = -ind_add(0, inp.s, len);
132 } else { 132 } else {
133 ps.n_comment_delta = 0; 133 ps.n_comment_delta = 0;
134 if (!(inp_p[0] == '\t' && !ch_isblank(inp_p[1]))) 134 if (!(inp_p[0] == '\t' && !ch_isblank(inp_p[1])))
135 while (ch_isblank(inp_p[0])) 135 while (ch_isblank(inp_p[0]))
136 inp_p++; 136 inp_p++;
137 } 137 }
138 138
139 *p_may_wrap = may_wrap; 139 *p_may_wrap = may_wrap;
140 *p_delim = delim; 140 *p_delim = delim;
141 *p_ind = ind; 141 *p_ind = ind;
142 *p_line_length = line_length; 142 *p_line_length = line_length;
143} 143}
144 144
145static void 145static void
146copy_comment_start(bool may_wrap, bool *delim, int ind, int line_length) 146copy_comment_start(bool may_wrap, bool *delim, int ind, int line_length)
147{ 147{
148 ps.comment_delta = 0; 148 ps.comment_delta = 0;
149 com_add_char('/'); 149 com_add_char('/');
150 com_add_char(token.s[token.len - 1]); /* either '*' or '/' */ 150 com_add_char(token.s[token.len - 1]); /* either '*' or '/' */
151 151
152 if (may_wrap) { 152 if (may_wrap) {
153 if (!ch_isblank(inp_p[0])) 153 if (!ch_isblank(inp_p[0]))
154 com_add_char(' '); 154 com_add_char(' ');
155 155
156 if (*delim && fits_in_one_line(ind, line_length)) 156 if (*delim && fits_in_one_line(ind, line_length))
157 *delim = false; 157 *delim = false;
158 if (*delim) { 158 if (*delim) {
159 output_line(); 159 output_line();
160 com_add_delim(); 160 com_add_delim();
161 } 161 }
162 } 162 }
163} 163}
164 164
165static void 165static void
166copy_comment_wrap_text(int line_length, ssize_t *last_blank) 166copy_comment_wrap_text(int line_length, ssize_t *last_blank)
167{ 167{
168 int now_len = ind_add(ps.com_ind, com.s, com.len); 168 int now_len = ind_add(ps.com_ind, com.s, com.len);
169 for (;;) { 169 for (;;) {
170 char ch = inp_next(); 170 char ch = inp_next();
171 if (ch_isblank(ch)) 171 if (ch_isblank(ch))
172 *last_blank = (ssize_t)com.len; 172 *last_blank = (ssize_t)com.len;
173 com_add_char(ch); 173 com_add_char(ch);
174 now_len++; 174 now_len++;
175 if (memchr("*\n\r\b\t", inp_p[0], 6) != NULL) 175 if (memchr("*\n\r\b\t", inp_p[0], 6) != NULL)
176 break; 176 break;
177 if (now_len >= line_length && *last_blank != -1) 177 if (now_len >= line_length && *last_blank != -1)
178 break; 178 break;
179 } 179 }
180 180
181 ps.next_col_1 = false; 181 ps.next_col_1 = false;
182 182
183 if (now_len <= line_length) 183 if (now_len <= line_length)
184 return; 184 return;
185 if (ch_isspace(com.s[com.len - 1])) 185 if (ch_isspace(com.s[com.len - 1]))
186 return; 186 return;
187 187
188 if (*last_blank == -1) { 188 if (*last_blank == -1) {
189 /* only a single word in this line */ 189 /* only a single word in this line */
190 output_line(); 190 output_line();
191 com_add_delim(); 191 com_add_delim();
192 return; 192 return;
193 } 193 }
194 194
195 const char *last_word_s = com.s + *last_blank + 1; 195 const char *last_word_s = com.s + *last_blank + 1;
196 size_t last_word_len = com.len - (size_t)(*last_blank + 1); 196 size_t last_word_len = com.len - (size_t)(*last_blank + 1);
197 com.len = (size_t)*last_blank; 197 com.len = (size_t)*last_blank;
198 output_line(); 198 output_line();
199 com_add_delim(); 199 com_add_delim();
200 200
201 /* Assume that output_line and com_add_delim don't invalidate the 201 /* Assume that output_line and com_add_delim don't invalidate the
202 * "unused" part of the buffer beyond com.s + com.len. */ 202 * "unused" part of the buffer beyond com.s + com.len. */
203 memmove(com.s + com.len, last_word_s, last_word_len); 203 memmove(com.s + com.len, last_word_s, last_word_len);
204 com.len += last_word_len; 204 com.len += last_word_len;
205 *last_blank = -1; 205 *last_blank = -1;
206} 206}
207 207
208static bool 208static bool
209copy_comment_wrap_newline(ssize_t *last_blank) 209copy_comment_wrap_newline(ssize_t *last_blank)
210{ 210{
211 *last_blank = -1; 211 *last_blank = -1;
212 if (ps.next_col_1) { 212 if (ps.next_col_1) {
213 if (com.len == 0) 213 if (com.len == 0)
214 com_add_char(' '); /* force empty output line */ 214 com_add_char(' '); /* force empty output line */
215 if (com.len > 3) { 215 if (com.len > 3) {
216 output_line(); 216 output_line();
217 com_add_delim(); 217 com_add_delim();
218 } 218 }
219 output_line(); 219 output_line();
220 com_add_delim(); 220 com_add_delim();
221 } else { 221 } else {
222 ps.next_col_1 = true; 222 ps.next_col_1 = true;
223 if (!(com.len > 0 && ch_isblank(com.s[com.len - 1]))) 223 if (!(com.len > 0 && ch_isblank(com.s[com.len - 1])))
224 com_add_char(' '); 224 com_add_char(' ');
225 *last_blank = (int)com.len - 1; 225 *last_blank = (int)com.len - 1;
226 } 226 }
227 ++line_no; 227 ++line_no;
228 228
229 /* flush any blanks and/or tabs at start of next line */ 229 /* flush any blanks and/or tabs at start of next line */
230 inp_skip(); /* '\n' */ 230 inp_skip(); /* '\n' */
231 while (ch_isblank(inp_p[0])) 231 while (ch_isblank(inp_p[0]))
232 inp_p++; 232 inp_p++;
233 if (inp_p[0] == '*' && inp_p[1] == '/') 233 if (inp_p[0] == '*' && inp_p[1] == '/')
234 return false; 234 return false;
235 if (inp_p[0] == '*') { 235 if (inp_p[0] == '*') {
236 inp_p++; 236 inp_p++;
237 while (ch_isblank(inp_p[0])) 237 while (ch_isblank(inp_p[0]))
238 inp_p++; 238 inp_p++;
239 } 239 }
240 240
241 return true; 241 return true;
242} 242}
243 243
244static void 244static void
245copy_comment_wrap_finish(int line_length, bool delim) 245copy_comment_wrap_finish(int line_length, bool delim)
246{ 246{
247 if (delim) { 247 if (delim) {
248 if (com.len > 3) 248 if (com.len > 3)
249 output_line(); 249 output_line();
250 else 250 else
251 com.len = 0; 251 buf_clear(&com);
252 com_add_char(' '); 252 com_add_char(' ');
253 } else { 253 } else {
254 size_t len = com.len; 254 size_t len = com.len;
255 while (ch_isblank(com.s[len - 1])) 255 while (ch_isblank(com.s[len - 1]))
256 len--; 256 len--;
257 int end_ind = ind_add(ps.com_ind, com.s, len); 257 int end_ind = ind_add(ps.com_ind, com.s, len);
258 if (end_ind + 3 > line_length) 258 if (end_ind + 3 > line_length)
259 output_line(); 259 output_line();
260 } 260 }
261 261
262 while (com.len >= 2 262 while (com.len >= 2
263 && ch_isblank(com.s[com.len - 1]) 263 && ch_isblank(com.s[com.len - 1])
264 && ch_isblank(com.s[com.len - 2])) 264 && ch_isblank(com.s[com.len - 2]))
265 com.len--; 265 com.len--;
 266 buf_terminate(&com);
266 267
267 inp_p += 2; 268 inp_p += 2;
268 if (com.len > 0 && ch_isblank(com.s[com.len - 1])) 269 if (com.len > 0 && ch_isblank(com.s[com.len - 1]))
269 buf_add_chars(&com, "*/", 2); 270 buf_add_chars(&com, "*/", 2);
270 else 271 else
271 buf_add_chars(&com, " */", 3); 272 buf_add_chars(&com, " */", 3);
272} 273}
273 274
274/* 275/*
275 * Copy characters from 'inp' to 'com'. Try to keep comments from going over 276 * Copy characters from 'inp' to 'com'. Try to keep comments from going over
276 * the maximum line length. To do that, remember where the last blank, tab, or 277 * the maximum line length. To do that, remember where the last blank, tab, or
277 * newline was. When a line is filled, print up to the last blank and continue 278 * newline was. When a line is filled, print up to the last blank and continue
278 * copying. 279 * copying.
279 */ 280 */
280static void 281static void
281copy_comment_wrap(int line_length, bool delim) 282copy_comment_wrap(int line_length, bool delim)
282{ 283{
283 ssize_t last_blank = -1; /* index of the last blank in 'com' */ 284 ssize_t last_blank = -1; /* index of the last blank in 'com' */
284 285
285 for (;;) { 286 for (;;) {
286 if (inp_p[0] == '\n') { 287 if (inp_p[0] == '\n') {
287 if (had_eof) 288 if (had_eof)
288 goto unterminated_comment; 289 goto unterminated_comment;
289 if (!copy_comment_wrap_newline(&last_blank)) 290 if (!copy_comment_wrap_newline(&last_blank))
290 goto end_of_comment; 291 goto end_of_comment;
291 } else if (inp_p[0] == '*' && inp_p[1] == '/') 292 } else if (inp_p[0] == '*' && inp_p[1] == '/')
292 goto end_of_comment; 293 goto end_of_comment;
293 else 294 else
294 copy_comment_wrap_text(line_length, &last_blank); 295 copy_comment_wrap_text(line_length, &last_blank);
295 } 296 }
296 297
297end_of_comment: 298end_of_comment:
298 copy_comment_wrap_finish(line_length, delim); 299 copy_comment_wrap_finish(line_length, delim);
299 return; 300 return;
300 301
301unterminated_comment: 302unterminated_comment:
302 diag(1, "Unterminated comment"); 303 diag(1, "Unterminated comment");
303 output_line(); 304 output_line();
304} 305}
305 306
306static void 307static void
307copy_comment_nowrap(void) 308copy_comment_nowrap(void)
308{ 309{
309 char kind = token.s[token.len - 1]; 310 char kind = token.s[token.len - 1];
310 311
311 for (;;) { 312 for (;;) {
312 if (inp_p[0] == '\n') { 313 if (inp_p[0] == '\n') {
313 if (kind == '/') 314 if (kind == '/')
314 return; 315 return;
315 316
316 if (had_eof) { 317 if (had_eof) {
317 diag(1, "Unterminated comment"); 318 diag(1, "Unterminated comment");
318 output_line(); 319 output_line();
319 return; 320 return;
320 } 321 }
321 322
322 if (com.len == 0) 323 if (com.len == 0)
323 com_add_char(' '); /* force output of an 324 com_add_char(' '); /* force output of an
324 * empty line */ 325 * empty line */
325 output_line(); 326 output_line();
326 ++line_no; 327 ++line_no;
327 inp_skip(); 328 inp_skip();
328 continue; 329 continue;
329 } 330 }
330 331
331 com_add_char(*inp_p++); 332 com_add_char(*inp_p++);
332 if (com.len >= 2 333 if (com.len >= 2
333 && com.s[com.len - 2] == '*' 334 && com.s[com.len - 2] == '*'
334 && com.s[com.len - 1] == '/' 335 && com.s[com.len - 1] == '/'
335 && kind == '*') 336 && kind == '*')
336 return; 337 return;
337 } 338 }
338} 339}
339 340
340/* 341/*
341 * Scan, reformat and output a single comment, which is either a block comment 342 * Scan, reformat and output a single comment, which is either a block comment
342 * starting with '/' '*' or an end-of-line comment starting with '//'. 343 * starting with '/' '*' or an end-of-line comment starting with '//'.
343 */ 344 */
344void 345void
345process_comment(void) 346process_comment(void)
346{ 347{
347 bool may_wrap, delim; 348 bool may_wrap, delim;
348 int ind, line_length; 349 int ind, line_length;
349 350
350 analyze_comment(&may_wrap, &delim, &ind, &line_length); 351 analyze_comment(&may_wrap, &delim, &ind, &line_length);
351 copy_comment_start(may_wrap, &delim, ind, line_length); 352 copy_comment_start(may_wrap, &delim, ind, line_length);
352 if (may_wrap) 353 if (may_wrap)
353 copy_comment_wrap(line_length, delim); 354 copy_comment_wrap(line_length, delim);
354 else 355 else
355 copy_comment_nowrap(); 356 copy_comment_nowrap();
356} 357}