Sat Mar 13 09:21:57 2021 UTC ()
indent: add debug logging for actually writing to the output file

Together with the results of the tokenizer and the 4 buffers for token,
label, code and comment, the debug log now provides a good high-level
view on how the indentation happens and where to look for the many
remaining bugs.


(rillig)
diff -r1.47 -r1.48 src/usr.bin/indent/indent.c
diff -r1.8 -r1.9 src/usr.bin/indent/indent.h
diff -r1.35 -r1.36 src/usr.bin/indent/io.c
diff -r1.38 -r1.39 src/usr.bin/indent/lexi.c

cvs diff -r1.47 -r1.48 src/usr.bin/indent/indent.c (expand / switch to context diff)
--- src/usr.bin/indent/indent.c 2021/03/13 00:26:56 1.47
+++ src/usr.bin/indent/indent.c 2021/03/13 09:21:57 1.48
@@ -1,4 +1,4 @@
-/*	$NetBSD: indent.c,v 1.47 2021/03/13 00:26:56 rillig Exp $	*/
+/*	$NetBSD: indent.c,v 1.48 2021/03/13 09:21:57 rillig Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -46,7 +46,7 @@
 #include <sys/cdefs.h>
 #ifndef lint
 #if defined(__NetBSD__)
-__RCSID("$NetBSD: indent.c,v 1.47 2021/03/13 00:26:56 rillig Exp $");
+__RCSID("$NetBSD: indent.c,v 1.48 2021/03/13 09:21:57 rillig Exp $");
 #elif defined(__FreeBSD__)
 __FBSDID("$FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $");
 #endif
@@ -1389,3 +1389,46 @@
 	ps.want_blank = false;
     }
 }
+
+#ifdef debug
+void
+debug_printf(const char *fmt, ...)
+{
+    FILE *f = output == stdout ? stderr : stdout;
+    va_list ap;
+
+    va_start(ap, fmt);
+    vfprintf(f, fmt, ap);
+    va_end(ap);
+}
+
+void
+debug_println(const char *fmt, ...)
+{
+    FILE *f = output == stdout ? stderr : stdout;
+    va_list ap;
+
+    va_start(ap, fmt);
+    vfprintf(f, fmt, ap);
+    va_end(ap);
+    fprintf(f, "\n");
+}
+
+void
+debug_vis_range(const char *prefix, const char *s, const char *e,
+		const char *suffix)
+{
+    debug_printf("%s", prefix);
+    for (const char *p = s; p < e; p++) {
+	if (isprint((unsigned char)*p) && *p != '\\' && *p != '"')
+	    debug_printf("%c", *p);
+	else if (*p == '\n')
+	    debug_printf("\\n");
+	else if (*p == '\t')
+	    debug_printf("\\t");
+	else
+	    debug_printf("\\x%02x", *p);
+    }
+    debug_printf("%s", suffix);
+}
+#endif

cvs diff -r1.8 -r1.9 src/usr.bin/indent/indent.h (expand / switch to context diff)
--- src/usr.bin/indent/indent.h 2021/03/13 00:26:56 1.8
+++ src/usr.bin/indent/indent.h 2021/03/13 09:21:57 1.9
@@ -1,4 +1,4 @@
-/*	$NetBSD: indent.h,v 1.8 2021/03/13 00:26:56 rillig Exp $	*/
+/*	$NetBSD: indent.h,v 1.9 2021/03/13 09:21:57 rillig Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
@@ -30,7 +30,7 @@
 
 #if 0
 #if defined(__NetBSD__)
-__RCSID("$NetBSD: indent.h,v 1.8 2021/03/13 00:26:56 rillig Exp $");
+__RCSID("$NetBSD: indent.h,v 1.9 2021/03/13 09:21:57 rillig Exp $");
 #elif defined(__FreeBSD__)
 __FBSDID("$FreeBSD: head/usr.bin/indent/indent.h 336333 2018-07-16 05:46:50Z pstef $");
 #endif
@@ -51,7 +51,14 @@
 int	count_spaces_until(int, const char *, const char *);
 void	init_constant_tt(void);
 #ifdef debug
+void	debug_vis_range(const char *, const char *, const char *, const char *);
+void	debug_printf(const char *, ...) __printflike(1, 2);
+void	debug_println(const char *, ...) __printflike(1, 2);
 const char *token_type_name(token_type);
+#else
+#define debug_printf(fmt, ...) do { } while (false)
+#define debug_println(fmt, ...) do { } while (false)
+#define debug_vis_range(prefix, s, e, suffix) do { } while (false)
 #endif
 token_type lexi(struct parser_state *);
 void	diag(int, const char *, ...) __printflike(2, 3);

cvs diff -r1.35 -r1.36 src/usr.bin/indent/io.c (expand / switch to context diff)
--- src/usr.bin/indent/io.c 2021/03/13 09:06:12 1.35
+++ src/usr.bin/indent/io.c 2021/03/13 09:21:57 1.36
@@ -1,4 +1,4 @@
-/*	$NetBSD: io.c,v 1.35 2021/03/13 09:06:12 rillig Exp $	*/
+/*	$NetBSD: io.c,v 1.36 2021/03/13 09:21:57 rillig Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -46,7 +46,7 @@
 #include <sys/cdefs.h>
 #ifndef lint
 #if defined(__NetBSD__)
-__RCSID("$NetBSD: io.c,v 1.35 2021/03/13 09:06:12 rillig Exp $");
+__RCSID("$NetBSD: io.c,v 1.36 2021/03/13 09:21:57 rillig Exp $");
 #elif defined(__FreeBSD__)
 __FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $");
 #endif
@@ -68,12 +68,14 @@
 output_char(char ch)
 {
     fputc(ch, output);
+    debug_vis_range("output_char '", &ch, &ch + 1, "'\n");
 }
 
 static void
 output_range(const char *s, const char *e)
 {
     fwrite(s, 1, (size_t)(e - s), output);
+    debug_vis_range("output_range \"", s, e, "\"\n");
 }
 
 static inline void
@@ -93,14 +95,15 @@
 	if (n > 0)
 	    ind -= ind % tabsize;
 	for (int i = 0; i < n; i++) {
-	    output_char('\t');
+	    fputc('\t', output);
 	    ind += tabsize;
 	}
     }
 
     for (; ind < new_ind; ind++)
-        output_char(' ');
+        fputc(' ', output);
 
+    debug_println("output_indent %d", ind);
     return ind;
 }
 
@@ -196,9 +199,19 @@
 	    {
 		int i;
 
-		for (i = 0; i < ps.p_l_follow; i++)
-		    if (ps.paren_indents[i] >= 0)
-			ps.paren_indents[i] = -(ps.paren_indents[i] + target_col);
+		for (i = 0; i < ps.p_l_follow; i++) {
+		    if (ps.paren_indents[i] >= 0) {
+			int ind = ps.paren_indents[i];
+			/*
+			 * XXX: this mix of 'indent' and 'column' smells like
+			 * an off-by-one error.
+			 */
+			ps.paren_indents[i] = -(ind + target_col);
+			debug_println(
+			    "setting pi[%d] from %d to %d for column %d",
+			    i, ind, ps.paren_indents[i], target_col);
+		    }
+		}
 	    }
 	    cur_col = 1 + output_indent(cur_col - 1, target_col - 1);
 	    output_range(s_code, e_code);
@@ -265,8 +278,11 @@
     *(e_com = s_com = combuf + 1) = '\0';
     ps.ind_level = ps.i_l_follow;
     ps.paren_level = ps.p_l_follow;
-    if (ps.paren_level > 0)
+    if (ps.paren_level > 0) {
+        /* TODO: explain what negative indentation means */
 	paren_indent = -ps.paren_indents[ps.paren_level - 1];
+	debug_println("paren_indent is now %d", paren_indent);
+    }
     not_first_line = 1;
 }
 

cvs diff -r1.38 -r1.39 src/usr.bin/indent/lexi.c (expand / switch to context diff)
--- src/usr.bin/indent/lexi.c 2021/03/12 23:10:18 1.38
+++ src/usr.bin/indent/lexi.c 2021/03/13 09:21:57 1.39
@@ -1,4 +1,4 @@
-/*	$NetBSD: lexi.c,v 1.38 2021/03/12 23:10:18 rillig Exp $	*/
+/*	$NetBSD: lexi.c,v 1.39 2021/03/13 09:21:57 rillig Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -46,7 +46,7 @@
 #include <sys/cdefs.h>
 #ifndef lint
 #if defined(__NetBSD__)
-__RCSID("$NetBSD: lexi.c,v 1.38 2021/03/12 23:10:18 rillig Exp $");
+__RCSID("$NetBSD: lexi.c,v 1.39 2021/03/13 09:21:57 rillig Exp $");
 #elif defined(__FreeBSD__)
 __FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 337862 2018-08-15 18:19:45Z pstef $");
 #endif
@@ -261,32 +261,22 @@
 static void
 print_buf(const char *name, const char *s, const char *e)
 {
-    if (s == e)
-	return;
-
-    printf(" %s \"", name);
-    for (const char *p = s; p < e; p++) {
-	if (isprint((unsigned char)*p) && *p != '\\' && *p != '"')
-	    printf("%c", *p);
-	else if (*p == '\n')
-	    printf("\\n");
-	else if (*p == '\t')
-	    printf("\\t");
-	else
-	    printf("\\x%02x", *p);
+    if (s < e) {
+	debug_printf(" %s ", name);
+	debug_vis_range("\"", s, e, "\"");
     }
-    printf("\"");
 }
 
 static token_type
 lexi_end(token_type code)
 {
-    printf("in line %d, lexi returns '%s'", line_no, token_type_name(code));
+    debug_printf("in line %d, lexi returns '%s'",
+	line_no, token_type_name(code));
     print_buf("token", s_token, e_token);
     print_buf("label", s_lab, e_lab);
     print_buf("code", s_code, e_code);
     print_buf("comment", s_com, e_com);
-    printf("\n");
+    debug_printf("\n");
 
     return code;
 }