Thu Nov 25 18:36:30 2021 UTC ()
indent: improve heuristic for spaces around '*' in declarations


(rillig)
diff -r1.28 -r1.29 src/tests/usr.bin/indent/fmt_decl.c
diff -r1.2 -r1.3 src/tests/usr.bin/indent/lsym_sizeof.c
diff -r1.162 -r1.163 src/usr.bin/indent/lexi.c

cvs diff -r1.28 -r1.29 src/tests/usr.bin/indent/fmt_decl.c (expand / switch to unified diff)

--- src/tests/usr.bin/indent/fmt_decl.c 2021/11/25 18:20:21 1.28
+++ src/tests/usr.bin/indent/fmt_decl.c 2021/11/25 18:36:30 1.29
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: fmt_decl.c,v 1.28 2021/11/25 18:20:21 rillig Exp $ */ 1/* $NetBSD: fmt_decl.c,v 1.29 2021/11/25 18:36:30 rillig Exp $ */
2/* $FreeBSD: head/usr.bin/indent/tests/declarations.0 334478 2018-06-01 09:41:15Z pstef $ */ 2/* $FreeBSD: head/usr.bin/indent/tests/declarations.0 334478 2018-06-01 09:41:15Z pstef $ */
3 3
4/* 4/*
5 * Tests for declarations of global variables, external functions, and local 5 * Tests for declarations of global variables, external functions, and local
6 * variables. 6 * variables.
7 * 7 *
8 * See also: 8 * See also:
9 * opt_di.c 9 * opt_di.c
10 */ 10 */
11 11
12/* See FreeBSD r303570 */ 12/* See FreeBSD r303570 */
13 13
14/* 14/*
@@ -580,27 +580,37 @@ int *aaaaaaaaaaaaaaaaaaaaaaaaaaaa @@ -580,27 +580,37 @@ int *aaaaaaaaaaaaaaaaaaaaaaaaaaaa
580 */ 580 */
581#indent input 581#indent input
582void buffer_add(buffer *, char); 582void buffer_add(buffer *, char);
583void buffer_add(buffer *buf, char ch); 583void buffer_add(buffer *buf, char ch);
584 584
585void 585void
586buffer_add(buffer *buf, char ch) 586buffer_add(buffer *buf, char ch)
587{ 587{
588 *buf->e++ = ch; 588 *buf->e++ = ch;
589} 589}
590#indent end 590#indent end
591 591
592/* Before lexi.c 1.156 from 2021-11-25, indent generated 'buffer * buf'. */ 592/* Before lexi.c 1.156 from 2021-11-25, indent generated 'buffer * buf'. */
593#indent run-equals-input 593#indent run
 594void buffer_add(buffer *, char);
 595/* $ FIXME: space after '*' */
 596void buffer_add(buffer * buf, char ch);
 597
 598void
 599buffer_add(buffer *buf, char ch)
 600{
 601 *buf->e++ = ch;
 602}
 603#indent end
594 604
595 605
596/* 606/*
597 * Indent gets easily confused by type names it does not know about. 607 * Indent gets easily confused by type names it does not know about.
598 */ 608 */
599#indent input 609#indent input
600static Token 610static Token
601ToToken(bool cond) 611ToToken(bool cond)
602{ 612{
603} 613}
604#indent end 614#indent end
605 615
606#indent run-equals-input -TToken 616#indent run-equals-input -TToken
@@ -783,43 +793,38 @@ number *var = a * b; @@ -783,43 +793,38 @@ number *var = a * b;
783 793
784void 794void
785function(void) 795function(void)
786{ 796{
787 number *var = a * b; 797 number *var = a * b;
788} 798}
789#indent end 799#indent end
790 800
791#indent run-equals-input -di0 801#indent run-equals-input -di0
792 802
793 803
794/* 804/*
795 * In declarations, most occurrences of '*' are pointer type derivations. 805 * In declarations, most occurrences of '*' are pointer type derivations.
796 * There are a few exceptions though. 806 * There are a few exceptions though. Some of these are hard to detect
797 * 807 * without knowing which identifiers are type names.
798 * Broken since lexi.c 1.156 from 2021-11-25. 
799 */ 808 */
800#indent input 809#indent input
801char str[expr * expr]; 810char str[expr * expr];
802char str[expr**ptr]; 811char str[expr**ptr];
803char str[*ptr**ptr]; 812char str[*ptr**ptr];
804char str[sizeof(expr * expr)]; 813char str[sizeof(expr * expr)];
805char str[sizeof(int) * expr]; 814char str[sizeof(int) * expr];
806char str[sizeof(*ptr)]; 815char str[sizeof(*ptr)];
807char str[sizeof(type**)]; 816char str[sizeof(type**)];
808char str[sizeof(**ptr)]; 817char str[sizeof(**ptr)];
809#indent end 818#indent end
810 819
811#indent run -di0 820#indent run -di0
812/* $ FIXME: The '*' must be a binary operator. */ 821char str[expr * expr];
813char str[expr *expr]; 822char str[expr * *ptr];
814/* $ FIXME: The first '*' must be a binary operator. */ 823char str[*ptr * *ptr];
815char str[expr **ptr]; 824char str[sizeof(expr * expr)];
816/* $ FIXME: The second '*' must be a binary operator. */ 825char str[sizeof(int) * expr];
817char str[*ptr **ptr]; 
818/* $ FIXME: The '*' must be a binary operator. */ 
819char str[sizeof(expr *expr)]; 
820/* $ FIXME: The '*' must be a binary operator. */ 
821char str[sizeof(int) *expr]; 
822char str[sizeof(*ptr)]; 826char str[sizeof(*ptr)];
823char str[sizeof(type **)]; 827/* $ FIXME: should be 'type **' */
 828char str[sizeof(type * *)];
824char str[sizeof(**ptr)]; 829char str[sizeof(**ptr)];
825#indent end 830#indent end

cvs diff -r1.2 -r1.3 src/tests/usr.bin/indent/lsym_sizeof.c (expand / switch to unified diff)

--- src/tests/usr.bin/indent/lsym_sizeof.c 2021/11/25 18:10:23 1.2
+++ src/tests/usr.bin/indent/lsym_sizeof.c 2021/11/25 18:36:30 1.3
@@ -1,33 +1,28 @@ @@ -1,33 +1,28 @@
1/* $NetBSD: lsym_sizeof.c,v 1.2 2021/11/25 18:10:23 rillig Exp $ */ 1/* $NetBSD: lsym_sizeof.c,v 1.3 2021/11/25 18:36:30 rillig Exp $ */
2/* $FreeBSD$ */ 2/* $FreeBSD$ */
3 3
4/* 4/*
5 * Tests for the token lsym_sizeof, which represents the keyword 'sizeof' for 5 * Tests for the token lsym_sizeof, which represents the keyword 'sizeof' for
6 * determining the memory size of an object or a type. 6 * determining the memory size of an object or a type.
7 * 7 *
8 * See also: 8 * See also:
9 * opt_bs.c "blank after sizeof" 9 * opt_bs.c "blank after sizeof"
10 * C11 6.5.3.4 "The 'sizeof' and '_Alignof' operators" 10 * C11 6.5.3.4 "The 'sizeof' and '_Alignof' operators"
11 */ 11 */
12 12
13#indent input 13#indent input
14// TODO: add input 14// TODO: add input
15#indent end 15#indent end
16 16
17#indent run-equals-input 17#indent run-equals-input
18 18
19 19
20/* 20/*
21 * After 'sizeof', a type name in parentheses does not start a cast 21 * After 'sizeof', a type name in parentheses does not start a cast
22 * expression. 22 * expression.
23 * 
24 * Broken since lexi.c 1.156 from 2021-11-25. 
25 */ 23 */
26#indent input 24#indent input
27char str[sizeof(int) * CHAR_BIT + 1]; 25char str[sizeof(int) * CHAR_BIT + 1];
28#indent end 26#indent end
29 27
30/* FIXME: The '*' must be a binary operator here. */ 28#indent run-equals-input -di0
31#indent run -di0 
32char str[sizeof(int) *CHAR_BIT + 1]; 
33#indent end 

cvs diff -r1.162 -r1.163 src/usr.bin/indent/lexi.c (expand / switch to unified diff)

--- src/usr.bin/indent/lexi.c 2021/11/25 17:50:00 1.162
+++ src/usr.bin/indent/lexi.c 2021/11/25 18:36:30 1.163
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: lexi.c,v 1.162 2021/11/25 17:50:00 rillig Exp $ */ 1/* $NetBSD: lexi.c,v 1.163 2021/11/25 18:36:30 rillig Exp $ */
2 2
3/*- 3/*-
4 * SPDX-License-Identifier: BSD-4-Clause 4 * SPDX-License-Identifier: BSD-4-Clause
5 * 5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc. 6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993 7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved. 8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved. 9 * All rights reserved.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
@@ -33,27 +33,27 @@ @@ -33,27 +33,27 @@
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE. 37 * SUCH DAMAGE.
38 */ 38 */
39 39
40#if 0 40#if 0
41static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 41static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
42#endif 42#endif
43 43
44#include <sys/cdefs.h> 44#include <sys/cdefs.h>
45#if defined(__NetBSD__) 45#if defined(__NetBSD__)
46__RCSID("$NetBSD: lexi.c,v 1.162 2021/11/25 17:50:00 rillig Exp $"); 46__RCSID("$NetBSD: lexi.c,v 1.163 2021/11/25 18:36:30 rillig Exp $");
47#elif defined(__FreeBSD__) 47#elif defined(__FreeBSD__)
48__FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 337862 2018-08-15 18:19:45Z pstef $"); 48__FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 337862 2018-08-15 18:19:45Z pstef $");
49#endif 49#endif
50 50
51#include <stdlib.h> 51#include <stdlib.h>
52#include <string.h> 52#include <string.h>
53 53
54#include "indent.h" 54#include "indent.h"
55 55
56/* 56/*
57 * While inside lexi_alnum, this constant just marks a type, independently of 57 * While inside lexi_alnum, this constant just marks a type, independently of
58 * the parentheses level. 58 * the parentheses level.
59 */ 59 */
@@ -542,26 +542,37 @@ found_typename: @@ -542,26 +542,37 @@ found_typename:
542 if (ps.in_decl) 542 if (ps.in_decl)
543 ps.in_parameter_declaration = true; 543 ps.in_parameter_declaration = true;
544 return lsym_funcname; 544 return lsym_funcname;
545 } 545 }
546 546
547 } else if (ps.p_l_follow == 0 && probably_typename()) { 547 } else if (ps.p_l_follow == 0 && probably_typename()) {
548 ps.next_unary = true; 548 ps.next_unary = true;
549 return lsym_type_outside_parentheses; 549 return lsym_type_outside_parentheses;
550 } 550 }
551 551
552 return is_type ? lsym_type_in_parentheses : lsym_word; 552 return is_type ? lsym_type_in_parentheses : lsym_word;
553} 553}
554 554
 555static bool
 556is_asterisk_unary(void)
 557{
 558 if (ps.next_unary || ps.in_parameter_declaration)
 559 return true;
 560 if (ps.prev_token == lsym_word ||
 561 ps.prev_token == lsym_rparen_or_rbracket)
 562 return false;
 563 return ps.in_decl && ps.p_l_follow > 0;
 564}
 565
555static void 566static void
556lex_asterisk_unary(void) 567lex_asterisk_unary(void)
557{ 568{
558 while (inp_peek() == '*' || ch_isspace(inp_peek())) { 569 while (inp_peek() == '*' || ch_isspace(inp_peek())) {
559 if (inp_peek() == '*') 570 if (inp_peek() == '*')
560 token_add_char('*'); 571 token_add_char('*');
561 inp_skip(); 572 inp_skip();
562 } 573 }
563 574
564 if (ps.in_decl) { 575 if (ps.in_decl) {
565 const char *tp = inp_p(), *e = inp_line_end(); 576 const char *tp = inp_p(), *e = inp_line_end();
566 577
567 while (tp < e) { 578 while (tp < e) {
@@ -684,28 +695,27 @@ lexi(void) @@ -684,28 +695,27 @@ lexi(void)
684 695
685 case '>': 696 case '>':
686 case '<': 697 case '<':
687 case '!': /* ops like <, <<, <=, !=, etc */ 698 case '!': /* ops like <, <<, <=, !=, etc */
688 if (inp_peek() == '>' || inp_peek() == '<' || inp_peek() == '=') 699 if (inp_peek() == '>' || inp_peek() == '<' || inp_peek() == '=')
689 *token.e++ = inp_next(); 700 *token.e++ = inp_next();
690 if (inp_peek() == '=') 701 if (inp_peek() == '=')
691 *token.e++ = inp_next(); 702 *token.e++ = inp_next();
692 lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; 703 lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
693 next_unary = true; 704 next_unary = true;
694 break; 705 break;
695 706
696 case '*': 707 case '*':
697 if (ps.next_unary || ps.in_parameter_declaration || 708 if (is_asterisk_unary()) {
698 (ps.in_decl && ps.p_l_follow > 0)) { 
699 lex_asterisk_unary(); 709 lex_asterisk_unary();
700 lsym = lsym_unary_op; 710 lsym = lsym_unary_op;
701 next_unary = true; 711 next_unary = true;
702 } else { 712 } else {
703 if (inp_peek() == '=') 713 if (inp_peek() == '=')
704 *token.e++ = inp_next(); 714 *token.e++ = inp_next();
705 lsym = lsym_binary_op; 715 lsym = lsym_binary_op;
706 next_unary = true; 716 next_unary = true;
707 } 717 }
708 break; 718 break;
709 719
710 default: 720 default:
711 if (token.e[-1] == '/' && (inp_peek() == '*' || inp_peek() == '/')) { 721 if (token.e[-1] == '/' && (inp_peek() == '*' || inp_peek() == '/')) {