Sun Jul 19 22:04:28 2020 UTC ()
make(1): make ampersand in ${VAR:from=to&} an ordinary character

In SysV substitutions, wildcards are expressed with % instead of &.  The
& is not mentioned in the manual page, and having another wildcard for
the whole word would be such an obscure feature that not even pkgsrc uses
it.  The easiest way to discover this feature had been to read the source
code of make(1) or to use a fuzzer and accidentally stumble upon this
edge case.


(rillig)
diff -r1.275 -r1.276 src/usr.bin/make/var.c
diff -r1.5 -r1.6 src/usr.bin/make/unit-tests/sysv.exp
diff -r1.6 -r1.7 src/usr.bin/make/unit-tests/sysv.mk

cvs diff -r1.275 -r1.276 src/usr.bin/make/var.c (expand / switch to unified diff)

--- src/usr.bin/make/var.c 2020/07/19 21:30:49 1.275
+++ src/usr.bin/make/var.c 2020/07/19 22:04:27 1.276
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: var.c,v 1.275 2020/07/19 21:30:49 rillig Exp $ */ 1/* $NetBSD: var.c,v 1.276 2020/07/19 22:04:27 rillig Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 1988, 1989, 1990, 1993 4 * Copyright (c) 1988, 1989, 1990, 1993
5 * The Regents of the University of California. All rights reserved. 5 * The Regents of the University of California. All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to Berkeley by 7 * This code is derived from software contributed to Berkeley by
8 * Adam de Boor. 8 * Adam de Boor.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
@@ -59,34 +59,34 @@ @@ -59,34 +59,34 @@
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE. 68 * SUCH DAMAGE.
69 */ 69 */
70 70
71#ifndef MAKE_NATIVE 71#ifndef MAKE_NATIVE
72static char rcsid[] = "$NetBSD: var.c,v 1.275 2020/07/19 21:30:49 rillig Exp $"; 72static char rcsid[] = "$NetBSD: var.c,v 1.276 2020/07/19 22:04:27 rillig Exp $";
73#else 73#else
74#include <sys/cdefs.h> 74#include <sys/cdefs.h>
75#ifndef lint 75#ifndef lint
76#if 0 76#if 0
77static char sccsid[] = "@(#)var.c 8.3 (Berkeley) 3/19/94"; 77static char sccsid[] = "@(#)var.c 8.3 (Berkeley) 3/19/94";
78#else 78#else
79__RCSID("$NetBSD: var.c,v 1.275 2020/07/19 21:30:49 rillig Exp $"); 79__RCSID("$NetBSD: var.c,v 1.276 2020/07/19 22:04:27 rillig Exp $");
80#endif 80#endif
81#endif /* not lint */ 81#endif /* not lint */
82#endif 82#endif
83 83
84/*- 84/*-
85 * var.c -- 85 * var.c --
86 * Variable-handling functions 86 * Variable-handling functions
87 * 87 *
88 * Interface: 88 * Interface:
89 * Var_Set Set the value of a variable in the given 89 * Var_Set Set the value of a variable in the given
90 * context. The variable is created if it doesn't 90 * context. The variable is created if it doesn't
91 * yet exist. The value and variable name need not 91 * yet exist. The value and variable name need not
92 * be preserved. 92 * be preserved.
@@ -1298,42 +1298,47 @@ Str_SYSVSubst(Buffer *buf, const char *p @@ -1298,42 +1298,47 @@ Str_SYSVSubst(Buffer *buf, const char *p
1298 /* skip the % */ 1298 /* skip the % */
1299 pat = m + 1; 1299 pat = m + 1;
1300 } 1300 }
1301 if (m != NULL || !lhsHasPercent) { 1301 if (m != NULL || !lhsHasPercent) {
1302 /* Copy the pattern */ 1302 /* Copy the pattern */
1303 Buf_AddBytes(buf, len, src); 1303 Buf_AddBytes(buf, len, src);
1304 } 1304 }
1305 1305
1306 /* append the rest */ 1306 /* append the rest */
1307 Buf_AddBytes(buf, strlen(pat), pat); 1307 Buf_AddBytes(buf, strlen(pat), pat);
1308} 1308}
1309 1309
1310 1310
 1311typedef struct {
 1312 const char *lhs;
 1313 const char *rhs;
 1314} VarSYSVSubstArgs;
 1315
1311/* Callback function for VarModify to implement the :%.from=%.to modifier. */ 1316/* Callback function for VarModify to implement the :%.from=%.to modifier. */
1312static Boolean 1317static Boolean
1313VarSYSVMatch(GNode *ctx, Var_Parse_State *vpstate, 1318VarSYSVSubst(GNode *ctx, Var_Parse_State *vpstate,
1314 const char *word, Boolean addSpace, Buffer *buf, 1319 const char *word, Boolean addSpace, Buffer *buf,
1315 void *data) 1320 void *data)
1316{ 1321{
1317 size_t len; 1322 const VarSYSVSubstArgs *args = data;
1318 const char *ptr; 
1319 Boolean hasPercent; 
1320 VarPattern *pat = data; 
1321 1323
1322 if (addSpace && vpstate->varSpace != '\0') 1324 if (addSpace && vpstate->varSpace != '\0')
1323 Buf_AddByte(buf, vpstate->varSpace); 1325 Buf_AddByte(buf, vpstate->varSpace);
1324 1326
1325 if ((ptr = Str_SYSVMatch(word, pat->lhs, &len, &hasPercent)) != NULL) { 1327 size_t len;
1326 char *varexp = Var_Subst(NULL, pat->rhs, ctx, VARE_WANTRES); 1328 Boolean hasPercent;
 1329 const char *ptr = Str_SYSVMatch(word, args->lhs, &len, &hasPercent);
 1330 if (ptr != NULL) {
 1331 char *varexp = Var_Subst(NULL, args->rhs, ctx, VARE_WANTRES);
1327 Str_SYSVSubst(buf, varexp, ptr, len, hasPercent); 1332 Str_SYSVSubst(buf, varexp, ptr, len, hasPercent);
1328 free(varexp); 1333 free(varexp);
1329 } else { 1334 } else {
1330 Buf_AddBytes(buf, strlen(word), word); 1335 Buf_AddBytes(buf, strlen(word), word);
1331 } 1336 }
1332 1337
1333 return TRUE; 1338 return TRUE;
1334} 1339}
1335#endif 1340#endif
1336 1341
1337/* Callback function for VarModify to implement the :N modifier. 1342/* Callback function for VarModify to implement the :N modifier.
1338 * Place the word in the buffer if it doesn't match the given pattern. */ 1343 * Place the word in the buffer if it doesn't match the given pattern. */
1339static Boolean 1344static Boolean
@@ -3157,89 +3162,81 @@ ApplyModifier_Remember(ApplyModifiersSta @@ -3157,89 +3162,81 @@ ApplyModifier_Remember(ApplyModifiersSta
3157 } else { 3162 } else {
3158 Var_Set("_", st->nstr, st->ctxt); 3163 Var_Set("_", st->nstr, st->ctxt);
3159 } 3164 }
3160 st->newStr = st->nstr; 3165 st->newStr = st->nstr;
3161 st->termc = *st->cp; 3166 st->termc = *st->cp;
3162 return TRUE; 3167 return TRUE;
3163} 3168}
3164 3169
3165#ifdef SYSVVARSUB 3170#ifdef SYSVVARSUB
3166/* :from=to */ 3171/* :from=to */
3167static int 3172static int
3168ApplyModifier_SysV(ApplyModifiersState *st) 3173ApplyModifier_SysV(ApplyModifiersState *st)
3169{ 3174{
3170 /* 3175 Boolean eqFound = FALSE;
3171 * This can either be a bogus modifier or a System-V 
3172 * substitution command. 
3173 */ 
3174 VarPattern pattern; 
3175 /* FIXME: SysV modifiers have nothing to do with :S or :C pattern matching */ 
3176 Boolean eqFound = FALSE; 
3177 
3178 pattern.pflags = 0; 
3179 3176
3180 /* 3177 /*
3181 * First we make a pass through the string trying 3178 * First we make a pass through the string trying
3182 * to verify it is a SYSV-make-style translation: 3179 * to verify it is a SYSV-make-style translation:
3183 * it must be: <string1>=<string2>) 3180 * it must be: <string1>=<string2>)
3184 */ 3181 */
3185 st->cp = st->tstr; 3182 st->cp = st->tstr;
3186 int nest = 1; 3183 int nest = 1;
3187 while (*st->cp != '\0' && nest > 0) { 3184 while (*st->cp != '\0' && nest > 0) {
3188 if (*st->cp == '=') { 3185 if (*st->cp == '=') {
3189 eqFound = TRUE; 3186 eqFound = TRUE;
3190 /* continue looking for st->endc */ 3187 /* continue looking for st->endc */
3191 } else if (*st->cp == st->endc) 3188 } else if (*st->cp == st->endc)
3192 nest--; 3189 nest--;
3193 else if (*st->cp == st->startc) 3190 else if (*st->cp == st->startc)
3194 nest++; 3191 nest++;
3195 if (nest > 0) 3192 if (nest > 0)
3196 st->cp++; 3193 st->cp++;
3197 } 3194 }
3198 if (*st->cp != st->endc || !eqFound) 3195 if (*st->cp != st->endc || !eqFound)
3199 return 0; 3196 return 0;
3200 3197
3201 st->delim = '='; 3198 st->delim = '=';
3202 st->cp = st->tstr; 3199 st->cp = st->tstr;
 3200 VarPatternFlags pflags = 0;
3203 /* FIXME: There's no point in having a single $ at the end of a 3201 /* FIXME: There's no point in having a single $ at the end of a
3204 * SysV substitution since that will not be interpreted as an 3202 * SysV substitution since that will not be interpreted as an
3205 * anchor anyway. */ 3203 * anchor anyway. */
3206 pattern.lhs = ParseModifierPart( 3204 char *lhs = ParseModifierPart(st->ctxt, &st->cp, st->delim, st->eflags,
3207 st->ctxt, &st->cp, st->delim, st->eflags, 3205 &pflags, NULL, NULL);
3208 &pattern.pflags, &pattern.leftLen, NULL); 3206 if (lhs == NULL)
3209 if (pattern.lhs == NULL) 
3210 return 'c'; 3207 return 'c';
3211 3208
3212 st->delim = st->endc; 3209 st->delim = st->endc;
3213 pattern.rhs = ParseModifierPart( 3210 char *rhs = ParseModifierPart(st->ctxt, &st->cp, st->delim, st->eflags,
3214 st->ctxt, &st->cp, st->delim, st->eflags, 3211 NULL, NULL, NULL);
3215 NULL, &pattern.rightLen, &pattern); 3212 if (rhs == NULL)
3216 if (pattern.rhs == NULL) 
3217 return 'c'; 3213 return 'c';
3218 3214
3219 /* 3215 /*
3220 * SYSV modifications happen through the whole 3216 * SYSV modifications happen through the whole
3221 * string. Note the pattern is anchored at the end. 3217 * string. Note the pattern is anchored at the end.
3222 */ 3218 */
3223 st->termc = *--st->cp; 3219 st->termc = *--st->cp;
3224 st->delim = '\0'; 3220 st->delim = '\0';
3225 if (pattern.leftLen == 0 && *st->nstr == '\0') { 3221 if (lhs[0] == '\0' && *st->nstr == '\0') {
3226 st->newStr = st->nstr; /* special case */ 3222 st->newStr = st->nstr; /* special case */
3227 } else { 3223 } else {
3228 st->newStr = VarModify( 3224 VarSYSVSubstArgs args = { lhs, rhs };
3229 st->ctxt, &st->parsestate, st->nstr, VarSYSVMatch, &pattern); 3225 st->newStr = VarModify(st->ctxt, &st->parsestate, st->nstr,
 3226 VarSYSVSubst, &args);
3230 } 3227 }
3231 free(UNCONST(pattern.lhs)); 3228 free(lhs);
3232 free(UNCONST(pattern.rhs)); 3229 free(rhs);
3233 return '='; 3230 return '=';
3234} 3231}
3235#endif 3232#endif
3236 3233
3237/* 3234/*
3238 * Now we need to apply any modifiers the user wants applied. 3235 * Now we need to apply any modifiers the user wants applied.
3239 * These are: 3236 * These are:
3240 * :M<pattern> words which match the given <pattern>. 3237 * :M<pattern> words which match the given <pattern>.
3241 * <pattern> is of the standard file 3238 * <pattern> is of the standard file
3242 * wildcarding form. 3239 * wildcarding form.
3243 * :N<pattern> words which do not match the given <pattern>. 3240 * :N<pattern> words which do not match the given <pattern>.
3244 * :S<d><pat1><d><pat2><d>[1gW] 3241 * :S<d><pat1><d><pat2><d>[1gW]
3245 * Substitute <pat2> for <pat1> in the value 3242 * Substitute <pat2> for <pat1> in the value

cvs diff -r1.5 -r1.6 src/usr.bin/make/unit-tests/Attic/sysv.exp (expand / switch to unified diff)

--- src/usr.bin/make/unit-tests/Attic/sysv.exp 2020/07/19 14:23:02 1.5
+++ src/usr.bin/make/unit-tests/Attic/sysv.exp 2020/07/19 22:04:27 1.6
@@ -4,15 +4,15 @@ fun @@ -4,15 +4,15 @@ fun
4fun 4fun
5fun 5fun
6In the Sun 6In the Sun
7acme 7acme
8aam.d 8aam.d
9sam.c 9sam.c
10a%.c 10a%.c
11asam.c.c 11asam.c.c
12asam.c 12asam.c
13a.c.c 13a.c.c
14 14
15ax:Q b c d eb 15ax:Q b c d eb
16bcd.e 16bcd.e
17a.bcd.e 17&
18exit status 0 18exit status 0

cvs diff -r1.6 -r1.7 src/usr.bin/make/unit-tests/Attic/sysv.mk (expand / switch to unified diff)

--- src/usr.bin/make/unit-tests/Attic/sysv.mk 2020/07/19 14:23:02 1.6
+++ src/usr.bin/make/unit-tests/Attic/sysv.mk 2020/07/19 22:04:27 1.7
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1# $Id: sysv.mk,v 1.6 2020/07/19 14:23:02 rillig Exp $ 1# $Id: sysv.mk,v 1.7 2020/07/19 22:04:27 rillig Exp $
2 2
3all: foo fun sam bla words ampersand 3all: foo fun sam bla words ampersand
4 4
5FOO ?= 5FOO ?=
6FOOBAR = ${FOO:=bar} 6FOOBAR = ${FOO:=bar}
7 7
8_this := ${.PARSEDIR}/${.PARSEFILE} 8_this := ${.PARSEDIR}/${.PARSEFILE}
9 9
10B = /b 10B = /b
11S = / 11S = /
12FUN = ${B}${S}fun 12FUN = ${B}${S}fun
13SUN = the Sun 13SUN = the Sun
14 14
@@ -37,20 +37,20 @@ sam: @@ -37,20 +37,20 @@ sam:
37 @echo ${SAM:%.c=a%.c} 37 @echo ${SAM:%.c=a%.c}
38 @echo ${SAM:sam%=a%.c} 38 @echo ${SAM:sam%=a%.c}
39 39
40BLA= 40BLA=
41 41
42bla: 42bla:
43 @echo $(BLA:%=foo/%x) 43 @echo $(BLA:%=foo/%x)
44 44
45# The :Q looks like a modifier but isn't. 45# The :Q looks like a modifier but isn't.
46# It is part of the replacement string. 46# It is part of the replacement string.
47words: 47words:
48 @echo a${a b c d e:L:%a=x:Q}b 48 @echo a${a b c d e:L:%a=x:Q}b
49 49
50# As of 2020-07-19, an ampersand can be used in the replacement part 50# Before 2020-07-19, an ampersand could be used in the replacement part
51# of a SysV substitution modifier. This can either be an intentional 51# of a SysV substitution modifier. This was probably a copy-and-paste
52# feature or an implementation mistake, as it is not mentioned in the 52# mistake since the SysV modifier code looked a lot like the code for the
53# manual page. 53# :S and :C modifiers. The ampersand is not mentioned in the manual page.
54ampersand: 54ampersand:
55 @echo ${:U${a.bcd.e:L:a.%=%}:Q} 55 @echo ${:U${a.bcd.e:L:a.%=%}:Q}
56 @echo ${:U${a.bcd.e:L:a.%=&}:Q} 56 @echo ${:U${a.bcd.e:L:a.%=&}:Q}