| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: str.c,v 1.102 2024/01/05 23:22:06 rillig Exp $ */ | | 1 | /* $NetBSD: str.c,v 1.103 2024/04/14 15:21:20 rillig Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright (c) 1988, 1989, 1990, 1993 | | 4 | * Copyright (c) 1988, 1989, 1990, 1993 |
5 | * The Regents of the University of California. All rights reserved. | | 5 | * The Regents of the University of California. All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to Berkeley by | | 7 | * This code is derived from software contributed to Berkeley by |
8 | * Adam de Boor. | | 8 | * Adam de Boor. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
| @@ -61,27 +61,27 @@ | | | @@ -61,27 +61,27 @@ |
61 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 61 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
62 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 62 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
63 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 63 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
64 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 64 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
65 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 65 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
66 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 66 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
67 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 67 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
68 | * SUCH DAMAGE. | | 68 | * SUCH DAMAGE. |
69 | */ | | 69 | */ |
70 | | | 70 | |
71 | #include "make.h" | | 71 | #include "make.h" |
72 | | | 72 | |
73 | /* "@(#)str.c 5.8 (Berkeley) 6/1/90" */ | | 73 | /* "@(#)str.c 5.8 (Berkeley) 6/1/90" */ |
74 | MAKE_RCSID("$NetBSD: str.c,v 1.102 2024/01/05 23:22:06 rillig Exp $"); | | 74 | MAKE_RCSID("$NetBSD: str.c,v 1.103 2024/04/14 15:21:20 rillig Exp $"); |
75 | | | 75 | |
76 | | | 76 | |
77 | static HashTable interned_strings; | | 77 | static HashTable interned_strings; |
78 | | | 78 | |
79 | | | 79 | |
80 | /* Return the concatenation of s1 and s2, freshly allocated. */ | | 80 | /* Return the concatenation of s1 and s2, freshly allocated. */ |
81 | char * | | 81 | char * |
82 | str_concat2(const char *s1, const char *s2) | | 82 | str_concat2(const char *s1, const char *s2) |
83 | { | | 83 | { |
84 | size_t len1 = strlen(s1); | | 84 | size_t len1 = strlen(s1); |
85 | size_t len2 = strlen(s2); | | 85 | size_t len2 = strlen(s2); |
86 | char *result = bmake_malloc(len1 + len2 + 1); | | 86 | char *result = bmake_malloc(len1 + len2 + 1); |
87 | memcpy(result, s1, len1); | | 87 | memcpy(result, s1, len1); |
| @@ -287,46 +287,26 @@ Str_Words(const char *str, bool expand) | | | @@ -287,46 +287,26 @@ Str_Words(const char *str, bool expand) |
287 | return words; | | 287 | return words; |
288 | } | | 288 | } |
289 | | | 289 | |
290 | words.words = bmake_malloc((swords.len + 1) * sizeof(words.words[0])); | | 290 | words.words = bmake_malloc((swords.len + 1) * sizeof(words.words[0])); |
291 | words.len = swords.len; | | 291 | words.len = swords.len; |
292 | words.freeIt = swords.freeIt; | | 292 | words.freeIt = swords.freeIt; |
293 | for (i = 0; i < swords.len + 1; i++) | | 293 | for (i = 0; i < swords.len + 1; i++) |
294 | words.words[i] = UNCONST(swords.words[i].start); | | 294 | words.words[i] = UNCONST(swords.words[i].start); |
295 | free(swords.words); | | 295 | free(swords.words); |
296 | return words; | | 296 | return words; |
297 | } | | 297 | } |
298 | | | 298 | |
299 | /* | | 299 | /* |
300 | * XXX: In the extreme edge case that one of the characters is from the basic | | | |
301 | * execution character set and the other isn't, the result of the comparison | | | |
302 | * differs depending on whether plain char is signed or unsigned. | | | |
303 | * | | | |
304 | * An example is the character range from \xE4 to 'a', where \xE4 may come | | | |
305 | * from U+00E4 'Latin small letter A with diaeresis'. | | | |
306 | * | | | |
307 | * If char is signed, \xE4 evaluates to -28, the first half of the condition | | | |
308 | * becomes -28 <= '0' && '0' <= 'a', which evaluates to true. | | | |
309 | * | | | |
310 | * If char is unsigned, \xE4 evaluates to 228, the second half of the | | | |
311 | * condition becomes 'a' <= '0' && '0' <= 228, which evaluates to false. | | | |
312 | */ | | | |
313 | static bool | | | |
314 | in_range(char e1, char c, char e2) | | | |
315 | { | | | |
316 | return (e1 <= c && c <= e2) || (e2 <= c && c <= e1); | | | |
317 | } | | | |
318 | | | | |
319 | /* | | | |
320 | * Test if a string matches a pattern like "*.[ch]". The pattern matching | | 300 | * Test if a string matches a pattern like "*.[ch]". The pattern matching |
321 | * characters are '*', '?' and '[]', as in fnmatch(3). | | 301 | * characters are '*', '?' and '[]', as in fnmatch(3). |
322 | * | | 302 | * |
323 | * See varmod-match.mk for examples and edge cases. | | 303 | * See varmod-match.mk for examples and edge cases. |
324 | */ | | 304 | */ |
325 | StrMatchResult | | 305 | StrMatchResult |
326 | Str_Match(const char *str, const char *pat) | | 306 | Str_Match(const char *str, const char *pat) |
327 | { | | 307 | { |
328 | StrMatchResult res = { NULL, false }; | | 308 | StrMatchResult res = { NULL, false }; |
329 | bool asterisk = false; | | 309 | bool asterisk = false; |
330 | const char *fixed_str = str; | | 310 | const char *fixed_str = str; |
331 | const char *fixed_pat = pat; | | 311 | const char *fixed_pat = pat; |
332 | | | 312 | |
| @@ -350,27 +330,31 @@ match_fixed_length: | | | @@ -350,27 +330,31 @@ match_fixed_length: |
350 | if (*pat == ']' || *pat == '\0') { | | 330 | if (*pat == ']' || *pat == '\0') { |
351 | if (neg) | | 331 | if (neg) |
352 | goto end_of_char_list; | | 332 | goto end_of_char_list; |
353 | goto no_match; | | 333 | goto no_match; |
354 | } | | 334 | } |
355 | if (*pat == *str) | | 335 | if (*pat == *str) |
356 | goto end_of_char_list; | | 336 | goto end_of_char_list; |
357 | if (pat[1] == '-' && pat[2] == '\0') { | | 337 | if (pat[1] == '-' && pat[2] == '\0') { |
358 | res.error = "Unfinished character range"; | | 338 | res.error = "Unfinished character range"; |
359 | res.matched = neg; | | 339 | res.matched = neg; |
360 | return res; | | 340 | return res; |
361 | } | | 341 | } |
362 | if (pat[1] == '-') { | | 342 | if (pat[1] == '-') { |
363 | if (in_range(pat[0], *str, pat[2])) | | 343 | unsigned char e1 = (unsigned char)pat[0]; |
| | | 344 | unsigned char c = (unsigned char)*str; |
| | | 345 | unsigned char e2 = (unsigned char)pat[2]; |
| | | 346 | if ((e1 <= c && c <= e2) |
| | | 347 | || (e2 <= c && c <= e1)) |
364 | goto end_of_char_list; | | 348 | goto end_of_char_list; |
365 | pat += 2; | | 349 | pat += 2; |
366 | } | | 350 | } |
367 | pat++; | | 351 | pat++; |
368 | goto next_char_in_list; | | 352 | goto next_char_in_list; |
369 | | | 353 | |
370 | end_of_char_list: | | 354 | end_of_char_list: |
371 | if (neg && *pat != ']' && *pat != '\0') | | 355 | if (neg && *pat != ']' && *pat != '\0') |
372 | goto no_match; | | 356 | goto no_match; |
373 | while (*pat != ']' && *pat != '\0') | | 357 | while (*pat != ']' && *pat != '\0') |
374 | pat++; | | 358 | pat++; |
375 | if (*pat == '\0') | | 359 | if (*pat == '\0') |
376 | pat--; | | 360 | pat--; |