| @@ -1,487 +1,489 @@ | | | @@ -1,487 +1,489 @@ |
1 | /* $NetBSD: strfile.c,v 1.41 2020/07/21 03:05:40 nia Exp $ */ | | 1 | /* $NetBSD: strfile.c,v 1.42 2020/07/26 15:14:09 nia Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 1989, 1993 | | 4 | * Copyright (c) 1989, 1993 |
5 | * The Regents of the University of California. All rights reserved. | | 5 | * The Regents of the University of California. All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to Berkeley by | | 7 | * This code is derived from software contributed to Berkeley by |
8 | * Ken Arnold. | | 8 | * Ken Arnold. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright | | 15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the | | 16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. | | 17 | * documentation and/or other materials provided with the distribution. |
18 | * 3. Neither the name of the University nor the names of its contributors | | 18 | * 3. Neither the name of the University nor the names of its contributors |
19 | * may be used to endorse or promote products derived from this software | | 19 | * may be used to endorse or promote products derived from this software |
20 | * without specific prior written permission. | | 20 | * without specific prior written permission. |
21 | * | | 21 | * |
22 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | | 22 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
23 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 23 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
24 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 24 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
25 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 25 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
27 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 27 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
28 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 28 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
32 | * SUCH DAMAGE. | | 32 | * SUCH DAMAGE. |
33 | */ | | 33 | */ |
34 | | | 34 | |
35 | #if HAVE_NBTOOL_CONFIG_H | | 35 | #if HAVE_NBTOOL_CONFIG_H |
36 | #include "nbtool_config.h" | | 36 | #include "nbtool_config.h" |
37 | #endif | | 37 | #endif |
38 | | | 38 | |
39 | #ifdef __NetBSD__ | | 39 | #ifdef __NetBSD__ |
40 | #include <sys/cdefs.h> | | 40 | #include <sys/cdefs.h> |
41 | #ifndef lint | | 41 | #ifndef lint |
42 | __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ | | 42 | __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ |
43 | The Regents of the University of California. All rights reserved."); | | 43 | The Regents of the University of California. All rights reserved."); |
44 | #endif /* not lint */ | | 44 | #endif /* not lint */ |
45 | | | 45 | |
46 | #ifndef lint | | 46 | #ifndef lint |
47 | #if 0 | | 47 | #if 0 |
48 | static char sccsid[] = "@(#)strfile.c 8.1 (Berkeley) 5/31/93"; | | 48 | static char sccsid[] = "@(#)strfile.c 8.1 (Berkeley) 5/31/93"; |
49 | #else | | 49 | #else |
50 | __RCSID("$NetBSD: strfile.c,v 1.41 2020/07/21 03:05:40 nia Exp $"); | | 50 | __RCSID("$NetBSD: strfile.c,v 1.42 2020/07/26 15:14:09 nia Exp $"); |
51 | #endif | | 51 | #endif |
52 | #endif /* not lint */ | | 52 | #endif /* not lint */ |
53 | #endif /* __NetBSD__ */ | | 53 | #endif /* __NetBSD__ */ |
54 | | | 54 | |
55 | #include <sys/types.h> | | 55 | #include <sys/types.h> |
56 | #include <sys/param.h> | | 56 | #include <sys/param.h> |
57 | #include <ctype.h> | | 57 | #include <ctype.h> |
58 | #include <stdarg.h> | | 58 | #include <stdarg.h> |
59 | #include <stdio.h> | | 59 | #include <stdio.h> |
60 | #include <stdlib.h> | | 60 | #include <stdlib.h> |
61 | #include <string.h> | | 61 | #include <string.h> |
62 | #include <time.h> | | 62 | #include <time.h> |
63 | #include <unistd.h> | | 63 | #include <unistd.h> |
64 | #include <inttypes.h> | | 64 | #include <inttypes.h> |
65 | #include <err.h> | | 65 | #include <err.h> |
66 | | | 66 | |
67 | #include "strfile.h" | | 67 | #include "strfile.h" |
68 | | | 68 | |
69 | #ifndef MAXPATHLEN | | 69 | #ifndef MAXPATHLEN |
70 | #define MAXPATHLEN 1024 | | 70 | #define MAXPATHLEN 1024 |
71 | #endif /* MAXPATHLEN */ | | 71 | #endif /* MAXPATHLEN */ |
72 | | | 72 | |
73 | /* | | 73 | /* |
74 | * This program takes a file composed of strings separated by | | 74 | * This program takes a file composed of strings separated by |
75 | * lines starting with two consecutive delimiting character (default | | 75 | * lines starting with two consecutive delimiting character (default |
76 | * character is '%') and creates another file which consists of a table | | 76 | * character is '%') and creates another file which consists of a table |
77 | * describing the file (structure from "strfile.h"), a table of seek | | 77 | * describing the file (structure from "strfile.h"), a table of seek |
78 | * pointers to the start of the strings, and the strings, each terminated | | 78 | * pointers to the start of the strings, and the strings, each terminated |
79 | * by a null byte. Usage: | | 79 | * by a null byte. Usage: |
80 | * | | 80 | * |
81 | * % strfile [-iorsx] [ -cC ] sourcefile [ datafile ] | | 81 | * % strfile [-iorsx] [ -cC ] sourcefile [ datafile ] |
82 | * | | 82 | * |
83 | * c - Change delimiting character from '%' to 'C' | | 83 | * c - Change delimiting character from '%' to 'C' |
84 | * s - Silent. Give no summary of data processed at the end of | | 84 | * s - Silent. Give no summary of data processed at the end of |
85 | * the run. | | 85 | * the run. |
86 | * o - order the strings in alphabetic order | | 86 | * o - order the strings in alphabetic order |
87 | * i - if ordering, ignore case | | 87 | * i - if ordering, ignore case |
88 | * r - randomize the order of the strings | | 88 | * r - randomize the order of the strings |
89 | * x - set rotated bit | | 89 | * x - set rotated bit |
90 | * | | 90 | * |
91 | * Ken Arnold Sept. 7, 1978 -- | | 91 | * Ken Arnold Sept. 7, 1978 -- |
92 | * | | 92 | * |
93 | * Added ordering options. | | 93 | * Added ordering options. |
94 | */ | | 94 | */ |
95 | | | 95 | |
96 | # define STORING_PTRS (Oflag || Rflag) | | 96 | # define STORING_PTRS (Oflag || Rflag) |
97 | # define CHUNKSIZE 512 | | 97 | # define CHUNKSIZE 512 |
98 | | | 98 | |
99 | # define ALLOC(ptr,sz) do { \ | | 99 | # define ALLOC(ptr,sz) do { \ |
100 | if (ptr == NULL) \ | | 100 | if (ptr == NULL) \ |
101 | ptr = malloc(CHUNKSIZE * sizeof *ptr); \ | | 101 | ptr = malloc(CHUNKSIZE * sizeof *ptr); \ |
102 | else if (((sz) + 1) % CHUNKSIZE == 0) \ | | 102 | else if (((sz) + 1) % CHUNKSIZE == 0) \ |
103 | ptr = realloc(ptr, ((sz) + CHUNKSIZE) * sizeof *ptr); \ | | 103 | ptr = realloc(ptr, ((sz) + CHUNKSIZE) * sizeof *ptr); \ |
104 | if (ptr == NULL) \ | | 104 | if (ptr == NULL) \ |
105 | err(1, "out of space"); \ | | 105 | err(1, "out of space"); \ |
106 | } while (0) | | 106 | } while (0) |
107 | | | 107 | |
108 | typedef struct { | | 108 | typedef struct { |
109 | char first; | | 109 | char first; |
110 | off_t pos; | | 110 | off_t pos; |
111 | } STR; | | 111 | } STR; |
112 | | | 112 | |
113 | static char *Infile = NULL; /* input file name */ | | 113 | static char *Infile = NULL; /* input file name */ |
114 | static char Outfile[MAXPATHLEN] = ""; /* output file name */ | | 114 | static char Outfile[MAXPATHLEN] = ""; /* output file name */ |
115 | static char Delimch = '%'; /* delimiting character */ | | 115 | static char Delimch = '%'; /* delimiting character */ |
116 | | | 116 | |
117 | static int Sflag = 0; /* silent run flag */ | | 117 | static int Sflag = 0; /* silent run flag */ |
118 | static int Oflag = 0; /* ordering flag */ | | 118 | static int Oflag = 0; /* ordering flag */ |
119 | static int Iflag = 0; /* ignore case flag */ | | 119 | static int Iflag = 0; /* ignore case flag */ |
120 | static int Rflag = 0; /* randomize order flag */ | | 120 | static int Rflag = 0; /* randomize order flag */ |
121 | static int Xflag = 0; /* set rotated bit */ | | 121 | static int Xflag = 0; /* set rotated bit */ |
122 | static long Num_pts = 0; /* number of pointers/strings */ | | 122 | static long Num_pts = 0; /* number of pointers/strings */ |
123 | | | 123 | |
124 | static off_t *Seekpts; | | 124 | static off_t *Seekpts; |
125 | | | 125 | |
126 | static FILE *Sort_1, *Sort_2; /* pointers for sorting */ | | 126 | static FILE *Sort_1, *Sort_2; /* pointers for sorting */ |
127 | | | 127 | |
128 | static STRFILE Tbl; /* statistics table */ | | 128 | static STRFILE Tbl; /* statistics table */ |
129 | | | 129 | |
130 | static STR *Firstch; /* first chars of each string */ | | 130 | static STR *Firstch; /* first chars of each string */ |
131 | | | 131 | |
132 | | | 132 | |
133 | static uint32_t h2nl(uint32_t h); | | 133 | static uint32_t h2nl(uint32_t h); |
134 | static void getargs(int argc, char **argv); | | 134 | static void getargs(int argc, char **argv); |
135 | static void usage(const char *, ...) __dead __printflike(1, 2); | | 135 | static void usage(const char *, ...) __dead __printflike(1, 2); |
136 | static void add_offset(FILE *fp, off_t off); | | 136 | static void add_offset(FILE *fp, off_t off); |
137 | static void do_order(void); | | 137 | static void do_order(void); |
138 | static int cmp_str(const void *vp1, const void *vp2); | | 138 | static int cmp_str(const void *vp1, const void *vp2); |
139 | static void randomize(void); | | 139 | static void randomize(void); |
140 | static void fwrite_be_offt(off_t off, FILE *f); | | 140 | static void fwrite_be_offt(off_t off, FILE *f); |
141 | | | 141 | |
142 | | | 142 | |
143 | /* | | 143 | /* |
144 | * main: | | 144 | * main: |
145 | * Drive the sucker. There are two main modes -- either we store | | 145 | * Drive the sucker. There are two main modes -- either we store |
146 | * the seek pointers, if the table is to be sorted or randomized, | | 146 | * the seek pointers, if the table is to be sorted or randomized, |
147 | * or we write the pointer directly to the file, if we are to stay | | 147 | * or we write the pointer directly to the file, if we are to stay |
148 | * in file order. If the former, we allocate and re-allocate in | | 148 | * in file order. If the former, we allocate and re-allocate in |
149 | * CHUNKSIZE blocks; if the latter, we just write each pointer, | | 149 | * CHUNKSIZE blocks; if the latter, we just write each pointer, |
150 | * and then seek back to the beginning to write in the table. | | 150 | * and then seek back to the beginning to write in the table. |
151 | */ | | 151 | */ |
152 | int | | 152 | int |
153 | main(int ac, char **av) | | 153 | main(int ac, char **av) |
154 | { | | 154 | { |
155 | char *sp, dc; | | 155 | char *sp, dc; |
156 | FILE *inf, *outf; | | 156 | FILE *inf, *outf; |
157 | off_t last_off, length, pos; | | 157 | off_t last_off, length, pos; |
158 | int first; | | 158 | int first; |
159 | char *nsp; | | 159 | char *nsp; |
160 | STR *fp; | | 160 | STR *fp; |
161 | static char string[257]; | | 161 | static char string[257]; |
162 | long i; | | 162 | long i; |
163 | | | 163 | |
164 | /* sanity test */ | | 164 | /* sanity test */ |
165 | if (sizeof(uint32_t) != 4) | | 165 | if (sizeof(uint32_t) != 4) |
166 | errx(1, "sizeof(uint32_t) != 4"); | | 166 | errx(1, "sizeof(uint32_t) != 4"); |
167 | | | 167 | |
168 | getargs(ac, av); /* evalute arguments */ | | 168 | getargs(ac, av); /* evalute arguments */ |
169 | dc = Delimch; | | 169 | dc = Delimch; |
170 | if ((inf = fopen(Infile, "r")) == NULL) | | 170 | if ((inf = fopen(Infile, "r")) == NULL) |
171 | err(1, "open `%s'", Infile); | | 171 | err(1, "open `%s'", Infile); |
172 | | | 172 | |
173 | if ((outf = fopen(Outfile, "w")) == NULL) | | 173 | if ((outf = fopen(Outfile, "w")) == NULL) |
174 | err(1, "open `%s'", Outfile); | | 174 | err(1, "open `%s'", Outfile); |
175 | if (!STORING_PTRS) | | 175 | if (!STORING_PTRS) |
176 | (void) fseek(outf, sizeof Tbl, SEEK_SET); | | 176 | (void) fseek(outf, sizeof Tbl, SEEK_SET); |
177 | | | 177 | |
178 | /* | | 178 | /* |
179 | * Write the strings onto the file | | 179 | * Write the strings onto the file |
180 | */ | | 180 | */ |
181 | | | 181 | |
182 | Tbl.str_longlen = 0; | | 182 | Tbl.str_longlen = 0; |
183 | Tbl.str_shortlen = (unsigned int) 0x7fffffff; | | 183 | Tbl.str_shortlen = (unsigned int) 0x7fffffff; |
184 | Tbl.str_delim = dc; | | 184 | Tbl.str_delim = dc; |
185 | Tbl.str_version = VERSION; | | 185 | Tbl.str_version = VERSION; |
186 | first = Oflag; | | 186 | first = Oflag; |
187 | add_offset(outf, ftell(inf)); | | 187 | add_offset(outf, ftell(inf)); |
188 | last_off = 0; | | 188 | last_off = 0; |
189 | do { | | 189 | do { |
190 | sp = fgets(string, 256, inf); | | 190 | sp = fgets(string, 256, inf); |
191 | if (sp == NULL || (sp[0] == dc && sp[1] == '\n')) { | | 191 | if (sp == NULL || (sp[0] == dc && sp[1] == '\n')) { |
192 | pos = ftell(inf); | | 192 | pos = ftell(inf); |
193 | length = pos - last_off - (sp ? strlen(sp) : 0); | | 193 | length = pos - last_off - (sp ? strlen(sp) : 0); |
194 | last_off = pos; | | 194 | last_off = pos; |
195 | if (!length) | | 195 | if (!length) |
196 | continue; | | 196 | continue; |
197 | add_offset(outf, pos); | | 197 | add_offset(outf, pos); |
198 | if ((off_t)Tbl.str_longlen < length) | | 198 | if ((off_t)Tbl.str_longlen < length) |
199 | Tbl.str_longlen = length; | | 199 | Tbl.str_longlen = length; |
200 | if ((off_t)Tbl.str_shortlen > length) | | 200 | if ((off_t)Tbl.str_shortlen > length) |
201 | Tbl.str_shortlen = length; | | 201 | Tbl.str_shortlen = length; |
202 | first = Oflag; | | 202 | first = Oflag; |
203 | } | | 203 | } |
204 | else if (first) { | | 204 | else if (first) { |
205 | for (nsp = sp; !isalnum((unsigned char)*nsp); nsp++) | | 205 | for (nsp = sp; !isalnum((unsigned char)*nsp); nsp++) |
206 | continue; | | 206 | continue; |
207 | ALLOC(Firstch, Num_pts); | | 207 | ALLOC(Firstch, Num_pts); |
208 | fp = &Firstch[Num_pts - 1]; | | 208 | fp = &Firstch[Num_pts - 1]; |
209 | if (Iflag && isupper((unsigned char)*nsp)) | | 209 | if (Iflag && isupper((unsigned char)*nsp)) |
210 | fp->first = tolower((unsigned char)*nsp); | | 210 | fp->first = tolower((unsigned char)*nsp); |
211 | else | | 211 | else |
212 | fp->first = *nsp; | | 212 | fp->first = *nsp; |
213 | fp->pos = Seekpts[Num_pts - 1]; | | 213 | fp->pos = Seekpts[Num_pts - 1]; |
214 | first = 0; | | 214 | first = 0; |
215 | } | | 215 | } |
216 | } while (sp != NULL); | | 216 | } while (sp != NULL); |
217 | | | 217 | |
218 | /* | | 218 | /* |
219 | * write the tables in | | 219 | * write the tables in |
220 | */ | | 220 | */ |
221 | | | 221 | |
222 | (void) fclose(inf); | | 222 | (void) fclose(inf); |
223 | | | 223 | |
224 | if (Oflag) | | 224 | if (Oflag) |
225 | do_order(); | | 225 | do_order(); |
226 | else if (Rflag) | | 226 | else if (Rflag) |
227 | randomize(); | | 227 | randomize(); |
228 | | | 228 | |
229 | if (Xflag) | | 229 | if (Xflag) |
230 | Tbl.str_flags |= STR_ROTATED; | | 230 | Tbl.str_flags |= STR_ROTATED; |
231 | | | 231 | |
232 | if (!Sflag) { | | 232 | if (!Sflag) { |
233 | printf("\"%s\" created\n", Outfile); | | 233 | printf("\"%s\" created\n", Outfile); |
234 | if (Num_pts == 2) | | 234 | if (Num_pts == 2) |
235 | puts("There was 1 string"); | | 235 | puts("There was 1 string"); |
236 | else | | 236 | else |
237 | printf("There were %d strings\n", (int)(Num_pts - 1)); | | 237 | printf("There were %d strings\n", (int)(Num_pts - 1)); |
238 | printf("Longest string: %lu byte%s\n", (unsigned long)Tbl.str_longlen, | | 238 | printf("Longest string: %lu byte%s\n", (unsigned long)Tbl.str_longlen, |
239 | Tbl.str_longlen == 1 ? "" : "s"); | | 239 | Tbl.str_longlen == 1 ? "" : "s"); |
240 | printf("Shortest string: %lu byte%s\n", (unsigned long)Tbl.str_shortlen, | | 240 | printf("Shortest string: %lu byte%s\n", (unsigned long)Tbl.str_shortlen, |
241 | Tbl.str_shortlen == 1 ? "" : "s"); | | 241 | Tbl.str_shortlen == 1 ? "" : "s"); |
242 | } | | 242 | } |
243 | | | 243 | |
244 | (void) fseek(outf, (off_t) 0, SEEK_SET); | | 244 | (void) fseek(outf, (off_t) 0, SEEK_SET); |
245 | Tbl.str_version = h2nl(Tbl.str_version); | | 245 | Tbl.str_version = h2nl(Tbl.str_version); |
246 | Tbl.str_numstr = h2nl(Num_pts - 1); | | 246 | Tbl.str_numstr = h2nl(Num_pts - 1); |
247 | Tbl.str_longlen = h2nl(Tbl.str_longlen); | | 247 | Tbl.str_longlen = h2nl(Tbl.str_longlen); |
248 | Tbl.str_shortlen = h2nl(Tbl.str_shortlen); | | 248 | Tbl.str_shortlen = h2nl(Tbl.str_shortlen); |
249 | Tbl.str_flags = h2nl(Tbl.str_flags); | | 249 | Tbl.str_flags = h2nl(Tbl.str_flags); |
250 | (void) fwrite((char *) &Tbl, sizeof Tbl, 1, outf); | | 250 | (void) fwrite((char *) &Tbl, sizeof Tbl, 1, outf); |
251 | if (STORING_PTRS) { | | 251 | if (STORING_PTRS) { |
252 | for (i = 0; i < Num_pts; i++) | | 252 | for (i = 0; i < Num_pts; i++) |
253 | fwrite_be_offt(Seekpts[i], outf); | | 253 | fwrite_be_offt(Seekpts[i], outf); |
254 | } | | 254 | } |
255 | fflush(outf); | | 255 | fflush(outf); |
256 | if (ferror(outf)) | | 256 | if (ferror(outf)) |
257 | err(1, "fwrite %s", Outfile); | | 257 | err(1, "fwrite %s", Outfile); |
258 | (void) fclose(outf); | | 258 | (void) fclose(outf); |
259 | exit(0); | | 259 | exit(0); |
260 | } | | 260 | } |
261 | | | 261 | |
262 | /* | | 262 | /* |
263 | * This routine evaluates arguments from the command line | | 263 | * This routine evaluates arguments from the command line |
264 | */ | | 264 | */ |
265 | static void | | 265 | static void |
266 | getargs(int argc, char **argv) | | 266 | getargs(int argc, char **argv) |
267 | { | | 267 | { |
268 | int ch; | | 268 | int ch; |
269 | extern int optind; | | 269 | extern int optind; |
270 | extern char *optarg; | | 270 | extern char *optarg; |
271 | size_t len; | | 271 | size_t len; |
272 | | | 272 | |
273 | while ((ch = getopt(argc, argv, "c:iorsx")) != -1) | | 273 | while ((ch = getopt(argc, argv, "c:iorsx")) != -1) |
274 | switch(ch) { | | 274 | switch(ch) { |
275 | case 'c': /* new delimiting char */ | | 275 | case 'c': /* new delimiting char */ |
276 | Delimch = *optarg; | | 276 | Delimch = *optarg; |
277 | if (!isascii(Delimch)) { | | 277 | if (!isascii(Delimch)) { |
278 | printf("bad delimiting character: '\\%o\n'", | | 278 | printf("bad delimiting character: '\\%o\n'", |
279 | Delimch); | | 279 | Delimch); |
280 | } | | 280 | } |
281 | break; | | 281 | break; |
282 | case 'i': /* ignore case in ordering */ | | 282 | case 'i': /* ignore case in ordering */ |
283 | Iflag++; | | 283 | Iflag++; |
284 | break; | | 284 | break; |
285 | case 'o': /* order strings */ | | 285 | case 'o': /* order strings */ |
286 | Oflag++; | | 286 | Oflag++; |
287 | break; | | 287 | break; |
288 | case 'r': /* randomize pointers */ | | 288 | case 'r': /* randomize pointers */ |
289 | Rflag++; | | 289 | Rflag++; |
290 | break; | | 290 | break; |
291 | case 's': /* silent */ | | 291 | case 's': /* silent */ |
292 | Sflag++; | | 292 | Sflag++; |
293 | break; | | 293 | break; |
294 | case 'x': /* set the rotated bit */ | | 294 | case 'x': /* set the rotated bit */ |
295 | Xflag++; | | 295 | Xflag++; |
296 | break; | | 296 | break; |
297 | case '?': | | 297 | case '?': |
298 | default: | | 298 | default: |
299 | usage(NULL); | | 299 | usage(NULL); |
300 | } | | 300 | } |
301 | argv += optind; | | 301 | argv += optind; |
302 | | | 302 | |
303 | if (*argv) { | | 303 | if (*argv) { |
304 | Infile = *argv; | | 304 | Infile = *argv; |
305 | if (*++argv) { | | 305 | if (*++argv) { |
306 | len = strlcpy(Outfile, *argv, sizeof(Outfile)); | | 306 | len = strlcpy(Outfile, *argv, sizeof(Outfile)); |
307 | if (len >= sizeof(Outfile)) { | | 307 | if (len >= sizeof(Outfile)) { |
308 | usage("Too long output filename"); | | 308 | usage("Too long output filename"); |
309 | } | | 309 | } |
310 | } | | 310 | } |
311 | } | | 311 | } |
312 | if (!Infile) { | | 312 | if (!Infile) { |
313 | usage("No input file name"); | | 313 | usage("No input file name"); |
314 | } | | 314 | } |
315 | if (*Outfile == '\0') { | | 315 | if (*Outfile == '\0') { |
316 | len = snprintf(Outfile, sizeof(Outfile), "%s.dat", Infile); | | 316 | len = snprintf(Outfile, sizeof(Outfile), "%s.dat", Infile); |
317 | if (len >= sizeof(Outfile)) { | | 317 | if (len >= sizeof(Outfile)) { |
318 | usage("Too long input filename"); | | 318 | usage("Too long input filename"); |
319 | } | | 319 | } |
320 | } | | 320 | } |
321 | } | | 321 | } |
322 | | | 322 | |
323 | static void | | 323 | static void |
324 | usage(const char *fmt, ...) | | 324 | usage(const char *fmt, ...) |
325 | { | | 325 | { |
326 | if (fmt) { | | 326 | if (fmt) { |
327 | va_list ap; | | 327 | va_list ap; |
328 | va_start(ap, fmt); | | 328 | va_start(ap, fmt); |
329 | vwarnx(fmt, ap); | | 329 | vwarnx(fmt, ap); |
330 | va_end(ap); | | 330 | va_end(ap); |
331 | } | | 331 | } |
332 | (void) fprintf(stderr, | | 332 | (void) fprintf(stderr, |
333 | "Usage: %s [-iorsx] [-c char] sourcefile [datafile]\n", | | 333 | "Usage: %s [-iorsx] [-c char] sourcefile [datafile]\n", |
334 | getprogname()); | | 334 | getprogname()); |
335 | exit(1); | | 335 | exit(1); |
336 | } | | 336 | } |
337 | | | 337 | |
338 | /* | | 338 | /* |
339 | * add_offset: | | 339 | * add_offset: |
340 | * Add an offset to the list, or write it out, as appropriate. | | 340 | * Add an offset to the list, or write it out, as appropriate. |
341 | */ | | 341 | */ |
342 | static void | | 342 | static void |
343 | add_offset(FILE *fp, off_t off) | | 343 | add_offset(FILE *fp, off_t off) |
344 | { | | 344 | { |
345 | | | 345 | |
346 | if (!STORING_PTRS) { | | 346 | if (!STORING_PTRS) { |
347 | fwrite_be_offt(off, fp); | | 347 | fwrite_be_offt(off, fp); |
348 | } else { | | 348 | } else { |
349 | ALLOC(Seekpts, Num_pts + 1); | | 349 | ALLOC(Seekpts, Num_pts + 1); |
350 | Seekpts[Num_pts] = off; | | 350 | Seekpts[Num_pts] = off; |
351 | } | | 351 | } |
352 | Num_pts++; | | 352 | Num_pts++; |
353 | } | | 353 | } |
354 | | | 354 | |
355 | /* | | 355 | /* |
356 | * do_order: | | 356 | * do_order: |
357 | * Order the strings alphabetically (possibly ignoring case). | | 357 | * Order the strings alphabetically (possibly ignoring case). |
358 | */ | | 358 | */ |
359 | static void | | 359 | static void |
360 | do_order(void) | | 360 | do_order(void) |
361 | { | | 361 | { |
362 | int i; | | 362 | int i; |
363 | off_t *lp; | | 363 | off_t *lp; |
364 | STR *fp; | | 364 | STR *fp; |
365 | | | 365 | |
366 | Sort_1 = fopen(Infile, "r"); | | 366 | Sort_1 = fopen(Infile, "r"); |
367 | Sort_2 = fopen(Infile, "r"); | | 367 | Sort_2 = fopen(Infile, "r"); |
368 | qsort((char *) Firstch, (int) Tbl.str_numstr, sizeof *Firstch, cmp_str); | | 368 | qsort((char *) Firstch, (int) Tbl.str_numstr, sizeof *Firstch, cmp_str); |
369 | i = Tbl.str_numstr; | | 369 | i = Tbl.str_numstr; |
370 | lp = Seekpts; | | 370 | lp = Seekpts; |
371 | fp = Firstch; | | 371 | fp = Firstch; |
372 | while (i--) | | 372 | while (i--) |
373 | *lp++ = fp++->pos; | | 373 | *lp++ = fp++->pos; |
374 | (void) fclose(Sort_1); | | 374 | (void) fclose(Sort_1); |
375 | (void) fclose(Sort_2); | | 375 | (void) fclose(Sort_2); |
376 | Tbl.str_flags |= STR_ORDERED; | | 376 | Tbl.str_flags |= STR_ORDERED; |
377 | } | | 377 | } |
378 | | | 378 | |
379 | static int | | 379 | static int |
380 | cmp_str(const void *vp1, const void *vp2) | | 380 | cmp_str(const void *vp1, const void *vp2) |
381 | { | | 381 | { |
382 | const STR *p1, *p2; | | 382 | const STR *p1, *p2; |
383 | int c1, c2; | | 383 | int c1, c2; |
384 | int n1, n2; | | 384 | int n1, n2; |
385 | | | 385 | |
386 | p1 = (const STR *)vp1; | | 386 | p1 = (const STR *)vp1; |
387 | p2 = (const STR *)vp2; | | 387 | p2 = (const STR *)vp2; |
388 | | | 388 | |
389 | # define SET_N(nf,ch) (nf = (ch == '\n')) | | 389 | # define SET_N(nf,ch) (nf = (ch == '\n')) |
390 | # define IS_END(ch,nf) (ch == Delimch && nf) | | 390 | # define IS_END(ch,nf) (ch == Delimch && nf) |
391 | | | 391 | |
392 | c1 = p1->first; | | 392 | c1 = p1->first; |
393 | c2 = p2->first; | | 393 | c2 = p2->first; |
394 | if (c1 != c2) | | 394 | if (c1 != c2) |
395 | return c1 - c2; | | 395 | return c1 - c2; |
396 | | | 396 | |
397 | (void) fseek(Sort_1, p1->pos, SEEK_SET); | | 397 | (void) fseek(Sort_1, p1->pos, SEEK_SET); |
398 | (void) fseek(Sort_2, p2->pos, SEEK_SET); | | 398 | (void) fseek(Sort_2, p2->pos, SEEK_SET); |
399 | | | 399 | |
400 | n1 = 0; | | 400 | n1 = 0; |
401 | n2 = 0; | | 401 | n2 = 0; |
402 | while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0') | | 402 | while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0') |
403 | SET_N(n1, c1); | | 403 | SET_N(n1, c1); |
404 | while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0') | | 404 | while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0') |
405 | SET_N(n2, c2); | | 405 | SET_N(n2, c2); |
406 | | | 406 | |
407 | while (!IS_END(c1, n1) && !IS_END(c2, n2)) { | | 407 | while (!IS_END(c1, n1) && !IS_END(c2, n2)) { |
408 | if (Iflag) { | | 408 | if (Iflag) { |
409 | if (isupper(c1)) | | 409 | if (isupper(c1)) |
410 | c1 = tolower(c1); | | 410 | c1 = tolower(c1); |
411 | if (isupper(c2)) | | 411 | if (isupper(c2)) |
412 | c2 = tolower(c2); | | 412 | c2 = tolower(c2); |
413 | } | | 413 | } |
414 | if (c1 != c2) | | 414 | if (c1 != c2) |
415 | return c1 - c2; | | 415 | return c1 - c2; |
416 | SET_N(n1, c1); | | 416 | SET_N(n1, c1); |
417 | SET_N(n2, c2); | | 417 | SET_N(n2, c2); |
418 | c1 = getc(Sort_1); | | 418 | c1 = getc(Sort_1); |
419 | c2 = getc(Sort_2); | | 419 | c2 = getc(Sort_2); |
420 | } | | 420 | } |
421 | if (IS_END(c1, n1)) | | 421 | if (IS_END(c1, n1)) |
422 | c1 = 0; | | 422 | c1 = 0; |
423 | if (IS_END(c2, n2)) | | 423 | if (IS_END(c2, n2)) |
424 | c2 = 0; | | 424 | c2 = 0; |
425 | return c1 - c2; | | 425 | return c1 - c2; |
426 | } | | 426 | } |
427 | | | 427 | |
428 | /* | | 428 | /* |
429 | * randomize: | | 429 | * randomize: |
430 | * Randomize the order of the string table. We must be careful | | 430 | * Randomize the order of the string table. We must be careful |
431 | * not to randomize across delimiter boundaries. All | | 431 | * not to randomize across delimiter boundaries. All |
432 | * randomization is done within each block. | | 432 | * randomization is done within each block. |
433 | */ | | 433 | */ |
434 | static void | | 434 | static void |
435 | randomize(void) | | 435 | randomize(void) |
436 | { | | 436 | { |
437 | int cnt, i; | | 437 | int cnt, i; |
438 | off_t tmp; | | 438 | off_t tmp; |
439 | off_t *sp; | | 439 | off_t *sp; |
440 | | | 440 | |
| | | 441 | srandom((int)(time(NULL) + getpid())); |
| | | 442 | |
441 | Tbl.str_flags |= STR_RANDOM; | | 443 | Tbl.str_flags |= STR_RANDOM; |
442 | cnt = Tbl.str_numstr; | | 444 | cnt = Tbl.str_numstr; |
443 | | | 445 | |
444 | /* | | 446 | /* |
445 | * move things around randomly | | 447 | * move things around randomly |
446 | */ | | 448 | */ |
447 | | | 449 | |
448 | for (sp = Seekpts; cnt > 0; cnt--, sp++) { | | 450 | for (sp = Seekpts; cnt > 0; cnt--, sp++) { |
449 | i = arc4random_uniform(cnt); | | 451 | i = random() % cnt; |
450 | tmp = sp[0]; | | 452 | tmp = sp[0]; |
451 | sp[0] = sp[i]; | | 453 | sp[0] = sp[i]; |
452 | sp[i] = tmp; | | 454 | sp[i] = tmp; |
453 | } | | 455 | } |
454 | } | | 456 | } |
455 | | | 457 | |
456 | /* | | 458 | /* |
457 | * fwrite_be_offt: | | 459 | * fwrite_be_offt: |
458 | * Write out the off paramater as a 64 bit big endian number | | 460 | * Write out the off paramater as a 64 bit big endian number |
459 | */ | | 461 | */ |
460 | | | 462 | |
461 | static void | | 463 | static void |
462 | fwrite_be_offt(off_t off, FILE *f) | | 464 | fwrite_be_offt(off_t off, FILE *f) |
463 | { | | 465 | { |
464 | int i; | | 466 | int i; |
465 | unsigned char c[8]; | | 467 | unsigned char c[8]; |
466 | | | 468 | |
467 | for (i = 7; i >= 0; i--) { | | 469 | for (i = 7; i >= 0; i--) { |
468 | c[i] = off & 0xff; | | 470 | c[i] = off & 0xff; |
469 | off >>= 8; | | 471 | off >>= 8; |
470 | } | | 472 | } |
471 | fwrite(c, sizeof(c), 1, f); | | 473 | fwrite(c, sizeof(c), 1, f); |
472 | } | | 474 | } |
473 | | | 475 | |
474 | static uint32_t | | 476 | static uint32_t |
475 | h2nl(uint32_t h) | | 477 | h2nl(uint32_t h) |
476 | { | | 478 | { |
477 | unsigned char c[4]; | | 479 | unsigned char c[4]; |
478 | uint32_t rv; | | 480 | uint32_t rv; |
479 | | | 481 | |
480 | c[0] = (h >> 24) & 0xff; | | 482 | c[0] = (h >> 24) & 0xff; |
481 | c[1] = (h >> 16) & 0xff; | | 483 | c[1] = (h >> 16) & 0xff; |
482 | c[2] = (h >> 8) & 0xff; | | 484 | c[2] = (h >> 8) & 0xff; |
483 | c[3] = (h >> 0) & 0xff; | | 485 | c[3] = (h >> 0) & 0xff; |
484 | memcpy(&rv, c, sizeof rv); | | 486 | memcpy(&rv, c, sizeof rv); |
485 | | | 487 | |
486 | return (rv); | | 488 | return (rv); |
487 | } | | 489 | } |