| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: wc.c,v 1.31 2008/07/21 14:19:28 lukem Exp $ */ | | 1 | /* $NetBSD: wc.c,v 1.31.4.1 2010/03/06 21:19:11 sborrill Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright (c) 1980, 1987, 1991, 1993 | | 4 | * Copyright (c) 1980, 1987, 1991, 1993 |
5 | * The Regents of the University of California. All rights reserved. | | 5 | * The Regents of the University of California. All rights reserved. |
6 | * | | 6 | * |
7 | * Redistribution and use in source and binary forms, with or without | | 7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions | | 8 | * modification, are permitted provided that the following conditions |
9 | * are met: | | 9 | * are met: |
10 | * 1. Redistributions of source code must retain the above copyright | | 10 | * 1. Redistributions of source code must retain the above copyright |
11 | * notice, this list of conditions and the following disclaimer. | | 11 | * notice, this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright | | 12 | * 2. Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in the | | 13 | * notice, this list of conditions and the following disclaimer in the |
14 | * documentation and/or other materials provided with the distribution. | | 14 | * documentation and/or other materials provided with the distribution. |
| @@ -29,203 +29,212 @@ | | | @@ -29,203 +29,212 @@ |
29 | * SUCH DAMAGE. | | 29 | * SUCH DAMAGE. |
30 | */ | | 30 | */ |
31 | | | 31 | |
32 | #include <sys/cdefs.h> | | 32 | #include <sys/cdefs.h> |
33 | #ifndef lint | | 33 | #ifndef lint |
34 | __COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\ | | 34 | __COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\ |
35 | The Regents of the University of California. All rights reserved."); | | 35 | The Regents of the University of California. All rights reserved."); |
36 | #endif /* not lint */ | | 36 | #endif /* not lint */ |
37 | | | 37 | |
38 | #ifndef lint | | 38 | #ifndef lint |
39 | #if 0 | | 39 | #if 0 |
40 | static char sccsid[] = "@(#)wc.c 8.2 (Berkeley) 5/2/95"; | | 40 | static char sccsid[] = "@(#)wc.c 8.2 (Berkeley) 5/2/95"; |
41 | #else | | 41 | #else |
42 | __RCSID("$NetBSD: wc.c,v 1.31 2008/07/21 14:19:28 lukem Exp $"); | | 42 | __RCSID("$NetBSD: wc.c,v 1.31.4.1 2010/03/06 21:19:11 sborrill Exp $"); |
43 | #endif | | 43 | #endif |
44 | #endif /* not lint */ | | 44 | #endif /* not lint */ |
45 | | | 45 | |
46 | /* wc line, word and char count */ | | 46 | /* wc line, word, char count and optionally longest line. */ |
47 | | | 47 | |
48 | #include <sys/param.h> | | 48 | #include <sys/param.h> |
49 | #include <sys/file.h> | | 49 | #include <sys/file.h> |
50 | #include <sys/stat.h> | | 50 | #include <sys/stat.h> |
51 | | | 51 | |
52 | #include <ctype.h> | | 52 | #include <ctype.h> |
53 | #include <fcntl.h> | | 53 | #include <fcntl.h> |
54 | #include <err.h> | | 54 | #include <err.h> |
55 | #include <errno.h> | | 55 | #include <errno.h> |
56 | #include <locale.h> | | 56 | #include <locale.h> |
| | | 57 | #include <stdbool.h> |
57 | #include <stdio.h> | | 58 | #include <stdio.h> |
58 | #include <stdlib.h> | | 59 | #include <stdlib.h> |
59 | #include <string.h> | | 60 | #include <string.h> |
60 | #include <unistd.h> | | 61 | #include <unistd.h> |
61 | #include <wchar.h> | | 62 | #include <wchar.h> |
62 | #include <wctype.h> | | 63 | #include <wctype.h> |
63 | | | 64 | |
64 | #ifdef NO_QUAD | | 65 | #ifdef NO_QUAD |
65 | typedef u_long wc_count_t; | | 66 | typedef u_long wc_count_t; |
66 | # define WCFMT " %7lu" | | 67 | # define WCFMT " %7lu" |
67 | # define WCCAST unsigned long | | 68 | # define WCCAST unsigned long |
68 | #else | | 69 | #else |
69 | typedef u_quad_t wc_count_t; | | 70 | typedef u_quad_t wc_count_t; |
70 | # define WCFMT " %7llu" | | 71 | # define WCFMT " %7llu" |
71 | # define WCCAST unsigned long long | | 72 | # define WCCAST unsigned long long |
72 | #endif | | 73 | #endif |
73 | | | 74 | |
74 | static wc_count_t tlinect, twordct, tcharct; | | 75 | static wc_count_t tlinect, twordct, tcharct, tlongest; |
75 | static int doline, doword, dobyte, dochar; | | 76 | static bool doline, doword, dobyte, dochar, dolongest; |
76 | static int rval = 0; | | 77 | static int rval = 0; |
77 | | | 78 | |
78 | static void cnt(char *); | | 79 | static void cnt(const char *); |
79 | static void print_counts(wc_count_t, wc_count_t, wc_count_t, char *); | | 80 | static void print_counts(wc_count_t, wc_count_t, wc_count_t, wc_count_t, |
| | | 81 | const char *); |
80 | static void usage(void); | | 82 | static void usage(void); |
81 | static size_t do_mb(wchar_t *, const char *, size_t, mbstate_t *, | | 83 | static size_t do_mb(wchar_t *, const char *, size_t, mbstate_t *, |
82 | size_t *, const char *); | | 84 | size_t *, const char *); |
83 | int main(int, char *[]); | | 85 | int main(int, char *[]); |
84 | | | 86 | |
85 | int | | 87 | int |
86 | main(int argc, char *argv[]) | | 88 | main(int argc, char *argv[]) |
87 | { | | 89 | { |
88 | int ch; | | 90 | int ch; |
89 | | | 91 | |
90 | setlocale(LC_ALL, ""); | | 92 | setlocale(LC_ALL, ""); |
91 | | | 93 | |
92 | while ((ch = getopt(argc, argv, "lwcm")) != -1) | | 94 | while ((ch = getopt(argc, argv, "lwcmL")) != -1) |
93 | switch (ch) { | | 95 | switch (ch) { |
94 | case 'l': | | 96 | case 'l': |
95 | doline = 1; | | 97 | doline = true; |
96 | break; | | 98 | break; |
97 | case 'w': | | 99 | case 'w': |
98 | doword = 1; | | 100 | doword = true; |
99 | break; | | 101 | break; |
100 | case 'm': | | 102 | case 'm': |
101 | dochar = 1; | | 103 | dochar = true; |
102 | dobyte = 0; | | 104 | dobyte = 0; |
103 | break; | | 105 | break; |
104 | case 'c': | | 106 | case 'c': |
105 | dochar = 0; | | 107 | dochar = 0; |
106 | dobyte = 1; | | 108 | dobyte = true; |
| | | 109 | break; |
| | | 110 | case 'L': |
| | | 111 | dolongest = true; |
107 | break; | | 112 | break; |
108 | case '?': | | 113 | case '?': |
109 | default: | | 114 | default: |
110 | usage(); | | 115 | usage(); |
111 | } | | 116 | } |
112 | argv += optind; | | 117 | argv += optind; |
113 | argc -= optind; | | 118 | argc -= optind; |
114 | | | 119 | |
115 | /* Wc's flags are on by default. */ | | 120 | /* Wc's flags are on by default. */ |
116 | if (doline + doword + dobyte + dochar == 0) | | 121 | if (!(doline || doword || dobyte || dochar || dolongest)) |
117 | doline = doword = dobyte = 1; | | 122 | doline = doword = dobyte = true; |
118 | | | 123 | |
119 | if (!*argv) { | | 124 | if (*argv == NULL) { |
120 | cnt(NULL); | | 125 | cnt(NULL); |
121 | } else { | | 126 | } else { |
122 | int dototal = (argc > 1); | | 127 | bool dototal = (argc > 1); |
123 | | | 128 | |
124 | do { | | 129 | do { |
125 | cnt(*argv); | | 130 | cnt(*argv); |
126 | } while(*++argv); | | 131 | } while(*++argv); |
127 | | | 132 | |
128 | if (dototal) | | 133 | if (dototal) { |
129 | print_counts(tlinect, twordct, tcharct, "total"); | | 134 | print_counts(tlinect, twordct, tcharct, tlongest, |
| | | 135 | "total"); |
| | | 136 | } |
130 | } | | 137 | } |
131 | | | 138 | |
132 | exit(rval); | | 139 | exit(rval); |
133 | } | | 140 | } |
134 | | | 141 | |
135 | static size_t | | 142 | static size_t |
136 | do_mb(wchar_t *wc, const char *p, size_t mblen, mbstate_t *st, | | 143 | do_mb(wchar_t *wc, const char *p, size_t len, mbstate_t *st, |
137 | size_t *cnt, const char *file) | | 144 | size_t *retcnt, const char *file) |
138 | { | | 145 | { |
139 | size_t r; | | 146 | size_t r; |
140 | size_t c = 0; | | 147 | size_t c = 0; |
141 | | | 148 | |
142 | do { | | 149 | do { |
143 | r = mbrtowc(wc, p, mblen, st); | | 150 | r = mbrtowc(wc, p, len, st); |
144 | if (r == (size_t)-1) { | | 151 | if (r == (size_t)-1) { |
145 | warnx("%s: invalid byte sequence", file); | | 152 | warnx("%s: invalid byte sequence", file); |
146 | rval = 1; | | 153 | rval = 1; |
147 | | | 154 | |
148 | /* XXX skip 1 byte */ | | 155 | /* XXX skip 1 byte */ |
149 | mblen--; | | 156 | len--; |
150 | p++; | | 157 | p++; |
151 | memset(st, 0, sizeof(*st)); | | 158 | memset(st, 0, sizeof(*st)); |
152 | continue; | | 159 | continue; |
153 | } else if (r == (size_t)-2) | | 160 | } else if (r == (size_t)-2) |
154 | break; | | 161 | break; |
155 | else if (r == 0) | | 162 | else if (r == 0) |
156 | r = 1; | | 163 | r = 1; |
157 | c++; | | 164 | c++; |
158 | if (wc) | | 165 | if (wc) |
159 | wc++; | | 166 | wc++; |
160 | mblen -= r; | | 167 | len -= r; |
161 | p += r; | | 168 | p += r; |
162 | } while (mblen > 0); | | 169 | } while (len > 0); |
163 | | | 170 | |
164 | *cnt = c; | | 171 | *retcnt = c; |
165 | | | 172 | |
166 | return (r); | | 173 | return (r); |
167 | } | | 174 | } |
168 | | | 175 | |
169 | static void | | 176 | static void |
170 | cnt(char *file) | | 177 | cnt(const char *file) |
171 | { | | 178 | { |
172 | u_char buf[MAXBSIZE]; | | 179 | u_char buf[MAXBSIZE]; |
173 | wchar_t wbuf[MAXBSIZE]; | | 180 | wchar_t wbuf[MAXBSIZE]; |
174 | struct stat sb; | | 181 | struct stat sb; |
175 | wc_count_t charct, linect, wordct; | | 182 | wc_count_t charct, linect, wordct, longest; |
176 | mbstate_t st; | | 183 | mbstate_t st; |
177 | u_char *C; | | 184 | u_char *C; |
178 | wchar_t *WC; | | 185 | wchar_t *WC; |
179 | char *name; /* filename or <stdin> */ | | 186 | const char *name; /* filename or <stdin> */ |
180 | size_t r = 0; | | 187 | size_t r = 0; |
181 | int fd, gotsp, len = 0; | | 188 | int fd, len = 0; |
182 | | | 189 | |
183 | linect = wordct = charct = 0; | | 190 | linect = wordct = charct = longest = 0; |
184 | if (file) { | | 191 | if (file != NULL) { |
185 | if ((fd = open(file, O_RDONLY, 0)) < 0) { | | 192 | if ((fd = open(file, O_RDONLY, 0)) < 0) { |
186 | warn("%s", file); | | 193 | warn("%s", file); |
187 | rval = 1; | | 194 | rval = 1; |
188 | return; | | 195 | return; |
189 | } | | 196 | } |
190 | name = file; | | 197 | name = file; |
191 | } else { | | 198 | } else { |
192 | fd = STDIN_FILENO; | | 199 | fd = STDIN_FILENO; |
193 | name = "<stdin>"; | | 200 | name = "<stdin>"; |
194 | } | | 201 | } |
195 | | | 202 | |
196 | if (dochar || doword) | | 203 | if (dochar || doword || dolongest) |
197 | memset(&st, 0, sizeof(st)); | | 204 | (void)memset(&st, 0, sizeof(st)); |
198 | | | 205 | |
199 | if (!doword) { | | 206 | if (!(doword || dolongest)) { |
200 | /* | | 207 | /* |
201 | * line counting is split out because it's a lot | | 208 | * line counting is split out because it's a lot |
202 | * faster to get lines than to get words, since | | 209 | * faster to get lines than to get words, since |
203 | * the word count requires some logic. | | 210 | * the word count requires some logic. |
204 | */ | | 211 | */ |
205 | if (doline || dochar) { | | 212 | if (doline || dochar) { |
206 | while ((len = read(fd, buf, MAXBSIZE)) > 0) { | | 213 | while ((len = read(fd, buf, MAXBSIZE)) > 0) { |
207 | if (dochar) { | | 214 | if (dochar) { |
208 | size_t wlen; | | 215 | size_t wlen; |
209 | | | 216 | |
210 | r = do_mb(0, (char *)buf, (size_t)len, | | 217 | r = do_mb(0, (char *)buf, (size_t)len, |
211 | &st, &wlen, name); | | 218 | &st, &wlen, name); |
212 | charct += wlen; | | 219 | charct += wlen; |
213 | } else if (dobyte) | | 220 | } else if (dobyte) |
214 | charct += len; | | 221 | charct += len; |
215 | if (doline) | | 222 | if (doline) { |
216 | for (C = buf; len--; ++C) | | 223 | for (C = buf; len--; ++C) { |
217 | if (*C == '\n') | | 224 | if (*C == '\n') |
218 | ++linect; | | 225 | ++linect; |
| | | 226 | } |
| | | 227 | } |
219 | } | | 228 | } |
220 | } | | 229 | } |
221 | | | 230 | |
222 | /* | | 231 | /* |
223 | * if all we need is the number of characters and | | 232 | * if all we need is the number of characters and |
224 | * it's a directory or a regular or linked file, just | | 233 | * it's a directory or a regular or linked file, just |
225 | * stat the puppy. We avoid testing for it not being | | 234 | * stat the puppy. We avoid testing for it not being |
226 | * a special device in case someone adds a new type | | 235 | * a special device in case someone adds a new type |
227 | * of inode. | | 236 | * of inode. |
228 | */ | | 237 | */ |
229 | else if (dobyte) { | | 238 | else if (dobyte) { |
230 | if (fstat(fd, &sb)) { | | 239 | if (fstat(fd, &sb)) { |
231 | warn("%s", name); | | 240 | warn("%s", name); |
| @@ -234,96 +243,113 @@ cnt(char *file) | | | @@ -234,96 +243,113 @@ cnt(char *file) |
234 | if (S_ISREG(sb.st_mode) || | | 243 | if (S_ISREG(sb.st_mode) || |
235 | S_ISLNK(sb.st_mode) || | | 244 | S_ISLNK(sb.st_mode) || |
236 | S_ISDIR(sb.st_mode)) { | | 245 | S_ISDIR(sb.st_mode)) { |
237 | charct = sb.st_size; | | 246 | charct = sb.st_size; |
238 | } else { | | 247 | } else { |
239 | while ((len = | | 248 | while ((len = |
240 | read(fd, buf, MAXBSIZE)) > 0) | | 249 | read(fd, buf, MAXBSIZE)) > 0) |
241 | charct += len; | | 250 | charct += len; |
242 | } | | 251 | } |
243 | } | | 252 | } |
244 | } | | 253 | } |
245 | } else { | | 254 | } else { |
246 | /* do it the hard way... */ | | 255 | /* do it the hard way... */ |
247 | gotsp = 1; | | 256 | wc_count_t linelen; |
| | | 257 | bool gotsp; |
| | | 258 | |
| | | 259 | linelen = 0; |
| | | 260 | gotsp = true; |
248 | while ((len = read(fd, buf, MAXBSIZE)) > 0) { | | 261 | while ((len = read(fd, buf, MAXBSIZE)) > 0) { |
249 | size_t wlen; | | 262 | size_t wlen; |
250 | | | 263 | |
251 | r = do_mb(wbuf, (char *)buf, (size_t)len, &st, &wlen, | | 264 | r = do_mb(wbuf, (char *)buf, (size_t)len, &st, &wlen, |
252 | name); | | 265 | name); |
253 | if (dochar) { | | 266 | if (dochar) { |
254 | charct += wlen; | | 267 | charct += wlen; |
255 | } else if (dobyte) | | 268 | } else if (dobyte) { |
256 | charct += len; | | 269 | charct += len; |
| | | 270 | } |
257 | for (WC = wbuf; wlen--; ++WC) { | | 271 | for (WC = wbuf; wlen--; ++WC) { |
258 | if (iswspace(*WC)) { | | 272 | if (iswspace(*WC)) { |
259 | gotsp = 1; | | 273 | gotsp = true; |
260 | if (*WC == L'\n') { | | 274 | if (*WC == L'\n') { |
261 | ++linect; | | 275 | ++linect; |
| | | 276 | if (linelen > longest) |
| | | 277 | longest = linelen; |
| | | 278 | linelen = 0; |
| | | 279 | } else { |
| | | 280 | linelen++; |
262 | } | | 281 | } |
263 | } else { | | 282 | } else { |
264 | /* | | 283 | /* |
265 | * This line implements the POSIX | | 284 | * This line implements the POSIX |
266 | * spec, i.e. a word is a "maximal | | 285 | * spec, i.e. a word is a "maximal |
267 | * string of characters delimited by | | 286 | * string of characters delimited by |
268 | * whitespace." Notice nothing was | | 287 | * whitespace." Notice nothing was |
269 | * said about a character being | | 288 | * said about a character being |
270 | * printing or non-printing. | | 289 | * printing or non-printing. |
271 | */ | | 290 | */ |
272 | if (gotsp) { | | 291 | if (gotsp) { |
273 | gotsp = 0; | | 292 | gotsp = false; |
274 | ++wordct; | | 293 | ++wordct; |
275 | } | | 294 | } |
| | | 295 | |
| | | 296 | linelen++; |
276 | } | | 297 | } |
277 | } | | 298 | } |
278 | } | | 299 | } |
279 | } | | 300 | } |
280 | | | 301 | |
281 | if (len == -1) { | | 302 | if (len == -1) { |
282 | warn("%s", name); | | 303 | warn("%s", name); |
283 | rval = 1; | | 304 | rval = 1; |
284 | } | | 305 | } |
285 | if (dochar && r == (size_t)-2) { | | 306 | if (dochar && r == (size_t)-2) { |
286 | warnx("%s: incomplete multibyte character", name); | | 307 | warnx("%s: incomplete multibyte character", name); |
287 | rval = 1; | | 308 | rval = 1; |
288 | } | | 309 | } |
289 | | | 310 | |
290 | print_counts(linect, wordct, charct, file); | | 311 | print_counts(linect, wordct, charct, longest, file); |
291 | | | 312 | |
292 | /* | | 313 | /* |
293 | * don't bother checkint doline, doword, or dobyte --- speeds | | 314 | * don't bother checkint doline, doword, or dobyte --- speeds |
294 | * up the common case | | 315 | * up the common case |
295 | */ | | 316 | */ |
296 | tlinect += linect; | | 317 | tlinect += linect; |
297 | twordct += wordct; | | 318 | twordct += wordct; |
298 | tcharct += charct; | | 319 | tcharct += charct; |
| | | 320 | if (dolongest && longest > tlongest) |
| | | 321 | tlongest = longest; |
299 | | | 322 | |
300 | if (close(fd)) { | | 323 | if (close(fd)) { |
301 | warn("%s", name); | | 324 | warn("%s", name); |
302 | rval = 1; | | 325 | rval = 1; |
303 | } | | 326 | } |
304 | } | | 327 | } |
305 | | | 328 | |
306 | static void | | 329 | static void |
307 | print_counts(wc_count_t lines, wc_count_t words, wc_count_t chars, char *name) | | 330 | print_counts(wc_count_t lines, wc_count_t words, wc_count_t chars, |
| | | 331 | wc_count_t longest, const char *name) |
308 | { | | 332 | { |
309 | | | 333 | |
310 | if (doline) | | 334 | if (doline) |
311 | printf(WCFMT, (WCCAST)lines); | | 335 | (void)printf(WCFMT, (WCCAST)lines); |
312 | if (doword) | | 336 | if (doword) |
313 | printf(WCFMT, (WCCAST)words); | | 337 | (void)printf(WCFMT, (WCCAST)words); |
314 | if (dobyte || dochar) | | 338 | if (dobyte || dochar) |
315 | printf(WCFMT, (WCCAST)chars); | | 339 | (void)printf(WCFMT, (WCCAST)chars); |
| | | 340 | if (dolongest) |
| | | 341 | (void)printf(WCFMT, (WCCAST)longest); |
316 | | | 342 | |
317 | if (name) | | 343 | if (name != NULL) |
318 | printf(" %s\n", name); | | 344 | (void)printf(" %s\n", name); |
319 | else | | 345 | else |
320 | printf("\n"); | | 346 | (void)putchar('\n'); |
321 | } | | 347 | } |
322 | | | 348 | |
323 | static void | | 349 | static void |
324 | usage(void) | | 350 | usage(void) |
325 | { | | 351 | { |
326 | | | 352 | |
327 | (void)fprintf(stderr, "usage: wc [-c | -m] [-lw] [file ...]\n"); | | 353 | (void)fprintf(stderr, "usage: wc [-c | -m] [-Llw] [file ...]\n"); |
328 | exit(1); | | 354 | exit(1); |
329 | } | | 355 | } |