| @@ -1,257 +1,262 @@ | | | @@ -1,257 +1,262 @@ |
1 | /* $NetBSD: uniq.c,v 1.18 2012/08/26 14:14:16 wiz Exp $ */ | | 1 | /* $NetBSD: uniq.c,v 1.19 2016/10/14 19:43:59 abhinav Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright (c) 1989, 1993 | | 4 | * Copyright (c) 1989, 1993 |
5 | * The Regents of the University of California. All rights reserved. | | 5 | * The Regents of the University of California. All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to Berkeley by | | 7 | * This code is derived from software contributed to Berkeley by |
8 | * Case Larsen. | | 8 | * Case Larsen. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright | | 15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the | | 16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. | | 17 | * documentation and/or other materials provided with the distribution. |
18 | * 3. Neither the name of the University nor the names of its contributors | | 18 | * 3. Neither the name of the University nor the names of its contributors |
19 | * may be used to endorse or promote products derived from this software | | 19 | * may be used to endorse or promote products derived from this software |
20 | * without specific prior written permission. | | 20 | * without specific prior written permission. |
21 | * | | 21 | * |
22 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | | 22 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
23 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 23 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
24 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 24 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
25 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 25 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
27 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 27 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
28 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 28 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
32 | * SUCH DAMAGE. | | 32 | * SUCH DAMAGE. |
33 | */ | | 33 | */ |
34 | | | 34 | |
35 | #include <sys/cdefs.h> | | 35 | #include <sys/cdefs.h> |
36 | #ifndef lint | | 36 | #ifndef lint |
37 | __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ | | 37 | __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ |
38 | The Regents of the University of California. All rights reserved."); | | 38 | The Regents of the University of California. All rights reserved."); |
39 | #endif /* not lint */ | | 39 | #endif /* not lint */ |
40 | | | 40 | |
41 | #ifndef lint | | 41 | #ifndef lint |
42 | #if 0 | | 42 | #if 0 |
43 | static char sccsid[] = "@(#)uniq.c 8.3 (Berkeley) 5/4/95"; | | 43 | static char sccsid[] = "@(#)uniq.c 8.3 (Berkeley) 5/4/95"; |
44 | #endif | | 44 | #endif |
45 | __RCSID("$NetBSD: uniq.c,v 1.18 2012/08/26 14:14:16 wiz Exp $"); | | 45 | __RCSID("$NetBSD: uniq.c,v 1.19 2016/10/14 19:43:59 abhinav Exp $"); |
46 | #endif /* not lint */ | | 46 | #endif /* not lint */ |
47 | | | 47 | |
48 | #include <err.h> | | 48 | #include <err.h> |
49 | #include <errno.h> | | 49 | #include <errno.h> |
50 | #include <stdio.h> | | 50 | #include <stdio.h> |
51 | #include <ctype.h> | | 51 | #include <ctype.h> |
52 | #include <stdlib.h> | | 52 | #include <stdlib.h> |
53 | #include <string.h> | | 53 | #include <string.h> |
54 | #include <unistd.h> | | 54 | #include <unistd.h> |
55 | | | 55 | |
56 | static int cflag, dflag, uflag; | | 56 | static int cflag, dflag, uflag; |
57 | static int numchars, numfields, repeats; | | 57 | static int numchars, numfields, repeats; |
58 | | | 58 | |
59 | static FILE *file(const char *, const char *); | | 59 | static FILE *file(const char *, const char *); |
60 | static void show(FILE *, const char *); | | 60 | static void show(FILE *, const char *); |
61 | static const char *skip(const char *); | | 61 | static const char *skip(const char *, size_t *); |
62 | static void obsolete(char *[]); | | 62 | static void obsolete(char *[]); |
63 | static void usage(void) __dead; | | 63 | static void usage(void) __dead; |
64 | | | 64 | |
65 | int | | 65 | int |
66 | main (int argc, char *argv[]) | | 66 | main (int argc, char *argv[]) |
67 | { | | 67 | { |
68 | const char *t1, *t2; | | 68 | const char *t1, *t2; |
69 | FILE *ifp, *ofp; | | 69 | FILE *ifp, *ofp; |
70 | int ch; | | 70 | int ch; |
71 | char *prevline, *thisline, *p; | | 71 | char *prevline, *thisline, *p; |
72 | size_t prevlinesize, thislinesize, psize; | | 72 | size_t prevlinesize, thislinesize, psize; |
| | | 73 | size_t prevlinecompsize, thislinecompsize; |
73 | | | 74 | |
74 | setprogname(argv[0]); | | 75 | setprogname(argv[0]); |
75 | ifp = ofp = NULL; | | 76 | ifp = ofp = NULL; |
76 | obsolete(argv); | | 77 | obsolete(argv); |
77 | while ((ch = getopt(argc, argv, "-cdf:s:u")) != -1) | | 78 | while ((ch = getopt(argc, argv, "-cdf:s:u")) != -1) |
78 | switch (ch) { | | 79 | switch (ch) { |
79 | case '-': | | 80 | case '-': |
80 | --optind; | | 81 | --optind; |
81 | goto done; | | 82 | goto done; |
82 | case 'c': | | 83 | case 'c': |
83 | cflag = 1; | | 84 | cflag = 1; |
84 | break; | | 85 | break; |
85 | case 'd': | | 86 | case 'd': |
86 | dflag = 1; | | 87 | dflag = 1; |
87 | break; | | 88 | break; |
88 | case 'f': | | 89 | case 'f': |
89 | numfields = strtol(optarg, &p, 10); | | 90 | numfields = strtol(optarg, &p, 10); |
90 | if (numfields < 0 || *p) | | 91 | if (numfields < 0 || *p) |
91 | errx(1, "illegal field skip value: %s", optarg); | | 92 | errx(1, "illegal field skip value: %s", optarg); |
92 | break; | | 93 | break; |
93 | case 's': | | 94 | case 's': |
94 | numchars = strtol(optarg, &p, 10); | | 95 | numchars = strtol(optarg, &p, 10); |
95 | if (numchars < 0 || *p) | | 96 | if (numchars < 0 || *p) |
96 | errx(1, "illegal character skip value: %s", | | 97 | errx(1, "illegal character skip value: %s", |
97 | optarg); | | 98 | optarg); |
98 | break; | | 99 | break; |
99 | case 'u': | | 100 | case 'u': |
100 | uflag = 1; | | 101 | uflag = 1; |
101 | break; | | 102 | break; |
102 | case '?': | | 103 | case '?': |
103 | default: | | 104 | default: |
104 | usage(); | | 105 | usage(); |
105 | } | | 106 | } |
106 | | | 107 | |
107 | done: argc -= optind; | | 108 | done: argc -= optind; |
108 | argv +=optind; | | 109 | argv +=optind; |
109 | | | 110 | |
110 | switch(argc) { | | 111 | switch(argc) { |
111 | case 0: | | 112 | case 0: |
112 | ifp = stdin; | | 113 | ifp = stdin; |
113 | ofp = stdout; | | 114 | ofp = stdout; |
114 | break; | | 115 | break; |
115 | case 1: | | 116 | case 1: |
116 | ifp = file(argv[0], "r"); | | 117 | ifp = file(argv[0], "r"); |
117 | ofp = stdout; | | 118 | ofp = stdout; |
118 | break; | | 119 | break; |
119 | case 2: | | 120 | case 2: |
120 | ifp = file(argv[0], "r"); | | 121 | ifp = file(argv[0], "r"); |
121 | ofp = file(argv[1], "w"); | | 122 | ofp = file(argv[1], "w"); |
122 | break; | | 123 | break; |
123 | default: | | 124 | default: |
124 | usage(); | | 125 | usage(); |
125 | } | | 126 | } |
126 | | | 127 | |
127 | if ((p = fgetln(ifp, &psize)) == NULL) | | 128 | if ((p = fgetln(ifp, &psize)) == NULL) |
128 | return 0; | | 129 | return 0; |
129 | prevlinesize = psize; | | 130 | prevlinesize = psize; |
130 | if ((prevline = malloc(prevlinesize + 1)) == NULL) | | 131 | if ((prevline = malloc(prevlinesize + 1)) == NULL) |
131 | err(1, "malloc"); | | 132 | err(1, "malloc"); |
132 | (void)memcpy(prevline, p, prevlinesize); | | 133 | (void)memcpy(prevline, p, prevlinesize); |
133 | prevline[prevlinesize] = '\0'; | | 134 | prevline[prevlinesize] = '\0'; |
134 | | | 135 | |
135 | thislinesize = psize; | | 136 | thislinesize = psize; |
136 | if ((thisline = malloc(thislinesize + 1)) == NULL) | | 137 | if ((thisline = malloc(thislinesize + 1)) == NULL) |
137 | err(1, "malloc"); | | 138 | err(1, "malloc"); |
138 | | | 139 | |
139 | while ((p = fgetln(ifp, &psize)) != NULL) { | | 140 | while ((p = fgetln(ifp, &psize)) != NULL) { |
140 | if (psize > thislinesize) { | | 141 | if (psize > thislinesize) { |
141 | if ((thisline = realloc(thisline, psize + 1)) == NULL) | | 142 | if ((thisline = realloc(thisline, psize + 1)) == NULL) |
142 | err(1, "realloc"); | | 143 | err(1, "realloc"); |
143 | thislinesize = psize; | | 144 | thislinesize = psize; |
144 | } | | 145 | } |
145 | (void)memcpy(thisline, p, psize); | | 146 | (void)memcpy(thisline, p, psize); |
146 | thisline[psize] = '\0'; | | 147 | thisline[psize] = '\0'; |
| | | 148 | thislinecompsize = thislinesize; |
| | | 149 | prevlinecompsize = prevlinesize; |
147 | | | 150 | |
148 | /* If requested get the chosen fields + character offsets. */ | | 151 | /* If requested get the chosen fields + character offsets. */ |
149 | if (numfields || numchars) { | | 152 | if (numfields || numchars) { |
150 | t1 = skip(thisline); | | 153 | t1 = skip(thisline, &thislinecompsize); |
151 | t2 = skip(prevline); | | 154 | t2 = skip(prevline, &prevlinecompsize); |
152 | } else { | | 155 | } else { |
153 | t1 = thisline; | | 156 | t1 = thisline; |
154 | t2 = prevline; | | 157 | t2 = prevline; |
155 | } | | 158 | } |
156 | | | 159 | |
157 | /* If different, print; set previous to new value. */ | | 160 | /* If different, print; set previous to new value. */ |
158 | if (strcmp(t1, t2)) { | | 161 | if (thislinecompsize != prevlinecompsize || strcmp(t1, t2)) { |
159 | char *t; | | 162 | char *t; |
160 | size_t ts; | | 163 | size_t ts; |
161 | | | 164 | |
162 | show(ofp, prevline); | | 165 | show(ofp, prevline); |
163 | t = prevline; | | 166 | t = prevline; |
164 | prevline = thisline; | | 167 | prevline = thisline; |
165 | thisline = t; | | 168 | thisline = t; |
166 | ts = prevlinesize; | | 169 | ts = prevlinesize; |
167 | prevlinesize = thislinesize; | | 170 | prevlinesize = thislinesize; |
168 | thislinesize = ts; | | 171 | thislinesize = ts; |
169 | repeats = 0; | | 172 | repeats = 0; |
170 | } else | | 173 | } else |
171 | ++repeats; | | 174 | ++repeats; |
172 | } | | 175 | } |
173 | show(ofp, prevline); | | 176 | show(ofp, prevline); |
174 | free(prevline); | | 177 | free(prevline); |
175 | free(thisline); | | 178 | free(thisline); |
176 | return 0; | | 179 | return 0; |
177 | } | | 180 | } |
178 | | | 181 | |
179 | /* | | 182 | /* |
180 | * show -- | | 183 | * show -- |
181 | * Output a line depending on the flags and number of repetitions | | 184 | * Output a line depending on the flags and number of repetitions |
182 | * of the line. | | 185 | * of the line. |
183 | */ | | 186 | */ |
184 | static void | | 187 | static void |
185 | show(FILE *ofp, const char *str) | | 188 | show(FILE *ofp, const char *str) |
186 | { | | 189 | { |
187 | | | 190 | |
188 | if ((dflag && repeats == 0) || (uflag && repeats > 0)) | | 191 | if ((dflag && repeats == 0) || (uflag && repeats > 0)) |
189 | return; | | 192 | return; |
190 | if (cflag) { | | 193 | if (cflag) { |
191 | (void)fprintf(ofp, "%4d %s", repeats + 1, str); | | 194 | (void)fprintf(ofp, "%4d %s", repeats + 1, str); |
192 | } else { | | 195 | } else { |
193 | (void)fprintf(ofp, "%s", str); | | 196 | (void)fprintf(ofp, "%s", str); |
194 | } | | 197 | } |
195 | } | | 198 | } |
196 | | | 199 | |
197 | static const char * | | 200 | static const char * |
198 | skip(const char *str) | | 201 | skip(const char *str, size_t *linesize) |
199 | { | | 202 | { |
200 | int infield, nchars, nfields; | | 203 | int infield, nchars, nfields; |
| | | 204 | size_t ls = *linesize; |
201 | | | 205 | |
202 | for (nfields = numfields, infield = 0; nfields && *str; ++str) | | 206 | for (nfields = numfields, infield = 0; nfields && *str; ++str, --ls) |
203 | if (isspace((unsigned char)*str)) { | | 207 | if (isspace((unsigned char)*str)) { |
204 | if (infield) { | | 208 | if (infield) { |
205 | infield = 0; | | 209 | infield = 0; |
206 | --nfields; | | 210 | --nfields; |
207 | } | | 211 | } |
208 | } else if (!infield) | | 212 | } else if (!infield) |
209 | infield = 1; | | 213 | infield = 1; |
210 | for (nchars = numchars; nchars-- && *str; ++str) | | 214 | for (nchars = numchars; nchars-- && *str; ++str, --ls) |
211 | continue; | | 215 | continue; |
| | | 216 | *linesize = ls; |
212 | return str; | | 217 | return str; |
213 | } | | 218 | } |
214 | | | 219 | |
215 | static FILE * | | 220 | static FILE * |
216 | file(const char *name, const char *mode) | | 221 | file(const char *name, const char *mode) |
217 | { | | 222 | { |
218 | FILE *fp; | | 223 | FILE *fp; |
219 | | | 224 | |
220 | if ((fp = fopen(name, mode)) == NULL) | | 225 | if ((fp = fopen(name, mode)) == NULL) |
221 | err(1, "%s", name); | | 226 | err(1, "%s", name); |
222 | return(fp); | | 227 | return(fp); |
223 | } | | 228 | } |
224 | | | 229 | |
225 | static void | | 230 | static void |
226 | obsolete(char *argv[]) | | 231 | obsolete(char *argv[]) |
227 | { | | 232 | { |
228 | char *ap, *p, *start; | | 233 | char *ap, *p, *start; |
229 | | | 234 | |
230 | while ((ap = *++argv) != NULL) { | | 235 | while ((ap = *++argv) != NULL) { |
231 | /* Return if "--" or not an option of any form. */ | | 236 | /* Return if "--" or not an option of any form. */ |
232 | if (ap[0] != '-') { | | 237 | if (ap[0] != '-') { |
233 | if (ap[0] != '+') | | 238 | if (ap[0] != '+') |
234 | return; | | 239 | return; |
235 | } else if (ap[1] == '-') | | 240 | } else if (ap[1] == '-') |
236 | return; | | 241 | return; |
237 | if (!isdigit((unsigned char)ap[1])) | | 242 | if (!isdigit((unsigned char)ap[1])) |
238 | continue; | | 243 | continue; |
239 | /* | | 244 | /* |
240 | * Digit signifies an old-style option. Malloc space for dash, | | 245 | * Digit signifies an old-style option. Malloc space for dash, |
241 | * new option and argument. | | 246 | * new option and argument. |
242 | */ | | 247 | */ |
243 | (void)asprintf(&p, "-%c%s", ap[0] == '+' ? 's' : 'f', ap + 1); | | 248 | (void)asprintf(&p, "-%c%s", ap[0] == '+' ? 's' : 'f', ap + 1); |
244 | if (!p) | | 249 | if (!p) |
245 | err(1, "malloc"); | | 250 | err(1, "malloc"); |
246 | start = p; | | 251 | start = p; |
247 | *argv = start; | | 252 | *argv = start; |
248 | } | | 253 | } |
249 | } | | 254 | } |
250 | | | 255 | |
251 | static void | | 256 | static void |
252 | usage(void) | | 257 | usage(void) |
253 | { | | 258 | { |
254 | (void)fprintf(stderr, "usage: %s [-cdu] [-f fields] [-s chars] " | | 259 | (void)fprintf(stderr, "usage: %s [-cdu] [-f fields] [-s chars] " |
255 | "[input_file [output_file]]\n", getprogname()); | | 260 | "[input_file [output_file]]\n", getprogname()); |
256 | exit(1); | | 261 | exit(1); |
257 | } | | 262 | } |