Fri Oct 14 19:43:59 2016 UTC ()
A small optimization: since we already know the length of the lines, check
if the lenghts are equal before calling strcmp(3). Most of the times, the call
to strcmp(3) can be saved if the lines are not of the same length.

Thanks to Christos for the reviews


(abhinav)
diff -r1.18 -r1.19 src/usr.bin/uniq/uniq.c

cvs diff -r1.18 -r1.19 src/usr.bin/uniq/uniq.c (switch to unified diff)

--- src/usr.bin/uniq/uniq.c 2012/08/26 14:14:16 1.18
+++ src/usr.bin/uniq/uniq.c 2016/10/14 19:43:59 1.19
@@ -1,257 +1,262 @@ @@ -1,257 +1,262 @@
1/* $NetBSD: uniq.c,v 1.18 2012/08/26 14:14:16 wiz Exp $ */ 1/* $NetBSD: uniq.c,v 1.19 2016/10/14 19:43:59 abhinav Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 1989, 1993 4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved. 5 * The Regents of the University of California. All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to Berkeley by 7 * This code is derived from software contributed to Berkeley by
8 * Case Larsen. 8 * Case Larsen.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors 18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software 19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission. 20 * without specific prior written permission.
21 * 21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE. 32 * SUCH DAMAGE.
33 */ 33 */
34 34
35#include <sys/cdefs.h> 35#include <sys/cdefs.h>
36#ifndef lint 36#ifndef lint
37__COPYRIGHT("@(#) Copyright (c) 1989, 1993\ 37__COPYRIGHT("@(#) Copyright (c) 1989, 1993\
38 The Regents of the University of California. All rights reserved."); 38 The Regents of the University of California. All rights reserved.");
39#endif /* not lint */ 39#endif /* not lint */
40 40
41#ifndef lint 41#ifndef lint
42#if 0 42#if 0
43static char sccsid[] = "@(#)uniq.c 8.3 (Berkeley) 5/4/95"; 43static char sccsid[] = "@(#)uniq.c 8.3 (Berkeley) 5/4/95";
44#endif 44#endif
45__RCSID("$NetBSD: uniq.c,v 1.18 2012/08/26 14:14:16 wiz Exp $"); 45__RCSID("$NetBSD: uniq.c,v 1.19 2016/10/14 19:43:59 abhinav Exp $");
46#endif /* not lint */ 46#endif /* not lint */
47 47
48#include <err.h> 48#include <err.h>
49#include <errno.h> 49#include <errno.h>
50#include <stdio.h> 50#include <stdio.h>
51#include <ctype.h> 51#include <ctype.h>
52#include <stdlib.h> 52#include <stdlib.h>
53#include <string.h> 53#include <string.h>
54#include <unistd.h> 54#include <unistd.h>
55 55
56static int cflag, dflag, uflag; 56static int cflag, dflag, uflag;
57static int numchars, numfields, repeats; 57static int numchars, numfields, repeats;
58 58
59static FILE *file(const char *, const char *); 59static FILE *file(const char *, const char *);
60static void show(FILE *, const char *); 60static void show(FILE *, const char *);
61static const char *skip(const char *); 61static const char *skip(const char *, size_t *);
62static void obsolete(char *[]); 62static void obsolete(char *[]);
63static void usage(void) __dead; 63static void usage(void) __dead;
64 64
65int 65int
66main (int argc, char *argv[]) 66main (int argc, char *argv[])
67{ 67{
68 const char *t1, *t2; 68 const char *t1, *t2;
69 FILE *ifp, *ofp; 69 FILE *ifp, *ofp;
70 int ch; 70 int ch;
71 char *prevline, *thisline, *p; 71 char *prevline, *thisline, *p;
72 size_t prevlinesize, thislinesize, psize; 72 size_t prevlinesize, thislinesize, psize;
 73 size_t prevlinecompsize, thislinecompsize;
73 74
74 setprogname(argv[0]); 75 setprogname(argv[0]);
75 ifp = ofp = NULL; 76 ifp = ofp = NULL;
76 obsolete(argv); 77 obsolete(argv);
77 while ((ch = getopt(argc, argv, "-cdf:s:u")) != -1) 78 while ((ch = getopt(argc, argv, "-cdf:s:u")) != -1)
78 switch (ch) { 79 switch (ch) {
79 case '-': 80 case '-':
80 --optind; 81 --optind;
81 goto done; 82 goto done;
82 case 'c': 83 case 'c':
83 cflag = 1; 84 cflag = 1;
84 break; 85 break;
85 case 'd': 86 case 'd':
86 dflag = 1; 87 dflag = 1;
87 break; 88 break;
88 case 'f': 89 case 'f':
89 numfields = strtol(optarg, &p, 10); 90 numfields = strtol(optarg, &p, 10);
90 if (numfields < 0 || *p) 91 if (numfields < 0 || *p)
91 errx(1, "illegal field skip value: %s", optarg); 92 errx(1, "illegal field skip value: %s", optarg);
92 break; 93 break;
93 case 's': 94 case 's':
94 numchars = strtol(optarg, &p, 10); 95 numchars = strtol(optarg, &p, 10);
95 if (numchars < 0 || *p) 96 if (numchars < 0 || *p)
96 errx(1, "illegal character skip value: %s", 97 errx(1, "illegal character skip value: %s",
97 optarg); 98 optarg);
98 break; 99 break;
99 case 'u': 100 case 'u':
100 uflag = 1; 101 uflag = 1;
101 break; 102 break;
102 case '?': 103 case '?':
103 default: 104 default:
104 usage(); 105 usage();
105 } 106 }
106 107
107done: argc -= optind; 108done: argc -= optind;
108 argv +=optind; 109 argv +=optind;
109 110
110 switch(argc) { 111 switch(argc) {
111 case 0: 112 case 0:
112 ifp = stdin; 113 ifp = stdin;
113 ofp = stdout; 114 ofp = stdout;
114 break; 115 break;
115 case 1: 116 case 1:
116 ifp = file(argv[0], "r"); 117 ifp = file(argv[0], "r");
117 ofp = stdout; 118 ofp = stdout;
118 break; 119 break;
119 case 2: 120 case 2:
120 ifp = file(argv[0], "r"); 121 ifp = file(argv[0], "r");
121 ofp = file(argv[1], "w"); 122 ofp = file(argv[1], "w");
122 break; 123 break;
123 default: 124 default:
124 usage(); 125 usage();
125 } 126 }
126 127
127 if ((p = fgetln(ifp, &psize)) == NULL) 128 if ((p = fgetln(ifp, &psize)) == NULL)
128 return 0; 129 return 0;
129 prevlinesize = psize; 130 prevlinesize = psize;
130 if ((prevline = malloc(prevlinesize + 1)) == NULL) 131 if ((prevline = malloc(prevlinesize + 1)) == NULL)
131 err(1, "malloc"); 132 err(1, "malloc");
132 (void)memcpy(prevline, p, prevlinesize); 133 (void)memcpy(prevline, p, prevlinesize);
133 prevline[prevlinesize] = '\0'; 134 prevline[prevlinesize] = '\0';
134 135
135 thislinesize = psize; 136 thislinesize = psize;
136 if ((thisline = malloc(thislinesize + 1)) == NULL) 137 if ((thisline = malloc(thislinesize + 1)) == NULL)
137 err(1, "malloc"); 138 err(1, "malloc");
138 139
139 while ((p = fgetln(ifp, &psize)) != NULL) { 140 while ((p = fgetln(ifp, &psize)) != NULL) {
140 if (psize > thislinesize) { 141 if (psize > thislinesize) {
141 if ((thisline = realloc(thisline, psize + 1)) == NULL) 142 if ((thisline = realloc(thisline, psize + 1)) == NULL)
142 err(1, "realloc"); 143 err(1, "realloc");
143 thislinesize = psize; 144 thislinesize = psize;
144 } 145 }
145 (void)memcpy(thisline, p, psize); 146 (void)memcpy(thisline, p, psize);
146 thisline[psize] = '\0'; 147 thisline[psize] = '\0';
 148 thislinecompsize = thislinesize;
 149 prevlinecompsize = prevlinesize;
147 150
148 /* If requested get the chosen fields + character offsets. */ 151 /* If requested get the chosen fields + character offsets. */
149 if (numfields || numchars) { 152 if (numfields || numchars) {
150 t1 = skip(thisline); 153 t1 = skip(thisline, &thislinecompsize);
151 t2 = skip(prevline); 154 t2 = skip(prevline, &prevlinecompsize);
152 } else { 155 } else {
153 t1 = thisline; 156 t1 = thisline;
154 t2 = prevline; 157 t2 = prevline;
155 } 158 }
156 159
157 /* If different, print; set previous to new value. */ 160 /* If different, print; set previous to new value. */
158 if (strcmp(t1, t2)) { 161 if (thislinecompsize != prevlinecompsize || strcmp(t1, t2)) {
159 char *t; 162 char *t;
160 size_t ts; 163 size_t ts;
161 164
162 show(ofp, prevline); 165 show(ofp, prevline);
163 t = prevline; 166 t = prevline;
164 prevline = thisline; 167 prevline = thisline;
165 thisline = t; 168 thisline = t;
166 ts = prevlinesize; 169 ts = prevlinesize;
167 prevlinesize = thislinesize; 170 prevlinesize = thislinesize;
168 thislinesize = ts; 171 thislinesize = ts;
169 repeats = 0; 172 repeats = 0;
170 } else 173 } else
171 ++repeats; 174 ++repeats;
172 } 175 }
173 show(ofp, prevline); 176 show(ofp, prevline);
174 free(prevline); 177 free(prevline);
175 free(thisline); 178 free(thisline);
176 return 0; 179 return 0;
177} 180}
178 181
179/* 182/*
180 * show -- 183 * show --
181 * Output a line depending on the flags and number of repetitions 184 * Output a line depending on the flags and number of repetitions
182 * of the line. 185 * of the line.
183 */ 186 */
184static void 187static void
185show(FILE *ofp, const char *str) 188show(FILE *ofp, const char *str)
186{ 189{
187 190
188 if ((dflag && repeats == 0) || (uflag && repeats > 0)) 191 if ((dflag && repeats == 0) || (uflag && repeats > 0))
189 return; 192 return;
190 if (cflag) { 193 if (cflag) {
191 (void)fprintf(ofp, "%4d %s", repeats + 1, str); 194 (void)fprintf(ofp, "%4d %s", repeats + 1, str);
192 } else { 195 } else {
193 (void)fprintf(ofp, "%s", str); 196 (void)fprintf(ofp, "%s", str);
194 } 197 }
195} 198}
196 199
197static const char * 200static const char *
198skip(const char *str) 201skip(const char *str, size_t *linesize)
199{ 202{
200 int infield, nchars, nfields; 203 int infield, nchars, nfields;
 204 size_t ls = *linesize;
201 205
202 for (nfields = numfields, infield = 0; nfields && *str; ++str) 206 for (nfields = numfields, infield = 0; nfields && *str; ++str, --ls)
203 if (isspace((unsigned char)*str)) { 207 if (isspace((unsigned char)*str)) {
204 if (infield) { 208 if (infield) {
205 infield = 0; 209 infield = 0;
206 --nfields; 210 --nfields;
207 } 211 }
208 } else if (!infield) 212 } else if (!infield)
209 infield = 1; 213 infield = 1;
210 for (nchars = numchars; nchars-- && *str; ++str) 214 for (nchars = numchars; nchars-- && *str; ++str, --ls)
211 continue; 215 continue;
 216 *linesize = ls;
212 return str; 217 return str;
213} 218}
214 219
215static FILE * 220static FILE *
216file(const char *name, const char *mode) 221file(const char *name, const char *mode)
217{ 222{
218 FILE *fp; 223 FILE *fp;
219 224
220 if ((fp = fopen(name, mode)) == NULL) 225 if ((fp = fopen(name, mode)) == NULL)
221 err(1, "%s", name); 226 err(1, "%s", name);
222 return(fp); 227 return(fp);
223} 228}
224 229
225static void 230static void
226obsolete(char *argv[]) 231obsolete(char *argv[])
227{ 232{
228 char *ap, *p, *start; 233 char *ap, *p, *start;
229 234
230 while ((ap = *++argv) != NULL) { 235 while ((ap = *++argv) != NULL) {
231 /* Return if "--" or not an option of any form. */ 236 /* Return if "--" or not an option of any form. */
232 if (ap[0] != '-') { 237 if (ap[0] != '-') {
233 if (ap[0] != '+') 238 if (ap[0] != '+')
234 return; 239 return;
235 } else if (ap[1] == '-') 240 } else if (ap[1] == '-')
236 return; 241 return;
237 if (!isdigit((unsigned char)ap[1])) 242 if (!isdigit((unsigned char)ap[1]))
238 continue; 243 continue;
239 /* 244 /*
240 * Digit signifies an old-style option. Malloc space for dash, 245 * Digit signifies an old-style option. Malloc space for dash,
241 * new option and argument. 246 * new option and argument.
242 */ 247 */
243 (void)asprintf(&p, "-%c%s", ap[0] == '+' ? 's' : 'f', ap + 1); 248 (void)asprintf(&p, "-%c%s", ap[0] == '+' ? 's' : 'f', ap + 1);
244 if (!p) 249 if (!p)
245 err(1, "malloc"); 250 err(1, "malloc");
246 start = p; 251 start = p;
247 *argv = start; 252 *argv = start;
248 } 253 }
249} 254}
250 255
251static void 256static void
252usage(void) 257usage(void)
253{ 258{
254 (void)fprintf(stderr, "usage: %s [-cdu] [-f fields] [-s chars] " 259 (void)fprintf(stderr, "usage: %s [-cdu] [-f fields] [-s chars] "
255 "[input_file [output_file]]\n", getprogname()); 260 "[input_file [output_file]]\n", getprogname());
256 exit(1); 261 exit(1);
257} 262}