Tue Jul 21 01:35:02 2009 UTC ()
Modify to support multibyte characters.


(ahoka)
diff -r1.10 -r1.11 src/usr.bin/fold/fold.1
diff -r1.15 -r1.16 src/usr.bin/fold/fold.c

cvs diff -r1.10 -r1.11 src/usr.bin/fold/fold.1 (expand / switch to unified diff)

--- src/usr.bin/fold/fold.1 2003/08/07 11:13:47 1.10
+++ src/usr.bin/fold/fold.1 2009/07/21 01:35:02 1.11
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1.\" $NetBSD: fold.1,v 1.10 2003/08/07 11:13:47 agc Exp $ 1.\" $NetBSD: fold.1,v 1.11 2009/07/21 01:35:02 ahoka Exp $
2.\" 2.\"
3.\" Copyright (c) 1980, 1993 3.\" Copyright (c) 1980, 1993
4.\" The Regents of the University of California. All rights reserved. 4.\" The Regents of the University of California. All rights reserved.
5.\" 5.\"
6.\" Redistribution and use in source and binary forms, with or without 6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions 7.\" modification, are permitted provided that the following conditions
8.\" are met: 8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright 9.\" 1. Redistributions of source code must retain the above copyright
10.\" notice, this list of conditions and the following disclaimer. 10.\" notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright 11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\" notice, this list of conditions and the following disclaimer in the 12.\" notice, this list of conditions and the following disclaimer in the
13.\" documentation and/or other materials provided with the distribution. 13.\" documentation and/or other materials provided with the distribution.
14.\" 3. Neither the name of the University nor the names of its contributors 14.\" 3. Neither the name of the University nor the names of its contributors
@@ -19,27 +19,27 @@ @@ -19,27 +19,27 @@
19.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28.\" SUCH DAMAGE. 28.\" SUCH DAMAGE.
29.\" 29.\"
30.\" @(#)fold.1 8.1 (Berkeley) 6/6/93 30.\" @(#)fold.1 8.1 (Berkeley) 6/6/93
31.\" 31.\"
32.Dd June 6, 1993 32.Dd July 21, 2009
33.Dt FOLD 1 33.Dt FOLD 1
34.Os 34.Os
35.Sh NAME 35.Sh NAME
36.Nm fold 36.Nm fold
37.Nd "fold long lines for finite width output device" 37.Nd "fold long lines for finite width output device"
38.Sh SYNOPSIS 38.Sh SYNOPSIS
39.Nm 39.Nm
40.Op Fl bs 40.Op Fl bs
41.Op Fl w Ar width 41.Op Fl w Ar width
42.Ar 42.Ar
43.Sh DESCRIPTION 43.Sh DESCRIPTION
44.Nm 44.Nm
45is a filter which folds the contents of the specified files, 45is a filter which folds the contents of the specified files,
@@ -49,33 +49,38 @@ breaking the lines to have maximum of 80 @@ -49,33 +49,38 @@ breaking the lines to have maximum of 80
49The options are as follows: 49The options are as follows:
50.Bl -tag -width indent 50.Bl -tag -width indent
51.It Fl b 51.It Fl b
52Count 52Count
53.Ar width 53.Ar width
54in bytes rather than column positions. 54in bytes rather than column positions.
55.It Fl s 55.It Fl s
56Fold line after the last blank character within the first 56Fold line after the last blank character within the first
57.Ar width 57.Ar width
58column positions (or bytes). 58column positions (or bytes).
59.It Fl w 59.It Fl w
60Specifies a line width to use instead of the default 80 characters. 60Specifies a line width to use instead of the default 80 characters.
61.El 61.El
 62.Sh EXIT STATUS
62.Pp 63.Pp
63The 64The
64.Nm 65.Nm
65utility exits 0 on success, and \*[Gt]0 if an error occurs. 66utility exits 0 on success, and \*[Gt]0 if an error occurs.
 67.Sh ENVIRONMENT
 68.Bl -tag -width indent
 69.It Ev LC_CTYPE
 70.El
66.Sh SEE ALSO 71.Sh SEE ALSO
67.Xr expand 1 72.Xr expand 1
68.Sh STANDARDS 73.Sh STANDARDS
69The 74The
70.Nm 75.Nm
71utility conforms to 76utility conforms to
72.St -p1003.2-92 . 77.St -p1003.1-2008 .
73.Sh BUGS 78.Sh BUGS
74If underlining is present it may be messed up by folding. 79If underlining is present it may be messed up by folding.
75.Pp 80.Pp
76.Ar Width 81.Ar Width
77should be a multiple of 8 if tabs are present, or the tabs should 82should be a multiple of 8 if tabs are present, or the tabs should
78be expanded using 83be expanded using
79.Xr expand 1 84.Xr expand 1
80before using 85before using
81.Nm . 86.Nm .

cvs diff -r1.15 -r1.16 src/usr.bin/fold/fold.c (expand / switch to unified diff)

--- src/usr.bin/fold/fold.c 2008/10/29 01:31:09 1.15
+++ src/usr.bin/fold/fold.c 2009/07/21 01:35:02 1.16
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: fold.c,v 1.15 2008/10/29 01:31:09 ahoka Exp $ */ 1/* $NetBSD: fold.c,v 1.16 2009/07/21 01:35:02 ahoka Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1990, 1993 4 * Copyright (c) 1990, 1993
5 * The Regents of the University of California. All rights reserved. 5 * The Regents of the University of California. All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to Berkeley by 7 * This code is derived from software contributed to Berkeley by
8 * Kevin Ruddy. 8 * Kevin Ruddy.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
@@ -32,52 +32,57 @@ @@ -32,52 +32,57 @@
32 * SUCH DAMAGE. 32 * SUCH DAMAGE.
33 */ 33 */
34 34
35#include <sys/cdefs.h> 35#include <sys/cdefs.h>
36#ifndef lint 36#ifndef lint
37__COPYRIGHT("@(#) Copyright (c) 1990, 1993\ 37__COPYRIGHT("@(#) Copyright (c) 1990, 1993\
38 The Regents of the University of California. All rights reserved."); 38 The Regents of the University of California. All rights reserved.");
39#endif /* not lint */ 39#endif /* not lint */
40 40
41#ifndef lint 41#ifndef lint
42#if 0 42#if 0
43static char sccsid[] = "@(#)fold.c 8.1 (Berkeley) 6/6/93"; 43static char sccsid[] = "@(#)fold.c 8.1 (Berkeley) 6/6/93";
44#endif 44#endif
45__RCSID("$NetBSD: fold.c,v 1.15 2008/10/29 01:31:09 ahoka Exp $"); 45__RCSID("$NetBSD: fold.c,v 1.16 2009/07/21 01:35:02 ahoka Exp $");
46#endif /* not lint */ 46#endif /* not lint */
47 47
 48#include <limits.h>
 49#include <locale.h>
48#include <stdio.h> 50#include <stdio.h>
49#include <stdlib.h> 51#include <stdlib.h>
50#include <string.h> 
51#include <unistd.h> 52#include <unistd.h>
 53#include <wchar.h>
52#include <err.h> 54#include <err.h>
53 55
54#define DEFLINEWIDTH 80 56#define DEFLINEWIDTH 80
55 57
56 int main(int, char **); 58 int main(int, char **);
57static void fold(int); 59static void fold(int);
58static int new_column_position(int, int); 60static int new_column_position(int, wint_t);
59static void usage(void); 61static void usage(void);
60 62
61int count_bytes = 0; 63int count_bytes = 0;
62int split_words = 0; 64int split_words = 0;
63 65
64int 66int
65main(int argc, char **argv) 67main(int argc, char **argv)
66{ 68{
67 int ch; 69 int ch;
68 int width; 70 int width;
69 char *p; 71 char *p;
70 72
 73 setlocale(LC_CTYPE, "");
 74 setprogname(argv[0]);
 75
71 width = -1; 76 width = -1;
72 while ((ch = getopt(argc, argv, "0123456789bsw:")) != -1) 77 while ((ch = getopt(argc, argv, "0123456789bsw:")) != -1)
73 switch (ch) { 78 switch (ch) {
74 case 'b': 79 case 'b':
75 count_bytes = 1; 80 count_bytes = 1;
76 break; 81 break;
77 case 's': 82 case 's':
78 split_words = 1; 83 split_words = 1;
79 break; 84 break;
80 case 'w': 85 case 'w':
81 if ((width = atoi(optarg)) <= 0) 86 if ((width = atoi(optarg)) <= 0)
82 errx(1, "illegal width value"); 87 errx(1, "illegal width value");
83 break; 88 break;
@@ -115,117 +120,130 @@ main(int argc, char **argv) @@ -115,117 +120,130 @@ main(int argc, char **argv)
115 * Fold the contents of standard input to fit within WIDTH columns 120 * Fold the contents of standard input to fit within WIDTH columns
116 * (or bytes) and write to standard output. 121 * (or bytes) and write to standard output.
117 * 122 *
118 * If split_words is set, split the line at the last space character 123 * If split_words is set, split the line at the last space character
119 * on the line. This flag necessitates storing the line in a buffer 124 * on the line. This flag necessitates storing the line in a buffer
120 * until the current column > width, or a newline or EOF is read. 125 * until the current column > width, or a newline or EOF is read.
121 * 126 *
122 * The buffer can grow larger than WIDTH due to backspaces and carriage 127 * The buffer can grow larger than WIDTH due to backspaces and carriage
123 * returns embedded in the input stream. 128 * returns embedded in the input stream.
124 */ 129 */
125static void 130static void
126fold(int width) 131fold(int width)
127{ 132{
128 static char *buf = NULL; 133 static wchar_t *buf = NULL;
129 char *nbuf; 134 wchar_t *nbuf;
130 static int buf_max = 0; 135 static int buf_max = 0;
131 int ch, col; 136 wint_t ch;
132 int indx; 137 int col, indx, i;
133 138
134 col = indx = 0; 139 col = indx = 0;
135 while ((ch = getchar()) != EOF) { 140 while ((ch = getwchar()) != WEOF) {
136 if (ch == '\n') { 141 if (ch == L'\n') {
137 if (indx != 0) 142 if (indx != 0) {
138 fwrite (buf, 1, indx, stdout); 143 for (i = 0; i < indx; i++)
139 putchar('\n'); 144 putwchar(buf[i]);
 145 }
 146 putwchar(L'\n');
140 col = indx = 0; 147 col = indx = 0;
141 continue; 148 continue;
142 } 149 }
143 150
144 col = new_column_position (col, ch); 151 col = new_column_position (col, ch);
145 if (col > width) { 152 if (col > width) {
146 int i, last_space; 153 int last_space;
147 154
148#ifdef __GNUC__ 155#ifdef __GNUC__
149 last_space = 0; /* XXX gcc */ 156 last_space = 0; /* XXX gcc */
150#endif 157#endif
151 if (split_words) { 158 if (split_words) {
152 for (i = 0, last_space = -1; i < indx; i++) 159 for (i = 0, last_space = -1; i < indx; i++)
153 if (buf[i] == ' ') 160 if (buf[i] == L' ')
154 last_space = i; 161 last_space = i;
155 } 162 }
156 163
157 if (split_words && last_space != -1) { 164 if (split_words && last_space != -1) {
158 fwrite (buf, 1, last_space, stdout); 165 for (i = 0; i < last_space; i++)
 166 putwchar(buf[i]);
159 167
160 /* increase last_space here, so we skip trailing whitespace */ 168 /* increase last_space here, so we skip trailing whitespace */
161 last_space++; 169 last_space++;
162 memmove (buf, buf+last_space, indx-last_space); 170 wmemmove (buf, buf+last_space, indx-last_space);
163 171
164 indx -= last_space; 172 indx -= last_space;
165 col = 0; 173 col = 0;
166 for (i = 0; i < indx; i++) { 174 for (i = 0; i < indx; i++) {
167 col = new_column_position (col, buf[i]); 175 col = new_column_position (col, buf[i]);
168 } 176 }
169 } else { 177 } else {
170 fwrite (buf, 1, indx, stdout); 178 for (i = 0; i < indx; i++)
 179 putwchar(buf[i]);
171 col = indx = 0; 180 col = indx = 0;
172 } 181 }
173 putchar('\n'); 182 putwchar('\n');
174 183
175 /* calculate the column position for the next line. */ 184 /* calculate the column position for the next line. */
176 col = new_column_position (col, ch); 185 col = new_column_position (col, ch);
177 } 186 }
178 187
179 if (indx + 1 > buf_max) { 188 if (indx + 1 > buf_max) {
180 /* Allocate buffer in LINE_MAX increments */ 189 /* Allocate buffer in LINE_MAX increments */
181 if ((nbuf = realloc (buf, buf_max + 2048)) == NULL) { 190 if ((nbuf = realloc (buf, buf_max + 2048)) == NULL) {
182 err (1, "realloc"); 191 err (1, "realloc");
183 /* NOTREACHED */ 192 /* NOTREACHED */
184 } 193 }
185 buf = nbuf; 194 buf = nbuf;
186 buf_max += 2048; 195 buf_max += 2048;
187 } 196 }
188 buf[indx++] = ch; 197 buf[indx++] = ch;
189 } 198 }
190 199
191 if (indx != 0) 200 if (indx != 0) {
192 fwrite (buf, 1, indx, stdout); 201 for (i = 0; i < indx; i++)
 202 putwchar(buf[i]);
 203 }
193} 204}
194 205
195/* 206/*
196 * calculate the column position  207 * calculate the column position
197 */ 208 */
198static int 209static int
199new_column_position (int col, int ch) 210new_column_position (int col, wint_t ch)
200{ 211{
 212 int w;
 213
201 if (!count_bytes) { 214 if (!count_bytes) {
202 switch (ch) { 215 switch (ch) {
203 case '\b': 216 case L'\b':
204 if (col > 0) 217 if (col > 0)
205 --col; 218 --col;
206 break; 219 break;
207 case '\r': 220 case L'\r':
208 col = 0; 221 col = 0;
209 break; 222 break;
210 case '\t': 223 case L'\t':
211 col = (col + 8) & ~7; 224 col = (col + 8) & ~7;
212 break; 225 break;
213 default: 226 default:
214 ++col; 227 w = wcwidth(ch);
 228 if (w > 0)
 229 col += w;
215 break; 230 break;
216 } 231 }
217 } else { 232 } else {
218 ++col; 233 char dummy[MB_LEN_MAX];
 234
 235 /* XXX: we assume stateless encoding */
 236 col += wcrtomb(dummy, ch, NULL);
219 } 237 }
220 238
221 return col; 239 return col;
222} 240}
223 241
224static void 242static void
225usage(void) 243usage(void)
226 { 244{
227 (void)fprintf(stderr, 245 (void)fprintf(stderr,
228 "usage: fold [-bs] [-w width] [file ...]\n"); 246 "usage: %s [-bs] [-w width] [file ...]\n", getprogname());
229 exit(1); 247 exit(1);
230 } 248}
231 249