Sun Nov 1 22:27:15 2020 UTC ()
Replace col with the FreeBSD version that adds wide char support


(christos)
diff -r1.5 -r1.6 src/usr.bin/col/Makefile
diff -r1.10 -r1.11 src/usr.bin/col/col.1
diff -r1.18 -r1.19 src/usr.bin/col/col.c

cvs diff -r1.5 -r1.6 src/usr.bin/col/Makefile (expand / switch to unified diff)

--- src/usr.bin/col/Makefile 2009/04/14 22:15:18 1.5
+++ src/usr.bin/col/Makefile 2020/11/01 22:27:15 1.6
@@ -1,6 +1,7 @@ @@ -1,6 +1,7 @@
1# $NetBSD: Makefile,v 1.5 2009/04/14 22:15:18 lukem Exp $ 1# $NetBSD: Makefile,v 1.6 2020/11/01 22:27:15 christos Exp $
2# @(#)Makefile 8.1 (Berkeley) 6/6/93 2# @(#)Makefile 8.1 (Berkeley) 6/6/93
3 3
 4WARNS=6
4PROG= col 5PROG= col
5 6
6.include <bsd.prog.mk> 7.include <bsd.prog.mk>

cvs diff -r1.10 -r1.11 src/usr.bin/col/col.1 (expand / switch to unified diff)

--- src/usr.bin/col/col.1 2012/03/22 07:58:18 1.10
+++ src/usr.bin/col/col.1 2020/11/01 22:27:15 1.11
@@ -1,15 +1,14 @@ @@ -1,15 +1,14 @@
1.\" $NetBSD: col.1,v 1.10 2012/03/22 07:58:18 wiz Exp $ 1.\" $NetBSD: col.1,v 1.11 2020/11/01 22:27:15 christos Exp $
2.\" 
3.\" Copyright (c) 1990, 1993 2.\" Copyright (c) 1990, 1993
4.\" The Regents of the University of California. All rights reserved. 3.\" The Regents of the University of California. All rights reserved.
5.\" 4.\"
6.\" This code is derived from software contributed to Berkeley by 5.\" This code is derived from software contributed to Berkeley by
7.\" Michael Rendell. 6.\" Michael Rendell.
8.\" 7.\"
9.\" Redistribution and use in source and binary forms, with or without 8.\" Redistribution and use in source and binary forms, with or without
10.\" modification, are permitted provided that the following conditions 9.\" modification, are permitted provided that the following conditions
11.\" are met: 10.\" are met:
12.\" 1. Redistributions of source code must retain the above copyright 11.\" 1. Redistributions of source code must retain the above copyright
13.\" notice, this list of conditions and the following disclaimer. 12.\" notice, this list of conditions and the following disclaimer.
14.\" 2. Redistributions in binary form must reproduce the above copyright 13.\" 2. Redistributions in binary form must reproduce the above copyright
15.\" notice, this list of conditions and the following disclaimer in the 14.\" notice, this list of conditions and the following disclaimer in the
@@ -21,118 +20,153 @@ @@ -21,118 +20,153 @@
21.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31.\" SUCH DAMAGE. 30.\" SUCH DAMAGE.
32.\" 31.\"
33.\" @(#)col.1 8.1 (Berkeley) 6/29/93 32.\" @(#)col.1 8.1 (Berkeley) 6/29/93
 33.\" $FreeBSD: head/usr.bin/col/col.1 366913 2020-10-21 16:30:34Z fernape $
34.\" 34.\"
35.Dd February 22, 1999 35.Dd November 1, 2020
36.Dt COL 1 36.Dt COL 1
37.Os 37.Os
38.Sh NAME 38.Sh NAME
39.Nm col 39.Nm col
40.Nd filter reverse line feeds from input 40.Nd filter reverse line feeds from input
41.Sh SYNOPSIS 41.Sh SYNOPSIS
42.Nm 42.Nm
43.Op Fl bfpx 43.Op Fl bfhpx
44.Op Fl l Ar num 44.Op Fl l Ar num
45.Sh DESCRIPTION 45.Sh DESCRIPTION
 46The
46.Nm 47.Nm
47filters out reverse (and half reverse) line feeds so that the output is 48utility filters out reverse (and half reverse) line feeds so that the output is
48in the correct order with only forward and half forward line 49in the correct order with only forward and half forward line
49feeds, and replaces white-space characters with tabs where possible. 50feeds, and replaces white-space characters with tabs where possible.
50This can be useful in processing the output of 
51.Xr nroff 1 
52and 
53.Xr tbl 1 . 
54.Pp 51.Pp
 52The
55.Nm 53.Nm
56reads from the standard input and writes to the standard output. 54utility reads from the standard input and writes to the standard output.
57.Pp 55.Pp
58The options are as follows: 56The options are as follows:
59.Bl -tag -width "-l num " 57.Bl -tag -width indent
60.It Fl b 58.It Fl b
61Do not output any backspaces, printing only the last character 59Do not output any backspaces, printing only the last character
62written to each column position. 60written to each column position.
63.It Fl f 61.It Fl f
64Forward half line feeds are permitted (``fine'' mode). 62Forward half line feeds are permitted (``fine'' mode).
65Normally characters printed on a half line boundary are printed 63Normally characters printed on a half line boundary are printed
66on the following line. 64on the following line.
 65.It Fl h
 66Do not output multiple spaces instead of tabs (default).
 67.It Fl l Ar num
 68Buffer at least
 69.Ar num
 70lines in memory.
 71By default, 128 lines are buffered.
67.It Fl p 72.It Fl p
68Force unknown control sequences to be passed through unchanged. 73Force unknown control sequences to be passed through unchanged.
69Normally, 74Normally,
70.Nm 75.Nm
71will filter out any control sequences from the input other than those 76will filter out any control sequences from the input other than those
72recognized and interpreted by itself, which are listed below. 77recognized and interpreted by itself, which are listed below.
73.It Fl x 78.It Fl x
74Output multiple spaces instead of tabs. 79Output multiple spaces instead of tabs.
75.It Fl l Ar num 
76Buffer at least 
77.Ar num 
78lines in memory. 
79By default, 128 lines are buffered. 
80.El 80.El
81.Pp 81.Pp
82The control sequences for carriage motion that 82In the input stream,
83.Nm 83.Nm
84understands and their decimal values are listed in the following 84understands both the escape sequences of the form escape-digit
85table: 85mandated by
 86.St -susv2
 87and the traditional
 88.Bx
 89format escape-control-character.
 90The control sequences for carriage motion and their ASCII values
 91are as follows:
86.Pp 92.Pp
87.Bl -tag -width "carriage return" -compact 93.Bl -tag -width "carriage return" -compact
 94.It ESC\-BELL
 95reverse line feed (escape then bell).
88.It ESC\-7 96.It ESC\-7
89reverse line feed (escape then 7) 97reverse line feed (escape then 7).
 98.It ESC\-BACKSPACE
 99half reverse line feed (escape then backspace).
90.It ESC\-8 100.It ESC\-8
91half reverse line feed (escape then 8) 101half reverse line feed (escape then 8).
 102.It ESC\-TAB
 103half forward line feed (escape than tab).
92.It ESC\-9 104.It ESC\-9
93half forward line feed (escape then 9) 105half forward line feed (escape then 9).
 106In
 107.Fl f
 108mode, this sequence may also occur in the output stream.
94.It backspace 109.It backspace
95moves back one column (8); ignored in the first column 110moves back one column (8); ignored in the first column
96.It carriage return 111.It carriage return
97(13) 112(13)
98.It newline 113.It newline
99forward line feed (10); also does carriage return 114forward line feed (10); also does carriage return
100.It shift in 115.It shift in
101shift to normal character set (15) 116shift to normal character set (15)
102.It shift out 117.It shift out
103shift to alternative character set (14) 118shift to alternate character set (14)
104.It space 119.It space
105moves forward one column (32) 120moves forward one column (32)
106.It tab 121.It tab
107moves forward to next tab stop (9) 122moves forward to next tab stop (9)
108.It vertical tab 123.It vertical tab
109reverse line feed (11) 124reverse line feed (11)
110.El 125.El
111.Pp 126.Pp
112All unrecognized control characters and escape sequences are 127All unrecognized control characters and escape sequences are
113discarded. 128discarded.
114.Pp 129.Pp
 130The
115.Nm 131.Nm
116keeps track of the character set as characters are read and makes 132utility keeps track of the character set as characters are read and makes
117sure the character set is correct when they are output. 133sure the character set is correct when they are output.
118.Pp 134.Pp
119If the input attempts to back up to the last flushed line, 135If the input attempts to back up to the last flushed line,
120.Nm 136.Nm
121will display a warning message. 137will display a warning message.
 138.Sh ENVIRONMENT
 139The
 140.Ev LANG , LC_ALL
 141and
 142.Ev LC_CTYPE
 143environment variables affect the execution of
 144.Nm
 145as described in
 146.Xr environ 7 .
 147.Sh EXIT STATUS
 148.Ex -std
 149.Sh EXAMPLES
 150We can use
 151.Nm
 152to filter the output of
 153.Xr man 1
 154and remove the backspace characters (
 155.Em ^H
 156) before searching for some text:
 157.Bd -literal -offset indent
 158man ls | col -b | grep HISTORY
 159.Ed
122.Sh SEE ALSO 160.Sh SEE ALSO
123.Xr expand 1 , 161.Xr expand 1
124.Xr nroff 1 , 
125.Xr tbl 1 
126.Sh STANDARDS 162.Sh STANDARDS
127The 163The
128.Nm 164.Nm
129utility conforms to 165utility conforms to
130.St -xpg4.2 . 166.St -susv2 .
131The 
132.Fl l 
133option is an extension to the standard. 
134.Sh HISTORY 167.Sh HISTORY
135A 168A
136.Nm 169.Nm
137command appeared in 170command
 171appeared in
138.At v6 . 172.At v6 .

cvs diff -r1.18 -r1.19 src/usr.bin/col/col.c (expand / switch to unified diff)

--- src/usr.bin/col/col.c 2014/10/18 14:56:14 1.18
+++ src/usr.bin/col/col.c 2020/11/01 22:27:15 1.19
@@ -1,16 +1,18 @@ @@ -1,16 +1,18 @@
1/* $NetBSD: col.c,v 1.18 2014/10/18 14:56:14 christos Exp $ */ 1/* $NetBSD: col.c,v 1.19 2020/11/01 22:27:15 christos Exp $ */
2 2
3/*- 3/*-
 4 * SPDX-License-Identifier: BSD-3-Clause
 5 *
4 * Copyright (c) 1990, 1993, 1994 6 * Copyright (c) 1990, 1993, 1994
5 * The Regents of the University of California. All rights reserved. 7 * The Regents of the University of California. All rights reserved.
6 * 8 *
7 * This code is derived from software contributed to Berkeley by 9 * This code is derived from software contributed to Berkeley by
8 * Michael Rendell of the Memorial University of Newfoundland. 10 * Michael Rendell of the Memorial University of Newfoundland.
9 * 11 *
10 * Redistribution and use in source and binary forms, with or without 12 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 13 * modification, are permitted provided that the following conditions
12 * are met: 14 * are met:
13 * 1. Redistributions of source code must retain the above copyright 15 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 16 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 17 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 18 * notice, this list of conditions and the following disclaimer in the
@@ -27,516 +29,575 @@ @@ -27,516 +29,575 @@
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE. 34 * SUCH DAMAGE.
33 */ 35 */
34 36
35#include <sys/cdefs.h> 37#include <sys/cdefs.h>
36#ifndef lint 38#ifndef lint
37__COPYRIGHT("@(#) Copyright (c) 1990, 1993, 1994\ 39__COPYRIGHT("@(#) Copyright (c) 1990, 1993, 1994\
38 The Regents of the University of California. All rights reserved."); 40 The Regents of the University of California. All rights reserved.");
39#endif /* not lint */ 41#endif /* not lint */
40 42
41#ifndef lint 43#ifndef lint
42#if 0 44#if 0
43static char sccsid[] = "@(#)col.c 8.5 (Berkeley) 5/4/95"; 45static char sccsid[] = "@(#)col.c 8.5 (Berkeley) 5/4/95";
 46__FBSDID("$FreeBSD: head/usr.bin/col/col.c 366577 2020-10-09 15:27:37Z markj $")
 47;
 48
44#endif 49#endif
45__RCSID("$NetBSD: col.c,v 1.18 2014/10/18 14:56:14 christos Exp $"); 50__RCSID("$NetBSD: col.c,v 1.19 2020/11/01 22:27:15 christos Exp $");
46#endif /* not lint */ 51#endif /* not lint */
47 52
48#include <ctype.h> 
49#include <err.h> 53#include <err.h>
50#include <string.h> 54#include <errno.h>
 55#include <inttypes.h>
 56#include <limits.h>
 57#include <locale.h>
51#include <stdio.h> 58#include <stdio.h>
52#include <stdlib.h> 59#include <stdlib.h>
 60#include <string.h>
 61#include <termios.h>
53#include <unistd.h> 62#include <unistd.h>
 63#include <wchar.h>
 64#include <wctype.h>
54 65
55#define BS '\b' /* backspace */ 66#define BS '\b' /* backspace */
56#define TAB '\t' /* tab */ 67#define TAB '\t' /* tab */
57#define SPACE ' ' /* space */ 68#define SPACE ' ' /* space */
58#define NL '\n' /* newline */ 69#define NL '\n' /* newline */
59#define CR '\r' /* carriage return */ 70#define CR '\r' /* carriage return */
60#define ESC '\033' /* escape */ 71#define ESC '\033' /* escape */
61#define SI '\017' /* shift in to normal character set */ 72#define SI '\017' /* shift in to normal character set */
62#define SO '\016' /* shift out to alternate character set */ 73#define SO '\016' /* shift out to alternate character set */
63#define VT '\013' /* vertical tab (aka reverse line feed) */ 74#define VT '\013' /* vertical tab (aka reverse line feed) */
64#define RLF '\007' /* ESC-07 reverse line feed */ 75#define RLF '7' /* ESC-7 reverse line feed */
65#define RHLF '\010' /* ESC-010 reverse half-line feed */ 76#define RHLF '8' /* ESC-8 reverse half-line feed */
66#define FHLF '\011' /* ESC-011 forward half-line feed */ 77#define FHLF '9' /* ESC-9 forward half-line feed */
67 78
68/* build up at least this many lines before flushing them out */ 79/* build up at least this many lines before flushing them out */
69#define BUFFER_MARGIN 32 80#define BUFFER_MARGIN 32
70 81
71typedef char CSET; 82typedef char CSET;
72 83
73typedef struct char_str { 84typedef struct char_str {
74#define CS_NORMAL 1 85#define CS_NORMAL 1
75#define CS_ALTERNATE 2 86#define CS_ALTERNATE 2
76 short c_column; /* column character is in */ 87 int c_column; /* column character is in */
77 CSET c_set; /* character set (currently only 2) */ 88 CSET c_set; /* character set (currently only 2) */
78 char c_char; /* character in question */ 89 wchar_t c_char; /* character in question */
 90 int c_width; /* character width */
79} CHAR; 91} CHAR;
80 92
81typedef struct line_str LINE; 93typedef struct line_str LINE;
82struct line_str { 94struct line_str {
83 CHAR *l_line; /* characters on the line */ 95 CHAR *l_line; /* characters on the line */
84 LINE *l_prev; /* previous line */ 96 LINE *l_prev; /* previous line */
85 LINE *l_next; /* next line */ 97 LINE *l_next; /* next line */
86 int l_lsize; /* allocated sizeof l_line */ 98 int l_lsize; /* allocated sizeof l_line */
87 int l_line_len; /* strlen(l_line) */ 99 int l_line_len; /* strlen(l_line) */
88 int l_needs_sort; /* set if chars went in out of order */ 100 int l_needs_sort; /* set if chars went in out of order */
89 int l_max_col; /* max column in the line */ 101 int l_max_col; /* max column in the line */
90}; 102};
91 103
92static LINE *alloc_line(void); 104static void addto_lineno(int *, int);
 105static LINE *alloc_line(void);
93static void dowarn(int); 106static void dowarn(int);
94static void flush_line(LINE *); 107static void flush_line(LINE *);
95static void flush_lines(int); 108static void flush_lines(int);
96static void flush_blanks(void); 109static void flush_blanks(void);
97static void free_line(LINE *); 110static void free_line(LINE *);
98__dead static void usage(void); 111__dead static void usage(void);
99__dead static void wrerr(void); 
100static void *xmalloc(void *, size_t); 
101 112
102static CSET last_set; /* char_set of last char printed */ 113static CSET last_set; /* char_set of last char printed */
103static LINE *lines; 114static LINE *lines;
104static int compress_spaces; /* if doing space -> tab conversion */ 115static int compress_spaces; /* if doing space -> tab conversion */
105static int fine; /* if `fine' resolution (half lines) */ 116static int fine; /* if `fine' resolution (half lines) */
106static int max_bufd_lines; /* max # lines to keep in memory */ 117static int max_bufd_lines; /* max # of half lines to keep in memory */
107static int nblank_lines; /* # blanks after last flushed line */ 118static int nblank_lines; /* # blanks after last flushed line */
108static int no_backspaces; /* if not to output any backspaces */ 119static int no_backspaces; /* if not to output any backspaces */
109static int pass_unknown_seqs; /* whether to pass unknown control sequences */ 120static int pass_unknown_seqs; /* pass unknown control sequences */
110 121
111#define PUTC(ch) \ 122#define PUTC(ch) \
112 if (putchar(ch) == EOF) \ 123 do { \
113 wrerr(); 124 if (putwchar(ch) == WEOF) \
 125 errx(EXIT_FAILURE, "write error"); \
 126 } while (/*CONSTCOND*/0)
114 127
115int 128int
116main(int argc, char **argv) 129main(int argc, char **argv)
117{ 130{
118 int ch; 131 wint_t ch;
119 CHAR *c; 132 CHAR *c;
120 CSET cur_set; /* current character set */ 133 CSET cur_set; /* current character set */
121 LINE *l; /* current line */ 134 LINE *l; /* current line */
122 int extra_lines; /* # of lines above first line */ 135 int extra_lines; /* # of lines above first line */
123 int cur_col; /* current column */ 136 int cur_col; /* current column */
124 int cur_line; /* line number of current position */ 137 int cur_line; /* line number of current position */
125 int max_line; /* max value of cur_line */ 138 int max_line; /* max value of cur_line */
126 int this_line; /* line l points to */ 139 int this_line; /* line l points to */
127 int nflushd_lines; /* number of lines that were flushed */ 140 int nflushd_lines; /* number of lines that were flushed */
128 int adjust, opt, warned; 141 int adjust, opt, warned, width;
 142 int e;
129 143
130 max_bufd_lines = 128; 144 (void)setlocale(LC_CTYPE, "");
 145
 146 max_bufd_lines = 256;
131 compress_spaces = 1; /* compress spaces into tabs */ 147 compress_spaces = 1; /* compress spaces into tabs */
132 pass_unknown_seqs = 0; /* remove unknown escape sequences */ 
133 while ((opt = getopt(argc, argv, "bfhl:px")) != -1) 148 while ((opt = getopt(argc, argv, "bfhl:px")) != -1)
134 switch (opt) { 149 switch (opt) {
135 case 'b': /* do not output backspaces */ 150 case 'b': /* do not output backspaces */
136 no_backspaces = 1; 151 no_backspaces = 1;
137 break; 152 break;
138 case 'f': /* allow half forward line feeds */ 153 case 'f': /* allow half forward line feeds */
139 fine = 1; 154 fine = 1;
140 break; 155 break;
141 case 'h': /* compress spaces into tabs */ 156 case 'h': /* compress spaces into tabs */
142 compress_spaces = 1; 157 compress_spaces = 1;
143 break; 158 break;
144 case 'l': /* buffered line count */ 159 case 'l': /* buffered line count */
145 if ((max_bufd_lines = atoi(optarg)) <= 0) { 160 max_bufd_lines = (int)strtoi(optarg, NULL, 0, 1,
146 (void)fprintf(stderr, 161 (INT_MAX - BUFFER_MARGIN) / 2, &e) * 2;
147 "col: bad -l argument %s.\n", optarg); 162 if (e)
148 exit(EXIT_FAILURE); 163 errc(EXIT_FAILURE, e, "bad -l argument `%s'",
149 } 164 optarg);
150 break; 165 break;
151 case 'p': /* pass unknown control sequences */ 166 case 'p': /* pass unknown control sequences */
152 pass_unknown_seqs = 1; 167 pass_unknown_seqs = 1;
153 break; 168 break;
154 case 'x': /* do not compress spaces into tabs */ 169 case 'x': /* do not compress spaces into tabs */
155 compress_spaces = 0; 170 compress_spaces = 0;
156 break; 171 break;
157 case '?': 172 case '?':
158 default: 173 default:
159 usage(); 174 usage();
160 } 175 }
161 176
162 if (optind != argc) 177 if (optind != argc)
163 usage(); 178 usage();
164 179
165 /* this value is in half lines */ 
166 max_bufd_lines *= 2; 
167 
168 adjust = cur_col = extra_lines = warned = 0; 180 adjust = cur_col = extra_lines = warned = 0;
169 cur_line = max_line = nflushd_lines = this_line = 0; 181 cur_line = max_line = nflushd_lines = this_line = 0;
170 cur_set = last_set = CS_NORMAL; 182 cur_set = last_set = CS_NORMAL;
171 lines = l = alloc_line(); 183 lines = l = alloc_line();
172 184
173 while ((ch = getchar()) != EOF) { 185 while ((ch = getwchar()) != WEOF) {
174 if (!isgraph(ch)) { 186 if (!iswgraph(ch)) {
175 switch (ch) { 187 switch (ch) {
176 case BS: /* can't go back further */ 188 case BS: /* can't go back further */
177 if (cur_col == 0) 189 if (cur_col == 0)
178 continue; 190 continue;
179 --cur_col; 191 --cur_col;
180 continue; 192 continue;
181 case CR: 193 case CR:
182 cur_col = 0; 194 cur_col = 0;
183 continue; 195 continue;
184 case ESC: /* just ignore EOF */ 196 case ESC: /* just ignore EOF */
185 switch(getchar()) { 197 switch(getwchar()) {
 198 /*
 199 * In the input stream, accept both the
 200 * XPG5 sequences ESC-digit and the
 201 * traditional BSD sequences ESC-ctrl.
 202 */
 203 case '\007':
 204 /* FALLTHROUGH */
186 case RLF: 205 case RLF:
187 cur_line -= 2; 206 addto_lineno(&cur_line, -2);
188 break; 207 break;
 208 case '\010':
 209 /* FALLTHROUGH */
189 case RHLF: 210 case RHLF:
190 cur_line--; 211 addto_lineno(&cur_line, -1);
191 break; 212 break;
 213 case '\011':
 214 /* FALLTHROUGH */
192 case FHLF: 215 case FHLF:
193 cur_line++; 216 addto_lineno(&cur_line, 1);
194 if (cur_line > max_line) 217 if (cur_line > max_line)
195 max_line = cur_line; 218 max_line = cur_line;
196 } 219 }
197 continue; 220 continue;
198 case NL: 221 case NL:
199 cur_line += 2; 222 addto_lineno(&cur_line, 2);
200 if (cur_line > max_line) 223 if (cur_line > max_line)
201 max_line = cur_line; 224 max_line = cur_line;
202 cur_col = 0; 225 cur_col = 0;
203 continue; 226 continue;
204 case SPACE: 227 case SPACE:
205 ++cur_col; 228 ++cur_col;
206 continue; 229 continue;
207 case SI: 230 case SI:
208 cur_set = CS_NORMAL; 231 cur_set = CS_NORMAL;
209 continue; 232 continue;
210 case SO: 233 case SO:
211 cur_set = CS_ALTERNATE; 234 cur_set = CS_ALTERNATE;
212 continue; 235 continue;
213 case TAB: /* adjust column */ 236 case TAB: /* adjust column */
214 cur_col |= 7; 237 cur_col |= 7;
215 ++cur_col; 238 ++cur_col;
216 continue; 239 continue;
217 case VT: 240 case VT:
218 cur_line -= 2; 241 addto_lineno(&cur_line, -2);
 242 continue;
 243 }
 244 if (iswspace(ch)) {
 245 if ((width = wcwidth(ch)) > 0)
 246 cur_col += width;
219 continue; 247 continue;
220 } 248 }
221 if (!pass_unknown_seqs) 249 if (!pass_unknown_seqs)
222 continue; 250 continue;
223 } 251 }
224 252
225 /* Must stuff ch in a line - are we at the right one? */ 253 /* Must stuff ch in a line - are we at the right one? */
226 if (cur_line != this_line - adjust) { 254 if (cur_line + adjust != this_line) {
227 LINE *lnew; 255 LINE *lnew;
228 int nmove; 
229 256
230 adjust = 0; 257 /* round up to next line */
231 nmove = cur_line - this_line; 258 adjust = !fine && (cur_line & 1);
232 if (!fine) { 259
233 /* round up to next line */ 260 if (cur_line + adjust < this_line) {
234 if (cur_line & 1) { 261 while (cur_line + adjust < this_line &&
235 adjust = 1; 262 l->l_prev != NULL) {
236 nmove++; 
237 } 
238 } 
239 if (nmove < 0) { 
240 for (; nmove < 0 && l->l_prev; nmove++) 
241 l = l->l_prev; 263 l = l->l_prev;
242 if (nmove) { 264 this_line--;
 265 }
 266 if (cur_line + adjust < this_line) {
243 if (nflushd_lines == 0) { 267 if (nflushd_lines == 0) {
244 /* 268 /*
245 * Allow backup past first 269 * Allow backup past first
246 * line if nothing has been 270 * line if nothing has been
247 * flushed yet. 271 * flushed yet.
248 */ 272 */
249 for (; nmove < 0; nmove++) { 273 while (cur_line + adjust
 274 < this_line) {
250 lnew = alloc_line(); 275 lnew = alloc_line();
251 l->l_prev = lnew; 276 l->l_prev = lnew;
252 lnew->l_next = l; 277 lnew->l_next = l;
253 l = lines = lnew; 278 l = lines = lnew;
254 extra_lines++; 279 extra_lines++;
 280 this_line--;
255 } 281 }
256 } else { 282 } else {
257 if (!warned++) 283 if (!warned++)
258 dowarn(cur_line); 284 dowarn(cur_line);
259 cur_line -= nmove; 285 cur_line = this_line - adjust;
260 } 286 }
261 } 287 }
262 } else { 288 } else {
263 /* may need to allocate here */ 289 /* may need to allocate here */
264 for (; nmove > 0 && l->l_next; nmove--) 290 while (cur_line + adjust > this_line) {
 291 if (l->l_next == NULL) {
 292 l->l_next = alloc_line();
 293 l->l_next->l_prev = l;
 294 }
265 l = l->l_next; 295 l = l->l_next;
266 for (; nmove > 0; nmove--) { 296 this_line++;
267 lnew = alloc_line(); 
268 lnew->l_prev = l; 
269 l->l_next = lnew; 
270 l = lnew; 
271 } 297 }
272 } 298 }
273 this_line = cur_line + adjust; 299 if (this_line > nflushd_lines &&
274 nmove = this_line - nflushd_lines; 300 this_line - nflushd_lines >=
275 if (nmove >= max_bufd_lines + BUFFER_MARGIN) { 301 max_bufd_lines + BUFFER_MARGIN) {
276 nflushd_lines += nmove - max_bufd_lines; 302 if (extra_lines) {
277 flush_lines(nmove - max_bufd_lines); 303 flush_lines(extra_lines);
 304 extra_lines = 0;
 305 }
 306 flush_lines(this_line - nflushd_lines -
 307 max_bufd_lines);
 308 nflushd_lines = this_line - max_bufd_lines;
278 } 309 }
279 } 310 }
280 /* grow line's buffer? */ 311 /* grow line's buffer? */
281 if (l->l_line_len + 1 >= l->l_lsize) { 312 if (l->l_line_len + 1 >= l->l_lsize) {
282 int need; 313 int need;
283 314
284 need = l->l_lsize ? l->l_lsize * 2 : 90; 315 need = l->l_lsize ? l->l_lsize * 2 : 90;
285 l->l_line = (CHAR *)xmalloc((void *) l->l_line, 316 if ((l->l_line = realloc(l->l_line,
286 (unsigned) need * sizeof(CHAR)); 317 (unsigned)need * sizeof(CHAR))) == NULL)
 318 err(EXIT_FAILURE, NULL);
287 l->l_lsize = need; 319 l->l_lsize = need;
288 } 320 }
289 c = &l->l_line[l->l_line_len++]; 321 c = &l->l_line[l->l_line_len++];
290 c->c_char = ch; 322 c->c_char = ch;
291 c->c_set = cur_set; 323 c->c_set = cur_set;
292 c->c_column = cur_col; 324 c->c_column = cur_col;
 325 c->c_width = wcwidth(ch);
293 /* 326 /*
294 * If things are put in out of order, they will need sorting 327 * If things are put in out of order, they will need sorting
295 * when it is flushed. 328 * when it is flushed.
296 */ 329 */
297 if (cur_col < l->l_max_col) 330 if (cur_col < l->l_max_col)
298 l->l_needs_sort = 1; 331 l->l_needs_sort = 1;
299 else 332 else
300 l->l_max_col = cur_col; 333 l->l_max_col = cur_col;
301 cur_col++; 334 if (c->c_width > 0)
 335 cur_col += c->c_width;
 336 }
 337 if (ferror(stdin))
 338 err(EXIT_FAILURE, NULL);
 339 if (extra_lines) {
 340 /*
 341 * Extra lines only exist if no lines have been flushed
 342 * yet. This means that 'lines' must point to line zero
 343 * after we flush the extra lines.
 344 */
 345 flush_lines(extra_lines);
 346 l = lines;
 347 this_line = 0;
302 } 348 }
303 if (max_line == 0) 
304 exit(EXIT_SUCCESS); /* no lines, so just exit */ 
305 349
306 /* goto the last line that had a character on it */ 350 /* goto the last line that had a character on it */
307 for (; l->l_next; l = l->l_next) 351 for (; l->l_next; l = l->l_next)
308 this_line++; 352 this_line++;
309 flush_lines(this_line - nflushd_lines + extra_lines + 1); 353 flush_lines(this_line - nflushd_lines + 1);
310 354
311 /* make sure we leave things in a sane state */ 355 /* make sure we leave things in a sane state */
312 if (last_set != CS_NORMAL) 356 if (last_set != CS_NORMAL)
313 PUTC('\017'); 357 PUTC(SI);
314 358
315 /* flush out the last few blank lines */ 359 /* flush out the last few blank lines */
316 nblank_lines = max_line - this_line; 360 if (max_line >= this_line)
317 if (max_line & 1) 361 nblank_lines = max_line - this_line + (max_line & 1);
318 nblank_lines++; 362 if (nblank_lines == 0)
319 else if (!nblank_lines) 363 /* end with a newline even if the source doesn't */
320 /* missing a \n on the last line? */ 
321 nblank_lines = 2; 364 nblank_lines = 2;
322 flush_blanks(); 365 flush_blanks();
323 exit(EXIT_SUCCESS); 366 exit(EXIT_SUCCESS);
324 /* NOTREACHED */ 
325} 367}
326 368
 369/*
 370 * Prints the first 'nflush' lines. Printed lines are freed.
 371 * After this function returns, 'lines' points to the first
 372 * of the remaining lines, and 'nblank_lines' will have the
 373 * number of half line feeds between the final flushed line
 374 * and the first remaining line.
 375 */
327static void 376static void
328flush_lines(int nflush) 377flush_lines(int nflush)
329{ 378{
330 LINE *l; 379 LINE *l;
331 380
332 while (--nflush >= 0) { 381 while (--nflush >= 0) {
333 l = lines; 382 l = lines;
334 lines = l->l_next; 383 lines = l->l_next;
335 if (l->l_line) { 384 if (l->l_line) {
336 flush_blanks(); 385 flush_blanks();
337 flush_line(l); 386 flush_line(l);
 387 free(l->l_line);
338 } 388 }
339 nblank_lines++; 389 if (l->l_next)
340 if (l->l_line) 390 nblank_lines++;
341 (void)free((void *)l->l_line); 
342 free_line(l); 391 free_line(l);
343 } 392 }
344 if (lines) 393 if (lines)
345 lines->l_prev = NULL; 394 lines->l_prev = NULL;
346} 395}
347 396
348/* 397/*
349 * Print a number of newline/half newlines. If fine flag is set, nblank_lines 398 * Print a number of newline/half newlines.
350 * is the number of half line feeds, otherwise it is the number of whole line 399 * nblank_lines is the number of half line feeds.
351 * feeds. 
352 */ 400 */
353static void 401static void
354flush_blanks(void) 402flush_blanks(void)
355{ 403{
356 int half, i, nb; 404 int half, i, nb;
357 405
358 half = 0; 406 half = 0;
359 nb = nblank_lines; 407 nb = nblank_lines;
360 if (nb & 1) { 408 if (nb & 1) {
361 if (fine) 409 if (fine)
362 half = 1; 410 half = 1;
363 else 411 else
364 nb++; 412 nb++;
365 } 413 }
366 nb /= 2; 414 nb /= 2;
367 for (i = nb; --i >= 0;) 415 for (i = nb; --i >= 0;)
368 PUTC('\n'); 416 PUTC('\n');
369 if (half) { 417 if (half) {
370 PUTC('\033'); 418 PUTC(ESC);
371 PUTC('\011'); 419 PUTC(FHLF);
372 if (!nb) 420 if (!nb)
373 PUTC('\r'); 421 PUTC('\r');
374 } 422 }
375 nblank_lines = 0; 423 nblank_lines = 0;
376} 424}
377 425
378/* 426/*
379 * Write a line to stdout taking care of space to tab conversion (-h flag) 427 * Write a line to stdout taking care of space to tab conversion (-h flag)
380 * and character set shifts. 428 * and character set shifts.
381 */ 429 */
382static void 430static void
383flush_line(LINE *l) 431flush_line(LINE *l)
384{ 432{
385 CHAR *c, *endc; 433 CHAR *c, *endc;
386 int nchars, last_col, this_col; 434 int i, j, nchars, last_col, save, this_col, tot;
387 435
388 last_col = 0; 436 last_col = 0;
389 nchars = l->l_line_len; 437 nchars = l->l_line_len;
390 438
391 if (l->l_needs_sort) { 439 if (l->l_needs_sort) {
392 static CHAR *sorted; 440 static CHAR *sorted;
393 static int count_size, *count, i, save, sorted_size, tot; 441 static int count_size, *count, sorted_size;
394 442
395 /* 443 /*
396 * Do an O(n) sort on l->l_line by column being careful to 444 * Do an O(n) sort on l->l_line by column being careful to
397 * preserve the order of characters in the same column. 445 * preserve the order of characters in the same column.
398 */ 446 */
399 if (l->l_lsize > sorted_size) { 447 if (l->l_lsize > sorted_size) {
400 sorted_size = l->l_lsize; 448 sorted_size = l->l_lsize;
401 sorted = (CHAR *)xmalloc((void *)sorted, 449 if ((sorted = realloc(sorted,
402 (unsigned)sizeof(CHAR) * sorted_size); 450 sizeof(CHAR) * (size_t)sorted_size)) == NULL)
 451 err(EXIT_FAILURE, NULL);
403 } 452 }
404 if (l->l_max_col >= count_size) { 453 if (l->l_max_col >= count_size) {
405 count_size = l->l_max_col + 1; 454 count_size = l->l_max_col + 1;
406 count = (int *)xmalloc((void *)count, 455 if ((count = realloc(count,
407 (unsigned)sizeof(int) * count_size); 456 sizeof(int) * (size_t)count_size)) == NULL)
 457 err(EXIT_FAILURE, NULL);
408 } 458 }
409 (void)memset(count, 0, sizeof(int) * l->l_max_col + 1); 459 memset(count, 0, sizeof(int) * (size_t)l->l_max_col + 1);
410 for (i = nchars, c = l->l_line; --i >= 0; c++) 460 for (i = nchars, c = l->l_line; --i >= 0; c++)
411 count[c->c_column]++; 461 count[c->c_column]++;
412 462
413 /* 463 /*
414 * calculate running total (shifted down by 1) to use as 464 * calculate running total (shifted down by 1) to use as
415 * indices into new line. 465 * indices into new line.
416 */ 466 */
417 for (tot = 0, i = 0; i <= l->l_max_col; i++) { 467 for (tot = 0, i = 0; i <= l->l_max_col; i++) {
418 save = count[i]; 468 save = count[i];
419 count[i] = tot; 469 count[i] = tot;
420 tot += save; 470 tot += save;
421 } 471 }
422 472
423 for (i = nchars, c = l->l_line; --i >= 0; c++) 473 for (i = nchars, c = l->l_line; --i >= 0; c++)
424 sorted[count[c->c_column]++] = *c; 474 sorted[count[c->c_column]++] = *c;
425 c = sorted; 475 c = sorted;
426 } else 476 } else
427 c = l->l_line; 477 c = l->l_line;
428 while (nchars > 0) { 478 while (nchars > 0) {
429 this_col = c->c_column; 479 this_col = c->c_column;
430 endc = c; 480 endc = c;
431 do { 481 do {
432 ++endc; 482 ++endc;
433 } while (--nchars > 0 && this_col == endc->c_column); 483 } while (--nchars > 0 && this_col == endc->c_column);
434 484
435 /* if -b only print last character */ 485 /* if -b only print last character */
436 if (no_backspaces) 486 if (no_backspaces) {
437 c = endc - 1; 487 c = endc - 1;
 488 if (nchars > 0 &&
 489 this_col + c->c_width > endc->c_column)
 490 continue;
 491 }
438 492
439 if (this_col > last_col) { 493 if (this_col > last_col) {
440 int nspace = this_col - last_col; 494 int nspace = this_col - last_col;
441 495
442 if (compress_spaces && nspace > 1) { 496 if (compress_spaces && nspace > 1) {
443 int ntabs; 497 while (1) {
 498 int tab_col, tab_size;
444 499
445 ntabs = ((last_col % 8) + nspace) / 8; 500 tab_col = (last_col + 8) & ~7;
446 if (ntabs) { 501 if (tab_col > this_col)
447 nspace -= (ntabs * 8) - (last_col % 8); 502 break;
448 while (--ntabs >= 0) 503 tab_size = tab_col - last_col;
 504 if (tab_size == 1)
 505 PUTC(' ');
 506 else
449 PUTC('\t'); 507 PUTC('\t');
 508 nspace -= tab_size;
 509 last_col = tab_col;
450 } 510 }
451 } 511 }
452 while (--nspace >= 0) 512 while (--nspace >= 0)
453 PUTC(' '); 513 PUTC(' ');
454 last_col = this_col; 514 last_col = this_col;
455 } 515 }
456 last_col++; 
457 516
458 for (;;) { 517 for (;;) {
459 if (c->c_set != last_set) { 518 if (c->c_set != last_set) {
460 switch (c->c_set) { 519 switch (c->c_set) {
461 case CS_NORMAL: 520 case CS_NORMAL:
462 PUTC('\017'); 521 PUTC(SI);
463 break; 522 break;
464 case CS_ALTERNATE: 523 case CS_ALTERNATE:
465 PUTC('\016'); 524 PUTC(SO);
466 } 525 }
467 last_set = c->c_set; 526 last_set = c->c_set;
468 } 527 }
469 PUTC(c->c_char); 528 PUTC(c->c_char);
 529 if ((c + 1) < endc)
 530 for (j = 0; j < c->c_width; j++)
 531 PUTC('\b');
470 if (++c >= endc) 532 if (++c >= endc)
471 break; 533 break;
472 PUTC('\b'); 
473 } 534 }
 535 last_col += (c - 1)->c_width;
 536 }
 537}
 538
 539/*
 540 * Increment or decrement a line number, checking for overflow.
 541 * Stop one below INT_MAX such that the adjust variable is safe.
 542 */
 543void
 544addto_lineno(int *lno, int offset)
 545{
 546 if (offset > 0) {
 547 if (*lno >= INT_MAX - offset)
 548 errx(EXIT_FAILURE, "too many lines");
 549 } else {
 550 if (*lno < INT_MIN - offset)
 551 errx(EXIT_FAILURE, "too many reverse line feeds");
474 } 552 }
 553 *lno += offset;
475} 554}
476 555
477#define NALLOC 64 556#define NALLOC 64
478 557
479static LINE *line_freelist; 558static LINE *line_freelist;
480 559
481static LINE * 560static LINE *
482alloc_line(void) 561alloc_line(void)
483{ 562{
484 LINE *l; 563 LINE *l;
485 int i; 564 int i;
486 565
487 if (!line_freelist) { 566 if (!line_freelist) {
488 l = (LINE *)xmalloc(NULL, sizeof(LINE) * NALLOC); 567 if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL)
 568 err(EXIT_FAILURE, NULL);
489 line_freelist = l; 569 line_freelist = l;
490 for (i = 1; i < NALLOC; i++, l++) 570 for (i = 1; i < NALLOC; i++, l++)
491 l->l_next = l + 1; 571 l->l_next = l + 1;
492 l->l_next = NULL; 572 l->l_next = NULL;
493 } 573 }
494 l = line_freelist; 574 l = line_freelist;
495 line_freelist = l->l_next; 575 line_freelist = l->l_next;
496 576
497 (void)memset(l, 0, sizeof(LINE)); 577 memset(l, 0, sizeof(LINE));
498 return (l); 578 return (l);
499} 579}
500 580
501static void 581static void
502free_line(LINE *l) 582free_line(LINE *l)
503{ 583{
504 584
505 l->l_next = line_freelist; 585 l->l_next = line_freelist;
506 line_freelist = l; 586 line_freelist = l;
507} 587}
508 588
509static void * 
510xmalloc(void *p, size_t size) 
511{ 
512 void *q; 
513 
514 if (!(q = (void *)realloc(p, size))) 
515 err(EXIT_FAILURE, "realloc"); 
516 p = q; 
517 return (p); 
518} 
519 
520static void 589static void
521usage(void) 590usage(void)
522{ 591{
523 592
524 (void)fprintf(stderr, "usage: col [-bfpx] [-l nline]\n"); 593 (void)fprintf(stderr, "Usage: %s [-bfhpx] [-l nline]\n", getprogname());
525 exit(EXIT_FAILURE); 
526} 
527 
528static void 
529wrerr(void) 
530{ 
531 
532 (void)fprintf(stderr, "col: write error.\n"); 
533 exit(EXIT_FAILURE); 594 exit(EXIT_FAILURE);
534} 595}
535 596
536static void 597static void
537dowarn(int line) 598dowarn(int line)
538{ 599{
539 600
540 warnx("warning: can't back up %s", 601 warnx("warning: can't back up %s",
541 line < 0 ? "past first line" : "-- line already flushed"); 602 line < 0 ? "past first line" : "-- line already flushed");
542} 603}