| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | .\" $NetBSD: ctype.3,v 1.23 2017/12/12 14:13:52 abhinav Exp $ | | 1 | .\" $NetBSD: ctype.3,v 1.24 2019/01/15 00:31:19 riastradh Exp $ |
2 | .\" | | 2 | .\" |
3 | .\" Copyright (c) 1991 Regents of the University of California. | | 3 | .\" Copyright (c) 1991 Regents of the University of California. |
4 | .\" All rights reserved. | | 4 | .\" All rights reserved. |
5 | .\" | | 5 | .\" |
6 | .\" | | 6 | .\" |
7 | .\" Redistribution and use in source and binary forms, with or without | | 7 | .\" Redistribution and use in source and binary forms, with or without |
8 | .\" modification, are permitted provided that the following conditions | | 8 | .\" modification, are permitted provided that the following conditions |
9 | .\" are met: | | 9 | .\" are met: |
10 | .\" 1. Redistributions of source code must retain the above copyright | | 10 | .\" 1. Redistributions of source code must retain the above copyright |
11 | .\" notice, this list of conditions and the following disclaimer. | | 11 | .\" notice, this list of conditions and the following disclaimer. |
12 | .\" 2. Redistributions in binary form must reproduce the above copyright | | 12 | .\" 2. Redistributions in binary form must reproduce the above copyright |
13 | .\" notice, this list of conditions and the following disclaimer in the | | 13 | .\" notice, this list of conditions and the following disclaimer in the |
14 | .\" documentation and/or other materials provided with the distribution. | | 14 | .\" documentation and/or other materials provided with the distribution. |
| @@ -20,27 +20,27 @@ | | | @@ -20,27 +20,27 @@ |
20 | .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 20 | .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
21 | .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 21 | .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
22 | .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 22 | .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
23 | .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 23 | .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
24 | .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 24 | .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
25 | .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 25 | .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
26 | .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 26 | .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 | .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 27 | .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
28 | .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 28 | .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
29 | .\" SUCH DAMAGE. | | 29 | .\" SUCH DAMAGE. |
30 | .\" | | 30 | .\" |
31 | .\" @(#)ctype.3 6.5 (Berkeley) 4/19/91 | | 31 | .\" @(#)ctype.3 6.5 (Berkeley) 4/19/91 |
32 | .\" | | 32 | .\" |
33 | .Dd December 8, 2017 | | 33 | .Dd January 15, 2019 |
34 | .Dt CTYPE 3 | | 34 | .Dt CTYPE 3 |
35 | .Os | | 35 | .Os |
36 | .Sh NAME | | 36 | .Sh NAME |
37 | .Nm ctype | | 37 | .Nm ctype |
38 | .Nd character classification and mapping functions | | 38 | .Nd character classification and mapping functions |
39 | .Sh LIBRARY | | 39 | .Sh LIBRARY |
40 | .Lb libc | | 40 | .Lb libc |
41 | .Sh SYNOPSIS | | 41 | .Sh SYNOPSIS |
42 | .In ctype.h | | 42 | .In ctype.h |
43 | .Fn isalpha "int c" | | 43 | .Fn isalpha "int c" |
44 | .Fn isupper "int c" | | 44 | .Fn isupper "int c" |
45 | .Fn islower "int c" | | 45 | .Fn islower "int c" |
46 | .Fn isdigit "int c" | | 46 | .Fn isdigit "int c" |
| @@ -126,13 +126,83 @@ must first be cast to | | | @@ -126,13 +126,83 @@ must first be cast to |
126 | to ensure that the values are within the correct range. | | 126 | to ensure that the values are within the correct range. |
127 | Casting a negative-valued | | 127 | Casting a negative-valued |
128 | .Vt char | | 128 | .Vt char |
129 | or | | 129 | or |
130 | .Vt signed char | | 130 | .Vt signed char |
131 | directly to | | 131 | directly to |
132 | .Vt int | | 132 | .Vt int |
133 | will produce a negative-valued | | 133 | will produce a negative-valued |
134 | .Vt int , | | 134 | .Vt int , |
135 | which will be outside the range of allowed values | | 135 | which will be outside the range of allowed values |
136 | (unless it happens to be equal to | | 136 | (unless it happens to be equal to |
137 | .Dv EOF , | | 137 | .Dv EOF , |
138 | but even that would not give the desired result). | | 138 | but even that would not give the desired result). |
| | | 139 | .Pp |
| | | 140 | Because the bugs may manifest as silent misbehavior or as crashes only |
| | | 141 | when fed input outside the US-ASCII range, the |
| | | 142 | .Nx |
| | | 143 | implementation of the |
| | | 144 | .Nm |
| | | 145 | functions is designed to elicit a compiler warning for code that passes |
| | | 146 | inputs of type |
| | | 147 | .Vt char |
| | | 148 | in order to flag code that may pass negative values at runtime that |
| | | 149 | would lead to undefined behavior: |
| | | 150 | .Bd -literal offset indent |
| | | 151 | #include <ctype.h> |
| | | 152 | #include <locale.h> |
| | | 153 | #include <stdio.h> |
| | | 154 | |
| | | 155 | int |
| | | 156 | main(int argc, char **argv) |
| | | 157 | { |
| | | 158 | |
| | | 159 | if (argc < 2) |
| | | 160 | return 1; |
| | | 161 | setlocale(LC_ALL, ""); |
| | | 162 | printf("%d %d\en", *argv[1], isprint(*argv[1])); |
| | | 163 | printf("%d %d\en", (int)(unsigned char)*argv[1], |
| | | 164 | isprint((int)(unsigned char)*argv[1])); |
| | | 165 | return 0; |
| | | 166 | } |
| | | 167 | .Ed |
| | | 168 | .Pp |
| | | 169 | When compiling this program, GCC reports a warning for the line that |
| | | 170 | passes |
| | | 171 | .Vt char . |
| | | 172 | At runtime, you may get nonsense answers for some inputs without the |
| | | 173 | cast -- if you're lucky and it doesn't crash or make demons come flying |
| | | 174 | out of your nose: |
| | | 175 | .Bd -literal -offset indent |
| | | 176 | % gcc -Wall -o test test.c |
| | | 177 | test.c: In function 'main': |
| | | 178 | test.c:12:2: warning: array subscript has type 'char' |
| | | 179 | % LANG=C ./test "`printf '\e270'`" |
| | | 180 | -72 5 |
| | | 181 | 184 0 |
| | | 182 | % LC_CTYPE=C ./test "`printf '\e377'`" |
| | | 183 | -1 0 |
| | | 184 | 255 0 |
| | | 185 | % LC_CTYPE=fr_FR.ISO8859-1 ./test "`printf '\e377'`" |
| | | 186 | -1 0 |
| | | 187 | 255 2 |
| | | 188 | .Ed |
| | | 189 | .Pp |
| | | 190 | Some implementations of libc, such as glibc as of 2018, attempt to |
| | | 191 | avoid the worst of the undefined behavior by defining the functions to |
| | | 192 | work for all integer inputs representable by either |
| | | 193 | .Vt unsigned char |
| | | 194 | or |
| | | 195 | .Vt char , |
| | | 196 | and suppress the warning. |
| | | 197 | However, this is not an excuse for avoiding conversion to |
| | | 198 | .Vt unsigned char : |
| | | 199 | if |
| | | 200 | .Dv EOF |
| | | 201 | coincides with any such value, as it does when it is -1 on platforms |
| | | 202 | with signed |
| | | 203 | .Dv char , |
| | | 204 | programs that pass |
| | | 205 | .Vt char |
| | | 206 | will still necessarily confuse the classification and mapping of |
| | | 207 | .Dv EOF |
| | | 208 | with the classification and mapping of some non-EOF inputs. |