| @@ -1,847 +1,859 @@ | | | @@ -1,847 +1,859 @@ |
1 | .\" $NetBSD: regex.3,v 1.28 2021/02/24 09:10:12 wiz Exp $ | | 1 | .\" $NetBSD: regex.3,v 1.29 2021/03/11 15:12:51 christos Exp $ |
2 | .\" | | 2 | .\" |
3 | .\" Copyright (c) 1992, 1993, 1994 Henry Spencer. | | 3 | .\" Copyright (c) 1992, 1993, 1994 Henry Spencer. |
4 | .\" Copyright (c) 1992, 1993, 1994 | | 4 | .\" Copyright (c) 1992, 1993, 1994 |
5 | .\" The Regents of the University of California. All rights reserved. | | 5 | .\" The Regents of the University of California. All rights reserved. |
6 | .\" | | 6 | .\" |
7 | .\" This code is derived from software contributed to Berkeley by | | 7 | .\" This code is derived from software contributed to Berkeley by |
8 | .\" Henry Spencer. | | 8 | .\" Henry Spencer. |
9 | .\" | | 9 | .\" |
10 | .\" Redistribution and use in source and binary forms, with or without | | 10 | .\" Redistribution and use in source and binary forms, with or without |
11 | .\" modification, are permitted provided that the following conditions | | 11 | .\" modification, are permitted provided that the following conditions |
12 | .\" are met: | | 12 | .\" are met: |
13 | .\" 1. Redistributions of source code must retain the above copyright | | 13 | .\" 1. Redistributions of source code must retain the above copyright |
14 | .\" notice, this list of conditions and the following disclaimer. | | 14 | .\" notice, this list of conditions and the following disclaimer. |
15 | .\" 2. Redistributions in binary form must reproduce the above copyright | | 15 | .\" 2. Redistributions in binary form must reproduce the above copyright |
16 | .\" notice, this list of conditions and the following disclaimer in the | | 16 | .\" notice, this list of conditions and the following disclaimer in the |
17 | .\" documentation and/or other materials provided with the distribution. | | 17 | .\" documentation and/or other materials provided with the distribution. |
18 | .\" 3. Neither the name of the University nor the names of its contributors | | 18 | .\" 3. Neither the name of the University nor the names of its contributors |
19 | .\" may be used to endorse or promote products derived from this software | | 19 | .\" may be used to endorse or promote products derived from this software |
20 | .\" without specific prior written permission. | | 20 | .\" without specific prior written permission. |
21 | .\" | | 21 | .\" |
22 | .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | | 22 | .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
23 | .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 23 | .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
24 | .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 24 | .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
25 | .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 25 | .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
26 | .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 26 | .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
27 | .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 27 | .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
28 | .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 28 | .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
29 | .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 29 | .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
30 | .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 30 | .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
31 | .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 31 | .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
32 | .\" SUCH DAMAGE. | | 32 | .\" SUCH DAMAGE. |
33 | .\" | | 33 | .\" |
34 | .\" @(#)regex.3 8.4 (Berkeley) 3/20/94 | | 34 | .\" @(#)regex.3 8.4 (Berkeley) 3/20/94 |
35 | .\" $FreeBSD: head/lib/libc/regex/regex.3 363817 2020-08-04 02:06:49Z kevans $ | | 35 | .\" $FreeBSD: head/lib/libc/regex/regex.3 363817 2020-08-04 02:06:49Z kevans $ |
36 | .\" | | 36 | .\" |
37 | .Dd February 22, 2021 | | 37 | .Dd March 11, 2021 |
38 | .Dt REGEX 3 | | 38 | .Dt REGEX 3 |
39 | .Os | | 39 | .Os |
40 | .Sh NAME | | 40 | .Sh NAME |
41 | .Nm regcomp , | | 41 | .Nm regcomp , |
42 | .Nm regexec , | | 42 | .Nm regexec , |
43 | .Nm regerror , | | 43 | .Nm regerror , |
44 | .Nm regfree , | | 44 | .Nm regfree , |
45 | .Nm regasub , | | 45 | .Nm regasub , |
46 | .Nm regnsub | | 46 | .Nm regnsub |
47 | .Nd regular-expression library | | 47 | .Nd regular-expression library |
48 | .Sh LIBRARY | | 48 | .Sh LIBRARY |
49 | .Lb libc | | 49 | .Lb libc |
50 | .Sh SYNOPSIS | | 50 | .Sh SYNOPSIS |
51 | .In regex.h | | 51 | .In regex.h |
52 | .Ft int | | 52 | .Ft int |
53 | .Fo regcomp | | 53 | .Fo regcomp |
54 | .Fa "regex_t * restrict preg" "const char * restrict pattern" "int cflags" | | 54 | .Fa "regex_t * restrict preg" "const char * restrict pattern" "int cflags" |
55 | .Fc | | 55 | .Fc |
56 | .Ft int | | 56 | .Ft int |
57 | .Fo regexec | | 57 | .Fo regexec |
58 | .Fa "const regex_t * restrict preg" "const char * restrict string" | | 58 | .Fa "const regex_t * restrict preg" "const char * restrict string" |
59 | .Fa "size_t nmatch" "regmatch_t pmatch[restrict]" "int eflags" | | 59 | .Fa "size_t nmatch" "regmatch_t pmatch[restrict]" "int eflags" |
60 | .Fc | | 60 | .Fc |
61 | .Ft size_t | | 61 | .Ft size_t |
62 | .Fo regerror | | 62 | .Fo regerror |
63 | .Fa "int errcode" "const regex_t * restrict preg" | | 63 | .Fa "int errcode" "const regex_t * restrict preg" |
64 | .Fa "char * restrict errbuf" "size_t errbuf_size" | | 64 | .Fa "char * restrict errbuf" "size_t errbuf_size" |
65 | .Fc | | 65 | .Fc |
66 | .Ft void | | 66 | .Ft void |
67 | .Fn regfree "regex_t *preg" | | 67 | .Fn regfree "regex_t *preg" |
68 | .Ft ssize_t | | 68 | .Ft ssize_t |
69 | .Fn regnsub "char *buf" "size_t bufsiz" "const char *sub" "const regmatch_t *rm" "const char *str" | | 69 | .Fn regnsub "char *buf" "size_t bufsiz" "const char *sub" "const regmatch_t *rm" "const char *str" |
70 | .Ft ssize_t | | 70 | .Ft ssize_t |
71 | .Fn regasub "char **buf" "const char *sub" "const regmatch_t *rm" "const char *sstr" | | 71 | .Fn regasub "char **buf" "const char *sub" "const regmatch_t *rm" "const char *sstr" |
72 | .Sh DESCRIPTION | | 72 | .Sh DESCRIPTION |
73 | These routines implement | | 73 | These routines implement |
74 | .St -p1003.2 | | 74 | .St -p1003.2 |
75 | regular expressions | | 75 | regular expressions |
76 | .Pq Do RE Dc Ns s ; | | 76 | .Pq Do RE Dc Ns s ; |
77 | see | | 77 | see |
78 | .Xr re_format 7 . | | 78 | .Xr re_format 7 . |
79 | The | | 79 | The |
80 | .Fn regcomp | | 80 | .Fn regcomp |
81 | function | | 81 | function |
82 | compiles an RE written as a string into an internal form, | | 82 | compiles an RE written as a string into an internal form, |
83 | .Fn regexec | | 83 | .Fn regexec |
84 | matches that internal form against a string and reports results, | | 84 | matches that internal form against a string and reports results, |
85 | .Fn regerror | | 85 | .Fn regerror |
86 | transforms error codes from either into human-readable messages, | | 86 | transforms error codes from either into human-readable messages, |
87 | and | | 87 | and |
88 | .Fn regfree | | 88 | .Fn regfree |
89 | frees any dynamically-allocated storage used by the internal form | | 89 | frees any dynamically-allocated storage used by the internal form |
90 | of an RE. | | 90 | of an RE. |
91 | .Pp | | 91 | .Pp |
92 | The header | | 92 | The header |
93 | .In regex.h | | 93 | .In regex.h |
94 | declares two structure types, | | 94 | declares two structure types, |
95 | .Ft regex_t | | 95 | .Ft regex_t |
96 | and | | 96 | and |
97 | .Ft regmatch_t , | | 97 | .Ft regmatch_t , |
98 | the former for compiled internal forms and the latter for match reporting. | | 98 | the former for compiled internal forms and the latter for match reporting. |
99 | It also declares the four functions, | | 99 | It also declares the four functions, |
100 | a type | | 100 | a type |
101 | .Ft regoff_t , | | 101 | .Ft regoff_t , |
102 | and a number of constants with names starting with | | 102 | and a number of constants with names starting with |
103 | .Dq Dv REG_ . | | 103 | .Dq Dv REG_ . |
104 | .Pp | | 104 | .Pp |
105 | The | | 105 | The |
106 | .Fn regcomp | | 106 | .Fn regcomp |
107 | function | | 107 | function |
108 | compiles the regular expression contained in the | | 108 | compiles the regular expression contained in the |
109 | .Fa pattern | | 109 | .Fa pattern |
110 | string, | | 110 | string, |
111 | subject to the flags in | | 111 | subject to the flags in |
112 | .Fa cflags , | | 112 | .Fa cflags , |
113 | and places the results in the | | 113 | and places the results in the |
114 | .Ft regex_t | | 114 | .Ft regex_t |
115 | structure pointed to by | | 115 | structure pointed to by |
116 | .Fa preg . | | 116 | .Fa preg . |
117 | The | | 117 | The |
118 | .Fa cflags | | 118 | .Fa cflags |
119 | argument | | 119 | argument |
120 | is the bitwise OR of zero or more of the following flags: | | 120 | is the bitwise OR of zero or more of the following flags: |
121 | .Bl -tag -width REG_EXTENDED | | 121 | .Bl -tag -width REG_EXTENDED |
122 | .It Dv REG_EXTENDED | | 122 | .It Dv REG_EXTENDED |
123 | Compile modern | | 123 | Compile modern |
124 | .Pq Dq extended | | 124 | .Pq Dq extended |
125 | REs, | | 125 | REs, |
126 | rather than the obsolete | | 126 | rather than the obsolete |
127 | .Pq Dq basic | | 127 | .Pq Dq basic |
128 | REs that | | 128 | REs that |
129 | are the default. | | 129 | are the default. |
130 | .It Dv REG_BASIC | | 130 | .It Dv REG_BASIC |
131 | This is a synonym for 0, | | 131 | This is a synonym for 0, |
132 | provided as a counterpart to | | 132 | provided as a counterpart to |
133 | .Dv REG_EXTENDED | | 133 | .Dv REG_EXTENDED |
134 | to improve readability. | | 134 | to improve readability. |
135 | .It Dv REG_NOSPEC | | 135 | .It Dv REG_NOSPEC |
136 | Compile with recognition of all special characters turned off. | | 136 | Compile with recognition of all special characters turned off. |
137 | All characters are thus considered ordinary, | | 137 | All characters are thus considered ordinary, |
138 | so the | | 138 | so the |
139 | .Dq RE | | 139 | .Dq RE |
140 | is a literal string. | | 140 | is a literal string. |
141 | This is an extension, | | 141 | This is an extension, |
142 | compatible with but not specified by | | 142 | compatible with but not specified by |
143 | .St -p1003.2 , | | 143 | .St -p1003.2 , |
144 | and should be used with | | 144 | and should be used with |
145 | caution in software intended to be portable to other systems. | | 145 | caution in software intended to be portable to other systems. |
146 | .Dv REG_EXTENDED | | 146 | .Dv REG_EXTENDED |
147 | and | | 147 | and |
148 | .Dv REG_NOSPEC | | 148 | .Dv REG_NOSPEC |
149 | may not be used | | 149 | may not be used |
150 | in the same call to | | 150 | in the same call to |
151 | .Fn regcomp . | | 151 | .Fn regcomp . |
152 | .It Dv REG_ICASE | | 152 | .It Dv REG_ICASE |
153 | Compile for matching that ignores upper/lower case distinctions. | | 153 | Compile for matching that ignores upper/lower case distinctions. |
154 | See | | 154 | See |
155 | .Xr re_format 7 . | | 155 | .Xr re_format 7 . |
156 | .It Dv REG_NOSUB | | 156 | .It Dv REG_NOSUB |
157 | Compile for matching that need only report success or failure, | | 157 | Compile for matching that need only report success or failure, |
158 | not what was matched. | | 158 | not what was matched. |
159 | .It Dv REG_NEWLINE | | 159 | .It Dv REG_NEWLINE |
160 | Compile for newline-sensitive matching. | | 160 | Compile for newline-sensitive matching. |
161 | By default, newline is a completely ordinary character with no special | | 161 | By default, newline is a completely ordinary character with no special |
162 | meaning in either REs or strings. | | 162 | meaning in either REs or strings. |
163 | With this flag, | | 163 | With this flag, |
164 | .Ql [^ | | 164 | .Ql [^ |
165 | bracket expressions and | | 165 | bracket expressions and |
166 | .Ql .\& | | 166 | .Ql .\& |
167 | never match newline, | | 167 | never match newline, |
168 | a | | 168 | a |
169 | .Ql ^\& | | 169 | .Ql ^\& |
170 | anchor matches the null string after any newline in the string | | 170 | anchor matches the null string after any newline in the string |
171 | in addition to its normal function, | | 171 | in addition to its normal function, |
172 | and the | | 172 | and the |
173 | .Ql $\& | | 173 | .Ql $\& |
174 | anchor matches the null string before any newline in the | | 174 | anchor matches the null string before any newline in the |
175 | string in addition to its normal function. | | 175 | string in addition to its normal function. |
176 | .It Dv REG_PEND | | 176 | .It Dv REG_PEND |
177 | The regular expression ends, | | 177 | The regular expression ends, |
178 | not at the first NUL, | | 178 | not at the first NUL, |
179 | but just before the character pointed to by the | | 179 | but just before the character pointed to by the |
180 | .Va re_endp | | 180 | .Va re_endp |
181 | member of the structure pointed to by | | 181 | member of the structure pointed to by |
182 | .Fa preg . | | 182 | .Fa preg . |
183 | The | | 183 | The |
184 | .Va re_endp | | 184 | .Va re_endp |
185 | member is of type | | 185 | member is of type |
186 | .Ft "const char *" . | | 186 | .Ft "const char *" . |
187 | This flag permits inclusion of NULs in the RE; | | 187 | This flag permits inclusion of NULs in the RE; |
188 | they are considered ordinary characters. | | 188 | they are considered ordinary characters. |
189 | This is an extension, | | 189 | This is an extension, |
190 | compatible with but not specified by | | 190 | compatible with but not specified by |
191 | .St -p1003.2 , | | 191 | .St -p1003.2 , |
192 | and should be used with | | 192 | and should be used with |
193 | caution in software intended to be portable to other systems. | | 193 | caution in software intended to be portable to other systems. |
194 | .It Dv REG_GNU | | 194 | .It Dv REG_GNU |
195 | Include GNU-inspired extensions: | | 195 | Include GNU-inspired extensions: |
196 | .Pp | | 196 | .Pp |
197 | .Bl -tag -offset indent -width XX -compact | | 197 | .Bl -tag -offset indent -width XX -compact |
198 | .It \eN | | 198 | .It \eN |
199 | Use backreference | | 199 | Use backreference |
200 | .Dv N | | 200 | .Dv N |
201 | where | | 201 | where |
202 | .Dv N | | 202 | .Dv N |
203 | is between | | 203 | is between |
204 | .Dv [1-9] . | | 204 | .Dv [1-9] . |
| | | 205 | .It \ea |
| | | 206 | Visual Bell |
205 | .It \eb | | 207 | .It \eb |
206 | Match a position that is a word boundary. | | 208 | Match a position that is a word boundary. |
207 | .It \eB | | 209 | .It \eB |
208 | Match a position that is a not word boundary. | | 210 | Match a position that is a not word boundary. |
| | | 211 | .It \ef |
| | | 212 | Form Feed |
| | | 213 | .It \en |
| | | 214 | Line Feed |
| | | 215 | .It \er |
| | | 216 | Carriage return |
209 | .It \es | | 217 | .It \es |
210 | Alias for [[:space:]] | | 218 | Alias for [[:space:]] |
211 | .It \eS | | 219 | .It \eS |
212 | Alias for [^[:space:]] | | 220 | Alias for [^[:space:]] |
| | | 221 | .It \et |
| | | 222 | Horizontal Tab |
| | | 223 | .It \ev |
| | | 224 | Vertical Tab |
213 | .It \ew | | 225 | .It \ew |
214 | Alias for [[:alnum:]] | | 226 | Alias for [[:alnum:]] |
215 | .It \eW | | 227 | .It \eW |
216 | Alias for [^[:alnum:]] | | 228 | Alias for [^[:alnum:]] |
217 | .It \e' | | 229 | .It \e' |
218 | Matches the end of the subject. | | 230 | Matches the end of the subject. |
219 | .It \e` | | 231 | .It \e` |
220 | Matches the beginning of the subject. | | 232 | Matches the beginning of the subject. |
221 | .El | | 233 | .El |
222 | .Pp | | 234 | .Pp |
223 | This is an extension, | | 235 | This is an extension, |
224 | compatible with but not specified by | | 236 | compatible with but not specified by |
225 | .St -p1003.2 , | | 237 | .St -p1003.2 , |
226 | and should be used with | | 238 | and should be used with |
227 | caution in software intended to be portable to other systems. | | 239 | caution in software intended to be portable to other systems. |
228 | .El | | 240 | .El |
229 | .Pp | | 241 | .Pp |
230 | When successful, | | 242 | When successful, |
231 | .Fn regcomp | | 243 | .Fn regcomp |
232 | returns 0 and fills in the structure pointed to by | | 244 | returns 0 and fills in the structure pointed to by |
233 | .Fa preg . | | 245 | .Fa preg . |
234 | One member of that structure | | 246 | One member of that structure |
235 | (other than | | 247 | (other than |
236 | .Va re_endp ) | | 248 | .Va re_endp ) |
237 | is publicized: | | 249 | is publicized: |
238 | .Va re_nsub , | | 250 | .Va re_nsub , |
239 | of type | | 251 | of type |
240 | .Ft size_t , | | 252 | .Ft size_t , |
241 | contains the number of parenthesized subexpressions within the RE | | 253 | contains the number of parenthesized subexpressions within the RE |
242 | (except that the value of this member is undefined if the | | 254 | (except that the value of this member is undefined if the |
243 | .Dv REG_NOSUB | | 255 | .Dv REG_NOSUB |
244 | flag was used). | | 256 | flag was used). |
245 | If | | 257 | If |
246 | .Fn regcomp | | 258 | .Fn regcomp |
247 | fails, it returns a non-zero error code; | | 259 | fails, it returns a non-zero error code; |
248 | see | | 260 | see |
249 | .Sx DIAGNOSTICS . | | 261 | .Sx DIAGNOSTICS . |
250 | .Pp | | 262 | .Pp |
251 | The | | 263 | The |
252 | .Fn regexec | | 264 | .Fn regexec |
253 | function | | 265 | function |
254 | matches the compiled RE pointed to by | | 266 | matches the compiled RE pointed to by |
255 | .Fa preg | | 267 | .Fa preg |
256 | against the | | 268 | against the |
257 | .Fa string , | | 269 | .Fa string , |
258 | subject to the flags in | | 270 | subject to the flags in |
259 | .Fa eflags , | | 271 | .Fa eflags , |
260 | and reports results using | | 272 | and reports results using |
261 | .Fa nmatch , | | 273 | .Fa nmatch , |
262 | .Fa pmatch , | | 274 | .Fa pmatch , |
263 | and the returned value. | | 275 | and the returned value. |
264 | The RE must have been compiled by a previous invocation of | | 276 | The RE must have been compiled by a previous invocation of |
265 | .Fn regcomp . | | 277 | .Fn regcomp . |
266 | The compiled form is not altered during execution of | | 278 | The compiled form is not altered during execution of |
267 | .Fn regexec , | | 279 | .Fn regexec , |
268 | so a single compiled RE can be used simultaneously by multiple threads. | | 280 | so a single compiled RE can be used simultaneously by multiple threads. |
269 | .Pp | | 281 | .Pp |
270 | By default, | | 282 | By default, |
271 | the NUL-terminated string pointed to by | | 283 | the NUL-terminated string pointed to by |
272 | .Fa string | | 284 | .Fa string |
273 | is considered to be the text of an entire line, minus any terminating | | 285 | is considered to be the text of an entire line, minus any terminating |
274 | newline. | | 286 | newline. |
275 | The | | 287 | The |
276 | .Fa eflags | | 288 | .Fa eflags |
277 | argument is the bitwise OR of zero or more of the following flags: | | 289 | argument is the bitwise OR of zero or more of the following flags: |
278 | .Bl -tag -width REG_STARTEND | | 290 | .Bl -tag -width REG_STARTEND |
279 | .It Dv REG_NOTBOL | | 291 | .It Dv REG_NOTBOL |
280 | The first character of the string is treated as the continuation | | 292 | The first character of the string is treated as the continuation |
281 | of a line. | | 293 | of a line. |
282 | This means that the anchors | | 294 | This means that the anchors |
283 | .Ql ^\& , | | 295 | .Ql ^\& , |
284 | .Ql [[:<:]] , | | 296 | .Ql [[:<:]] , |
285 | and | | 297 | and |
286 | .Ql \e< | | 298 | .Ql \e< |
287 | do not match before it; but see | | 299 | do not match before it; but see |
288 | .Dv REG_STARTEND | | 300 | .Dv REG_STARTEND |
289 | below. | | 301 | below. |
290 | This does not affect the behavior of newlines under | | 302 | This does not affect the behavior of newlines under |
291 | .Dv REG_NEWLINE . | | 303 | .Dv REG_NEWLINE . |
292 | .It Dv REG_NOTEOL | | 304 | .It Dv REG_NOTEOL |
293 | The NUL terminating | | 305 | The NUL terminating |
294 | the string | | 306 | the string |
295 | does not end a line, so the | | 307 | does not end a line, so the |
296 | .Ql $\& | | 308 | .Ql $\& |
297 | anchor does not match before it. | | 309 | anchor does not match before it. |
298 | This does not affect the behavior of newlines under | | 310 | This does not affect the behavior of newlines under |
299 | .Dv REG_NEWLINE . | | 311 | .Dv REG_NEWLINE . |
300 | .It Dv REG_STARTEND | | 312 | .It Dv REG_STARTEND |
301 | The string is considered to start at | | 313 | The string is considered to start at |
302 | .Fa string No + | | 314 | .Fa string No + |
303 | .Fa pmatch Ns [0]. Ns Fa rm_so | | 315 | .Fa pmatch Ns [0]. Ns Fa rm_so |
304 | and to end before the byte located at | | 316 | and to end before the byte located at |
305 | .Fa string No + | | 317 | .Fa string No + |
306 | .Fa pmatch Ns [0]. Ns Fa rm_eo , | | 318 | .Fa pmatch Ns [0]. Ns Fa rm_eo , |
307 | regardless of the value of | | 319 | regardless of the value of |
308 | .Fa nmatch . | | 320 | .Fa nmatch . |
309 | See below for the definition of | | 321 | See below for the definition of |
310 | .Fa pmatch | | 322 | .Fa pmatch |
311 | and | | 323 | and |
312 | .Fa nmatch . | | 324 | .Fa nmatch . |
313 | This is an extension, | | 325 | This is an extension, |
314 | compatible with but not specified by | | 326 | compatible with but not specified by |
315 | .St -p1003.2 , | | 327 | .St -p1003.2 , |
316 | and should be used with | | 328 | and should be used with |
317 | caution in software intended to be portable to other systems. | | 329 | caution in software intended to be portable to other systems. |
318 | .Pp | | 330 | .Pp |
319 | Without | | 331 | Without |
320 | .Dv REG_NOTBOL , | | 332 | .Dv REG_NOTBOL , |
321 | the position | | 333 | the position |
322 | .Fa rm_so | | 334 | .Fa rm_so |
323 | is considered the beginning of a line, such that | | 335 | is considered the beginning of a line, such that |
324 | .Ql ^ | | 336 | .Ql ^ |
325 | matches before it, and the beginning of a word if there is a word | | 337 | matches before it, and the beginning of a word if there is a word |
326 | character at this position, such that | | 338 | character at this position, such that |
327 | .Ql [[:<:]] | | 339 | .Ql [[:<:]] |
328 | and | | 340 | and |
329 | .Ql \e< | | 341 | .Ql \e< |
330 | match before it. | | 342 | match before it. |
331 | .Pp | | 343 | .Pp |
332 | With | | 344 | With |
333 | .Dv REG_NOTBOL , | | 345 | .Dv REG_NOTBOL , |
334 | the character at position | | 346 | the character at position |
335 | .Fa rm_so | | 347 | .Fa rm_so |
336 | is treated as the continuation of a line, and if | | 348 | is treated as the continuation of a line, and if |
337 | .Fa rm_so | | 349 | .Fa rm_so |
338 | is greater than 0, the preceding character is taken into consideration. | | 350 | is greater than 0, the preceding character is taken into consideration. |
339 | If the preceding character is a newline and the regular expression was compiled | | 351 | If the preceding character is a newline and the regular expression was compiled |
340 | with | | 352 | with |
341 | .Dv REG_NEWLINE , | | 353 | .Dv REG_NEWLINE , |
342 | .Ql ^ | | 354 | .Ql ^ |
343 | matches before the string; if the preceding character is not a word character | | 355 | matches before the string; if the preceding character is not a word character |
344 | but the string starts with a word character, | | 356 | but the string starts with a word character, |
345 | .Ql [[:<:]] | | 357 | .Ql [[:<:]] |
346 | and | | 358 | and |
347 | .Ql \e< | | 359 | .Ql \e< |
348 | match before the string. | | 360 | match before the string. |
349 | .El | | 361 | .El |
350 | .Pp | | 362 | .Pp |
351 | See | | 363 | See |
352 | .Xr re_format 7 | | 364 | .Xr re_format 7 |
353 | for a discussion of what is matched in situations where an RE or a | | 365 | for a discussion of what is matched in situations where an RE or a |
354 | portion thereof could match any of several substrings of | | 366 | portion thereof could match any of several substrings of |
355 | .Fa string . | | 367 | .Fa string . |
356 | .Pp | | 368 | .Pp |
357 | Normally, | | 369 | Normally, |
358 | .Fn regexec | | 370 | .Fn regexec |
359 | returns 0 for success and the non-zero code | | 371 | returns 0 for success and the non-zero code |
360 | .Dv REG_NOMATCH | | 372 | .Dv REG_NOMATCH |
361 | for failure. | | 373 | for failure. |
362 | Other non-zero error codes may be returned in exceptional situations; | | 374 | Other non-zero error codes may be returned in exceptional situations; |
363 | see | | 375 | see |
364 | .Sx DIAGNOSTICS . | | 376 | .Sx DIAGNOSTICS . |
365 | .Pp | | 377 | .Pp |
366 | If | | 378 | If |
367 | .Dv REG_NOSUB | | 379 | .Dv REG_NOSUB |
368 | was specified in the compilation of the RE, | | 380 | was specified in the compilation of the RE, |
369 | or if | | 381 | or if |
370 | .Fa nmatch | | 382 | .Fa nmatch |
371 | is 0, | | 383 | is 0, |
372 | .Fn regexec | | 384 | .Fn regexec |
373 | ignores the | | 385 | ignores the |
374 | .Fa pmatch | | 386 | .Fa pmatch |
375 | argument (but see below for the case where | | 387 | argument (but see below for the case where |
376 | .Dv REG_STARTEND | | 388 | .Dv REG_STARTEND |
377 | is specified). | | 389 | is specified). |
378 | Otherwise, | | 390 | Otherwise, |
379 | .Fa pmatch | | 391 | .Fa pmatch |
380 | points to an array of | | 392 | points to an array of |
381 | .Fa nmatch | | 393 | .Fa nmatch |
382 | structures of type | | 394 | structures of type |
383 | .Ft regmatch_t . | | 395 | .Ft regmatch_t . |
384 | Such a structure has at least the members | | 396 | Such a structure has at least the members |
385 | .Va rm_so | | 397 | .Va rm_so |
386 | and | | 398 | and |
387 | .Va rm_eo , | | 399 | .Va rm_eo , |
388 | both of type | | 400 | both of type |
389 | .Ft regoff_t | | 401 | .Ft regoff_t |
390 | (a signed arithmetic type at least as large as an | | 402 | (a signed arithmetic type at least as large as an |
391 | .Ft off_t | | 403 | .Ft off_t |
392 | and a | | 404 | and a |
393 | .Ft ssize_t ) , | | 405 | .Ft ssize_t ) , |
394 | containing respectively the offset of the first character of a substring | | 406 | containing respectively the offset of the first character of a substring |
395 | and the offset of the first character after the end of the substring. | | 407 | and the offset of the first character after the end of the substring. |
396 | Offsets are measured from the beginning of the | | 408 | Offsets are measured from the beginning of the |
397 | .Fa string | | 409 | .Fa string |
398 | argument given to | | 410 | argument given to |
399 | .Fn regexec . | | 411 | .Fn regexec . |
400 | An empty substring is denoted by equal offsets, | | 412 | An empty substring is denoted by equal offsets, |
401 | both indicating the character following the empty substring. | | 413 | both indicating the character following the empty substring. |
402 | .Pp | | 414 | .Pp |
403 | The 0th member of the | | 415 | The 0th member of the |
404 | .Fa pmatch | | 416 | .Fa pmatch |
405 | array is filled in to indicate what substring of | | 417 | array is filled in to indicate what substring of |
406 | .Fa string | | 418 | .Fa string |
407 | was matched by the entire RE. | | 419 | was matched by the entire RE. |
408 | Remaining members report what substring was matched by parenthesized | | 420 | Remaining members report what substring was matched by parenthesized |
409 | subexpressions within the RE; | | 421 | subexpressions within the RE; |
410 | member | | 422 | member |
411 | .Va i | | 423 | .Va i |
412 | reports subexpression | | 424 | reports subexpression |
413 | .Va i , | | 425 | .Va i , |
414 | with subexpressions counted (starting at 1) by the order of their opening | | 426 | with subexpressions counted (starting at 1) by the order of their opening |
415 | parentheses in the RE, left to right. | | 427 | parentheses in the RE, left to right. |
416 | Unused entries in the array (corresponding either to subexpressions that | | 428 | Unused entries in the array (corresponding either to subexpressions that |
417 | did not participate in the match at all, or to subexpressions that do not | | 429 | did not participate in the match at all, or to subexpressions that do not |
418 | exist in the RE (that is, | | 430 | exist in the RE (that is, |
419 | .Va i | | 431 | .Va i |
420 | > | | 432 | > |
421 | .Fa preg Ns -> Ns Va re_nsub ) ) | | 433 | .Fa preg Ns -> Ns Va re_nsub ) ) |
422 | have both | | 434 | have both |
423 | .Va rm_so | | 435 | .Va rm_so |
424 | and | | 436 | and |
425 | .Va rm_eo | | 437 | .Va rm_eo |
426 | set to -1. | | 438 | set to -1. |
427 | If a subexpression participated in the match several times, | | 439 | If a subexpression participated in the match several times, |
428 | the reported substring is the last one it matched. | | 440 | the reported substring is the last one it matched. |
429 | (Note, as an example in particular, that when the RE | | 441 | (Note, as an example in particular, that when the RE |
430 | .Ql "(b*)+" | | 442 | .Ql "(b*)+" |
431 | matches | | 443 | matches |
432 | .Ql bbb , | | 444 | .Ql bbb , |
433 | the parenthesized subexpression matches each of the three | | 445 | the parenthesized subexpression matches each of the three |
434 | .So Li b Sc Ns s | | 446 | .So Li b Sc Ns s |
435 | and then | | 447 | and then |
436 | an infinite number of empty strings following the last | | 448 | an infinite number of empty strings following the last |
437 | .Ql b , | | 449 | .Ql b , |
438 | so the reported substring is one of the empties.) | | 450 | so the reported substring is one of the empties.) |
439 | .Pp | | 451 | .Pp |
440 | If | | 452 | If |
441 | .Dv REG_STARTEND | | 453 | .Dv REG_STARTEND |
442 | is specified, | | 454 | is specified, |
443 | .Fa pmatch | | 455 | .Fa pmatch |
444 | must point to at least one | | 456 | must point to at least one |
445 | .Ft regmatch_t | | 457 | .Ft regmatch_t |
446 | (even if | | 458 | (even if |
447 | .Fa nmatch | | 459 | .Fa nmatch |
448 | is 0 or | | 460 | is 0 or |
449 | .Dv REG_NOSUB | | 461 | .Dv REG_NOSUB |
450 | was specified), | | 462 | was specified), |
451 | to hold the input offsets for | | 463 | to hold the input offsets for |
452 | .Dv REG_STARTEND . | | 464 | .Dv REG_STARTEND . |
453 | Use for output is still entirely controlled by | | 465 | Use for output is still entirely controlled by |
454 | .Fa nmatch ; | | 466 | .Fa nmatch ; |
455 | if | | 467 | if |
456 | .Fa nmatch | | 468 | .Fa nmatch |
457 | is 0 or | | 469 | is 0 or |
458 | .Dv REG_NOSUB | | 470 | .Dv REG_NOSUB |
459 | was specified, | | 471 | was specified, |
460 | the value of | | 472 | the value of |
461 | .Fa pmatch Ns [0] | | 473 | .Fa pmatch Ns [0] |
462 | will not be changed by a successful | | 474 | will not be changed by a successful |
463 | .Fn regexec . | | 475 | .Fn regexec . |
464 | .Pp | | 476 | .Pp |
465 | The | | 477 | The |
466 | .Fn regerror | | 478 | .Fn regerror |
467 | function | | 479 | function |
468 | maps a non-zero | | 480 | maps a non-zero |
469 | .Fa errcode | | 481 | .Fa errcode |
470 | from either | | 482 | from either |
471 | .Fn regcomp | | 483 | .Fn regcomp |
472 | or | | 484 | or |
473 | .Fn regexec | | 485 | .Fn regexec |
474 | to a human-readable, printable message. | | 486 | to a human-readable, printable message. |
475 | If | | 487 | If |
476 | .Fa preg | | 488 | .Fa preg |
477 | is | | 489 | is |
478 | .No non\- Ns Dv NULL , | | 490 | .No non\- Ns Dv NULL , |
479 | the error code should have arisen from use of | | 491 | the error code should have arisen from use of |
480 | the | | 492 | the |
481 | .Ft regex_t | | 493 | .Ft regex_t |
482 | pointed to by | | 494 | pointed to by |
483 | .Fa preg , | | 495 | .Fa preg , |
484 | and if the error code came from | | 496 | and if the error code came from |
485 | .Fn regcomp , | | 497 | .Fn regcomp , |
486 | it should have been the result from the most recent | | 498 | it should have been the result from the most recent |
487 | .Fn regcomp | | 499 | .Fn regcomp |
488 | using that | | 500 | using that |
489 | .Ft regex_t . | | 501 | .Ft regex_t . |
490 | The | | 502 | The |
491 | .Po | | 503 | .Po |
492 | .Fn regerror | | 504 | .Fn regerror |
493 | may be able to supply a more detailed message using information | | 505 | may be able to supply a more detailed message using information |
494 | from the | | 506 | from the |
495 | .Ft regex_t . | | 507 | .Ft regex_t . |
496 | .Pc | | 508 | .Pc |
497 | The | | 509 | The |
498 | .Fn regerror | | 510 | .Fn regerror |
499 | function | | 511 | function |
500 | places the NUL-terminated message into the buffer pointed to by | | 512 | places the NUL-terminated message into the buffer pointed to by |
501 | .Fa errbuf , | | 513 | .Fa errbuf , |
502 | limiting the length (including the NUL) to at most | | 514 | limiting the length (including the NUL) to at most |
503 | .Fa errbuf_size | | 515 | .Fa errbuf_size |
504 | bytes. | | 516 | bytes. |
505 | If the whole message will not fit, | | 517 | If the whole message will not fit, |
506 | as much of it as will fit before the terminating NUL is supplied. | | 518 | as much of it as will fit before the terminating NUL is supplied. |
507 | In any case, | | 519 | In any case, |
508 | the returned value is the size of buffer needed to hold the whole | | 520 | the returned value is the size of buffer needed to hold the whole |
509 | message (including terminating NUL). | | 521 | message (including terminating NUL). |
510 | If | | 522 | If |
511 | .Fa errbuf_size | | 523 | .Fa errbuf_size |
512 | is 0, | | 524 | is 0, |
513 | .Fa errbuf | | 525 | .Fa errbuf |
514 | is ignored but the return value is still correct. | | 526 | is ignored but the return value is still correct. |
515 | .Pp | | 527 | .Pp |
516 | If the | | 528 | If the |
517 | .Fa errcode | | 529 | .Fa errcode |
518 | given to | | 530 | given to |
519 | .Fn regerror | | 531 | .Fn regerror |
520 | is first ORed with | | 532 | is first ORed with |
521 | .Dv REG_ITOA , | | 533 | .Dv REG_ITOA , |
522 | the | | 534 | the |
523 | .Dq message | | 535 | .Dq message |
524 | that results is the printable name of the error code, | | 536 | that results is the printable name of the error code, |
525 | e.g.\& | | 537 | e.g.\& |
526 | .Dq Dv REG_NOMATCH , | | 538 | .Dq Dv REG_NOMATCH , |
527 | rather than an explanation thereof. | | 539 | rather than an explanation thereof. |
528 | If | | 540 | If |
529 | .Fa errcode | | 541 | .Fa errcode |
530 | is | | 542 | is |
531 | .Dv REG_ATOI , | | 543 | .Dv REG_ATOI , |
532 | then | | 544 | then |
533 | .Fa preg | | 545 | .Fa preg |
534 | shall be | | 546 | shall be |
535 | .No non\- Ns Dv NULL | | 547 | .No non\- Ns Dv NULL |
536 | and the | | 548 | and the |
537 | .Va re_endp | | 549 | .Va re_endp |
538 | member of the structure it points to | | 550 | member of the structure it points to |
539 | must point to the printable name of an error code; | | 551 | must point to the printable name of an error code; |
540 | in this case, the result in | | 552 | in this case, the result in |
541 | .Fa errbuf | | 553 | .Fa errbuf |
542 | is the decimal digits of | | 554 | is the decimal digits of |
543 | the numeric value of the error code | | 555 | the numeric value of the error code |
544 | (0 if the name is not recognized). | | 556 | (0 if the name is not recognized). |
545 | .Dv REG_ITOA | | 557 | .Dv REG_ITOA |
546 | and | | 558 | and |
547 | .Dv REG_ATOI | | 559 | .Dv REG_ATOI |
548 | are intended primarily as debugging facilities; | | 560 | are intended primarily as debugging facilities; |
549 | they are extensions, | | 561 | they are extensions, |
550 | compatible with but not specified by | | 562 | compatible with but not specified by |
551 | .St -p1003.2 , | | 563 | .St -p1003.2 , |
552 | and should be used with | | 564 | and should be used with |
553 | caution in software intended to be portable to other systems. | | 565 | caution in software intended to be portable to other systems. |
554 | Be warned also that they are considered experimental and changes are possible. | | 566 | Be warned also that they are considered experimental and changes are possible. |
555 | .Pp | | 567 | .Pp |
556 | The | | 568 | The |
557 | .Fn regfree | | 569 | .Fn regfree |
558 | function | | 570 | function |
559 | frees any dynamically-allocated storage associated with the compiled RE | | 571 | frees any dynamically-allocated storage associated with the compiled RE |
560 | pointed to by | | 572 | pointed to by |
561 | .Fa preg . | | 573 | .Fa preg . |
562 | The remaining | | 574 | The remaining |
563 | .Ft regex_t | | 575 | .Ft regex_t |
564 | is no longer a valid compiled RE | | 576 | is no longer a valid compiled RE |
565 | and the effect of supplying it to | | 577 | and the effect of supplying it to |
566 | .Fn regexec | | 578 | .Fn regexec |
567 | or | | 579 | or |
568 | .Fn regerror | | 580 | .Fn regerror |
569 | is undefined. | | 581 | is undefined. |
570 | .Pp | | 582 | .Pp |
571 | None of these functions references global variables except for tables | | 583 | None of these functions references global variables except for tables |
572 | of constants; | | 584 | of constants; |
573 | all are safe for use from multiple threads if the arguments are safe. | | 585 | all are safe for use from multiple threads if the arguments are safe. |
574 | .Pp | | 586 | .Pp |
575 | The | | 587 | The |
576 | .Fn regnsub | | 588 | .Fn regnsub |
577 | and | | 589 | and |
578 | .Fn regasub | | 590 | .Fn regasub |
579 | functions perform substitutions using | | 591 | functions perform substitutions using |
580 | .Xr sed 1 | | 592 | .Xr sed 1 |
581 | like syntax. | | 593 | like syntax. |
582 | They return the length of the string that would have been created | | 594 | They return the length of the string that would have been created |
583 | if there was enough space or | | 595 | if there was enough space or |
584 | .Dv \-1 | | 596 | .Dv \-1 |
585 | on error, setting | | 597 | on error, setting |
586 | .Dv errno . | | 598 | .Dv errno . |
587 | The result | | 599 | The result |
588 | is being placed in | | 600 | is being placed in |
589 | .Fa buf | | 601 | .Fa buf |
590 | which is user-supplied in | | 602 | which is user-supplied in |
591 | .Fn regnsub | | 603 | .Fn regnsub |
592 | and dynamically allocated in | | 604 | and dynamically allocated in |
593 | .Fn regasub . | | 605 | .Fn regasub . |
594 | The | | 606 | The |
595 | .Fa sub | | 607 | .Fa sub |
596 | argument contains a substitution string which might refer to the first | | 608 | argument contains a substitution string which might refer to the first |
597 | 9 regular expression strings using | | 609 | 9 regular expression strings using |
598 | .Dq \e<n> | | 610 | .Dq \e<n> |
599 | to refer to the nth matched | | 611 | to refer to the nth matched |
600 | item, or | | 612 | item, or |
601 | .Dq & | | 613 | .Dq & |
602 | (which is equivalent to | | 614 | (which is equivalent to |
603 | .Dq \e0 ) | | 615 | .Dq \e0 ) |
604 | to refer to the full match. | | 616 | to refer to the full match. |
605 | The | | 617 | The |
606 | .Fa rm | | 618 | .Fa rm |
607 | array must be at least 10 elements long, and should contain the result | | 619 | array must be at least 10 elements long, and should contain the result |
608 | of the matches from a previous | | 620 | of the matches from a previous |
609 | .Fn regexec | | 621 | .Fn regexec |
610 | call. | | 622 | call. |
611 | Only 10 elements of the | | 623 | Only 10 elements of the |
612 | .Fa rm | | 624 | .Fa rm |
613 | array can be used. | | 625 | array can be used. |
614 | The | | 626 | The |
615 | .Fa str | | 627 | .Fa str |
616 | argument contains the source string to apply the transformation to. | | 628 | argument contains the source string to apply the transformation to. |
617 | .Sh IMPLEMENTATION CHOICES | | 629 | .Sh IMPLEMENTATION CHOICES |
618 | There are a number of decisions that | | 630 | There are a number of decisions that |
619 | .St -p1003.2 | | 631 | .St -p1003.2 |
620 | leaves up to the implementor, | | 632 | leaves up to the implementor, |
621 | either by explicitly saying | | 633 | either by explicitly saying |
622 | .Dq undefined | | 634 | .Dq undefined |
623 | or by virtue of them being | | 635 | or by virtue of them being |
624 | forbidden by the RE grammar. | | 636 | forbidden by the RE grammar. |
625 | This implementation treats them as follows. | | 637 | This implementation treats them as follows. |
626 | .Pp | | 638 | .Pp |
627 | See | | 639 | See |
628 | .Xr re_format 7 | | 640 | .Xr re_format 7 |
629 | for a discussion of the definition of case-independent matching. | | 641 | for a discussion of the definition of case-independent matching. |
630 | .Pp | | 642 | .Pp |
631 | There is no particular limit on the length of REs, | | 643 | There is no particular limit on the length of REs, |
632 | except insofar as memory is limited. | | 644 | except insofar as memory is limited. |
633 | Memory usage is approximately linear in RE size, and largely insensitive | | 645 | Memory usage is approximately linear in RE size, and largely insensitive |
634 | to RE complexity, except for bounded repetitions. | | 646 | to RE complexity, except for bounded repetitions. |
635 | See | | 647 | See |
636 | .Sx BUGS | | 648 | .Sx BUGS |
637 | for one short RE using them | | 649 | for one short RE using them |
638 | that will run almost any system out of memory. | | 650 | that will run almost any system out of memory. |
639 | .Pp | | 651 | .Pp |
640 | A backslashed character other than one specifically given a magic meaning | | 652 | A backslashed character other than one specifically given a magic meaning |
641 | by | | 653 | by |
642 | .St -p1003.2 | | 654 | .St -p1003.2 |
643 | (such magic meanings occur only in obsolete | | 655 | (such magic meanings occur only in obsolete |
644 | .Bq Dq basic | | 656 | .Bq Dq basic |
645 | REs) | | 657 | REs) |
646 | is taken as an ordinary character. | | 658 | is taken as an ordinary character. |
647 | .Pp | | 659 | .Pp |
648 | Any unmatched | | 660 | Any unmatched |
649 | .Ql [\& | | 661 | .Ql [\& |
650 | is a | | 662 | is a |
651 | .Dv REG_EBRACK | | 663 | .Dv REG_EBRACK |
652 | error. | | 664 | error. |
653 | .Pp | | 665 | .Pp |
654 | Equivalence classes cannot begin or end bracket-expression ranges. | | 666 | Equivalence classes cannot begin or end bracket-expression ranges. |
655 | The endpoint of one range cannot begin another. | | 667 | The endpoint of one range cannot begin another. |
656 | .Pp | | 668 | .Pp |
657 | .Dv RE_DUP_MAX , | | 669 | .Dv RE_DUP_MAX , |
658 | the limit on repetition counts in bounded repetitions, is 255. | | 670 | the limit on repetition counts in bounded repetitions, is 255. |
659 | .Pp | | 671 | .Pp |
660 | A repetition operator | | 672 | A repetition operator |
661 | .Ql ( ?\& , | | 673 | .Ql ( ?\& , |
662 | .Ql *\& , | | 674 | .Ql *\& , |
663 | .Ql +\& , | | 675 | .Ql +\& , |
664 | or bounds) | | 676 | or bounds) |
665 | cannot follow another | | 677 | cannot follow another |
666 | repetition operator. | | 678 | repetition operator. |
667 | A repetition operator cannot begin an expression or subexpression | | 679 | A repetition operator cannot begin an expression or subexpression |
668 | or follow | | 680 | or follow |
669 | .Ql ^\& | | 681 | .Ql ^\& |
670 | or | | 682 | or |
671 | .Ql |\& . | | 683 | .Ql |\& . |
672 | .Pp | | 684 | .Pp |
673 | .Ql |\& | | 685 | .Ql |\& |
674 | cannot appear first or last in a (sub)expression or after another | | 686 | cannot appear first or last in a (sub)expression or after another |
675 | .Ql |\& , | | 687 | .Ql |\& , |
676 | i.e., an operand of | | 688 | i.e., an operand of |
677 | .Ql |\& | | 689 | .Ql |\& |
678 | cannot be an empty subexpression. | | 690 | cannot be an empty subexpression. |
679 | An empty parenthesized subexpression, | | 691 | An empty parenthesized subexpression, |
680 | .Ql "()" , | | 692 | .Ql "()" , |
681 | is legal and matches an | | 693 | is legal and matches an |
682 | empty (sub)string. | | 694 | empty (sub)string. |
683 | An empty string is not a legal RE. | | 695 | An empty string is not a legal RE. |
684 | .Pp | | 696 | .Pp |
685 | A | | 697 | A |
686 | .Ql {\& | | 698 | .Ql {\& |
687 | followed by a digit is considered the beginning of bounds for a | | 699 | followed by a digit is considered the beginning of bounds for a |
688 | bounded repetition, which must then follow the syntax for bounds. | | 700 | bounded repetition, which must then follow the syntax for bounds. |
689 | A | | 701 | A |
690 | .Ql {\& | | 702 | .Ql {\& |
691 | .Em not | | 703 | .Em not |
692 | followed by a digit is considered an ordinary character. | | 704 | followed by a digit is considered an ordinary character. |
693 | .Pp | | 705 | .Pp |
694 | .Ql ^\& | | 706 | .Ql ^\& |
695 | and | | 707 | and |
696 | .Ql $\& | | 708 | .Ql $\& |
697 | beginning and ending subexpressions in obsolete | | 709 | beginning and ending subexpressions in obsolete |
698 | .Pq Dq basic | | 710 | .Pq Dq basic |
699 | REs are anchors, not ordinary characters. | | 711 | REs are anchors, not ordinary characters. |
700 | .Sh DIAGNOSTICS | | 712 | .Sh DIAGNOSTICS |
701 | Non-zero error codes from | | 713 | Non-zero error codes from |
702 | .Fn regcomp | | 714 | .Fn regcomp |
703 | and | | 715 | and |
704 | .Fn regexec | | 716 | .Fn regexec |
705 | include the following: | | 717 | include the following: |
706 | .Pp | | 718 | .Pp |
707 | .Bl -tag -width REG_ECOLLATE -compact | | 719 | .Bl -tag -width REG_ECOLLATE -compact |
708 | .It Dv REG_NOMATCH | | 720 | .It Dv REG_NOMATCH |
709 | The | | 721 | The |
710 | .Fn regexec | | 722 | .Fn regexec |
711 | function | | 723 | function |
712 | failed to match | | 724 | failed to match |
713 | .It Dv REG_BADPAT | | 725 | .It Dv REG_BADPAT |
714 | invalid regular expression | | 726 | invalid regular expression |
715 | .It Dv REG_ECOLLATE | | 727 | .It Dv REG_ECOLLATE |
716 | invalid collating element | | 728 | invalid collating element |
717 | .It Dv REG_ECTYPE | | 729 | .It Dv REG_ECTYPE |
718 | invalid character class | | 730 | invalid character class |
719 | .It Dv REG_EESCAPE | | 731 | .It Dv REG_EESCAPE |
720 | .Ql \e | | 732 | .Ql \e |
721 | applied to unescapable character | | 733 | applied to unescapable character |
722 | .It Dv REG_ESUBREG | | 734 | .It Dv REG_ESUBREG |
723 | invalid backreference number | | 735 | invalid backreference number |
724 | .It Dv REG_EBRACK | | 736 | .It Dv REG_EBRACK |
725 | brackets | | 737 | brackets |
726 | .Ql "[ ]" | | 738 | .Ql "[ ]" |
727 | not balanced | | 739 | not balanced |
728 | .It Dv REG_EPAREN | | 740 | .It Dv REG_EPAREN |
729 | parentheses | | 741 | parentheses |
730 | .Ql "( )" | | 742 | .Ql "( )" |
731 | not balanced | | 743 | not balanced |
732 | .It Dv REG_EBRACE | | 744 | .It Dv REG_EBRACE |
733 | braces | | 745 | braces |
734 | .Ql "{ }" | | 746 | .Ql "{ }" |
735 | not balanced | | 747 | not balanced |
736 | .It Dv REG_BADBR | | 748 | .It Dv REG_BADBR |
737 | invalid repetition count(s) in | | 749 | invalid repetition count(s) in |
738 | .Ql "{ }" | | 750 | .Ql "{ }" |
739 | .It Dv REG_ERANGE | | 751 | .It Dv REG_ERANGE |
740 | invalid character range in | | 752 | invalid character range in |
741 | .Ql "[ ]" | | 753 | .Ql "[ ]" |
742 | .It Dv REG_ESPACE | | 754 | .It Dv REG_ESPACE |
743 | ran out of memory | | 755 | ran out of memory |
744 | .It Dv REG_BADRPT | | 756 | .It Dv REG_BADRPT |
745 | .Ql ?\& , | | 757 | .Ql ?\& , |
746 | .Ql *\& , | | 758 | .Ql *\& , |
747 | or | | 759 | or |
748 | .Ql +\& | | 760 | .Ql +\& |
749 | operand invalid | | 761 | operand invalid |
750 | .It Dv REG_EMPTY | | 762 | .It Dv REG_EMPTY |
751 | empty (sub)expression | | 763 | empty (sub)expression |
752 | .It Dv REG_ASSERT | | 764 | .It Dv REG_ASSERT |
753 | cannot happen - you found a bug | | 765 | cannot happen - you found a bug |
754 | .It Dv REG_INVARG | | 766 | .It Dv REG_INVARG |
755 | invalid argument, e.g.\& negative-length string | | 767 | invalid argument, e.g.\& negative-length string |
756 | .It Dv REG_ILLSEQ | | 768 | .It Dv REG_ILLSEQ |
757 | illegal byte sequence (bad multibyte character) | | 769 | illegal byte sequence (bad multibyte character) |
758 | .El | | 770 | .El |
759 | .Sh SEE ALSO | | 771 | .Sh SEE ALSO |
760 | .Xr grep 1 , | | 772 | .Xr grep 1 , |
761 | .Xr re_format 7 | | 773 | .Xr re_format 7 |
762 | .Pp | | 774 | .Pp |
763 | .St -p1003.2 , | | 775 | .St -p1003.2 , |
764 | sections 2.8 (Regular Expression Notation) | | 776 | sections 2.8 (Regular Expression Notation) |
765 | and | | 777 | and |
766 | B.5 (C Binding for Regular Expression Matching). | | 778 | B.5 (C Binding for Regular Expression Matching). |
767 | .Sh HISTORY | | 779 | .Sh HISTORY |
768 | Originally written by | | 780 | Originally written by |
769 | .An Henry Spencer . | | 781 | .An Henry Spencer . |
770 | Altered for inclusion in the | | 782 | Altered for inclusion in the |
771 | .Bx 4.4 | | 783 | .Bx 4.4 |
772 | distribution. | | 784 | distribution. |
773 | .Pp | | 785 | .Pp |
774 | The | | 786 | The |
775 | .Fn regnsub | | 787 | .Fn regnsub |
776 | and | | 788 | and |
777 | .Fn regasub | | 789 | .Fn regasub |
778 | functions appeared in | | 790 | functions appeared in |
779 | .Nx 8 . | | 791 | .Nx 8 . |
780 | .Sh BUGS | | 792 | .Sh BUGS |
781 | This is an alpha release with known defects. | | 793 | This is an alpha release with known defects. |
782 | Please report problems. | | 794 | Please report problems. |
783 | .Pp | | 795 | .Pp |
784 | The back-reference code is subtle and doubts linger about its correctness | | 796 | The back-reference code is subtle and doubts linger about its correctness |
785 | in complex cases. | | 797 | in complex cases. |
786 | .Pp | | 798 | .Pp |
787 | The | | 799 | The |
788 | .Fn regexec | | 800 | .Fn regexec |
789 | function | | 801 | function |
790 | performance is poor. | | 802 | performance is poor. |
791 | This will improve with later releases. | | 803 | This will improve with later releases. |
792 | The | | 804 | The |
793 | .Fa nmatch | | 805 | .Fa nmatch |
794 | argument | | 806 | argument |
795 | exceeding 0 is expensive; | | 807 | exceeding 0 is expensive; |
796 | .Fa nmatch | | 808 | .Fa nmatch |
797 | exceeding 1 is worse. | | 809 | exceeding 1 is worse. |
798 | The | | 810 | The |
799 | .Fn regexec | | 811 | .Fn regexec |
800 | function | | 812 | function |
801 | is largely insensitive to RE complexity | | 813 | is largely insensitive to RE complexity |
802 | .Em except | | 814 | .Em except |
803 | that back | | 815 | that back |
804 | references are massively expensive. | | 816 | references are massively expensive. |
805 | RE length does matter; in particular, there is a strong speed bonus | | 817 | RE length does matter; in particular, there is a strong speed bonus |
806 | for keeping RE length under about 30 characters, | | 818 | for keeping RE length under about 30 characters, |
807 | with most special characters counting roughly double. | | 819 | with most special characters counting roughly double. |
808 | .Pp | | 820 | .Pp |
809 | The | | 821 | The |
810 | .Fn regcomp | | 822 | .Fn regcomp |
811 | function | | 823 | function |
812 | implements bounded repetitions by macro expansion, | | 824 | implements bounded repetitions by macro expansion, |
813 | which is costly in time and space if counts are large | | 825 | which is costly in time and space if counts are large |
814 | or bounded repetitions are nested. | | 826 | or bounded repetitions are nested. |
815 | An RE like, say, | | 827 | An RE like, say, |
816 | .Ql "((((a{1,100}){1,100}){1,100}){1,100}){1,100}" | | 828 | .Ql "((((a{1,100}){1,100}){1,100}){1,100}){1,100}" |
817 | will (eventually) run almost any existing machine out of swap space. | | 829 | will (eventually) run almost any existing machine out of swap space. |
818 | .Pp | | 830 | .Pp |
819 | There are suspected problems with response to obscure error conditions. | | 831 | There are suspected problems with response to obscure error conditions. |
820 | Notably, | | 832 | Notably, |
821 | certain kinds of internal overflow, | | 833 | certain kinds of internal overflow, |
822 | produced only by truly enormous REs or by multiply nested bounded repetitions, | | 834 | produced only by truly enormous REs or by multiply nested bounded repetitions, |
823 | are probably not handled well. | | 835 | are probably not handled well. |
824 | .Pp | | 836 | .Pp |
825 | Due to a mistake in | | 837 | Due to a mistake in |
826 | .St -p1003.2 , | | 838 | .St -p1003.2 , |
827 | things like | | 839 | things like |
828 | .Ql "a)b" | | 840 | .Ql "a)b" |
829 | are legal REs because | | 841 | are legal REs because |
830 | .Ql )\& | | 842 | .Ql )\& |
831 | is | | 843 | is |
832 | a special character only in the presence of a previous unmatched | | 844 | a special character only in the presence of a previous unmatched |
833 | .Ql (\& . | | 845 | .Ql (\& . |
834 | This cannot be fixed until the spec is fixed. | | 846 | This cannot be fixed until the spec is fixed. |
835 | .Pp | | 847 | .Pp |
836 | The standard's definition of back references is vague. | | 848 | The standard's definition of back references is vague. |
837 | For example, does | | 849 | For example, does |
838 | .Ql "a\e(\e(b\e)*\e2\e)*d" | | 850 | .Ql "a\e(\e(b\e)*\e2\e)*d" |
839 | match | | 851 | match |
840 | .Ql "abbbd" ? | | 852 | .Ql "abbbd" ? |
841 | Until the standard is clarified, | | 853 | Until the standard is clarified, |
842 | behavior in such cases should not be relied on. | | 854 | behavior in such cases should not be relied on. |
843 | .Pp | | 855 | .Pp |
844 | The implementation of word-boundary matching is a bit of a kludge, | | 856 | The implementation of word-boundary matching is a bit of a kludge, |
845 | and bugs may lurk in combinations of word-boundary matching and anchoring. | | 857 | and bugs may lurk in combinations of word-boundary matching and anchoring. |
846 | .Pp | | 858 | .Pp |
847 | Word-boundary matching does not work properly in multibyte locales. | | 859 | Word-boundary matching does not work properly in multibyte locales. |