| @@ -1,861 +1,861 @@ | | | @@ -1,861 +1,861 @@ |
1 | .\" $NetBSD: regex.3,v 1.30 2021/03/11 16:36:41 christos Exp $ | | 1 | .\" $NetBSD: regex.3,v 1.31 2021/03/11 17:13:29 wiz Exp $ |
2 | .\" | | 2 | .\" |
3 | .\" Copyright (c) 1992, 1993, 1994 Henry Spencer. | | 3 | .\" Copyright (c) 1992, 1993, 1994 Henry Spencer. |
4 | .\" Copyright (c) 1992, 1993, 1994 | | 4 | .\" Copyright (c) 1992, 1993, 1994 |
5 | .\" The Regents of the University of California. All rights reserved. | | 5 | .\" The Regents of the University of California. All rights reserved. |
6 | .\" | | 6 | .\" |
7 | .\" This code is derived from software contributed to Berkeley by | | 7 | .\" This code is derived from software contributed to Berkeley by |
8 | .\" Henry Spencer. | | 8 | .\" Henry Spencer. |
9 | .\" | | 9 | .\" |
10 | .\" Redistribution and use in source and binary forms, with or without | | 10 | .\" Redistribution and use in source and binary forms, with or without |
11 | .\" modification, are permitted provided that the following conditions | | 11 | .\" modification, are permitted provided that the following conditions |
12 | .\" are met: | | 12 | .\" are met: |
13 | .\" 1. Redistributions of source code must retain the above copyright | | 13 | .\" 1. Redistributions of source code must retain the above copyright |
14 | .\" notice, this list of conditions and the following disclaimer. | | 14 | .\" notice, this list of conditions and the following disclaimer. |
15 | .\" 2. Redistributions in binary form must reproduce the above copyright | | 15 | .\" 2. Redistributions in binary form must reproduce the above copyright |
16 | .\" notice, this list of conditions and the following disclaimer in the | | 16 | .\" notice, this list of conditions and the following disclaimer in the |
17 | .\" documentation and/or other materials provided with the distribution. | | 17 | .\" documentation and/or other materials provided with the distribution. |
18 | .\" 3. Neither the name of the University nor the names of its contributors | | 18 | .\" 3. Neither the name of the University nor the names of its contributors |
19 | .\" may be used to endorse or promote products derived from this software | | 19 | .\" may be used to endorse or promote products derived from this software |
20 | .\" without specific prior written permission. | | 20 | .\" without specific prior written permission. |
21 | .\" | | 21 | .\" |
22 | .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | | 22 | .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
23 | .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 23 | .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
24 | .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 24 | .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
25 | .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 25 | .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
26 | .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 26 | .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
27 | .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 27 | .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
28 | .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 28 | .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
29 | .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 29 | .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
30 | .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 30 | .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
31 | .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 31 | .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
32 | .\" SUCH DAMAGE. | | 32 | .\" SUCH DAMAGE. |
33 | .\" | | 33 | .\" |
34 | .\" @(#)regex.3 8.4 (Berkeley) 3/20/94 | | 34 | .\" @(#)regex.3 8.4 (Berkeley) 3/20/94 |
35 | .\" $FreeBSD: head/lib/libc/regex/regex.3 363817 2020-08-04 02:06:49Z kevans $ | | 35 | .\" $FreeBSD: head/lib/libc/regex/regex.3 363817 2020-08-04 02:06:49Z kevans $ |
36 | .\" | | 36 | .\" |
37 | .Dd March 11, 2021 | | 37 | .Dd March 11, 2021 |
38 | .Dt REGEX 3 | | 38 | .Dt REGEX 3 |
39 | .Os | | 39 | .Os |
40 | .Sh NAME | | 40 | .Sh NAME |
41 | .Nm regcomp , | | 41 | .Nm regcomp , |
42 | .Nm regexec , | | 42 | .Nm regexec , |
43 | .Nm regerror , | | 43 | .Nm regerror , |
44 | .Nm regfree , | | 44 | .Nm regfree , |
45 | .Nm regasub , | | 45 | .Nm regasub , |
46 | .Nm regnsub | | 46 | .Nm regnsub |
47 | .Nd regular-expression library | | 47 | .Nd regular-expression library |
48 | .Sh LIBRARY | | 48 | .Sh LIBRARY |
49 | .Lb libc | | 49 | .Lb libc |
50 | .Sh SYNOPSIS | | 50 | .Sh SYNOPSIS |
51 | .In regex.h | | 51 | .In regex.h |
52 | .Ft int | | 52 | .Ft int |
53 | .Fo regcomp | | 53 | .Fo regcomp |
54 | .Fa "regex_t * restrict preg" "const char * restrict pattern" "int cflags" | | 54 | .Fa "regex_t * restrict preg" "const char * restrict pattern" "int cflags" |
55 | .Fc | | 55 | .Fc |
56 | .Ft int | | 56 | .Ft int |
57 | .Fo regexec | | 57 | .Fo regexec |
58 | .Fa "const regex_t * restrict preg" "const char * restrict string" | | 58 | .Fa "const regex_t * restrict preg" "const char * restrict string" |
59 | .Fa "size_t nmatch" "regmatch_t pmatch[restrict]" "int eflags" | | 59 | .Fa "size_t nmatch" "regmatch_t pmatch[restrict]" "int eflags" |
60 | .Fc | | 60 | .Fc |
61 | .Ft size_t | | 61 | .Ft size_t |
62 | .Fo regerror | | 62 | .Fo regerror |
63 | .Fa "int errcode" "const regex_t * restrict preg" | | 63 | .Fa "int errcode" "const regex_t * restrict preg" |
64 | .Fa "char * restrict errbuf" "size_t errbuf_size" | | 64 | .Fa "char * restrict errbuf" "size_t errbuf_size" |
65 | .Fc | | 65 | .Fc |
66 | .Ft void | | 66 | .Ft void |
67 | .Fn regfree "regex_t *preg" | | 67 | .Fn regfree "regex_t *preg" |
68 | .Ft ssize_t | | 68 | .Ft ssize_t |
69 | .Fn regnsub "char *buf" "size_t bufsiz" "const char *sub" "const regmatch_t *rm" "const char *str" | | 69 | .Fn regnsub "char *buf" "size_t bufsiz" "const char *sub" "const regmatch_t *rm" "const char *str" |
70 | .Ft ssize_t | | 70 | .Ft ssize_t |
71 | .Fn regasub "char **buf" "const char *sub" "const regmatch_t *rm" "const char *sstr" | | 71 | .Fn regasub "char **buf" "const char *sub" "const regmatch_t *rm" "const char *sstr" |
72 | .Sh DESCRIPTION | | 72 | .Sh DESCRIPTION |
73 | These routines implement | | 73 | These routines implement |
74 | .St -p1003.2 | | 74 | .St -p1003.2 |
75 | regular expressions | | 75 | regular expressions |
76 | .Pq Do RE Dc Ns s ; | | 76 | .Pq Do RE Dc Ns s ; |
77 | see | | 77 | see |
78 | .Xr re_format 7 . | | 78 | .Xr re_format 7 . |
79 | The | | 79 | The |
80 | .Fn regcomp | | 80 | .Fn regcomp |
81 | function | | 81 | function |
82 | compiles an RE written as a string into an internal form, | | 82 | compiles an RE written as a string into an internal form, |
83 | .Fn regexec | | 83 | .Fn regexec |
84 | matches that internal form against a string and reports results, | | 84 | matches that internal form against a string and reports results, |
85 | .Fn regerror | | 85 | .Fn regerror |
86 | transforms error codes from either into human-readable messages, | | 86 | transforms error codes from either into human-readable messages, |
87 | and | | 87 | and |
88 | .Fn regfree | | 88 | .Fn regfree |
89 | frees any dynamically-allocated storage used by the internal form | | 89 | frees any dynamically-allocated storage used by the internal form |
90 | of an RE. | | 90 | of an RE. |
91 | .Pp | | 91 | .Pp |
92 | The header | | 92 | The header |
93 | .In regex.h | | 93 | .In regex.h |
94 | declares two structure types, | | 94 | declares two structure types, |
95 | .Ft regex_t | | 95 | .Ft regex_t |
96 | and | | 96 | and |
97 | .Ft regmatch_t , | | 97 | .Ft regmatch_t , |
98 | the former for compiled internal forms and the latter for match reporting. | | 98 | the former for compiled internal forms and the latter for match reporting. |
99 | It also declares the four functions, | | 99 | It also declares the four functions, |
100 | a type | | 100 | a type |
101 | .Ft regoff_t , | | 101 | .Ft regoff_t , |
102 | and a number of constants with names starting with | | 102 | and a number of constants with names starting with |
103 | .Dq Dv REG_ . | | 103 | .Dq Dv REG_ . |
104 | .Pp | | 104 | .Pp |
105 | The | | 105 | The |
106 | .Fn regcomp | | 106 | .Fn regcomp |
107 | function | | 107 | function |
108 | compiles the regular expression contained in the | | 108 | compiles the regular expression contained in the |
109 | .Fa pattern | | 109 | .Fa pattern |
110 | string, | | 110 | string, |
111 | subject to the flags in | | 111 | subject to the flags in |
112 | .Fa cflags , | | 112 | .Fa cflags , |
113 | and places the results in the | | 113 | and places the results in the |
114 | .Ft regex_t | | 114 | .Ft regex_t |
115 | structure pointed to by | | 115 | structure pointed to by |
116 | .Fa preg . | | 116 | .Fa preg . |
117 | The | | 117 | The |
118 | .Fa cflags | | 118 | .Fa cflags |
119 | argument | | 119 | argument |
120 | is the bitwise OR of zero or more of the following flags: | | 120 | is the bitwise OR of zero or more of the following flags: |
121 | .Bl -tag -width REG_EXTENDED | | 121 | .Bl -tag -width REG_EXTENDED |
122 | .It Dv REG_EXTENDED | | 122 | .It Dv REG_EXTENDED |
123 | Compile modern | | 123 | Compile modern |
124 | .Pq Dq extended | | 124 | .Pq Dq extended |
125 | REs, | | 125 | REs, |
126 | rather than the obsolete | | 126 | rather than the obsolete |
127 | .Pq Dq basic | | 127 | .Pq Dq basic |
128 | REs that | | 128 | REs that |
129 | are the default. | | 129 | are the default. |
130 | .It Dv REG_BASIC | | 130 | .It Dv REG_BASIC |
131 | This is a synonym for 0, | | 131 | This is a synonym for 0, |
132 | provided as a counterpart to | | 132 | provided as a counterpart to |
133 | .Dv REG_EXTENDED | | 133 | .Dv REG_EXTENDED |
134 | to improve readability. | | 134 | to improve readability. |
135 | .It Dv REG_NOSPEC | | 135 | .It Dv REG_NOSPEC |
136 | Compile with recognition of all special characters turned off. | | 136 | Compile with recognition of all special characters turned off. |
137 | All characters are thus considered ordinary, | | 137 | All characters are thus considered ordinary, |
138 | so the | | 138 | so the |
139 | .Dq RE | | 139 | .Dq RE |
140 | is a literal string. | | 140 | is a literal string. |
141 | This is an extension, | | 141 | This is an extension, |
142 | compatible with but not specified by | | 142 | compatible with but not specified by |
143 | .St -p1003.2 , | | 143 | .St -p1003.2 , |
144 | and should be used with | | 144 | and should be used with |
145 | caution in software intended to be portable to other systems. | | 145 | caution in software intended to be portable to other systems. |
146 | .Dv REG_EXTENDED | | 146 | .Dv REG_EXTENDED |
147 | and | | 147 | and |
148 | .Dv REG_NOSPEC | | 148 | .Dv REG_NOSPEC |
149 | may not be used | | 149 | may not be used |
150 | in the same call to | | 150 | in the same call to |
151 | .Fn regcomp . | | 151 | .Fn regcomp . |
152 | .It Dv REG_ICASE | | 152 | .It Dv REG_ICASE |
153 | Compile for matching that ignores upper/lower case distinctions. | | 153 | Compile for matching that ignores upper/lower case distinctions. |
154 | See | | 154 | See |
155 | .Xr re_format 7 . | | 155 | .Xr re_format 7 . |
156 | .It Dv REG_NOSUB | | 156 | .It Dv REG_NOSUB |
157 | Compile for matching that need only report success or failure, | | 157 | Compile for matching that need only report success or failure, |
158 | not what was matched. | | 158 | not what was matched. |
159 | .It Dv REG_NEWLINE | | 159 | .It Dv REG_NEWLINE |
160 | Compile for newline-sensitive matching. | | 160 | Compile for newline-sensitive matching. |
161 | By default, newline is a completely ordinary character with no special | | 161 | By default, newline is a completely ordinary character with no special |
162 | meaning in either REs or strings. | | 162 | meaning in either REs or strings. |
163 | With this flag, | | 163 | With this flag, |
164 | .Ql [^ | | 164 | .Ql [^ |
165 | bracket expressions and | | 165 | bracket expressions and |
166 | .Ql .\& | | 166 | .Ql .\& |
167 | never match newline, | | 167 | never match newline, |
168 | a | | 168 | a |
169 | .Ql ^\& | | 169 | .Ql ^\& |
170 | anchor matches the null string after any newline in the string | | 170 | anchor matches the null string after any newline in the string |
171 | in addition to its normal function, | | 171 | in addition to its normal function, |
172 | and the | | 172 | and the |
173 | .Ql $\& | | 173 | .Ql $\& |
174 | anchor matches the null string before any newline in the | | 174 | anchor matches the null string before any newline in the |
175 | string in addition to its normal function. | | 175 | string in addition to its normal function. |
176 | .It Dv REG_PEND | | 176 | .It Dv REG_PEND |
177 | The regular expression ends, | | 177 | The regular expression ends, |
178 | not at the first NUL, | | 178 | not at the first NUL, |
179 | but just before the character pointed to by the | | 179 | but just before the character pointed to by the |
180 | .Va re_endp | | 180 | .Va re_endp |
181 | member of the structure pointed to by | | 181 | member of the structure pointed to by |
182 | .Fa preg . | | 182 | .Fa preg . |
183 | The | | 183 | The |
184 | .Va re_endp | | 184 | .Va re_endp |
185 | member is of type | | 185 | member is of type |
186 | .Ft "const char *" . | | 186 | .Ft "const char *" . |
187 | This flag permits inclusion of NULs in the RE; | | 187 | This flag permits inclusion of NULs in the RE; |
188 | they are considered ordinary characters. | | 188 | they are considered ordinary characters. |
189 | This is an extension, | | 189 | This is an extension, |
190 | compatible with but not specified by | | 190 | compatible with but not specified by |
191 | .St -p1003.2 , | | 191 | .St -p1003.2 , |
192 | and should be used with | | 192 | and should be used with |
193 | caution in software intended to be portable to other systems. | | 193 | caution in software intended to be portable to other systems. |
194 | .It Dv REG_GNU | | 194 | .It Dv REG_GNU |
195 | Include GNU-inspired extensions: | | 195 | Include GNU-inspired extensions: |
196 | .Pp | | 196 | .Pp |
197 | .Bl -tag -offset indent -width XX -compact | | 197 | .Bl -tag -offset indent -width XX -compact |
198 | .It \eN | | 198 | .It \eN |
199 | Use backreference | | 199 | Use backreference |
200 | .Dv N | | 200 | .Dv N |
201 | where | | 201 | where |
202 | .Dv N | | 202 | .Dv N |
203 | is a single digit number between | | 203 | is a single digit number between |
204 | .Dv 1 | | 204 | .Dv 1 |
205 | and | | 205 | and |
206 | .Dv 9 . | | 206 | .Dv 9 . |
207 | .It \ea | | 207 | .It \ea |
208 | Visual Bell | | 208 | Visual Bell |
209 | .It \eb | | 209 | .It \eb |
210 | Match a position that is a word boundary. | | 210 | Match a position that is a word boundary. |
211 | .It \eB | | 211 | .It \eB |
212 | Match a position that is a not word boundary. | | 212 | Match a position that is a not word boundary. |
213 | .It \ef | | 213 | .It \ef |
214 | Form Feed | | 214 | Form Feed |
215 | .It \en | | 215 | .It \en |
216 | Line Feed | | 216 | Line Feed |
217 | .It \er | | 217 | .It \er |
218 | Carriage return | | 218 | Carriage return |
219 | .It \es | | 219 | .It \es |
220 | Alias for [[:space:]] | | 220 | Alias for [[:space:]] |
221 | .It \eS | | 221 | .It \eS |
222 | Alias for [^[:space:]] | | 222 | Alias for [^[:space:]] |
223 | .It \et | | 223 | .It \et |
224 | Horizontal Tab | | 224 | Horizontal Tab |
225 | .It \ev | | 225 | .It \ev |
226 | Vertical Tab | | 226 | Vertical Tab |
227 | .It \ew | | 227 | .It \ew |
228 | Alias for [[:alnum:]] | | 228 | Alias for [[:alnum:]] |
229 | .It \eW | | 229 | .It \eW |
230 | Alias for [^[:alnum:]] | | 230 | Alias for [^[:alnum:]] |
231 | .It \e' | | 231 | .It \e' |
232 | Matches the end of the subject string (the string to be matched). | | 232 | Matches the end of the subject string (the string to be matched). |
233 | .It \e` | | 233 | .It \e` |
234 | Matches the beginning of the subject string. | | 234 | Matches the beginning of the subject string. |
235 | .El | | 235 | .El |
236 | .Pp | | 236 | .Pp |
237 | This is an extension, | | 237 | This is an extension, |
238 | compatible with but not specified by | | 238 | compatible with but not specified by |
239 | .St -p1003.2 , | | 239 | .St -p1003.2 , |
240 | and should be used with | | 240 | and should be used with |
241 | caution in software intended to be portable to other systems. | | 241 | caution in software intended to be portable to other systems. |
242 | .El | | 242 | .El |
243 | .Pp | | 243 | .Pp |
244 | When successful, | | 244 | When successful, |
245 | .Fn regcomp | | 245 | .Fn regcomp |
246 | returns 0 and fills in the structure pointed to by | | 246 | returns 0 and fills in the structure pointed to by |
247 | .Fa preg . | | 247 | .Fa preg . |
248 | One member of that structure | | 248 | One member of that structure |
249 | (other than | | 249 | (other than |
250 | .Va re_endp ) | | 250 | .Va re_endp ) |
251 | is publicized: | | 251 | is publicized: |
252 | .Va re_nsub , | | 252 | .Va re_nsub , |
253 | of type | | 253 | of type |
254 | .Ft size_t , | | 254 | .Ft size_t , |
255 | contains the number of parenthesized subexpressions within the RE | | 255 | contains the number of parenthesized subexpressions within the RE |
256 | (except that the value of this member is undefined if the | | 256 | (except that the value of this member is undefined if the |
257 | .Dv REG_NOSUB | | 257 | .Dv REG_NOSUB |
258 | flag was used). | | 258 | flag was used). |
259 | If | | 259 | If |
260 | .Fn regcomp | | 260 | .Fn regcomp |
261 | fails, it returns a non-zero error code; | | 261 | fails, it returns a non-zero error code; |
262 | see | | 262 | see |
263 | .Sx DIAGNOSTICS . | | 263 | .Sx DIAGNOSTICS . |
264 | .Pp | | 264 | .Pp |
265 | The | | 265 | The |
266 | .Fn regexec | | 266 | .Fn regexec |
267 | function | | 267 | function |
268 | matches the compiled RE pointed to by | | 268 | matches the compiled RE pointed to by |
269 | .Fa preg | | 269 | .Fa preg |
270 | against the | | 270 | against the |
271 | .Fa string , | | 271 | .Fa string , |
272 | subject to the flags in | | 272 | subject to the flags in |
273 | .Fa eflags , | | 273 | .Fa eflags , |
274 | and reports results using | | 274 | and reports results using |
275 | .Fa nmatch , | | 275 | .Fa nmatch , |
276 | .Fa pmatch , | | 276 | .Fa pmatch , |
277 | and the returned value. | | 277 | and the returned value. |
278 | The RE must have been compiled by a previous invocation of | | 278 | The RE must have been compiled by a previous invocation of |
279 | .Fn regcomp . | | 279 | .Fn regcomp . |
280 | The compiled form is not altered during execution of | | 280 | The compiled form is not altered during execution of |
281 | .Fn regexec , | | 281 | .Fn regexec , |
282 | so a single compiled RE can be used simultaneously by multiple threads. | | 282 | so a single compiled RE can be used simultaneously by multiple threads. |
283 | .Pp | | 283 | .Pp |
284 | By default, | | 284 | By default, |
285 | the NUL-terminated string pointed to by | | 285 | the NUL-terminated string pointed to by |
286 | .Fa string | | 286 | .Fa string |
287 | is considered to be the text of an entire line, minus any terminating | | 287 | is considered to be the text of an entire line, minus any terminating |
288 | newline. | | 288 | newline. |
289 | The | | 289 | The |
290 | .Fa eflags | | 290 | .Fa eflags |
291 | argument is the bitwise OR of zero or more of the following flags: | | 291 | argument is the bitwise OR of zero or more of the following flags: |
292 | .Bl -tag -width REG_STARTEND | | 292 | .Bl -tag -width REG_STARTEND |
293 | .It Dv REG_NOTBOL | | 293 | .It Dv REG_NOTBOL |
294 | The first character of the string is treated as the continuation | | 294 | The first character of the string is treated as the continuation |
295 | of a line. | | 295 | of a line. |
296 | This means that the anchors | | 296 | This means that the anchors |
297 | .Ql ^\& , | | 297 | .Ql ^\& , |
298 | .Ql [[:<:]] , | | 298 | .Ql [[:<:]] , |
299 | and | | 299 | and |
300 | .Ql \e< | | 300 | .Ql \e< |
301 | do not match before it; but see | | 301 | do not match before it; but see |
302 | .Dv REG_STARTEND | | 302 | .Dv REG_STARTEND |
303 | below. | | 303 | below. |
304 | This does not affect the behavior of newlines under | | 304 | This does not affect the behavior of newlines under |
305 | .Dv REG_NEWLINE . | | 305 | .Dv REG_NEWLINE . |
306 | .It Dv REG_NOTEOL | | 306 | .It Dv REG_NOTEOL |
307 | The NUL terminating | | 307 | The NUL terminating |
308 | the string | | 308 | the string |
309 | does not end a line, so the | | 309 | does not end a line, so the |
310 | .Ql $\& | | 310 | .Ql $\& |
311 | anchor does not match before it. | | 311 | anchor does not match before it. |
312 | This does not affect the behavior of newlines under | | 312 | This does not affect the behavior of newlines under |
313 | .Dv REG_NEWLINE . | | 313 | .Dv REG_NEWLINE . |
314 | .It Dv REG_STARTEND | | 314 | .It Dv REG_STARTEND |
315 | The string is considered to start at | | 315 | The string is considered to start at |
316 | .Fa string No + | | 316 | .Fa string No + |
317 | .Fa pmatch Ns [0]. Ns Fa rm_so | | 317 | .Fa pmatch Ns [0]. Ns Fa rm_so |
318 | and to end before the byte located at | | 318 | and to end before the byte located at |
319 | .Fa string No + | | 319 | .Fa string No + |
320 | .Fa pmatch Ns [0]. Ns Fa rm_eo , | | 320 | .Fa pmatch Ns [0]. Ns Fa rm_eo , |
321 | regardless of the value of | | 321 | regardless of the value of |
322 | .Fa nmatch . | | 322 | .Fa nmatch . |
323 | See below for the definition of | | 323 | See below for the definition of |
324 | .Fa pmatch | | 324 | .Fa pmatch |
325 | and | | 325 | and |
326 | .Fa nmatch . | | 326 | .Fa nmatch . |
327 | This is an extension, | | 327 | This is an extension, |
328 | compatible with but not specified by | | 328 | compatible with but not specified by |
329 | .St -p1003.2 , | | 329 | .St -p1003.2 , |
330 | and should be used with | | 330 | and should be used with |
331 | caution in software intended to be portable to other systems. | | 331 | caution in software intended to be portable to other systems. |
332 | .Pp | | 332 | .Pp |
333 | Without | | 333 | Without |
334 | .Dv REG_NOTBOL , | | 334 | .Dv REG_NOTBOL , |
335 | the position | | 335 | the position |
336 | .Fa rm_so | | 336 | .Fa rm_so |
337 | is considered the beginning of a line, such that | | 337 | is considered the beginning of a line, such that |
338 | .Ql ^ | | 338 | .Ql ^ |
339 | matches before it, and the beginning of a word if there is a word | | 339 | matches before it, and the beginning of a word if there is a word |
340 | character at this position, such that | | 340 | character at this position, such that |
341 | .Ql [[:<:]] | | 341 | .Ql [[:<:]] |
342 | and | | 342 | and |
343 | .Ql \e< | | 343 | .Ql \e< |
344 | match before it. | | 344 | match before it. |
345 | .Pp | | 345 | .Pp |
346 | With | | 346 | With |
347 | .Dv REG_NOTBOL , | | 347 | .Dv REG_NOTBOL , |
348 | the character at position | | 348 | the character at position |
349 | .Fa rm_so | | 349 | .Fa rm_so |
350 | is treated as the continuation of a line, and if | | 350 | is treated as the continuation of a line, and if |
351 | .Fa rm_so | | 351 | .Fa rm_so |
352 | is greater than 0, the preceding character is taken into consideration. | | 352 | is greater than 0, the preceding character is taken into consideration. |
353 | If the preceding character is a newline and the regular expression was compiled | | 353 | If the preceding character is a newline and the regular expression was compiled |
354 | with | | 354 | with |
355 | .Dv REG_NEWLINE , | | 355 | .Dv REG_NEWLINE , |
356 | .Ql ^ | | 356 | .Ql ^ |
357 | matches before the string; if the preceding character is not a word character | | 357 | matches before the string; if the preceding character is not a word character |
358 | but the string starts with a word character, | | 358 | but the string starts with a word character, |
359 | .Ql [[:<:]] | | 359 | .Ql [[:<:]] |
360 | and | | 360 | and |
361 | .Ql \e< | | 361 | .Ql \e< |
362 | match before the string. | | 362 | match before the string. |
363 | .El | | 363 | .El |
364 | .Pp | | 364 | .Pp |
365 | See | | 365 | See |
366 | .Xr re_format 7 | | 366 | .Xr re_format 7 |
367 | for a discussion of what is matched in situations where an RE or a | | 367 | for a discussion of what is matched in situations where an RE or a |
368 | portion thereof could match any of several substrings of | | 368 | portion thereof could match any of several substrings of |
369 | .Fa string . | | 369 | .Fa string . |
370 | .Pp | | 370 | .Pp |
371 | Normally, | | 371 | Normally, |
372 | .Fn regexec | | 372 | .Fn regexec |
373 | returns 0 for success and the non-zero code | | 373 | returns 0 for success and the non-zero code |
374 | .Dv REG_NOMATCH | | 374 | .Dv REG_NOMATCH |
375 | for failure. | | 375 | for failure. |
376 | Other non-zero error codes may be returned in exceptional situations; | | 376 | Other non-zero error codes may be returned in exceptional situations; |
377 | see | | 377 | see |
378 | .Sx DIAGNOSTICS . | | 378 | .Sx DIAGNOSTICS . |
379 | .Pp | | 379 | .Pp |
380 | If | | 380 | If |
381 | .Dv REG_NOSUB | | 381 | .Dv REG_NOSUB |
382 | was specified in the compilation of the RE, | | 382 | was specified in the compilation of the RE, |
383 | or if | | 383 | or if |
384 | .Fa nmatch | | 384 | .Fa nmatch |
385 | is 0, | | 385 | is 0, |
386 | .Fn regexec | | 386 | .Fn regexec |
387 | ignores the | | 387 | ignores the |
388 | .Fa pmatch | | 388 | .Fa pmatch |
389 | argument (but see below for the case where | | 389 | argument (but see below for the case where |
390 | .Dv REG_STARTEND | | 390 | .Dv REG_STARTEND |
391 | is specified). | | 391 | is specified). |
392 | Otherwise, | | 392 | Otherwise, |
393 | .Fa pmatch | | 393 | .Fa pmatch |
394 | points to an array of | | 394 | points to an array of |
395 | .Fa nmatch | | 395 | .Fa nmatch |
396 | structures of type | | 396 | structures of type |
397 | .Ft regmatch_t . | | 397 | .Ft regmatch_t . |
398 | Such a structure has at least the members | | 398 | Such a structure has at least the members |
399 | .Va rm_so | | 399 | .Va rm_so |
400 | and | | 400 | and |
401 | .Va rm_eo , | | 401 | .Va rm_eo , |
402 | both of type | | 402 | both of type |
403 | .Ft regoff_t | | 403 | .Ft regoff_t |
404 | (a signed arithmetic type at least as large as an | | 404 | (a signed arithmetic type at least as large as an |
405 | .Ft off_t | | 405 | .Ft off_t |
406 | and a | | 406 | and a |
407 | .Ft ssize_t ) , | | 407 | .Ft ssize_t ) , |
408 | containing respectively the offset of the first character of a substring | | 408 | containing respectively the offset of the first character of a substring |
409 | and the offset of the first character after the end of the substring. | | 409 | and the offset of the first character after the end of the substring. |
410 | Offsets are measured from the beginning of the | | 410 | Offsets are measured from the beginning of the |
411 | .Fa string | | 411 | .Fa string |
412 | argument given to | | 412 | argument given to |
413 | .Fn regexec . | | 413 | .Fn regexec . |
414 | An empty substring is denoted by equal offsets, | | 414 | An empty substring is denoted by equal offsets, |
415 | both indicating the character following the empty substring. | | 415 | both indicating the character following the empty substring. |
416 | .Pp | | 416 | .Pp |
417 | The 0th member of the | | 417 | The 0th member of the |
418 | .Fa pmatch | | 418 | .Fa pmatch |
419 | array is filled in to indicate what substring of | | 419 | array is filled in to indicate what substring of |
420 | .Fa string | | 420 | .Fa string |
421 | was matched by the entire RE. | | 421 | was matched by the entire RE. |
422 | Remaining members report what substring was matched by parenthesized | | 422 | Remaining members report what substring was matched by parenthesized |
423 | subexpressions within the RE; | | 423 | subexpressions within the RE; |
424 | member | | 424 | member |
425 | .Va i | | 425 | .Va i |
426 | reports subexpression | | 426 | reports subexpression |
427 | .Va i , | | 427 | .Va i , |
428 | with subexpressions counted (starting at 1) by the order of their opening | | 428 | with subexpressions counted (starting at 1) by the order of their opening |
429 | parentheses in the RE, left to right. | | 429 | parentheses in the RE, left to right. |
430 | Unused entries in the array (corresponding either to subexpressions that | | 430 | Unused entries in the array (corresponding either to subexpressions that |
431 | did not participate in the match at all, or to subexpressions that do not | | 431 | did not participate in the match at all, or to subexpressions that do not |
432 | exist in the RE (that is, | | 432 | exist in the RE (that is, |
433 | .Va i | | 433 | .Va i |
434 | > | | 434 | > |
435 | .Fa preg Ns -> Ns Va re_nsub ) ) | | 435 | .Fa preg Ns -> Ns Va re_nsub ) ) |
436 | have both | | 436 | have both |
437 | .Va rm_so | | 437 | .Va rm_so |
438 | and | | 438 | and |
439 | .Va rm_eo | | 439 | .Va rm_eo |
440 | set to -1. | | 440 | set to -1. |
441 | If a subexpression participated in the match several times, | | 441 | If a subexpression participated in the match several times, |
442 | the reported substring is the last one it matched. | | 442 | the reported substring is the last one it matched. |
443 | (Note, as an example in particular, that when the RE | | 443 | (Note, as an example in particular, that when the RE |
444 | .Ql "(b*)+" | | 444 | .Ql "(b*)+" |
445 | matches | | 445 | matches |
446 | .Ql bbb , | | 446 | .Ql bbb , |
447 | the parenthesized subexpression matches each of the three | | 447 | the parenthesized subexpression matches each of the three |
448 | .So Li b Sc Ns s | | 448 | .So Li b Sc Ns s |
449 | and then | | 449 | and then |
450 | an infinite number of empty strings following the last | | 450 | an infinite number of empty strings following the last |
451 | .Ql b , | | 451 | .Ql b , |
452 | so the reported substring is one of the empties.) | | 452 | so the reported substring is one of the empties.) |
453 | .Pp | | 453 | .Pp |
454 | If | | 454 | If |
455 | .Dv REG_STARTEND | | 455 | .Dv REG_STARTEND |
456 | is specified, | | 456 | is specified, |
457 | .Fa pmatch | | 457 | .Fa pmatch |
458 | must point to at least one | | 458 | must point to at least one |
459 | .Ft regmatch_t | | 459 | .Ft regmatch_t |
460 | (even if | | 460 | (even if |
461 | .Fa nmatch | | 461 | .Fa nmatch |
462 | is 0 or | | 462 | is 0 or |
463 | .Dv REG_NOSUB | | 463 | .Dv REG_NOSUB |
464 | was specified), | | 464 | was specified), |
465 | to hold the input offsets for | | 465 | to hold the input offsets for |
466 | .Dv REG_STARTEND . | | 466 | .Dv REG_STARTEND . |
467 | Use for output is still entirely controlled by | | 467 | Use for output is still entirely controlled by |
468 | .Fa nmatch ; | | 468 | .Fa nmatch ; |
469 | if | | 469 | if |
470 | .Fa nmatch | | 470 | .Fa nmatch |
471 | is 0 or | | 471 | is 0 or |
472 | .Dv REG_NOSUB | | 472 | .Dv REG_NOSUB |
473 | was specified, | | 473 | was specified, |
474 | the value of | | 474 | the value of |
475 | .Fa pmatch Ns [0] | | 475 | .Fa pmatch Ns [0] |
476 | will not be changed by a successful | | 476 | will not be changed by a successful |
477 | .Fn regexec . | | 477 | .Fn regexec . |
478 | .Pp | | 478 | .Pp |
479 | The | | 479 | The |
480 | .Fn regerror | | 480 | .Fn regerror |
481 | function | | 481 | function |
482 | maps a non-zero | | 482 | maps a non-zero |
483 | .Fa errcode | | 483 | .Fa errcode |
484 | from either | | 484 | from either |
485 | .Fn regcomp | | 485 | .Fn regcomp |
486 | or | | 486 | or |
487 | .Fn regexec | | 487 | .Fn regexec |
488 | to a human-readable, printable message. | | 488 | to a human-readable, printable message. |
489 | If | | 489 | If |
490 | .Fa preg | | 490 | .Fa preg |
491 | is | | 491 | is |
492 | .No non\- Ns Dv NULL , | | 492 | .No non\- Ns Dv NULL , |
493 | the error code should have arisen from use of | | 493 | the error code should have arisen from use of |
494 | the | | 494 | the |
495 | .Ft regex_t | | 495 | .Ft regex_t |
496 | pointed to by | | 496 | pointed to by |
497 | .Fa preg , | | 497 | .Fa preg , |
498 | and if the error code came from | | 498 | and if the error code came from |
499 | .Fn regcomp , | | 499 | .Fn regcomp , |
500 | it should have been the result from the most recent | | 500 | it should have been the result from the most recent |
501 | .Fn regcomp | | 501 | .Fn regcomp |
502 | using that | | 502 | using that |
503 | .Ft regex_t . | | 503 | .Ft regex_t . |
504 | The | | 504 | The |
505 | .Po | | 505 | .Po |
506 | .Fn regerror | | 506 | .Fn regerror |
507 | may be able to supply a more detailed message using information | | 507 | may be able to supply a more detailed message using information |
508 | from the | | 508 | from the |
509 | .Ft regex_t . | | 509 | .Ft regex_t . |
510 | .Pc | | 510 | .Pc |
511 | The | | 511 | The |
512 | .Fn regerror | | 512 | .Fn regerror |
513 | function | | 513 | function |
514 | places the NUL-terminated message into the buffer pointed to by | | 514 | places the NUL-terminated message into the buffer pointed to by |
515 | .Fa errbuf , | | 515 | .Fa errbuf , |
516 | limiting the length (including the NUL) to at most | | 516 | limiting the length (including the NUL) to at most |
517 | .Fa errbuf_size | | 517 | .Fa errbuf_size |
518 | bytes. | | 518 | bytes. |
519 | If the whole message will not fit, | | 519 | If the whole message will not fit, |
520 | as much of it as will fit before the terminating NUL is supplied. | | 520 | as much of it as will fit before the terminating NUL is supplied. |
521 | In any case, | | 521 | In any case, |
522 | the returned value is the size of buffer needed to hold the whole | | 522 | the returned value is the size of buffer needed to hold the whole |
523 | message (including terminating NUL). | | 523 | message (including terminating NUL). |
524 | If | | 524 | If |
525 | .Fa errbuf_size | | 525 | .Fa errbuf_size |
526 | is 0, | | 526 | is 0, |
527 | .Fa errbuf | | 527 | .Fa errbuf |
528 | is ignored but the return value is still correct. | | 528 | is ignored but the return value is still correct. |
529 | .Pp | | 529 | .Pp |
530 | If the | | 530 | If the |
531 | .Fa errcode | | 531 | .Fa errcode |
532 | given to | | 532 | given to |
533 | .Fn regerror | | 533 | .Fn regerror |
534 | is first ORed with | | 534 | is first ORed with |
535 | .Dv REG_ITOA , | | 535 | .Dv REG_ITOA , |
536 | the | | 536 | the |
537 | .Dq message | | 537 | .Dq message |
538 | that results is the printable name of the error code, | | 538 | that results is the printable name of the error code, |
539 | e.g.\& | | 539 | e.g.\& |
540 | .Dq Dv REG_NOMATCH , | | 540 | .Dq Dv REG_NOMATCH , |
541 | rather than an explanation thereof. | | 541 | rather than an explanation thereof. |
542 | If | | 542 | If |
543 | .Fa errcode | | 543 | .Fa errcode |
544 | is | | 544 | is |
545 | .Dv REG_ATOI , | | 545 | .Dv REG_ATOI , |
546 | then | | 546 | then |
547 | .Fa preg | | 547 | .Fa preg |
548 | shall be | | 548 | shall be |
549 | .No non\- Ns Dv NULL | | 549 | .No non\- Ns Dv NULL |
550 | and the | | 550 | and the |
551 | .Va re_endp | | 551 | .Va re_endp |
552 | member of the structure it points to | | 552 | member of the structure it points to |
553 | must point to the printable name of an error code; | | 553 | must point to the printable name of an error code; |
554 | in this case, the result in | | 554 | in this case, the result in |
555 | .Fa errbuf | | 555 | .Fa errbuf |
556 | is the decimal digits of | | 556 | is the decimal digits of |
557 | the numeric value of the error code | | 557 | the numeric value of the error code |
558 | (0 if the name is not recognized). | | 558 | (0 if the name is not recognized). |
559 | .Dv REG_ITOA | | 559 | .Dv REG_ITOA |
560 | and | | 560 | and |
561 | .Dv REG_ATOI | | 561 | .Dv REG_ATOI |
562 | are intended primarily as debugging facilities; | | 562 | are intended primarily as debugging facilities; |
563 | they are extensions, | | 563 | they are extensions, |
564 | compatible with but not specified by | | 564 | compatible with but not specified by |
565 | .St -p1003.2 , | | 565 | .St -p1003.2 , |
566 | and should be used with | | 566 | and should be used with |
567 | caution in software intended to be portable to other systems. | | 567 | caution in software intended to be portable to other systems. |
568 | Be warned also that they are considered experimental and changes are possible. | | 568 | Be warned also that they are considered experimental and changes are possible. |
569 | .Pp | | 569 | .Pp |
570 | The | | 570 | The |
571 | .Fn regfree | | 571 | .Fn regfree |
572 | function | | 572 | function |
573 | frees any dynamically-allocated storage associated with the compiled RE | | 573 | frees any dynamically-allocated storage associated with the compiled RE |
574 | pointed to by | | 574 | pointed to by |
575 | .Fa preg . | | 575 | .Fa preg . |
576 | The remaining | | 576 | The remaining |
577 | .Ft regex_t | | 577 | .Ft regex_t |
578 | is no longer a valid compiled RE | | 578 | is no longer a valid compiled RE |
579 | and the effect of supplying it to | | 579 | and the effect of supplying it to |
580 | .Fn regexec | | 580 | .Fn regexec |
581 | or | | 581 | or |
582 | .Fn regerror | | 582 | .Fn regerror |
583 | is undefined. | | 583 | is undefined. |
584 | .Pp | | 584 | .Pp |
585 | None of these functions references global variables except for tables | | 585 | None of these functions references global variables except for tables |
586 | of constants; | | 586 | of constants; |
587 | all are safe for use from multiple threads if the arguments are safe. | | 587 | all are safe for use from multiple threads if the arguments are safe. |
588 | .Pp | | 588 | .Pp |
589 | The | | 589 | The |
590 | .Fn regnsub | | 590 | .Fn regnsub |
591 | and | | 591 | and |
592 | .Fn regasub | | 592 | .Fn regasub |
593 | functions perform substitutions using | | 593 | functions perform substitutions using |
594 | .Xr sed 1 | | 594 | .Xr sed 1 |
595 | like syntax. | | 595 | like syntax. |
596 | They return the length of the string that would have been created | | 596 | They return the length of the string that would have been created |
597 | if there was enough space or | | 597 | if there was enough space or |
598 | .Dv \-1 | | 598 | .Dv \-1 |
599 | on error, setting | | 599 | on error, setting |
600 | .Dv errno . | | 600 | .Dv errno . |
601 | The result | | 601 | The result |
602 | is being placed in | | 602 | is being placed in |
603 | .Fa buf | | 603 | .Fa buf |
604 | which is user-supplied in | | 604 | which is user-supplied in |
605 | .Fn regnsub | | 605 | .Fn regnsub |
606 | and dynamically allocated in | | 606 | and dynamically allocated in |
607 | .Fn regasub . | | 607 | .Fn regasub . |
608 | The | | 608 | The |
609 | .Fa sub | | 609 | .Fa sub |
610 | argument contains a substitution string which might refer to the first | | 610 | argument contains a substitution string which might refer to the first |
611 | 9 regular expression strings using | | 611 | 9 regular expression strings using |
612 | .Dq \e<n> | | 612 | .Dq \e<n> |
613 | to refer to the nth matched | | 613 | to refer to the nth matched |
614 | item, or | | 614 | item, or |
615 | .Dq & | | 615 | .Dq & |
616 | (which is equivalent to | | 616 | (which is equivalent to |
617 | .Dq \e0 ) | | 617 | .Dq \e0 ) |
618 | to refer to the full match. | | 618 | to refer to the full match. |
619 | The | | 619 | The |
620 | .Fa rm | | 620 | .Fa rm |
621 | array must be at least 10 elements long, and should contain the result | | 621 | array must be at least 10 elements long, and should contain the result |
622 | of the matches from a previous | | 622 | of the matches from a previous |
623 | .Fn regexec | | 623 | .Fn regexec |
624 | call. | | 624 | call. |
625 | Only 10 elements of the | | 625 | Only 10 elements of the |
626 | .Fa rm | | 626 | .Fa rm |
627 | array can be used. | | 627 | array can be used. |
628 | The | | 628 | The |
629 | .Fa str | | 629 | .Fa str |
630 | argument contains the source string to apply the transformation to. | | 630 | argument contains the source string to apply the transformation to. |
631 | .Sh IMPLEMENTATION CHOICES | | 631 | .Sh IMPLEMENTATION CHOICES |
632 | There are a number of decisions that | | 632 | There are a number of decisions that |
633 | .St -p1003.2 | | 633 | .St -p1003.2 |
634 | leaves up to the implementor, | | 634 | leaves up to the implementor, |
635 | either by explicitly saying | | 635 | either by explicitly saying |
636 | .Dq undefined | | 636 | .Dq undefined |
637 | or by virtue of them being | | 637 | or by virtue of them being |
638 | forbidden by the RE grammar. | | 638 | forbidden by the RE grammar. |
639 | This implementation treats them as follows. | | 639 | This implementation treats them as follows. |
640 | .Pp | | 640 | .Pp |
641 | See | | 641 | See |
642 | .Xr re_format 7 | | 642 | .Xr re_format 7 |
643 | for a discussion of the definition of case-independent matching. | | 643 | for a discussion of the definition of case-independent matching. |
644 | .Pp | | 644 | .Pp |
645 | There is no particular limit on the length of REs, | | 645 | There is no particular limit on the length of REs, |
646 | except insofar as memory is limited. | | 646 | except insofar as memory is limited. |
647 | Memory usage is approximately linear in RE size, and largely insensitive | | 647 | Memory usage is approximately linear in RE size, and largely insensitive |
648 | to RE complexity, except for bounded repetitions. | | 648 | to RE complexity, except for bounded repetitions. |
649 | See | | 649 | See |
650 | .Sx BUGS | | 650 | .Sx BUGS |
651 | for one short RE using them | | 651 | for one short RE using them |
652 | that will run almost any system out of memory. | | 652 | that will run almost any system out of memory. |
653 | .Pp | | 653 | .Pp |
654 | A backslashed character other than one specifically given a magic meaning | | 654 | A backslashed character other than one specifically given a magic meaning |
655 | by | | 655 | by |
656 | .St -p1003.2 | | 656 | .St -p1003.2 |
657 | (such magic meanings occur only in obsolete | | 657 | (such magic meanings occur only in obsolete |
658 | .Bq Dq basic | | 658 | .Bq Dq basic |
659 | REs) | | 659 | REs) |
660 | is taken as an ordinary character. | | 660 | is taken as an ordinary character. |
661 | .Pp | | 661 | .Pp |
662 | Any unmatched | | 662 | Any unmatched |
663 | .Ql [\& | | 663 | .Ql [\& |
664 | is a | | 664 | is a |
665 | .Dv REG_EBRACK | | 665 | .Dv REG_EBRACK |
666 | error. | | 666 | error. |
667 | .Pp | | 667 | .Pp |
668 | Equivalence classes cannot begin or end bracket-expression ranges. | | 668 | Equivalence classes cannot begin or end bracket-expression ranges. |
669 | The endpoint of one range cannot begin another. | | 669 | The endpoint of one range cannot begin another. |
670 | .Pp | | 670 | .Pp |
671 | .Dv RE_DUP_MAX , | | 671 | .Dv RE_DUP_MAX , |
672 | the limit on repetition counts in bounded repetitions, is 255. | | 672 | the limit on repetition counts in bounded repetitions, is 255. |
673 | .Pp | | 673 | .Pp |
674 | A repetition operator | | 674 | A repetition operator |
675 | .Ql ( ?\& , | | 675 | .Ql ( ?\& , |
676 | .Ql *\& , | | 676 | .Ql *\& , |
677 | .Ql +\& , | | 677 | .Ql +\& , |
678 | or bounds) | | 678 | or bounds) |
679 | cannot follow another | | 679 | cannot follow another |
680 | repetition operator. | | 680 | repetition operator. |
681 | A repetition operator cannot begin an expression or subexpression | | 681 | A repetition operator cannot begin an expression or subexpression |
682 | or follow | | 682 | or follow |
683 | .Ql ^\& | | 683 | .Ql ^\& |
684 | or | | 684 | or |
685 | .Ql |\& . | | 685 | .Ql |\& . |
686 | .Pp | | 686 | .Pp |
687 | .Ql |\& | | 687 | .Ql |\& |
688 | cannot appear first or last in a (sub)expression or after another | | 688 | cannot appear first or last in a (sub)expression or after another |
689 | .Ql |\& , | | 689 | .Ql |\& , |
690 | i.e., an operand of | | 690 | i.e., an operand of |
691 | .Ql |\& | | 691 | .Ql |\& |
692 | cannot be an empty subexpression. | | 692 | cannot be an empty subexpression. |
693 | An empty parenthesized subexpression, | | 693 | An empty parenthesized subexpression, |
694 | .Ql "()" , | | 694 | .Ql "()" , |
695 | is legal and matches an | | 695 | is legal and matches an |
696 | empty (sub)string. | | 696 | empty (sub)string. |
697 | An empty string is not a legal RE. | | 697 | An empty string is not a legal RE. |
698 | .Pp | | 698 | .Pp |
699 | A | | 699 | A |
700 | .Ql {\& | | 700 | .Ql {\& |
701 | followed by a digit is considered the beginning of bounds for a | | 701 | followed by a digit is considered the beginning of bounds for a |
702 | bounded repetition, which must then follow the syntax for bounds. | | 702 | bounded repetition, which must then follow the syntax for bounds. |
703 | A | | 703 | A |
704 | .Ql {\& | | 704 | .Ql {\& |
705 | .Em not | | 705 | .Em not |
706 | followed by a digit is considered an ordinary character. | | 706 | followed by a digit is considered an ordinary character. |
707 | .Pp | | 707 | .Pp |
708 | .Ql ^\& | | 708 | .Ql ^\& |
709 | and | | 709 | and |
710 | .Ql $\& | | 710 | .Ql $\& |
711 | beginning and ending subexpressions in obsolete | | 711 | beginning and ending subexpressions in obsolete |
712 | .Pq Dq basic | | 712 | .Pq Dq basic |
713 | REs are anchors, not ordinary characters. | | 713 | REs are anchors, not ordinary characters. |
714 | .Sh DIAGNOSTICS | | 714 | .Sh DIAGNOSTICS |
715 | Non-zero error codes from | | 715 | Non-zero error codes from |
716 | .Fn regcomp | | 716 | .Fn regcomp |
717 | and | | 717 | and |
718 | .Fn regexec | | 718 | .Fn regexec |
719 | include the following: | | 719 | include the following: |
720 | .Pp | | 720 | .Pp |
721 | .Bl -tag -width REG_ECOLLATE -compact | | 721 | .Bl -tag -width REG_ECOLLATE -compact |
722 | .It Dv REG_NOMATCH | | 722 | .It Dv REG_NOMATCH |
723 | The | | 723 | The |
724 | .Fn regexec | | 724 | .Fn regexec |
725 | function | | 725 | function |
726 | failed to match | | 726 | failed to match |
727 | .It Dv REG_BADPAT | | 727 | .It Dv REG_BADPAT |
728 | invalid regular expression | | 728 | invalid regular expression |
729 | .It Dv REG_ECOLLATE | | 729 | .It Dv REG_ECOLLATE |
730 | invalid collating element | | 730 | invalid collating element |
731 | .It Dv REG_ECTYPE | | 731 | .It Dv REG_ECTYPE |
732 | invalid character class | | 732 | invalid character class |
733 | .It Dv REG_EESCAPE | | 733 | .It Dv REG_EESCAPE |
734 | .Ql \e | | 734 | .Ql \e |
735 | applied to unescapable character | | 735 | applied to unescapable character |
736 | .It Dv REG_ESUBREG | | 736 | .It Dv REG_ESUBREG |
737 | invalid backreference number | | 737 | invalid backreference number |
738 | .It Dv REG_EBRACK | | 738 | .It Dv REG_EBRACK |
739 | brackets | | 739 | brackets |
740 | .Ql "[ ]" | | 740 | .Ql "[ ]" |
741 | not balanced | | 741 | not balanced |
742 | .It Dv REG_EPAREN | | 742 | .It Dv REG_EPAREN |
743 | parentheses | | 743 | parentheses |
744 | .Ql "( )" | | 744 | .Ql "( )" |
745 | not balanced | | 745 | not balanced |
746 | .It Dv REG_EBRACE | | 746 | .It Dv REG_EBRACE |
747 | braces | | 747 | braces |
748 | .Ql "{ }" | | 748 | .Ql "{ }" |
749 | not balanced | | 749 | not balanced |
750 | .It Dv REG_BADBR | | 750 | .It Dv REG_BADBR |
751 | invalid repetition count(s) in | | 751 | invalid repetition count(s) in |
752 | .Ql "{ }" | | 752 | .Ql "{ }" |
753 | .It Dv REG_ERANGE | | 753 | .It Dv REG_ERANGE |
754 | invalid character range in | | 754 | invalid character range in |
755 | .Ql "[ ]" | | 755 | .Ql "[ ]" |
756 | .It Dv REG_ESPACE | | 756 | .It Dv REG_ESPACE |
757 | ran out of memory | | 757 | ran out of memory |
758 | .It Dv REG_BADRPT | | 758 | .It Dv REG_BADRPT |
759 | .Ql ?\& , | | 759 | .Ql ?\& , |
760 | .Ql *\& , | | 760 | .Ql *\& , |
761 | or | | 761 | or |
762 | .Ql +\& | | 762 | .Ql +\& |
763 | operand invalid | | 763 | operand invalid |
764 | .It Dv REG_EMPTY | | 764 | .It Dv REG_EMPTY |
765 | empty (sub)expression | | 765 | empty (sub)expression |
766 | .It Dv REG_ASSERT | | 766 | .It Dv REG_ASSERT |
767 | cannot happen - you found a bug | | 767 | cannot happen - you found a bug |
768 | .It Dv REG_INVARG | | 768 | .It Dv REG_INVARG |
769 | invalid argument, e.g.\& negative-length string | | 769 | invalid argument, e.g.\& negative-length string |
770 | .It Dv REG_ILLSEQ | | 770 | .It Dv REG_ILLSEQ |
771 | illegal byte sequence (bad multibyte character) | | 771 | illegal byte sequence (bad multibyte character) |
772 | .El | | 772 | .El |
773 | .Sh SEE ALSO | | 773 | .Sh SEE ALSO |
774 | .Xr grep 1 , | | 774 | .Xr grep 1 , |
775 | .Xr re_format 7 | | 775 | .Xr re_format 7 |
776 | .Pp | | 776 | .Pp |
777 | .St -p1003.2 , | | 777 | .St -p1003.2 , |
778 | sections 2.8 (Regular Expression Notation) | | 778 | sections 2.8 (Regular Expression Notation) |
779 | and | | 779 | and |
780 | B.5 (C Binding for Regular Expression Matching). | | 780 | B.5 (C Binding for Regular Expression Matching). |
781 | .Sh HISTORY | | 781 | .Sh HISTORY |
782 | Originally written by | | 782 | Originally written by |
783 | .An Henry Spencer . | | 783 | .An Henry Spencer . |
784 | Altered for inclusion in the | | 784 | Altered for inclusion in the |
785 | .Bx 4.4 | | 785 | .Bx 4.4 |
786 | distribution. | | 786 | distribution. |
787 | .Pp | | 787 | .Pp |
788 | The | | 788 | The |
789 | .Fn regnsub | | 789 | .Fn regnsub |
790 | and | | 790 | and |
791 | .Fn regasub | | 791 | .Fn regasub |
792 | functions appeared in | | 792 | functions appeared in |
793 | .Nx 8 . | | 793 | .Nx 8 . |
794 | .Sh BUGS | | 794 | .Sh BUGS |
795 | This is an alpha release with known defects. | | 795 | This is an alpha release with known defects. |
796 | Please report problems. | | 796 | Please report problems. |
797 | .Pp | | 797 | .Pp |
798 | The back-reference code is subtle and doubts linger about its correctness | | 798 | The back-reference code is subtle and doubts linger about its correctness |
799 | in complex cases. | | 799 | in complex cases. |
800 | .Pp | | 800 | .Pp |
801 | The | | 801 | The |
802 | .Fn regexec | | 802 | .Fn regexec |
803 | function | | 803 | function |
804 | performance is poor. | | 804 | performance is poor. |
805 | This will improve with later releases. | | 805 | This will improve with later releases. |
806 | The | | 806 | The |
807 | .Fa nmatch | | 807 | .Fa nmatch |
808 | argument | | 808 | argument |
809 | exceeding 0 is expensive; | | 809 | exceeding 0 is expensive; |
810 | .Fa nmatch | | 810 | .Fa nmatch |
811 | exceeding 1 is worse. | | 811 | exceeding 1 is worse. |
812 | The | | 812 | The |
813 | .Fn regexec | | 813 | .Fn regexec |
814 | function | | 814 | function |
815 | is largely insensitive to RE complexity | | 815 | is largely insensitive to RE complexity |
816 | .Em except | | 816 | .Em except |
817 | that back | | 817 | that back |
818 | references are massively expensive. | | 818 | references are massively expensive. |
819 | RE length does matter; in particular, there is a strong speed bonus | | 819 | RE length does matter; in particular, there is a strong speed bonus |
820 | for keeping RE length under about 30 characters, | | 820 | for keeping RE length under about 30 characters, |
821 | with most special characters counting roughly double. | | 821 | with most special characters counting roughly double. |
822 | .Pp | | 822 | .Pp |
823 | The | | 823 | The |
824 | .Fn regcomp | | 824 | .Fn regcomp |
825 | function | | 825 | function |
826 | implements bounded repetitions by macro expansion, | | 826 | implements bounded repetitions by macro expansion, |
827 | which is costly in time and space if counts are large | | 827 | which is costly in time and space if counts are large |
828 | or bounded repetitions are nested. | | 828 | or bounded repetitions are nested. |
829 | An RE like, say, | | 829 | An RE like, say, |
830 | .Ql "((((a{1,100}){1,100}){1,100}){1,100}){1,100}" | | 830 | .Ql "((((a{1,100}){1,100}){1,100}){1,100}){1,100}" |
831 | will (eventually) run almost any existing machine out of swap space. | | 831 | will (eventually) run almost any existing machine out of swap space. |
832 | .Pp | | 832 | .Pp |
833 | There are suspected problems with response to obscure error conditions. | | 833 | There are suspected problems with response to obscure error conditions. |
834 | Notably, | | 834 | Notably, |
835 | certain kinds of internal overflow, | | 835 | certain kinds of internal overflow, |
836 | produced only by truly enormous REs or by multiply nested bounded repetitions, | | 836 | produced only by truly enormous REs or by multiply nested bounded repetitions, |
837 | are probably not handled well. | | 837 | are probably not handled well. |
838 | .Pp | | 838 | .Pp |
839 | Due to a mistake in | | 839 | Due to a mistake in |
840 | .St -p1003.2 , | | 840 | .St -p1003.2 , |
841 | things like | | 841 | things like |
842 | .Ql "a)b" | | 842 | .Ql "a)b" |
843 | are legal REs because | | 843 | are legal REs because |
844 | .Ql )\& | | 844 | .Ql )\& |
845 | is | | 845 | is |
846 | a special character only in the presence of a previous unmatched | | 846 | a special character only in the presence of a previous unmatched |
847 | .Ql (\& . | | 847 | .Ql (\& . |
848 | This cannot be fixed until the spec is fixed. | | 848 | This cannot be fixed until the spec is fixed. |
849 | .Pp | | 849 | .Pp |
850 | The standard's definition of back references is vague. | | 850 | The standard's definition of back references is vague. |
851 | For example, does | | 851 | For example, does |
852 | .Ql "a\e(\e(b\e)*\e2\e)*d" | | 852 | .Ql "a\e(\e(b\e)*\e2\e)*d" |
853 | match | | 853 | match |
854 | .Ql "abbbd" ? | | 854 | .Ql "abbbd" ? |
855 | Until the standard is clarified, | | 855 | Until the standard is clarified, |
856 | behavior in such cases should not be relied on. | | 856 | behavior in such cases should not be relied on. |
857 | .Pp | | 857 | .Pp |
858 | The implementation of word-boundary matching is a bit of a kludge, | | 858 | The implementation of word-boundary matching is a bit of a kludge, |
859 | and bugs may lurk in combinations of word-boundary matching and anchoring. | | 859 | and bugs may lurk in combinations of word-boundary matching and anchoring. |
860 | .Pp | | 860 | .Pp |
861 | Word-boundary matching does not work properly in multibyte locales. | | 861 | Word-boundary matching does not work properly in multibyte locales. |