| @@ -1,458 +1,458 @@ | | | @@ -1,458 +1,458 @@ |
1 | /* $NetBSD: sys_memfd.c,v 1.6 2023/07/29 17:54:54 christos Exp $ */ | | 1 | /* $NetBSD: sys_memfd.c,v 1.7 2023/07/29 23:51:29 rin Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 2023 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 2023 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Theodore Preduta. | | 8 | * by Theodore Preduta. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright | | 15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the | | 16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. | | 17 | * documentation and/or other materials provided with the distribution. |
18 | * | | 18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. | | 29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ | | 30 | */ |
31 | | | 31 | |
32 | #include <sys/cdefs.h> | | 32 | #include <sys/cdefs.h> |
33 | __KERNEL_RCSID(0, "$NetBSD: sys_memfd.c,v 1.6 2023/07/29 17:54:54 christos Exp $"); | | 33 | __KERNEL_RCSID(0, "$NetBSD: sys_memfd.c,v 1.7 2023/07/29 23:51:29 rin Exp $"); |
34 | | | 34 | |
35 | #include <sys/param.h> | | 35 | #include <sys/param.h> |
36 | #include <sys/types.h> | | 36 | #include <sys/types.h> |
37 | | | 37 | |
38 | #include <sys/fcntl.h> | | 38 | #include <sys/fcntl.h> |
39 | #include <sys/file.h> | | 39 | #include <sys/file.h> |
40 | #include <sys/filedesc.h> | | 40 | #include <sys/filedesc.h> |
41 | #include <sys/memfd.h> | | 41 | #include <sys/memfd.h> |
42 | #include <sys/mman.h> | | 42 | #include <sys/mman.h> |
43 | #include <sys/syscallargs.h> | | 43 | #include <sys/syscallargs.h> |
44 | | | 44 | |
45 | #include <uvm/uvm_extern.h> | | 45 | #include <uvm/uvm_extern.h> |
46 | #include <uvm/uvm_object.h> | | 46 | #include <uvm/uvm_object.h> |
47 | | | 47 | |
48 | #define F_SEAL_ANY_WRITE (F_SEAL_WRITE|F_SEAL_FUTURE_WRITE) | | 48 | #define F_SEAL_ANY_WRITE (F_SEAL_WRITE|F_SEAL_FUTURE_WRITE) |
49 | #define MFD_KNOWN_SEALS (F_SEAL_SEAL|F_SEAL_SHRINK|F_SEAL_GROW \ | | 49 | #define MFD_KNOWN_SEALS (F_SEAL_SEAL|F_SEAL_SHRINK|F_SEAL_GROW \ |
50 | |F_SEAL_WRITE|F_SEAL_FUTURE_WRITE) | | 50 | |F_SEAL_WRITE|F_SEAL_FUTURE_WRITE) |
51 | | | 51 | |
52 | static const char memfd_prefix[] = "memfd:"; | | 52 | static const char memfd_prefix[] = "memfd:"; |
53 | | | 53 | |
54 | static int memfd_read(file_t *, off_t *, struct uio *, kauth_cred_t, int); | | 54 | static int memfd_read(file_t *, off_t *, struct uio *, kauth_cred_t, int); |
55 | static int memfd_write(file_t *, off_t *, struct uio *, kauth_cred_t, int); | | 55 | static int memfd_write(file_t *, off_t *, struct uio *, kauth_cred_t, int); |
56 | static int memfd_ioctl(file_t *, u_long, void *); | | 56 | static int memfd_ioctl(file_t *, u_long, void *); |
57 | static int memfd_fcntl(file_t *, u_int, void *); | | 57 | static int memfd_fcntl(file_t *, u_int, void *); |
58 | static int memfd_stat(file_t *, struct stat *); | | 58 | static int memfd_stat(file_t *, struct stat *); |
59 | static int memfd_close(file_t *); | | 59 | static int memfd_close(file_t *); |
60 | static int memfd_mmap(file_t *, off_t *, size_t, int, int *, int *, | | 60 | static int memfd_mmap(file_t *, off_t *, size_t, int, int *, int *, |
61 | struct uvm_object **, int *); | | 61 | struct uvm_object **, int *); |
62 | static int memfd_seek(file_t *, off_t, int, off_t *, int); | | 62 | static int memfd_seek(file_t *, off_t, int, off_t *, int); |
63 | static int do_memfd_truncate(file_t *, off_t); | | 63 | static int do_memfd_truncate(file_t *, off_t); |
64 | static int memfd_truncate(file_t *, off_t); | | 64 | static int memfd_truncate(file_t *, off_t); |
65 | | | 65 | |
66 | static const struct fileops memfd_fileops = { | | 66 | static const struct fileops memfd_fileops = { |
67 | .fo_name = "memfd", | | 67 | .fo_name = "memfd", |
68 | .fo_read = memfd_read, | | 68 | .fo_read = memfd_read, |
69 | .fo_write = memfd_write, | | 69 | .fo_write = memfd_write, |
70 | .fo_ioctl = memfd_ioctl, | | 70 | .fo_ioctl = memfd_ioctl, |
71 | .fo_fcntl = memfd_fcntl, | | 71 | .fo_fcntl = memfd_fcntl, |
72 | .fo_poll = fnullop_poll, | | 72 | .fo_poll = fnullop_poll, |
73 | .fo_stat = memfd_stat, | | 73 | .fo_stat = memfd_stat, |
74 | .fo_close = memfd_close, | | 74 | .fo_close = memfd_close, |
75 | .fo_kqfilter = fnullop_kqfilter, | | 75 | .fo_kqfilter = fnullop_kqfilter, |
76 | .fo_restart = fnullop_restart, | | 76 | .fo_restart = fnullop_restart, |
77 | .fo_mmap = memfd_mmap, | | 77 | .fo_mmap = memfd_mmap, |
78 | .fo_seek = memfd_seek, | | 78 | .fo_seek = memfd_seek, |
79 | .fo_fpathconf = (void *)eopnotsupp, | | 79 | .fo_fpathconf = (void *)eopnotsupp, |
80 | .fo_posix_fadvise = (void *)eopnotsupp, | | 80 | .fo_posix_fadvise = (void *)eopnotsupp, |
81 | .fo_truncate = memfd_truncate, | | 81 | .fo_truncate = memfd_truncate, |
82 | }; | | 82 | }; |
83 | | | 83 | |
84 | /* | | 84 | /* |
85 | * memfd_create(2). Creat a file descriptor associated with anonymous | | 85 | * memfd_create(2). Creat a file descriptor associated with anonymous |
86 | * memory. | | 86 | * memory. |
87 | */ | | 87 | */ |
88 | int | | 88 | int |
89 | sys_memfd_create(struct lwp *l, const struct sys_memfd_create_args *uap, | | 89 | sys_memfd_create(struct lwp *l, const struct sys_memfd_create_args *uap, |
90 | register_t *retval) | | 90 | register_t *retval) |
91 | { | | 91 | { |
92 | /* { | | 92 | /* { |
93 | syscallarg(const char *) name; | | 93 | syscallarg(const char *) name; |
94 | syscallarg(unsigned int) flags; | | 94 | syscallarg(unsigned int) flags; |
95 | } */ | | 95 | } */ |
96 | int error, fd; | | 96 | int error, fd; |
97 | file_t *fp; | | 97 | file_t *fp; |
98 | struct memfd *mfd; | | 98 | struct memfd *mfd; |
99 | struct proc *p = l->l_proc; | | 99 | struct proc *p = l->l_proc; |
100 | const unsigned int flags = SCARG(uap, flags); | | 100 | const unsigned int flags = SCARG(uap, flags); |
101 | | | 101 | |
102 | if (flags & ~(MFD_CLOEXEC|MFD_ALLOW_SEALING)) | | 102 | if (flags & ~(MFD_CLOEXEC|MFD_ALLOW_SEALING)) |
103 | return EINVAL; | | 103 | return EINVAL; |
104 | | | 104 | |
105 | mfd = kmem_zalloc(sizeof(*mfd), KM_SLEEP); | | 105 | mfd = kmem_zalloc(sizeof(*mfd), KM_SLEEP); |
106 | mfd->mfd_size = 0; | | 106 | mfd->mfd_size = 0; |
107 | mfd->mfd_uobj = uao_create(INT64_MAX - PAGE_SIZE, 0); /* same as tmpfs */ | | 107 | mfd->mfd_uobj = uao_create(INT64_MAX - PAGE_SIZE, 0); /* same as tmpfs */ |
108 | | | 108 | |
109 | CTASSERT(sizeof(memfd_prefix) < NAME_MAX); /* sanity check */ | | 109 | CTASSERT(sizeof(memfd_prefix) < NAME_MAX); /* sanity check */ |
110 | strcpy(mfd->mfd_name, memfd_prefix); | | 110 | strcpy(mfd->mfd_name, memfd_prefix); |
111 | error = copyinstr(SCARG(uap, name), | | 111 | error = copyinstr(SCARG(uap, name), |
112 | &mfd->mfd_name[sizeof(memfd_prefix) - 1], | | 112 | &mfd->mfd_name[sizeof(memfd_prefix) - 1], |
113 | sizeof(mfd->mfd_name) - sizeof(memfd_prefix), NULL); | | 113 | sizeof(mfd->mfd_name) - sizeof(memfd_prefix), NULL); |
114 | if (error != 0) | | 114 | if (error != 0) |
115 | goto leave; | | 115 | goto leave; |
116 | | | 116 | |
117 | getnanotime(&mfd->mfd_btime); | | 117 | getnanotime(&mfd->mfd_btime); |
118 | | | 118 | |
119 | if ((flags & MFD_ALLOW_SEALING) == 0) | | 119 | if ((flags & MFD_ALLOW_SEALING) == 0) |
120 | mfd->mfd_seals |= F_SEAL_SEAL; | | 120 | mfd->mfd_seals |= F_SEAL_SEAL; |
121 | | | 121 | |
122 | error = fd_allocfile(&fp, &fd); | | 122 | error = fd_allocfile(&fp, &fd); |
123 | if (error != 0) | | 123 | if (error != 0) |
124 | goto leave; | | 124 | goto leave; |
125 | | | 125 | |
126 | fp->f_flag = FREAD|FWRITE; | | 126 | fp->f_flag = FREAD|FWRITE; |
127 | fp->f_type = DTYPE_MEMFD; | | 127 | fp->f_type = DTYPE_MEMFD; |
128 | fp->f_ops = &memfd_fileops; | | 128 | fp->f_ops = &memfd_fileops; |
129 | fp->f_memfd = mfd; | | 129 | fp->f_memfd = mfd; |
130 | fd_set_exclose(l, fd, (flags & MFD_CLOEXEC) != 0); | | 130 | fd_set_exclose(l, fd, (flags & MFD_CLOEXEC) != 0); |
131 | fd_affix(p, fp, fd); | | 131 | fd_affix(p, fp, fd); |
132 | | | 132 | |
133 | *retval = fd; | | 133 | *retval = fd; |
134 | return 0; | | 134 | return 0; |
135 | | | 135 | |
136 | leave: | | 136 | leave: |
137 | uao_detach(mfd->mfd_uobj); | | 137 | uao_detach(mfd->mfd_uobj); |
138 | kmem_free(mfd, sizeof(*mfd)); | | 138 | kmem_free(mfd, sizeof(*mfd)); |
139 | return error; | | 139 | return error; |
140 | } | | 140 | } |
141 | | | 141 | |
142 | static int | | 142 | static int |
143 | memfd_read(file_t *fp, off_t *offp, struct uio *uio, kauth_cred_t cred, | | 143 | memfd_read(file_t *fp, off_t *offp, struct uio *uio, kauth_cred_t cred, |
144 | int flags) | | 144 | int flags) |
145 | { | | 145 | { |
146 | int error; | | 146 | int error; |
147 | vsize_t todo; | | 147 | vsize_t todo; |
148 | struct memfd *mfd = fp->f_memfd; | | 148 | struct memfd *mfd = fp->f_memfd; |
149 | | | 149 | |
150 | mutex_enter(&fp->f_lock); | | 150 | mutex_enter(&fp->f_lock); |
151 | | | 151 | |
152 | if (*offp < 0) { | | 152 | if (*offp < 0) { |
153 | error = EINVAL; | | 153 | error = EINVAL; |
154 | goto leave; | | 154 | goto leave; |
155 | } | | 155 | } |
156 | | | 156 | |
157 | /* Trying to read past the end does nothing. */ | | 157 | /* Trying to read past the end does nothing. */ |
158 | if (*offp >= mfd->mfd_size) { | | 158 | if (*offp >= mfd->mfd_size) { |
159 | error = 0; | | 159 | error = 0; |
160 | goto leave; | | 160 | goto leave; |
161 | } | | 161 | } |
162 | | | 162 | |
163 | if (flags & FOF_UPDATE_OFFSET) | | 163 | uio->uio_offset = *offp; |
164 | *offp = uio->uio_offset; | | | |
165 | todo = MIN(uio->uio_resid, mfd->mfd_size - *offp); | | 164 | todo = MIN(uio->uio_resid, mfd->mfd_size - *offp); |
166 | error = ubc_uiomove(mfd->mfd_uobj, uio, todo, UVM_ADV_SEQUENTIAL, | | 165 | error = ubc_uiomove(mfd->mfd_uobj, uio, todo, UVM_ADV_SEQUENTIAL, |
167 | UBC_READ|UBC_PARTIALOK); | | 166 | UBC_READ|UBC_PARTIALOK); |
168 | *offp = uio->uio_offset; | | 167 | if (flags & FOF_UPDATE_OFFSET) |
| | | 168 | *offp = uio->uio_offset; |
169 | | | 169 | |
170 | leave: | | 170 | leave: |
171 | getnanotime(&mfd->mfd_atime); | | 171 | getnanotime(&mfd->mfd_atime); |
172 | | | 172 | |
173 | | | 173 | |
174 | mutex_exit(&fp->f_lock); | | 174 | mutex_exit(&fp->f_lock); |
175 | | | 175 | |
176 | return error; | | 176 | return error; |
177 | } | | 177 | } |
178 | | | 178 | |
179 | static int | | 179 | static int |
180 | memfd_write(file_t *fp, off_t *offp, struct uio *uio, kauth_cred_t cred, | | 180 | memfd_write(file_t *fp, off_t *offp, struct uio *uio, kauth_cred_t cred, |
181 | int flags) | | 181 | int flags) |
182 | { | | 182 | { |
183 | int error; | | 183 | int error; |
184 | vsize_t todo; | | 184 | vsize_t todo; |
185 | struct memfd *mfd = fp->f_memfd; | | 185 | struct memfd *mfd = fp->f_memfd; |
186 | | | 186 | |
187 | mutex_enter(&fp->f_lock); | | 187 | mutex_enter(&fp->f_lock); |
188 | | | 188 | |
189 | if (mfd->mfd_seals & F_SEAL_ANY_WRITE) { | | 189 | if (mfd->mfd_seals & F_SEAL_ANY_WRITE) { |
190 | error = EPERM; | | 190 | error = EPERM; |
191 | goto leave; | | 191 | goto leave; |
192 | } | | 192 | } |
193 | | | 193 | |
194 | if (*offp < 0) { | | 194 | if (*offp < 0) { |
195 | error = EINVAL; | | 195 | error = EINVAL; |
196 | goto leave; | | 196 | goto leave; |
197 | } | | 197 | } |
198 | | | 198 | |
199 | uio->uio_offset = *offp; | | 199 | uio->uio_offset = *offp; |
200 | todo = uio->uio_resid; | | 200 | todo = uio->uio_resid; |
201 | | | 201 | |
202 | if (mfd->mfd_seals & F_SEAL_GROW) { | | 202 | if (mfd->mfd_seals & F_SEAL_GROW) { |
203 | if (*offp >= mfd->mfd_size) { | | 203 | if (*offp >= mfd->mfd_size) { |
204 | error = EPERM; | | 204 | error = EPERM; |
205 | goto leave; | | 205 | goto leave; |
206 | } | | 206 | } |
207 | | | 207 | |
208 | /* Truncate the write to fit in mfd_size */ | | 208 | /* Truncate the write to fit in mfd_size */ |
209 | if (*offp + uio->uio_resid >= mfd->mfd_size) | | 209 | if (*offp + uio->uio_resid >= mfd->mfd_size) |
210 | todo = mfd->mfd_size - *offp; | | 210 | todo = mfd->mfd_size - *offp; |
211 | } else if (*offp + uio->uio_resid >= mfd->mfd_size) { | | 211 | } else if (*offp + uio->uio_resid >= mfd->mfd_size) { |
212 | /* Grow to accommodate the write request. */ | | 212 | /* Grow to accommodate the write request. */ |
213 | error = do_memfd_truncate(fp, *offp + uio->uio_resid); | | 213 | error = do_memfd_truncate(fp, *offp + uio->uio_resid); |
214 | if (error != 0) | | 214 | if (error != 0) |
215 | goto leave; | | 215 | goto leave; |
216 | } | | 216 | } |
217 | | | 217 | |
218 | error = ubc_uiomove(mfd->mfd_uobj, uio, todo, UVM_ADV_SEQUENTIAL, | | 218 | error = ubc_uiomove(mfd->mfd_uobj, uio, todo, UVM_ADV_SEQUENTIAL, |
219 | UBC_WRITE|UBC_PARTIALOK); | | 219 | UBC_WRITE|UBC_PARTIALOK); |
220 | if (flags & FOF_UPDATE_OFFSET) | | 220 | if (flags & FOF_UPDATE_OFFSET) |
221 | *offp = uio->uio_offset; | | 221 | *offp = uio->uio_offset; |
222 | | | 222 | |
223 | getnanotime(&mfd->mfd_mtime); | | 223 | getnanotime(&mfd->mfd_mtime); |
224 | | | 224 | |
225 | leave: | | 225 | leave: |
226 | mutex_exit(&fp->f_lock); | | 226 | mutex_exit(&fp->f_lock); |
227 | | | 227 | |
228 | return error; | | 228 | return error; |
229 | } | | 229 | } |
230 | | | 230 | |
231 | static int | | 231 | static int |
232 | memfd_ioctl(file_t *fp, u_long cmd, void *data) | | 232 | memfd_ioctl(file_t *fp, u_long cmd, void *data) |
233 | { | | 233 | { |
234 | | | 234 | |
235 | return EINVAL; | | 235 | return EINVAL; |
236 | } | | 236 | } |
237 | | | 237 | |
238 | static int | | 238 | static int |
239 | memfd_fcntl(file_t *fp, u_int cmd, void *data) | | 239 | memfd_fcntl(file_t *fp, u_int cmd, void *data) |
240 | { | | 240 | { |
241 | struct memfd *mfd = fp->f_memfd; | | 241 | struct memfd *mfd = fp->f_memfd; |
242 | int error = 0; | | 242 | int error = 0; |
243 | | | 243 | |
244 | switch (cmd) { | | 244 | switch (cmd) { |
245 | case F_ADD_SEALS: | | 245 | case F_ADD_SEALS: |
246 | mutex_enter(&fp->f_lock); | | 246 | mutex_enter(&fp->f_lock); |
247 | | | 247 | |
248 | if (mfd->mfd_seals & F_SEAL_SEAL) { | | 248 | if (mfd->mfd_seals & F_SEAL_SEAL) { |
249 | error = EPERM; | | 249 | error = EPERM; |
250 | goto leave_add_seals; | | 250 | goto leave_add_seals; |
251 | } | | 251 | } |
252 | | | 252 | |
253 | if (*(int *)data & ~MFD_KNOWN_SEALS) { | | 253 | if (*(int *)data & ~MFD_KNOWN_SEALS) { |
254 | error = EINVAL; | | 254 | error = EINVAL; |
255 | goto leave_add_seals; | | 255 | goto leave_add_seals; |
256 | } | | 256 | } |
257 | | | 257 | |
258 | /* | | 258 | /* |
259 | * Can only add F_SEAL_WRITE if there are no currently | | 259 | * Can only add F_SEAL_WRITE if there are no currently |
260 | * open mmaps. | | 260 | * open mmaps. |
261 | * | | 261 | * |
262 | * XXX should only disallow if there are no currently | | 262 | * XXX should only disallow if there are no currently |
263 | * open mmaps with PROT_WRITE. | | 263 | * open mmaps with PROT_WRITE. |
264 | */ | | 264 | */ |
265 | if ((mfd->mfd_seals & F_SEAL_WRITE) == 0 && | | 265 | if ((mfd->mfd_seals & F_SEAL_WRITE) == 0 && |
266 | (*(int *)data & F_SEAL_WRITE) != 0 && | | 266 | (*(int *)data & F_SEAL_WRITE) != 0 && |
267 | mfd->mfd_uobj->uo_refs > 1) | | 267 | mfd->mfd_uobj->uo_refs > 1) |
268 | { | | 268 | { |
269 | error = EBUSY; | | 269 | error = EBUSY; |
270 | goto leave_add_seals; | | 270 | goto leave_add_seals; |
271 | } | | 271 | } |
272 | | | 272 | |
273 | mfd->mfd_seals |= *(int *)data; | | 273 | mfd->mfd_seals |= *(int *)data; |
274 | | | 274 | |
275 | leave_add_seals: | | 275 | leave_add_seals: |
276 | mutex_exit(&fp->f_lock); | | 276 | mutex_exit(&fp->f_lock); |
277 | return error; | | 277 | return error; |
278 | | | 278 | |
279 | case F_GET_SEALS: | | 279 | case F_GET_SEALS: |
280 | mutex_enter(&fp->f_lock); | | 280 | mutex_enter(&fp->f_lock); |
281 | *(int *)data = mfd->mfd_seals; | | 281 | *(int *)data = mfd->mfd_seals; |
282 | mutex_exit(&fp->f_lock); | | 282 | mutex_exit(&fp->f_lock); |
283 | return 0; | | 283 | return 0; |
284 | | | 284 | |
285 | default: | | 285 | default: |
286 | return EINVAL; | | 286 | return EINVAL; |
287 | } | | 287 | } |
288 | } | | 288 | } |
289 | | | 289 | |
290 | static int | | 290 | static int |
291 | memfd_stat(file_t *fp, struct stat *st) | | 291 | memfd_stat(file_t *fp, struct stat *st) |
292 | { | | 292 | { |
293 | struct memfd *mfd = fp->f_memfd; | | 293 | struct memfd *mfd = fp->f_memfd; |
294 | | | 294 | |
295 | mutex_enter(&fp->f_lock); | | 295 | mutex_enter(&fp->f_lock); |
296 | | | 296 | |
297 | memset(st, 0, sizeof(*st)); | | 297 | memset(st, 0, sizeof(*st)); |
298 | st->st_uid = kauth_cred_geteuid(fp->f_cred); | | 298 | st->st_uid = kauth_cred_geteuid(fp->f_cred); |
299 | st->st_gid = kauth_cred_getegid(fp->f_cred); | | 299 | st->st_gid = kauth_cred_getegid(fp->f_cred); |
300 | st->st_size = mfd->mfd_size; | | 300 | st->st_size = mfd->mfd_size; |
301 | | | 301 | |
302 | st->st_mode = S_IREAD; | | 302 | st->st_mode = S_IREAD; |
303 | if ((mfd->mfd_seals & F_SEAL_ANY_WRITE) == 0) | | 303 | if ((mfd->mfd_seals & F_SEAL_ANY_WRITE) == 0) |
304 | st->st_mode |= S_IWRITE; | | 304 | st->st_mode |= S_IWRITE; |
305 | | | 305 | |
306 | st->st_birthtimespec = mfd->mfd_btime; | | 306 | st->st_birthtimespec = mfd->mfd_btime; |
307 | st->st_ctimespec = mfd->mfd_mtime; | | 307 | st->st_ctimespec = mfd->mfd_mtime; |
308 | st->st_atimespec = mfd->mfd_atime; | | 308 | st->st_atimespec = mfd->mfd_atime; |
309 | st->st_mtimespec = mfd->mfd_mtime; | | 309 | st->st_mtimespec = mfd->mfd_mtime; |
310 | | | 310 | |
311 | mutex_exit(&fp->f_lock); | | 311 | mutex_exit(&fp->f_lock); |
312 | | | 312 | |
313 | return 0; | | 313 | return 0; |
314 | } | | 314 | } |
315 | | | 315 | |
316 | static int | | 316 | static int |
317 | memfd_close(file_t *fp) | | 317 | memfd_close(file_t *fp) |
318 | { | | 318 | { |
319 | struct memfd *mfd = fp->f_memfd; | | 319 | struct memfd *mfd = fp->f_memfd; |
320 | | | 320 | |
321 | uao_detach(mfd->mfd_uobj); | | 321 | uao_detach(mfd->mfd_uobj); |
322 | | | 322 | |
323 | kmem_free(mfd, sizeof(*mfd)); | | 323 | kmem_free(mfd, sizeof(*mfd)); |
324 | fp->f_memfd = NULL; | | 324 | fp->f_memfd = NULL; |
325 | | | 325 | |
326 | return 0; | | 326 | return 0; |
327 | } | | 327 | } |
328 | | | 328 | |
329 | static int | | 329 | static int |
330 | memfd_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp, | | 330 | memfd_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp, |
331 | int *advicep, struct uvm_object **uobjp, int *maxprotp) | | 331 | int *advicep, struct uvm_object **uobjp, int *maxprotp) |
332 | { | | 332 | { |
333 | struct memfd *mfd = fp->f_memfd; | | 333 | struct memfd *mfd = fp->f_memfd; |
334 | int error = 0; | | 334 | int error = 0; |
335 | | | 335 | |
336 | /* uvm_mmap guarantees page-aligned offset and size. */ | | 336 | /* uvm_mmap guarantees page-aligned offset and size. */ |
337 | KASSERT(*offp == round_page(*offp)); | | 337 | KASSERT(*offp == round_page(*offp)); |
338 | KASSERT(size == round_page(size)); | | 338 | KASSERT(size == round_page(size)); |
339 | KASSERT(size > 0); | | 339 | KASSERT(size > 0); |
340 | | | 340 | |
341 | mutex_enter(&fp->f_lock); | | 341 | mutex_enter(&fp->f_lock); |
342 | | | 342 | |
343 | if (*offp < 0) { | | 343 | if (*offp < 0) { |
344 | error = EINVAL; | | 344 | error = EINVAL; |
345 | goto leave; | | 345 | goto leave; |
346 | } | | 346 | } |
347 | if (*offp + size > mfd->mfd_size) { | | 347 | if (*offp + size > mfd->mfd_size) { |
348 | error = EINVAL; | | 348 | error = EINVAL; |
349 | goto leave; | | 349 | goto leave; |
350 | } | | 350 | } |
351 | | | 351 | |
352 | if ((mfd->mfd_seals & F_SEAL_ANY_WRITE) && | | 352 | if ((mfd->mfd_seals & F_SEAL_ANY_WRITE) && |
353 | (prot & VM_PROT_WRITE) && (*flagsp & MAP_PRIVATE) == 0) { | | 353 | (prot & VM_PROT_WRITE) && (*flagsp & MAP_PRIVATE) == 0) { |
354 | error = EPERM; | | 354 | error = EPERM; |
355 | goto leave; | | 355 | goto leave; |
356 | } | | 356 | } |
357 | | | 357 | |
358 | uao_reference(fp->f_memfd->mfd_uobj); | | 358 | uao_reference(fp->f_memfd->mfd_uobj); |
359 | *uobjp = fp->f_memfd->mfd_uobj; | | 359 | *uobjp = fp->f_memfd->mfd_uobj; |
360 | | | 360 | |
361 | *maxprotp = prot; | | 361 | *maxprotp = prot; |
362 | *advicep = UVM_ADV_RANDOM; | | 362 | *advicep = UVM_ADV_RANDOM; |
363 | | | 363 | |
364 | leave: | | 364 | leave: |
365 | mutex_exit(&fp->f_lock); | | 365 | mutex_exit(&fp->f_lock); |
366 | | | 366 | |
367 | return error; | | 367 | return error; |
368 | } | | 368 | } |
369 | | | 369 | |
370 | static int | | 370 | static int |
371 | memfd_seek(file_t *fp, off_t delta, int whence, off_t *newoffp, | | 371 | memfd_seek(file_t *fp, off_t delta, int whence, off_t *newoffp, |
372 | int flags) | | 372 | int flags) |
373 | { | | 373 | { |
374 | off_t newoff; | | 374 | off_t newoff; |
375 | int error = 0; | | 375 | int error = 0; |
376 | | | 376 | |
377 | mutex_enter(&fp->f_lock); | | 377 | mutex_enter(&fp->f_lock); |
378 | | | 378 | |
379 | switch (whence) { | | 379 | switch (whence) { |
380 | case SEEK_CUR: | | 380 | case SEEK_CUR: |
381 | newoff = fp->f_offset + delta; | | 381 | newoff = fp->f_offset + delta; |
382 | break; | | 382 | break; |
383 | | | 383 | |
384 | case SEEK_END: | | 384 | case SEEK_END: |
385 | newoff = fp->f_memfd->mfd_size + delta; | | 385 | newoff = fp->f_memfd->mfd_size + delta; |
386 | break; | | 386 | break; |
387 | | | 387 | |
388 | case SEEK_SET: | | 388 | case SEEK_SET: |
389 | newoff = delta; | | 389 | newoff = delta; |
390 | break; | | 390 | break; |
391 | | | 391 | |
392 | default: | | 392 | default: |
393 | error = EINVAL; | | 393 | error = EINVAL; |
394 | goto leave; | | 394 | goto leave; |
395 | } | | 395 | } |
396 | | | 396 | |
397 | if (newoffp) | | 397 | if (newoffp) |
398 | *newoffp = newoff; | | 398 | *newoffp = newoff; |
399 | if (flags & FOF_UPDATE_OFFSET) | | 399 | if (flags & FOF_UPDATE_OFFSET) |
400 | fp->f_offset = newoff; | | 400 | fp->f_offset = newoff; |
401 | | | 401 | |
402 | leave: | | 402 | leave: |
403 | mutex_exit(&fp->f_lock); | | 403 | mutex_exit(&fp->f_lock); |
404 | | | 404 | |
405 | return error; | | 405 | return error; |
406 | } | | 406 | } |
407 | | | 407 | |
408 | static int | | 408 | static int |
409 | do_memfd_truncate(file_t *fp, off_t length) | | 409 | do_memfd_truncate(file_t *fp, off_t length) |
410 | { | | 410 | { |
411 | struct memfd *mfd = fp->f_memfd; | | 411 | struct memfd *mfd = fp->f_memfd; |
412 | voff_t start, end; | | 412 | voff_t start, end; |
413 | int error = 0; | | 413 | int error = 0; |
414 | | | 414 | |
415 | KASSERT(mutex_owned(&fp->f_lock)); | | 415 | KASSERT(mutex_owned(&fp->f_lock)); |
416 | | | 416 | |
417 | if (length < 0) | | 417 | if (length < 0) |
418 | return EINVAL; | | 418 | return EINVAL; |
419 | if (length == mfd->mfd_size) | | 419 | if (length == mfd->mfd_size) |
420 | return 0; | | 420 | return 0; |
421 | | | 421 | |
422 | if ((mfd->mfd_seals & F_SEAL_SHRINK) && length < mfd->mfd_size) | | 422 | if ((mfd->mfd_seals & F_SEAL_SHRINK) && length < mfd->mfd_size) |
423 | return EPERM; | | 423 | return EPERM; |
424 | if ((mfd->mfd_seals & F_SEAL_GROW) && length > mfd->mfd_size) | | 424 | if ((mfd->mfd_seals & F_SEAL_GROW) && length > mfd->mfd_size) |
425 | return EPERM; | | 425 | return EPERM; |
426 | | | 426 | |
427 | if (length > mfd->mfd_size) | | 427 | if (length > mfd->mfd_size) |
428 | ubc_zerorange(mfd->mfd_uobj, mfd->mfd_size, | | 428 | ubc_zerorange(mfd->mfd_uobj, mfd->mfd_size, |
429 | length - mfd->mfd_size, 0); | | 429 | length - mfd->mfd_size, 0); |
430 | else { | | 430 | else { |
431 | /* length < mfd->mfd_size, so try to get rid of excess pages */ | | 431 | /* length < mfd->mfd_size, so try to get rid of excess pages */ |
432 | start = round_page(length); | | 432 | start = round_page(length); |
433 | end = round_page(mfd->mfd_size); | | 433 | end = round_page(mfd->mfd_size); |
434 | | | 434 | |
435 | if (start < end) { /* we actually have pages to remove */ | | 435 | if (start < end) { /* we actually have pages to remove */ |
436 | rw_enter(mfd->mfd_uobj->vmobjlock, RW_WRITER); | | 436 | rw_enter(mfd->mfd_uobj->vmobjlock, RW_WRITER); |
437 | error = (*mfd->mfd_uobj->pgops->pgo_put)(mfd->mfd_uobj, | | 437 | error = (*mfd->mfd_uobj->pgops->pgo_put)(mfd->mfd_uobj, |
438 | start, end, PGO_FREE); | | 438 | start, end, PGO_FREE); |
439 | /* pgo_put drops vmobjlock */ | | 439 | /* pgo_put drops vmobjlock */ |
440 | } | | 440 | } |
441 | } | | 441 | } |
442 | | | 442 | |
443 | getnanotime(&mfd->mfd_mtime); | | 443 | getnanotime(&mfd->mfd_mtime); |
444 | mfd->mfd_size = length; | | 444 | mfd->mfd_size = length; |
445 | | | 445 | |
446 | return error; | | 446 | return error; |
447 | } | | 447 | } |
448 | | | 448 | |
449 | static int | | 449 | static int |
450 | memfd_truncate(file_t *fp, off_t length) | | 450 | memfd_truncate(file_t *fp, off_t length) |
451 | { | | 451 | { |
452 | int error; | | 452 | int error; |
453 | | | 453 | |
454 | mutex_enter(&fp->f_lock); | | 454 | mutex_enter(&fp->f_lock); |
455 | error = do_memfd_truncate(fp, length); | | 455 | error = do_memfd_truncate(fp, length); |
456 | mutex_exit(&fp->f_lock); | | 456 | mutex_exit(&fp->f_lock); |
457 | return error; | | 457 | return error; |
458 | } | | 458 | } |