Sat Jul 29 23:51:29 2023 UTC ()
sys_memfd: Fix logic errors for offset in the previous.


(rin)
diff -r1.6 -r1.7 src/sys/kern/sys_memfd.c

cvs diff -r1.6 -r1.7 src/sys/kern/sys_memfd.c (switch to unified diff)

--- src/sys/kern/sys_memfd.c 2023/07/29 17:54:54 1.6
+++ src/sys/kern/sys_memfd.c 2023/07/29 23:51:29 1.7
@@ -1,458 +1,458 @@ @@ -1,458 +1,458 @@
1/* $NetBSD: sys_memfd.c,v 1.6 2023/07/29 17:54:54 christos Exp $ */ 1/* $NetBSD: sys_memfd.c,v 1.7 2023/07/29 23:51:29 rin Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2023 The NetBSD Foundation, Inc. 4 * Copyright (c) 2023 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Theodore Preduta. 8 * by Theodore Preduta.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32#include <sys/cdefs.h> 32#include <sys/cdefs.h>
33__KERNEL_RCSID(0, "$NetBSD: sys_memfd.c,v 1.6 2023/07/29 17:54:54 christos Exp $"); 33__KERNEL_RCSID(0, "$NetBSD: sys_memfd.c,v 1.7 2023/07/29 23:51:29 rin Exp $");
34 34
35#include <sys/param.h> 35#include <sys/param.h>
36#include <sys/types.h> 36#include <sys/types.h>
37 37
38#include <sys/fcntl.h> 38#include <sys/fcntl.h>
39#include <sys/file.h> 39#include <sys/file.h>
40#include <sys/filedesc.h> 40#include <sys/filedesc.h>
41#include <sys/memfd.h> 41#include <sys/memfd.h>
42#include <sys/mman.h> 42#include <sys/mman.h>
43#include <sys/syscallargs.h> 43#include <sys/syscallargs.h>
44 44
45#include <uvm/uvm_extern.h> 45#include <uvm/uvm_extern.h>
46#include <uvm/uvm_object.h> 46#include <uvm/uvm_object.h>
47 47
48#define F_SEAL_ANY_WRITE (F_SEAL_WRITE|F_SEAL_FUTURE_WRITE) 48#define F_SEAL_ANY_WRITE (F_SEAL_WRITE|F_SEAL_FUTURE_WRITE)
49#define MFD_KNOWN_SEALS (F_SEAL_SEAL|F_SEAL_SHRINK|F_SEAL_GROW \ 49#define MFD_KNOWN_SEALS (F_SEAL_SEAL|F_SEAL_SHRINK|F_SEAL_GROW \
50 |F_SEAL_WRITE|F_SEAL_FUTURE_WRITE) 50 |F_SEAL_WRITE|F_SEAL_FUTURE_WRITE)
51 51
52static const char memfd_prefix[] = "memfd:"; 52static const char memfd_prefix[] = "memfd:";
53 53
54static int memfd_read(file_t *, off_t *, struct uio *, kauth_cred_t, int); 54static int memfd_read(file_t *, off_t *, struct uio *, kauth_cred_t, int);
55static int memfd_write(file_t *, off_t *, struct uio *, kauth_cred_t, int); 55static int memfd_write(file_t *, off_t *, struct uio *, kauth_cred_t, int);
56static int memfd_ioctl(file_t *, u_long, void *); 56static int memfd_ioctl(file_t *, u_long, void *);
57static int memfd_fcntl(file_t *, u_int, void *); 57static int memfd_fcntl(file_t *, u_int, void *);
58static int memfd_stat(file_t *, struct stat *); 58static int memfd_stat(file_t *, struct stat *);
59static int memfd_close(file_t *); 59static int memfd_close(file_t *);
60static int memfd_mmap(file_t *, off_t *, size_t, int, int *, int *, 60static int memfd_mmap(file_t *, off_t *, size_t, int, int *, int *,
61 struct uvm_object **, int *); 61 struct uvm_object **, int *);
62static int memfd_seek(file_t *, off_t, int, off_t *, int); 62static int memfd_seek(file_t *, off_t, int, off_t *, int);
63static int do_memfd_truncate(file_t *, off_t); 63static int do_memfd_truncate(file_t *, off_t);
64static int memfd_truncate(file_t *, off_t); 64static int memfd_truncate(file_t *, off_t);
65 65
66static const struct fileops memfd_fileops = { 66static const struct fileops memfd_fileops = {
67 .fo_name = "memfd", 67 .fo_name = "memfd",
68 .fo_read = memfd_read, 68 .fo_read = memfd_read,
69 .fo_write = memfd_write, 69 .fo_write = memfd_write,
70 .fo_ioctl = memfd_ioctl, 70 .fo_ioctl = memfd_ioctl,
71 .fo_fcntl = memfd_fcntl, 71 .fo_fcntl = memfd_fcntl,
72 .fo_poll = fnullop_poll, 72 .fo_poll = fnullop_poll,
73 .fo_stat = memfd_stat, 73 .fo_stat = memfd_stat,
74 .fo_close = memfd_close, 74 .fo_close = memfd_close,
75 .fo_kqfilter = fnullop_kqfilter, 75 .fo_kqfilter = fnullop_kqfilter,
76 .fo_restart = fnullop_restart, 76 .fo_restart = fnullop_restart,
77 .fo_mmap = memfd_mmap, 77 .fo_mmap = memfd_mmap,
78 .fo_seek = memfd_seek, 78 .fo_seek = memfd_seek,
79 .fo_fpathconf = (void *)eopnotsupp, 79 .fo_fpathconf = (void *)eopnotsupp,
80 .fo_posix_fadvise = (void *)eopnotsupp, 80 .fo_posix_fadvise = (void *)eopnotsupp,
81 .fo_truncate = memfd_truncate, 81 .fo_truncate = memfd_truncate,
82}; 82};
83 83
84/* 84/*
85 * memfd_create(2). Creat a file descriptor associated with anonymous 85 * memfd_create(2). Creat a file descriptor associated with anonymous
86 * memory. 86 * memory.
87 */ 87 */
88int 88int
89sys_memfd_create(struct lwp *l, const struct sys_memfd_create_args *uap, 89sys_memfd_create(struct lwp *l, const struct sys_memfd_create_args *uap,
90 register_t *retval) 90 register_t *retval)
91{ 91{
92 /* { 92 /* {
93 syscallarg(const char *) name; 93 syscallarg(const char *) name;
94 syscallarg(unsigned int) flags; 94 syscallarg(unsigned int) flags;
95 } */ 95 } */
96 int error, fd; 96 int error, fd;
97 file_t *fp; 97 file_t *fp;
98 struct memfd *mfd; 98 struct memfd *mfd;
99 struct proc *p = l->l_proc; 99 struct proc *p = l->l_proc;
100 const unsigned int flags = SCARG(uap, flags); 100 const unsigned int flags = SCARG(uap, flags);
101 101
102 if (flags & ~(MFD_CLOEXEC|MFD_ALLOW_SEALING)) 102 if (flags & ~(MFD_CLOEXEC|MFD_ALLOW_SEALING))
103 return EINVAL; 103 return EINVAL;
104 104
105 mfd = kmem_zalloc(sizeof(*mfd), KM_SLEEP); 105 mfd = kmem_zalloc(sizeof(*mfd), KM_SLEEP);
106 mfd->mfd_size = 0; 106 mfd->mfd_size = 0;
107 mfd->mfd_uobj = uao_create(INT64_MAX - PAGE_SIZE, 0); /* same as tmpfs */ 107 mfd->mfd_uobj = uao_create(INT64_MAX - PAGE_SIZE, 0); /* same as tmpfs */
108 108
109 CTASSERT(sizeof(memfd_prefix) < NAME_MAX); /* sanity check */ 109 CTASSERT(sizeof(memfd_prefix) < NAME_MAX); /* sanity check */
110 strcpy(mfd->mfd_name, memfd_prefix); 110 strcpy(mfd->mfd_name, memfd_prefix);
111 error = copyinstr(SCARG(uap, name), 111 error = copyinstr(SCARG(uap, name),
112 &mfd->mfd_name[sizeof(memfd_prefix) - 1], 112 &mfd->mfd_name[sizeof(memfd_prefix) - 1],
113 sizeof(mfd->mfd_name) - sizeof(memfd_prefix), NULL); 113 sizeof(mfd->mfd_name) - sizeof(memfd_prefix), NULL);
114 if (error != 0) 114 if (error != 0)
115 goto leave; 115 goto leave;
116 116
117 getnanotime(&mfd->mfd_btime); 117 getnanotime(&mfd->mfd_btime);
118 118
119 if ((flags & MFD_ALLOW_SEALING) == 0) 119 if ((flags & MFD_ALLOW_SEALING) == 0)
120 mfd->mfd_seals |= F_SEAL_SEAL; 120 mfd->mfd_seals |= F_SEAL_SEAL;
121 121
122 error = fd_allocfile(&fp, &fd); 122 error = fd_allocfile(&fp, &fd);
123 if (error != 0) 123 if (error != 0)
124 goto leave; 124 goto leave;
125 125
126 fp->f_flag = FREAD|FWRITE; 126 fp->f_flag = FREAD|FWRITE;
127 fp->f_type = DTYPE_MEMFD; 127 fp->f_type = DTYPE_MEMFD;
128 fp->f_ops = &memfd_fileops; 128 fp->f_ops = &memfd_fileops;
129 fp->f_memfd = mfd; 129 fp->f_memfd = mfd;
130 fd_set_exclose(l, fd, (flags & MFD_CLOEXEC) != 0); 130 fd_set_exclose(l, fd, (flags & MFD_CLOEXEC) != 0);
131 fd_affix(p, fp, fd); 131 fd_affix(p, fp, fd);
132 132
133 *retval = fd; 133 *retval = fd;
134 return 0; 134 return 0;
135 135
136leave: 136leave:
137 uao_detach(mfd->mfd_uobj); 137 uao_detach(mfd->mfd_uobj);
138 kmem_free(mfd, sizeof(*mfd)); 138 kmem_free(mfd, sizeof(*mfd));
139 return error; 139 return error;
140} 140}
141 141
142static int 142static int
143memfd_read(file_t *fp, off_t *offp, struct uio *uio, kauth_cred_t cred, 143memfd_read(file_t *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
144 int flags) 144 int flags)
145{ 145{
146 int error; 146 int error;
147 vsize_t todo; 147 vsize_t todo;
148 struct memfd *mfd = fp->f_memfd; 148 struct memfd *mfd = fp->f_memfd;
149 149
150 mutex_enter(&fp->f_lock); 150 mutex_enter(&fp->f_lock);
151 151
152 if (*offp < 0) { 152 if (*offp < 0) {
153 error = EINVAL; 153 error = EINVAL;
154 goto leave; 154 goto leave;
155 } 155 }
156 156
157 /* Trying to read past the end does nothing. */ 157 /* Trying to read past the end does nothing. */
158 if (*offp >= mfd->mfd_size) { 158 if (*offp >= mfd->mfd_size) {
159 error = 0; 159 error = 0;
160 goto leave; 160 goto leave;
161 } 161 }
162 162
163 if (flags & FOF_UPDATE_OFFSET) 163 uio->uio_offset = *offp;
164 *offp = uio->uio_offset; 
165 todo = MIN(uio->uio_resid, mfd->mfd_size - *offp); 164 todo = MIN(uio->uio_resid, mfd->mfd_size - *offp);
166 error = ubc_uiomove(mfd->mfd_uobj, uio, todo, UVM_ADV_SEQUENTIAL, 165 error = ubc_uiomove(mfd->mfd_uobj, uio, todo, UVM_ADV_SEQUENTIAL,
167 UBC_READ|UBC_PARTIALOK); 166 UBC_READ|UBC_PARTIALOK);
168 *offp = uio->uio_offset; 167 if (flags & FOF_UPDATE_OFFSET)
 168 *offp = uio->uio_offset;
169 169
170leave: 170leave:
171 getnanotime(&mfd->mfd_atime); 171 getnanotime(&mfd->mfd_atime);
172 172
173 173
174 mutex_exit(&fp->f_lock); 174 mutex_exit(&fp->f_lock);
175 175
176 return error; 176 return error;
177} 177}
178 178
179static int 179static int
180memfd_write(file_t *fp, off_t *offp, struct uio *uio, kauth_cred_t cred, 180memfd_write(file_t *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
181 int flags) 181 int flags)
182{ 182{
183 int error; 183 int error;
184 vsize_t todo; 184 vsize_t todo;
185 struct memfd *mfd = fp->f_memfd; 185 struct memfd *mfd = fp->f_memfd;
186 186
187 mutex_enter(&fp->f_lock); 187 mutex_enter(&fp->f_lock);
188 188
189 if (mfd->mfd_seals & F_SEAL_ANY_WRITE) { 189 if (mfd->mfd_seals & F_SEAL_ANY_WRITE) {
190 error = EPERM; 190 error = EPERM;
191 goto leave; 191 goto leave;
192 } 192 }
193 193
194 if (*offp < 0) { 194 if (*offp < 0) {
195 error = EINVAL; 195 error = EINVAL;
196 goto leave; 196 goto leave;
197 } 197 }
198 198
199 uio->uio_offset = *offp; 199 uio->uio_offset = *offp;
200 todo = uio->uio_resid; 200 todo = uio->uio_resid;
201 201
202 if (mfd->mfd_seals & F_SEAL_GROW) { 202 if (mfd->mfd_seals & F_SEAL_GROW) {
203 if (*offp >= mfd->mfd_size) { 203 if (*offp >= mfd->mfd_size) {
204 error = EPERM; 204 error = EPERM;
205 goto leave; 205 goto leave;
206 } 206 }
207 207
208 /* Truncate the write to fit in mfd_size */ 208 /* Truncate the write to fit in mfd_size */
209 if (*offp + uio->uio_resid >= mfd->mfd_size) 209 if (*offp + uio->uio_resid >= mfd->mfd_size)
210 todo = mfd->mfd_size - *offp; 210 todo = mfd->mfd_size - *offp;
211 } else if (*offp + uio->uio_resid >= mfd->mfd_size) { 211 } else if (*offp + uio->uio_resid >= mfd->mfd_size) {
212 /* Grow to accommodate the write request. */ 212 /* Grow to accommodate the write request. */
213 error = do_memfd_truncate(fp, *offp + uio->uio_resid); 213 error = do_memfd_truncate(fp, *offp + uio->uio_resid);
214 if (error != 0) 214 if (error != 0)
215 goto leave; 215 goto leave;
216 } 216 }
217 217
218 error = ubc_uiomove(mfd->mfd_uobj, uio, todo, UVM_ADV_SEQUENTIAL, 218 error = ubc_uiomove(mfd->mfd_uobj, uio, todo, UVM_ADV_SEQUENTIAL,
219 UBC_WRITE|UBC_PARTIALOK); 219 UBC_WRITE|UBC_PARTIALOK);
220 if (flags & FOF_UPDATE_OFFSET) 220 if (flags & FOF_UPDATE_OFFSET)
221 *offp = uio->uio_offset; 221 *offp = uio->uio_offset;
222 222
223 getnanotime(&mfd->mfd_mtime); 223 getnanotime(&mfd->mfd_mtime);
224 224
225leave: 225leave:
226 mutex_exit(&fp->f_lock); 226 mutex_exit(&fp->f_lock);
227 227
228 return error; 228 return error;
229} 229}
230 230
231static int 231static int
232memfd_ioctl(file_t *fp, u_long cmd, void *data) 232memfd_ioctl(file_t *fp, u_long cmd, void *data)
233{ 233{
234 234
235 return EINVAL; 235 return EINVAL;
236} 236}
237 237
238static int 238static int
239memfd_fcntl(file_t *fp, u_int cmd, void *data) 239memfd_fcntl(file_t *fp, u_int cmd, void *data)
240{ 240{
241 struct memfd *mfd = fp->f_memfd; 241 struct memfd *mfd = fp->f_memfd;
242 int error = 0; 242 int error = 0;
243 243
244 switch (cmd) { 244 switch (cmd) {
245 case F_ADD_SEALS: 245 case F_ADD_SEALS:
246 mutex_enter(&fp->f_lock); 246 mutex_enter(&fp->f_lock);
247 247
248 if (mfd->mfd_seals & F_SEAL_SEAL) { 248 if (mfd->mfd_seals & F_SEAL_SEAL) {
249 error = EPERM; 249 error = EPERM;
250 goto leave_add_seals; 250 goto leave_add_seals;
251 } 251 }
252 252
253 if (*(int *)data & ~MFD_KNOWN_SEALS) { 253 if (*(int *)data & ~MFD_KNOWN_SEALS) {
254 error = EINVAL; 254 error = EINVAL;
255 goto leave_add_seals; 255 goto leave_add_seals;
256 } 256 }
257 257
258 /* 258 /*
259 * Can only add F_SEAL_WRITE if there are no currently 259 * Can only add F_SEAL_WRITE if there are no currently
260 * open mmaps. 260 * open mmaps.
261 * 261 *
262 * XXX should only disallow if there are no currently 262 * XXX should only disallow if there are no currently
263 * open mmaps with PROT_WRITE. 263 * open mmaps with PROT_WRITE.
264 */ 264 */
265 if ((mfd->mfd_seals & F_SEAL_WRITE) == 0 && 265 if ((mfd->mfd_seals & F_SEAL_WRITE) == 0 &&
266 (*(int *)data & F_SEAL_WRITE) != 0 && 266 (*(int *)data & F_SEAL_WRITE) != 0 &&
267 mfd->mfd_uobj->uo_refs > 1) 267 mfd->mfd_uobj->uo_refs > 1)
268 { 268 {
269 error = EBUSY; 269 error = EBUSY;
270 goto leave_add_seals; 270 goto leave_add_seals;
271 } 271 }
272 272
273 mfd->mfd_seals |= *(int *)data; 273 mfd->mfd_seals |= *(int *)data;
274 274
275 leave_add_seals: 275 leave_add_seals:
276 mutex_exit(&fp->f_lock); 276 mutex_exit(&fp->f_lock);
277 return error; 277 return error;
278 278
279 case F_GET_SEALS: 279 case F_GET_SEALS:
280 mutex_enter(&fp->f_lock); 280 mutex_enter(&fp->f_lock);
281 *(int *)data = mfd->mfd_seals; 281 *(int *)data = mfd->mfd_seals;
282 mutex_exit(&fp->f_lock); 282 mutex_exit(&fp->f_lock);
283 return 0; 283 return 0;
284 284
285 default: 285 default:
286 return EINVAL; 286 return EINVAL;
287 } 287 }
288} 288}
289 289
290static int 290static int
291memfd_stat(file_t *fp, struct stat *st) 291memfd_stat(file_t *fp, struct stat *st)
292{ 292{
293 struct memfd *mfd = fp->f_memfd; 293 struct memfd *mfd = fp->f_memfd;
294 294
295 mutex_enter(&fp->f_lock); 295 mutex_enter(&fp->f_lock);
296 296
297 memset(st, 0, sizeof(*st)); 297 memset(st, 0, sizeof(*st));
298 st->st_uid = kauth_cred_geteuid(fp->f_cred); 298 st->st_uid = kauth_cred_geteuid(fp->f_cred);
299 st->st_gid = kauth_cred_getegid(fp->f_cred); 299 st->st_gid = kauth_cred_getegid(fp->f_cred);
300 st->st_size = mfd->mfd_size; 300 st->st_size = mfd->mfd_size;
301 301
302 st->st_mode = S_IREAD; 302 st->st_mode = S_IREAD;
303 if ((mfd->mfd_seals & F_SEAL_ANY_WRITE) == 0) 303 if ((mfd->mfd_seals & F_SEAL_ANY_WRITE) == 0)
304 st->st_mode |= S_IWRITE; 304 st->st_mode |= S_IWRITE;
305 305
306 st->st_birthtimespec = mfd->mfd_btime; 306 st->st_birthtimespec = mfd->mfd_btime;
307 st->st_ctimespec = mfd->mfd_mtime; 307 st->st_ctimespec = mfd->mfd_mtime;
308 st->st_atimespec = mfd->mfd_atime; 308 st->st_atimespec = mfd->mfd_atime;
309 st->st_mtimespec = mfd->mfd_mtime; 309 st->st_mtimespec = mfd->mfd_mtime;
310 310
311 mutex_exit(&fp->f_lock); 311 mutex_exit(&fp->f_lock);
312 312
313 return 0; 313 return 0;
314} 314}
315 315
316static int 316static int
317memfd_close(file_t *fp) 317memfd_close(file_t *fp)
318{ 318{
319 struct memfd *mfd = fp->f_memfd; 319 struct memfd *mfd = fp->f_memfd;
320 320
321 uao_detach(mfd->mfd_uobj); 321 uao_detach(mfd->mfd_uobj);
322 322
323 kmem_free(mfd, sizeof(*mfd)); 323 kmem_free(mfd, sizeof(*mfd));
324 fp->f_memfd = NULL; 324 fp->f_memfd = NULL;
325 325
326 return 0; 326 return 0;
327} 327}
328 328
329static int 329static int
330memfd_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp, 330memfd_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp,
331 int *advicep, struct uvm_object **uobjp, int *maxprotp) 331 int *advicep, struct uvm_object **uobjp, int *maxprotp)
332{ 332{
333 struct memfd *mfd = fp->f_memfd; 333 struct memfd *mfd = fp->f_memfd;
334 int error = 0; 334 int error = 0;
335 335
336 /* uvm_mmap guarantees page-aligned offset and size. */ 336 /* uvm_mmap guarantees page-aligned offset and size. */
337 KASSERT(*offp == round_page(*offp)); 337 KASSERT(*offp == round_page(*offp));
338 KASSERT(size == round_page(size)); 338 KASSERT(size == round_page(size));
339 KASSERT(size > 0); 339 KASSERT(size > 0);
340 340
341 mutex_enter(&fp->f_lock); 341 mutex_enter(&fp->f_lock);
342 342
343 if (*offp < 0) { 343 if (*offp < 0) {
344 error = EINVAL; 344 error = EINVAL;
345 goto leave; 345 goto leave;
346 } 346 }
347 if (*offp + size > mfd->mfd_size) { 347 if (*offp + size > mfd->mfd_size) {
348 error = EINVAL; 348 error = EINVAL;
349 goto leave; 349 goto leave;
350 } 350 }
351 351
352 if ((mfd->mfd_seals & F_SEAL_ANY_WRITE) && 352 if ((mfd->mfd_seals & F_SEAL_ANY_WRITE) &&
353 (prot & VM_PROT_WRITE) && (*flagsp & MAP_PRIVATE) == 0) { 353 (prot & VM_PROT_WRITE) && (*flagsp & MAP_PRIVATE) == 0) {
354 error = EPERM; 354 error = EPERM;
355 goto leave; 355 goto leave;
356 } 356 }
357 357
358 uao_reference(fp->f_memfd->mfd_uobj); 358 uao_reference(fp->f_memfd->mfd_uobj);
359 *uobjp = fp->f_memfd->mfd_uobj; 359 *uobjp = fp->f_memfd->mfd_uobj;
360 360
361 *maxprotp = prot; 361 *maxprotp = prot;
362 *advicep = UVM_ADV_RANDOM; 362 *advicep = UVM_ADV_RANDOM;
363 363
364leave: 364leave:
365 mutex_exit(&fp->f_lock); 365 mutex_exit(&fp->f_lock);
366 366
367 return error; 367 return error;
368} 368}
369 369
370static int 370static int
371memfd_seek(file_t *fp, off_t delta, int whence, off_t *newoffp, 371memfd_seek(file_t *fp, off_t delta, int whence, off_t *newoffp,
372 int flags) 372 int flags)
373{ 373{
374 off_t newoff; 374 off_t newoff;
375 int error = 0; 375 int error = 0;
376 376
377 mutex_enter(&fp->f_lock); 377 mutex_enter(&fp->f_lock);
378 378
379 switch (whence) { 379 switch (whence) {
380 case SEEK_CUR: 380 case SEEK_CUR:
381 newoff = fp->f_offset + delta; 381 newoff = fp->f_offset + delta;
382 break; 382 break;
383 383
384 case SEEK_END: 384 case SEEK_END:
385 newoff = fp->f_memfd->mfd_size + delta; 385 newoff = fp->f_memfd->mfd_size + delta;
386 break; 386 break;
387 387
388 case SEEK_SET: 388 case SEEK_SET:
389 newoff = delta; 389 newoff = delta;
390 break; 390 break;
391 391
392 default: 392 default:
393 error = EINVAL; 393 error = EINVAL;
394 goto leave; 394 goto leave;
395 } 395 }
396 396
397 if (newoffp) 397 if (newoffp)
398 *newoffp = newoff; 398 *newoffp = newoff;
399 if (flags & FOF_UPDATE_OFFSET) 399 if (flags & FOF_UPDATE_OFFSET)
400 fp->f_offset = newoff; 400 fp->f_offset = newoff;
401 401
402leave: 402leave:
403 mutex_exit(&fp->f_lock); 403 mutex_exit(&fp->f_lock);
404 404
405 return error; 405 return error;
406} 406}
407 407
408static int 408static int
409do_memfd_truncate(file_t *fp, off_t length) 409do_memfd_truncate(file_t *fp, off_t length)
410{ 410{
411 struct memfd *mfd = fp->f_memfd; 411 struct memfd *mfd = fp->f_memfd;
412 voff_t start, end; 412 voff_t start, end;
413 int error = 0; 413 int error = 0;
414 414
415 KASSERT(mutex_owned(&fp->f_lock)); 415 KASSERT(mutex_owned(&fp->f_lock));
416 416
417 if (length < 0) 417 if (length < 0)
418 return EINVAL; 418 return EINVAL;
419 if (length == mfd->mfd_size) 419 if (length == mfd->mfd_size)
420 return 0; 420 return 0;
421 421
422 if ((mfd->mfd_seals & F_SEAL_SHRINK) && length < mfd->mfd_size) 422 if ((mfd->mfd_seals & F_SEAL_SHRINK) && length < mfd->mfd_size)
423 return EPERM; 423 return EPERM;
424 if ((mfd->mfd_seals & F_SEAL_GROW) && length > mfd->mfd_size) 424 if ((mfd->mfd_seals & F_SEAL_GROW) && length > mfd->mfd_size)
425 return EPERM; 425 return EPERM;
426 426
427 if (length > mfd->mfd_size) 427 if (length > mfd->mfd_size)
428 ubc_zerorange(mfd->mfd_uobj, mfd->mfd_size, 428 ubc_zerorange(mfd->mfd_uobj, mfd->mfd_size,
429 length - mfd->mfd_size, 0); 429 length - mfd->mfd_size, 0);
430 else { 430 else {
431 /* length < mfd->mfd_size, so try to get rid of excess pages */ 431 /* length < mfd->mfd_size, so try to get rid of excess pages */
432 start = round_page(length); 432 start = round_page(length);
433 end = round_page(mfd->mfd_size); 433 end = round_page(mfd->mfd_size);
434 434
435 if (start < end) { /* we actually have pages to remove */ 435 if (start < end) { /* we actually have pages to remove */
436 rw_enter(mfd->mfd_uobj->vmobjlock, RW_WRITER); 436 rw_enter(mfd->mfd_uobj->vmobjlock, RW_WRITER);
437 error = (*mfd->mfd_uobj->pgops->pgo_put)(mfd->mfd_uobj, 437 error = (*mfd->mfd_uobj->pgops->pgo_put)(mfd->mfd_uobj,
438 start, end, PGO_FREE); 438 start, end, PGO_FREE);
439 /* pgo_put drops vmobjlock */ 439 /* pgo_put drops vmobjlock */
440 } 440 }
441 } 441 }
442 442
443 getnanotime(&mfd->mfd_mtime); 443 getnanotime(&mfd->mfd_mtime);
444 mfd->mfd_size = length; 444 mfd->mfd_size = length;
445 445
446 return error; 446 return error;
447} 447}
448 448
449static int 449static int
450memfd_truncate(file_t *fp, off_t length) 450memfd_truncate(file_t *fp, off_t length)
451{ 451{
452 int error; 452 int error;
453 453
454 mutex_enter(&fp->f_lock); 454 mutex_enter(&fp->f_lock);
455 error = do_memfd_truncate(fp, length); 455 error = do_memfd_truncate(fp, length);
456 mutex_exit(&fp->f_lock); 456 mutex_exit(&fp->f_lock);
457 return error; 457 return error;
458} 458}