Sun May 17 19:39:15 2020 UTC ()
PR kern/55268: tmpfs is slow

tmpfs_getpages(): handle the PGO_LOCKED case and implement lazy update of
atime/mtime.


(ad)
diff -r1.55 -r1.56 src/sys/fs/tmpfs/tmpfs.h
diff -r1.111 -r1.112 src/sys/fs/tmpfs/tmpfs_subr.c
diff -r1.138 -r1.139 src/sys/fs/tmpfs/tmpfs_vnops.c

cvs diff -r1.55 -r1.56 src/sys/fs/tmpfs/tmpfs.h (expand / switch to unified diff)

--- src/sys/fs/tmpfs/tmpfs.h 2018/04/19 21:50:09 1.55
+++ src/sys/fs/tmpfs/tmpfs.h 2020/05/17 19:39:15 1.56
@@ -1,17 +1,17 @@ @@ -1,17 +1,17 @@
1/* $NetBSD: tmpfs.h,v 1.55 2018/04/19 21:50:09 christos Exp $ */ 1/* $NetBSD: tmpfs.h,v 1.56 2020/05/17 19:39:15 ad Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. 4 * Copyright (c) 2005, 2006, 2007, 2020 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program. 9 * 2005 program.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
@@ -102,30 +102,32 @@ typedef struct tmpfs_node { @@ -102,30 +102,32 @@ typedef struct tmpfs_node {
102 /* Inode identifier and generation number. */ 102 /* Inode identifier and generation number. */
103 ino_t tn_id; 103 ino_t tn_id;
104 uint32_t tn_gen; 104 uint32_t tn_gen;
105 105
106 /* The inode size. */ 106 /* The inode size. */
107 off_t tn_size; 107 off_t tn_size;
108 108
109 /* Generic node attributes. */ 109 /* Generic node attributes. */
110 uid_t tn_uid; 110 uid_t tn_uid;
111 gid_t tn_gid; 111 gid_t tn_gid;
112 mode_t tn_mode; 112 mode_t tn_mode;
113 int tn_flags; 113 int tn_flags;
114 nlink_t tn_links; 114 nlink_t tn_links;
 115 unsigned tn_tflags;
115 struct timespec tn_atime; 116 struct timespec tn_atime;
116 struct timespec tn_mtime; 117 struct timespec tn_mtime;
117 struct timespec tn_ctime; 118 struct timespec tn_ctime;
118 struct timespec tn_birthtime; 119 struct timespec tn_birthtime;
 120 kmutex_t tn_timelock;
119 121
120 /* Head of byte-level lock list (used by tmpfs_advlock). */ 122 /* Head of byte-level lock list (used by tmpfs_advlock). */
121 struct lockf * tn_lockf; 123 struct lockf * tn_lockf;
122 124
123 union { 125 union {
124 /* Type case: VBLK or VCHR. */ 126 /* Type case: VBLK or VCHR. */
125 struct { 127 struct {
126 dev_t tn_rdev; 128 dev_t tn_rdev;
127 } tn_dev; 129 } tn_dev;
128 130
129 /* Type case: VDIR. */ 131 /* Type case: VDIR. */
130 struct { 132 struct {
131 /* Parent directory (root inode points to itself). */ 133 /* Parent directory (root inode points to itself). */
@@ -264,26 +266,28 @@ uint32_t tmpfs_dir_getseq(tmpfs_node_t * @@ -264,26 +266,28 @@ uint32_t tmpfs_dir_getseq(tmpfs_node_t *
264tmpfs_dirent_t *tmpfs_dir_lookupbyseq(tmpfs_node_t *, off_t); 266tmpfs_dirent_t *tmpfs_dir_lookupbyseq(tmpfs_node_t *, off_t);
265int tmpfs_dir_getdents(tmpfs_node_t *, struct uio *, off_t *); 267int tmpfs_dir_getdents(tmpfs_node_t *, struct uio *, off_t *);
266 268
267int tmpfs_reg_resize(vnode_t *, off_t); 269int tmpfs_reg_resize(vnode_t *, off_t);
268 270
269int tmpfs_chflags(vnode_t *, int, kauth_cred_t, lwp_t *); 271int tmpfs_chflags(vnode_t *, int, kauth_cred_t, lwp_t *);
270int tmpfs_chmod(vnode_t *, mode_t, kauth_cred_t, lwp_t *); 272int tmpfs_chmod(vnode_t *, mode_t, kauth_cred_t, lwp_t *);
271int tmpfs_chown(vnode_t *, uid_t, gid_t, kauth_cred_t, lwp_t *); 273int tmpfs_chown(vnode_t *, uid_t, gid_t, kauth_cred_t, lwp_t *);
272int tmpfs_chsize(vnode_t *, u_quad_t, kauth_cred_t, lwp_t *); 274int tmpfs_chsize(vnode_t *, u_quad_t, kauth_cred_t, lwp_t *);
273int tmpfs_chtimes(vnode_t *, const struct timespec *, 275int tmpfs_chtimes(vnode_t *, const struct timespec *,
274 const struct timespec *, const struct timespec *, int, 276 const struct timespec *, const struct timespec *, int,
275 kauth_cred_t, lwp_t *); 277 kauth_cred_t, lwp_t *);
276void tmpfs_update(vnode_t *, unsigned); 278void tmpfs_update(vnode_t *, unsigned);
 279void tmpfs_update_locked(vnode_t *, unsigned);
 280void tmpfs_update_lazily(vnode_t *, unsigned);
277 281
278/* 282/*
279 * Prototypes for tmpfs_mem.c. 283 * Prototypes for tmpfs_mem.c.
280 */ 284 */
281 285
282void tmpfs_mntmem_init(tmpfs_mount_t *, uint64_t); 286void tmpfs_mntmem_init(tmpfs_mount_t *, uint64_t);
283void tmpfs_mntmem_destroy(tmpfs_mount_t *); 287void tmpfs_mntmem_destroy(tmpfs_mount_t *);
284int tmpfs_mntmem_set(tmpfs_mount_t *, uint64_t); 288int tmpfs_mntmem_set(tmpfs_mount_t *, uint64_t);
285 289
286size_t tmpfs_mem_info(bool); 290size_t tmpfs_mem_info(bool);
287uint64_t tmpfs_bytes_max(tmpfs_mount_t *); 291uint64_t tmpfs_bytes_max(tmpfs_mount_t *);
288size_t tmpfs_pages_avail(tmpfs_mount_t *); 292size_t tmpfs_pages_avail(tmpfs_mount_t *);
289bool tmpfs_mem_incr(tmpfs_mount_t *, size_t); 293bool tmpfs_mem_incr(tmpfs_mount_t *, size_t);

cvs diff -r1.111 -r1.112 src/sys/fs/tmpfs/tmpfs_subr.c (expand / switch to unified diff)

--- src/sys/fs/tmpfs/tmpfs_subr.c 2020/05/16 18:31:49 1.111
+++ src/sys/fs/tmpfs/tmpfs_subr.c 2020/05/17 19:39:15 1.112
@@ -1,17 +1,17 @@ @@ -1,17 +1,17 @@
1/* $NetBSD: tmpfs_subr.c,v 1.111 2020/05/16 18:31:49 christos Exp $ */ 1/* $NetBSD: tmpfs_subr.c,v 1.112 2020/05/17 19:39:15 ad Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2005-2013 The NetBSD Foundation, Inc. 4 * Copyright (c) 2005-2020 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program, and by Mindaugas Rasiukevicius. 9 * 2005 program, and by Mindaugas Rasiukevicius.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
@@ -63,27 +63,27 @@ @@ -63,27 +63,27 @@
63 * 63 *
64 * If an inode has references within the file system (tn_links > 0) and 64 * If an inode has references within the file system (tn_links > 0) and
65 * its inactive vnode gets reclaimed/recycled - then the association is 65 * its inactive vnode gets reclaimed/recycled - then the association is
66 * broken in tmpfs_reclaim(). In such case, an inode will always pass 66 * broken in tmpfs_reclaim(). In such case, an inode will always pass
67 * tmpfs_lookup() and thus vcache_get() to associate a new vnode. 67 * tmpfs_lookup() and thus vcache_get() to associate a new vnode.
68 * 68 *
69 * Lock order 69 * Lock order
70 * 70 *
71 * vnode_t::v_vlock -> 71 * vnode_t::v_vlock ->
72 * vnode_t::v_interlock 72 * vnode_t::v_interlock
73 */ 73 */
74 74
75#include <sys/cdefs.h> 75#include <sys/cdefs.h>
76__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.111 2020/05/16 18:31:49 christos Exp $"); 76__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.112 2020/05/17 19:39:15 ad Exp $");
77 77
78#include <sys/param.h> 78#include <sys/param.h>
79#include <sys/cprng.h> 79#include <sys/cprng.h>
80#include <sys/dirent.h> 80#include <sys/dirent.h>
81#include <sys/event.h> 81#include <sys/event.h>
82#include <sys/kmem.h> 82#include <sys/kmem.h>
83#include <sys/mount.h> 83#include <sys/mount.h>
84#include <sys/namei.h> 84#include <sys/namei.h>
85#include <sys/time.h> 85#include <sys/time.h>
86#include <sys/stat.h> 86#include <sys/stat.h>
87#include <sys/systm.h> 87#include <sys/systm.h>
88#include <sys/vnode.h> 88#include <sys/vnode.h>
89#include <sys/kauth.h> 89#include <sys/kauth.h>
@@ -220,30 +220,32 @@ tmpfs_newvnode(struct mount *mp, struct  @@ -220,30 +220,32 @@ tmpfs_newvnode(struct mount *mp, struct
220 * tmpfs_inactive() uses generation zero to mark dead nodes. 220 * tmpfs_inactive() uses generation zero to mark dead nodes.
221 */ 221 */
222 do { 222 do {
223 node->tn_gen = TMPFS_NODE_GEN_MASK & cprng_fast32(); 223 node->tn_gen = TMPFS_NODE_GEN_MASK & cprng_fast32();
224 } while (node->tn_gen == 0); 224 } while (node->tn_gen == 0);
225 225
226 /* Generic initialization. */ 226 /* Generic initialization. */
227 KASSERT((int)vap->va_type != VNOVAL); 227 KASSERT((int)vap->va_type != VNOVAL);
228 node->tn_type = vap->va_type; 228 node->tn_type = vap->va_type;
229 node->tn_size = 0; 229 node->tn_size = 0;
230 node->tn_flags = 0; 230 node->tn_flags = 0;
231 node->tn_lockf = NULL; 231 node->tn_lockf = NULL;
232 232
 233 node->tn_tflags = 0;
233 vfs_timestamp(&node->tn_atime); 234 vfs_timestamp(&node->tn_atime);
234 node->tn_birthtime = node->tn_atime; 235 node->tn_birthtime = node->tn_atime;
235 node->tn_ctime = node->tn_atime; 236 node->tn_ctime = node->tn_atime;
236 node->tn_mtime = node->tn_atime; 237 node->tn_mtime = node->tn_atime;
 238 mutex_init(&node->tn_timelock, MUTEX_DEFAULT, IPL_NONE);
237 239
238 if (dvp == NULL) { 240 if (dvp == NULL) {
239 KASSERT(vap->va_uid != VNOVAL && vap->va_gid != VNOVAL); 241 KASSERT(vap->va_uid != VNOVAL && vap->va_gid != VNOVAL);
240 node->tn_uid = vap->va_uid; 242 node->tn_uid = vap->va_uid;
241 node->tn_gid = vap->va_gid; 243 node->tn_gid = vap->va_gid;
242 vp->v_vflag |= VV_ROOT; 244 vp->v_vflag |= VV_ROOT;
243 } else { 245 } else {
244 KASSERT(dnode != NULL); 246 KASSERT(dnode != NULL);
245 node->tn_uid = kauth_cred_geteuid(cred); 247 node->tn_uid = kauth_cred_geteuid(cred);
246 node->tn_gid = dnode->tn_gid; 248 node->tn_gid = dnode->tn_gid;
247 } 249 }
248 KASSERT(vap->va_mode != VNOVAL); 250 KASSERT(vap->va_mode != VNOVAL);
249 node->tn_mode = vap->va_mode; 251 node->tn_mode = vap->va_mode;
@@ -340,26 +342,27 @@ tmpfs_free_node(tmpfs_mount_t *tmp, tmpf @@ -340,26 +342,27 @@ tmpfs_free_node(tmpfs_mount_t *tmp, tmpf
340 case VDIR: 342 case VDIR:
341 KASSERT(node->tn_size == 0); 343 KASSERT(node->tn_size == 0);
342 KASSERT(node->tn_spec.tn_dir.tn_seq_arena == NULL); 344 KASSERT(node->tn_spec.tn_dir.tn_seq_arena == NULL);
343 KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir)); 345 KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir));
344 KASSERT(node->tn_spec.tn_dir.tn_parent == NULL || 346 KASSERT(node->tn_spec.tn_dir.tn_parent == NULL ||
345 node == tmp->tm_root); 347 node == tmp->tm_root);
346 break; 348 break;
347 default: 349 default:
348 break; 350 break;
349 } 351 }
350 KASSERT(node->tn_vnode == NULL); 352 KASSERT(node->tn_vnode == NULL);
351 KASSERT(node->tn_links == 0); 353 KASSERT(node->tn_links == 0);
352 354
 355 mutex_destroy(&node->tn_timelock);
353 tmpfs_node_put(tmp, node); 356 tmpfs_node_put(tmp, node);
354} 357}
355 358
356/* 359/*
357 * tmpfs_construct_node: allocate a new file of specified type and adds it 360 * tmpfs_construct_node: allocate a new file of specified type and adds it
358 * into the parent directory. 361 * into the parent directory.
359 * 362 *
360 * => Credentials of the caller are used. 363 * => Credentials of the caller are used.
361 */ 364 */
362int 365int
363tmpfs_construct_node(vnode_t *dvp, vnode_t **vpp, struct vattr *vap, 366tmpfs_construct_node(vnode_t *dvp, vnode_t **vpp, struct vattr *vap,
364 struct componentname *cnp, char *target) 367 struct componentname *cnp, char *target)
365{ 368{
@@ -1157,50 +1160,89 @@ tmpfs_chtimes(vnode_t *vp, const struct  @@ -1157,50 +1160,89 @@ tmpfs_chtimes(vnode_t *vp, const struct
1157 /* Disallow this operation if the file system is mounted read-only. */ 1160 /* Disallow this operation if the file system is mounted read-only. */
1158 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1161 if (vp->v_mount->mnt_flag & MNT_RDONLY)
1159 return EROFS; 1162 return EROFS;
1160 1163
1161 /* Immutable or append-only files cannot be modified, either. */ 1164 /* Immutable or append-only files cannot be modified, either. */
1162 if (node->tn_flags & (IMMUTABLE | APPEND)) 1165 if (node->tn_flags & (IMMUTABLE | APPEND))
1163 return EPERM; 1166 return EPERM;
1164 1167
1165 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp, NULL, 1168 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp, NULL,
1166 genfs_can_chtimes(vp, cred, node->tn_uid, vaflags)); 1169 genfs_can_chtimes(vp, cred, node->tn_uid, vaflags));
1167 if (error) 1170 if (error)
1168 return error; 1171 return error;
1169 1172
 1173 mutex_enter(&node->tn_timelock);
1170 if (atime->tv_sec != VNOVAL) { 1174 if (atime->tv_sec != VNOVAL) {
 1175 atomic_and_uint(&node->tn_tflags, ~TMPFS_UPDATE_ATIME);
1171 node->tn_atime = *atime; 1176 node->tn_atime = *atime;
1172 } 1177 }
1173 if (mtime->tv_sec != VNOVAL) { 1178 if (mtime->tv_sec != VNOVAL) {
 1179 atomic_and_uint(&node->tn_tflags, ~TMPFS_UPDATE_MTIME);
1174 node->tn_mtime = *mtime; 1180 node->tn_mtime = *mtime;
1175 } 1181 }
1176 if (btime->tv_sec != VNOVAL) { 1182 if (btime->tv_sec != VNOVAL) {
1177 node->tn_birthtime = *btime; 1183 node->tn_birthtime = *btime;
1178 } 1184 }
 1185 mutex_exit(&node->tn_timelock);
1179 VN_KNOTE(vp, NOTE_ATTRIB); 1186 VN_KNOTE(vp, NOTE_ATTRIB);
1180 return 0; 1187 return 0;
1181} 1188}
1182 1189
1183/* 1190/*
1184 * tmpfs_update: update the timestamps as indicated by the flags. 1191 * tmpfs_update_locked: update the timestamps as indicated by the flags.
1185 */ 1192 */
1186void 1193void
1187tmpfs_update(vnode_t *vp, unsigned tflags) 1194tmpfs_update_locked(vnode_t *vp, unsigned tflags)
1188{ 1195{
1189 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1196 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1190 struct timespec nowtm; 1197 struct timespec nowtm;
1191 1198
1192 if (tflags == 0) { 1199 KASSERT(mutex_owned(&node->tn_timelock));
 1200
 1201 if ((tflags |= atomic_swap_uint(&node->tn_tflags, 0)) == 0) {
1193 return; 1202 return;
1194 } 1203 }
1195 vfs_timestamp(&nowtm); 1204 vfs_timestamp(&nowtm);
1196 1205
1197 if (tflags & TMPFS_UPDATE_ATIME) { 1206 if (tflags & TMPFS_UPDATE_ATIME) {
1198 node->tn_atime = nowtm; 1207 node->tn_atime = nowtm;
1199 } 1208 }
1200 if (tflags & TMPFS_UPDATE_MTIME) { 1209 if (tflags & TMPFS_UPDATE_MTIME) {
1201 node->tn_mtime = nowtm; 1210 node->tn_mtime = nowtm;
1202 } 1211 }
1203 if (tflags & TMPFS_UPDATE_CTIME) { 1212 if (tflags & TMPFS_UPDATE_CTIME) {
1204 node->tn_ctime = nowtm; 1213 node->tn_ctime = nowtm;
1205 } 1214 }
1206} 1215}
 1216
 1217/*
 1218 * tmpfs_update: update the timestamps as indicated by the flags.
 1219 */
 1220void
 1221tmpfs_update(vnode_t *vp, unsigned tflags)
 1222{
 1223 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
 1224
 1225 if ((tflags | atomic_load_relaxed(&node->tn_tflags)) == 0) {
 1226 return;
 1227 }
 1228
 1229 mutex_enter(&node->tn_timelock);
 1230 tmpfs_update_locked(vp, tflags);
 1231 mutex_exit(&node->tn_timelock);
 1232}
 1233
 1234/*
 1235 * tmpfs_update_lazily: schedule a deferred timestamp update.
 1236 */
 1237void
 1238tmpfs_update_lazily(vnode_t *vp, unsigned tflags)
 1239{
 1240 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
 1241 unsigned cur;
 1242
 1243 cur = atomic_load_relaxed(&node->tn_tflags);
 1244 if ((cur & tflags) != tflags) {
 1245 atomic_or_uint(&node->tn_tflags, tflags);
 1246 return;
 1247 }
 1248}

cvs diff -r1.138 -r1.139 src/sys/fs/tmpfs/tmpfs_vnops.c (expand / switch to unified diff)

--- src/sys/fs/tmpfs/tmpfs_vnops.c 2020/05/16 18:31:49 1.138
+++ src/sys/fs/tmpfs/tmpfs_vnops.c 2020/05/17 19:39:15 1.139
@@ -1,17 +1,17 @@ @@ -1,17 +1,17 @@
1/* $NetBSD: tmpfs_vnops.c,v 1.138 2020/05/16 18:31:49 christos Exp $ */ 1/* $NetBSD: tmpfs_vnops.c,v 1.139 2020/05/17 19:39:15 ad Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. 4 * Copyright (c) 2005, 2006, 2007, 2020 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program. 9 * 2005 program.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
@@ -25,27 +25,27 @@ @@ -25,27 +25,27 @@
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE. 30 * POSSIBILITY OF SUCH DAMAGE.
31 */ 31 */
32 32
33/* 33/*
34 * tmpfs vnode interface. 34 * tmpfs vnode interface.
35 */ 35 */
36 36
37#include <sys/cdefs.h> 37#include <sys/cdefs.h>
38__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.138 2020/05/16 18:31:49 christos Exp $"); 38__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.139 2020/05/17 19:39:15 ad Exp $");
39 39
40#include <sys/param.h> 40#include <sys/param.h>
41#include <sys/dirent.h> 41#include <sys/dirent.h>
42#include <sys/fcntl.h> 42#include <sys/fcntl.h>
43#include <sys/event.h> 43#include <sys/event.h>
44#include <sys/malloc.h> 44#include <sys/malloc.h>
45#include <sys/namei.h> 45#include <sys/namei.h>
46#include <sys/stat.h> 46#include <sys/stat.h>
47#include <sys/uio.h> 47#include <sys/uio.h>
48#include <sys/unistd.h> 48#include <sys/unistd.h>
49#include <sys/vnode.h> 49#include <sys/vnode.h>
50#include <sys/lockf.h> 50#include <sys/lockf.h>
51#include <sys/kauth.h> 51#include <sys/kauth.h>
@@ -428,39 +428,43 @@ tmpfs_getattr(void *v) @@ -428,39 +428,43 @@ tmpfs_getattr(void *v)
428 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 428 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
429 429
430 vattr_null(vap); 430 vattr_null(vap);
431 431
432 vap->va_type = vp->v_type; 432 vap->va_type = vp->v_type;
433 vap->va_mode = node->tn_mode; 433 vap->va_mode = node->tn_mode;
434 vap->va_nlink = node->tn_links; 434 vap->va_nlink = node->tn_links;
435 vap->va_uid = node->tn_uid; 435 vap->va_uid = node->tn_uid;
436 vap->va_gid = node->tn_gid; 436 vap->va_gid = node->tn_gid;
437 vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0]; 437 vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
438 vap->va_fileid = node->tn_id; 438 vap->va_fileid = node->tn_id;
439 vap->va_size = node->tn_size; 439 vap->va_size = node->tn_size;
440 vap->va_blocksize = PAGE_SIZE; 440 vap->va_blocksize = PAGE_SIZE;
441 vap->va_atime = node->tn_atime; 
442 vap->va_mtime = node->tn_mtime; 
443 vap->va_ctime = node->tn_ctime; 
444 vap->va_birthtime = node->tn_birthtime; 
445 vap->va_gen = TMPFS_NODE_GEN(node); 441 vap->va_gen = TMPFS_NODE_GEN(node);
446 vap->va_flags = node->tn_flags; 442 vap->va_flags = node->tn_flags;
447 vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ? 443 vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
448 node->tn_spec.tn_dev.tn_rdev : VNOVAL; 444 node->tn_spec.tn_dev.tn_rdev : VNOVAL;
449 vap->va_bytes = round_page(node->tn_size); 445 vap->va_bytes = round_page(node->tn_size);
450 vap->va_filerev = VNOVAL; 446 vap->va_filerev = VNOVAL;
451 vap->va_vaflags = 0; 447 vap->va_vaflags = 0;
452 vap->va_spare = VNOVAL; /* XXX */ 448 vap->va_spare = VNOVAL; /* XXX */
453 449
 450 mutex_enter(&node->tn_timelock);
 451 tmpfs_update_locked(vp, 0);
 452 vap->va_atime = node->tn_atime;
 453 vap->va_mtime = node->tn_mtime;
 454 vap->va_ctime = node->tn_ctime;
 455 vap->va_birthtime = node->tn_birthtime;
 456 mutex_exit(&node->tn_timelock);
 457
454 return 0; 458 return 0;
455} 459}
456 460
457int 461int
458tmpfs_setattr(void *v) 462tmpfs_setattr(void *v)
459{ 463{
460 struct vop_setattr_args /* { 464 struct vop_setattr_args /* {
461 struct vnode *a_vp; 465 struct vnode *a_vp;
462 struct vattr *a_vap; 466 struct vattr *a_vap;
463 kauth_cred_t a_cred; 467 kauth_cred_t a_cred;
464 } */ *ap = v; 468 } */ *ap = v;
465 vnode_t *vp = ap->a_vp; 469 vnode_t *vp = ap->a_vp;
466 struct vattr *vap = ap->a_vap; 470 struct vattr *vap = ap->a_vap;
@@ -651,27 +655,27 @@ tmpfs_fsync(void *v) @@ -651,27 +655,27 @@ tmpfs_fsync(void *v)
651 * => We unlock and drop the reference on both. 655 * => We unlock and drop the reference on both.
652 */ 656 */
653int 657int
654tmpfs_remove(void *v) 658tmpfs_remove(void *v)
655{ 659{
656 struct vop_remove_v2_args /* { 660 struct vop_remove_v2_args /* {
657 struct vnode *a_dvp; 661 struct vnode *a_dvp;
658 struct vnode *a_vp; 662 struct vnode *a_vp;
659 struct componentname *a_cnp; 663 struct componentname *a_cnp;
660 } */ *ap = v; 664 } */ *ap = v;
661 vnode_t *dvp = ap->a_dvp, *vp = ap->a_vp; 665 vnode_t *dvp = ap->a_dvp, *vp = ap->a_vp;
662 tmpfs_node_t *dnode, *node; 666 tmpfs_node_t *dnode, *node;
663 tmpfs_dirent_t *de; 667 tmpfs_dirent_t *de;
664 int error; 668 int error, tflags;
665 669
666 KASSERT(VOP_ISLOCKED(dvp)); 670 KASSERT(VOP_ISLOCKED(dvp));
667 KASSERT(VOP_ISLOCKED(vp)); 671 KASSERT(VOP_ISLOCKED(vp));
668 672
669 if (vp->v_type == VDIR) { 673 if (vp->v_type == VDIR) {
670 error = EPERM; 674 error = EPERM;
671 goto out; 675 goto out;
672 } 676 }
673 dnode = VP_TO_TMPFS_DIR(dvp); 677 dnode = VP_TO_TMPFS_DIR(dvp);
674 node = VP_TO_TMPFS_NODE(vp); 678 node = VP_TO_TMPFS_NODE(vp);
675 679
676 /* 680 /*
677 * Files marked as immutable or append-only cannot be deleted. 681 * Files marked as immutable or append-only cannot be deleted.
@@ -700,31 +704,32 @@ tmpfs_remove(void *v) @@ -700,31 +704,32 @@ tmpfs_remove(void *v)
700 * destroy it or replace with a whiteout. 704 * destroy it or replace with a whiteout.
701 * 705 *
702 * Note: the inode referred by it will not be destroyed until the 706 * Note: the inode referred by it will not be destroyed until the
703 * vnode is reclaimed/recycled. 707 * vnode is reclaimed/recycled.
704 */ 708 */
705 709
706 tmpfs_dir_detach(dnode, de); 710 tmpfs_dir_detach(dnode, de);
707 711
708 if (ap->a_cnp->cn_flags & DOWHITEOUT) 712 if (ap->a_cnp->cn_flags & DOWHITEOUT)
709 tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT); 713 tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
710 else 714 else
711 tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de); 715 tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
712 716
 717 tflags = TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME;
713 if (node->tn_links > 0) { 718 if (node->tn_links > 0) {
714 /* We removed a hard link. */ 719 /* We removed a hard link. */
715 tmpfs_update(vp, TMPFS_UPDATE_CTIME); 720 tflags |= TMPFS_UPDATE_CTIME;
716 } 721 }
717 tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME); 722 tmpfs_update(dvp, tflags);
718 error = 0; 723 error = 0;
719out: 724out:
720 /* Drop the reference and unlock the node. */ 725 /* Drop the reference and unlock the node. */
721 if (dvp == vp) { 726 if (dvp == vp) {
722 vrele(vp); 727 vrele(vp);
723 } else { 728 } else {
724 vput(vp); 729 vput(vp);
725 } 730 }
726 return error; 731 return error;
727} 732}
728 733
729/* 734/*
730 * tmpfs_link: create a hard link. 735 * tmpfs_link: create a hard link.
@@ -1052,26 +1057,27 @@ tmpfs_inactive(void *v) @@ -1052,26 +1057,27 @@ tmpfs_inactive(void *v)
1052 */ 1057 */
1053 atomic_and_32(&node->tn_gen, ~TMPFS_NODE_GEN_MASK); 1058 atomic_and_32(&node->tn_gen, ~TMPFS_NODE_GEN_MASK);
1054 1059
1055 /* 1060 /*
1056 * If the file has been deleted, truncate it, otherwise VFS 1061 * If the file has been deleted, truncate it, otherwise VFS
1057 * will quite rightly try to write back dirty data, which in 1062 * will quite rightly try to write back dirty data, which in
1058 * the case of tmpfs/UAO means needless page deactivations. 1063 * the case of tmpfs/UAO means needless page deactivations.
1059 */ 1064 */
1060 if (vp->v_type == VREG) { 1065 if (vp->v_type == VREG) {
1061 error = tmpfs_reg_resize(vp, 0); 1066 error = tmpfs_reg_resize(vp, 0);
1062 } 1067 }
1063 *ap->a_recycle = true; 1068 *ap->a_recycle = true;
1064 } else { 1069 } else {
 1070 tmpfs_update(vp, 0);
1065 *ap->a_recycle = false; 1071 *ap->a_recycle = false;
1066 } 1072 }
1067 1073
1068 return error; 1074 return error;
1069} 1075}
1070 1076
1071int 1077int
1072tmpfs_reclaim(void *v) 1078tmpfs_reclaim(void *v)
1073{ 1079{
1074 struct vop_reclaim_v2_args /* { 1080 struct vop_reclaim_v2_args /* {
1075 struct vnode *a_vp; 1081 struct vnode *a_vp;
1076 } */ *ap = v; 1082 } */ *ap = v;
1077 vnode_t *vp = ap->a_vp; 1083 vnode_t *vp = ap->a_vp;
@@ -1160,91 +1166,101 @@ tmpfs_getpages(void *v) @@ -1160,91 +1166,101 @@ tmpfs_getpages(void *v)
1160 int *a_count; 1166 int *a_count;
1161 int a_centeridx; 1167 int a_centeridx;
1162 vm_prot_t a_access_type; 1168 vm_prot_t a_access_type;
1163 int a_advice; 1169 int a_advice;
1164 int a_flags; 1170 int a_flags;
1165 } */ * const ap = v; 1171 } */ * const ap = v;
1166 vnode_t *vp = ap->a_vp; 1172 vnode_t *vp = ap->a_vp;
1167 const voff_t offset = ap->a_offset; 1173 const voff_t offset = ap->a_offset;
1168 struct vm_page **pgs = ap->a_m; 1174 struct vm_page **pgs = ap->a_m;
1169 const int centeridx = ap->a_centeridx; 1175 const int centeridx = ap->a_centeridx;
1170 const vm_prot_t access_type = ap->a_access_type; 1176 const vm_prot_t access_type = ap->a_access_type;
1171 const int advice = ap->a_advice; 1177 const int advice = ap->a_advice;
1172 const int flags = ap->a_flags; 1178 const int flags = ap->a_flags;
1173 int error, npages = *ap->a_count; 1179 int error, iflag, npages = *ap->a_count;
1174 tmpfs_node_t *node; 1180 tmpfs_node_t *node;
1175 struct uvm_object *uobj; 1181 struct uvm_object *uobj;
1176 1182
1177 KASSERT(vp->v_type == VREG); 1183 KASSERT(vp->v_type == VREG);
1178 KASSERT(rw_lock_held(vp->v_uobj.vmobjlock)); 1184 KASSERT(rw_lock_held(vp->v_uobj.vmobjlock));
1179 1185
1180 /* 1186 /*
1181 * Currently, PGO_PASTEOF is not supported. 1187 * Currently, PGO_PASTEOF is not supported.
1182 */ 1188 */
1183 if (vp->v_size <= offset + (centeridx << PAGE_SHIFT)) { 1189 if (vp->v_size <= offset + (centeridx << PAGE_SHIFT)) {
1184 if ((flags & PGO_LOCKED) == 0) 1190 if ((flags & PGO_LOCKED) == 0)
1185 rw_exit(vp->v_uobj.vmobjlock); 1191 rw_exit(vp->v_uobj.vmobjlock);
1186 return EINVAL; 1192 return EINVAL;
1187 } 1193 }
1188 1194
1189 if (vp->v_size < offset + (npages << PAGE_SHIFT)) { 1195 if (vp->v_size < offset + (npages << PAGE_SHIFT)) {
1190 npages = (round_page(vp->v_size) - offset) >> PAGE_SHIFT; 1196 npages = (round_page(vp->v_size) - offset) >> PAGE_SHIFT;
1191 } 1197 }
1192 1198
1193 if ((flags & PGO_LOCKED) != 0) 1199 /*
1194 return EBUSY; 1200 * Check for reclaimed vnode. v_interlock is not held here, but
1195 1201 * VI_DEADCHECK is set with vmobjlock held.
1196 mutex_enter(vp->v_interlock); 1202 */
1197 error = vdead_check(vp, VDEAD_NOWAIT); 1203 iflag = atomic_load_relaxed(&vp->v_iflag);
1198 mutex_exit(vp->v_interlock); 1204 if (__predict_false((iflag & VI_DEADCHECK) != 0) {
1199 if (error != 0) 1205 mutex_enter(vp->v_interlock);
1200 return ENOENT; 1206 error = vdead_check(vp, VDEAD_NOWAIT);
 1207 mutex_exit(vp->v_interlock);
 1208 if (error) {
 1209 if ((flags & PGO_LOCKED) == 0)
 1210 rw_exit(vp->v_uobj.vmobjlock);
 1211 return error;
 1212 }
 1213 }
1201 1214
1202 node = VP_TO_TMPFS_NODE(vp); 1215 node = VP_TO_TMPFS_NODE(vp);
1203 uobj = node->tn_spec.tn_reg.tn_aobj; 1216 uobj = node->tn_spec.tn_reg.tn_aobj;
1204 1217
 1218 /*
 1219 * Update timestamp lazily. The update will be made real when
 1220 * a synchronous update is next made -- or by tmpfs_getattr,
 1221 * tmpfs_putpages, and tmpfs_inactive.
 1222 */
1205 if ((flags & PGO_NOTIMESTAMP) == 0) { 1223 if ((flags & PGO_NOTIMESTAMP) == 0) {
1206 u_int tflags = 0; 1224 u_int tflags = 0;
1207 1225
1208 if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 1226 if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
1209 tflags |= TMPFS_UPDATE_ATIME; 1227 tflags |= TMPFS_UPDATE_ATIME;
1210 1228
1211 if ((access_type & VM_PROT_WRITE) != 0) { 1229 if ((access_type & VM_PROT_WRITE) != 0) {
1212 tflags |= TMPFS_UPDATE_MTIME; 1230 tflags |= TMPFS_UPDATE_MTIME;
1213 if (vp->v_mount->mnt_flag & MNT_RELATIME) 1231 if (vp->v_mount->mnt_flag & MNT_RELATIME)
1214 tflags |= TMPFS_UPDATE_ATIME; 1232 tflags |= TMPFS_UPDATE_ATIME;
1215 } 1233 }
1216 tmpfs_update(vp, tflags); 1234 tmpfs_update_lazily(vp, tflags);
1217 } 1235 }
1218 1236
1219 /* 1237 /*
1220 * Invoke the pager. 1238 * Invoke the pager.
1221 * 1239 *
1222 * Clean the array of pages before. XXX: PR/32166 1240 * Clean the array of pages before. XXX: PR/32166
1223 * Note that vnode lock is shared with underlying UVM object. 1241 * Note that vnode lock is shared with underlying UVM object.
1224 */ 1242 */
1225 if (pgs) { 1243 if ((flags & PGO_LOCKED) == 0 && pgs) {
1226 memset(pgs, 0, sizeof(struct vm_pages *) * npages); 1244 memset(pgs, 0, sizeof(struct vm_pages *) * npages);
1227 } 1245 }
1228 KASSERT(vp->v_uobj.vmobjlock == uobj->vmobjlock); 1246 KASSERT(vp->v_uobj.vmobjlock == uobj->vmobjlock);
1229 1247
1230 error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, centeridx, 1248 error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, centeridx,
1231 access_type, advice, flags | PGO_ALLPAGES); 1249 access_type, advice, flags);
1232 1250
1233#if defined(DEBUG) 1251#if defined(DEBUG)
1234 if (!error && pgs) { 1252 if (!error && pgs) {
1235 for (int i = 0; i < npages; i++) { 1253 KASSERT(pgs[centeridx] != NULL);
1236 KASSERT(pgs[i] != NULL); 
1237 } 
1238 } 1254 }
1239#endif 1255#endif
1240 return error; 1256 return error;
1241} 1257}
1242 1258
1243int 1259int
1244tmpfs_putpages(void *v) 1260tmpfs_putpages(void *v)
1245{ 1261{
1246 struct vop_putpages_args /* { 1262 struct vop_putpages_args /* {
1247 struct vnode *a_vp; 1263 struct vnode *a_vp;
1248 voff_t a_offlo; 1264 voff_t a_offlo;
1249 voff_t a_offhi; 1265 voff_t a_offhi;
1250 int a_flags; 1266 int a_flags;
@@ -1262,26 +1278,28 @@ tmpfs_putpages(void *v) @@ -1262,26 +1278,28 @@ tmpfs_putpages(void *v)
1262 if (vp->v_type != VREG) { 1278 if (vp->v_type != VREG) {
1263 rw_exit(vp->v_uobj.vmobjlock); 1279 rw_exit(vp->v_uobj.vmobjlock);
1264 return 0; 1280 return 0;
1265 } 1281 }
1266 1282
1267 node = VP_TO_TMPFS_NODE(vp); 1283 node = VP_TO_TMPFS_NODE(vp);
1268 uobj = node->tn_spec.tn_reg.tn_aobj; 1284 uobj = node->tn_spec.tn_reg.tn_aobj;
1269 1285
1270 KASSERT(vp->v_uobj.vmobjlock == uobj->vmobjlock); 1286 KASSERT(vp->v_uobj.vmobjlock == uobj->vmobjlock);
1271 error = (*uobj->pgops->pgo_put)(uobj, offlo, offhi, flags); 1287 error = (*uobj->pgops->pgo_put)(uobj, offlo, offhi, flags);
1272 1288
1273 /* XXX mtime */ 1289 /* XXX mtime */
1274 1290
 1291 /* Process deferred updates. */
 1292 tmpfs_update(vp, 0);
1275 return error; 1293 return error;
1276} 1294}
1277 1295
1278int 1296int
1279tmpfs_whiteout(void *v) 1297tmpfs_whiteout(void *v)
1280{ 1298{
1281 struct vop_whiteout_args /* { 1299 struct vop_whiteout_args /* {
1282 struct vnode *a_dvp; 1300 struct vnode *a_dvp;
1283 struct componentname *a_cnp; 1301 struct componentname *a_cnp;
1284 int a_flags; 1302 int a_flags;
1285 } */ *ap = v; 1303 } */ *ap = v;
1286 vnode_t *dvp = ap->a_dvp; 1304 vnode_t *dvp = ap->a_dvp;
1287 struct componentname *cnp = ap->a_cnp; 1305 struct componentname *cnp = ap->a_cnp;