Mon Oct 21 15:31:26 2013 UTC ()
NetBSD does not have vn_in_dnlc


(christos)
diff -r1.14 -r1.15 src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_znode.c

cvs diff -r1.14 -r1.15 src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_znode.c (switch to unified diff)

--- src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_znode.c 2013/09/23 19:42:42 1.14
+++ src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_znode.c 2013/10/21 15:31:26 1.15
@@ -1,1499 +1,1501 @@ @@ -1,1499 +1,1501 @@
1/* 1/*
2 * CDDL HEADER START 2 * CDDL HEADER START
3 * 3 *
4 * The contents of this file are subject to the terms of the 4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License"). 5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License. 6 * You may not use this file except in compliance with the License.
7 * 7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing. 9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions 10 * See the License for the specific language governing permissions
11 * and limitations under the License. 11 * and limitations under the License.
12 * 12 *
13 * When distributing Covered Code, include this CDDL HEADER in each 13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the 15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner] 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 * 18 *
19 * CDDL HEADER END 19 * CDDL HEADER END
20 */ 20 */
21/* 21/*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms. 23 * Use is subject to license terms.
24 */ 24 */
25 25
26/* Portions Copyright 2007 Jeremy Teo */ 26/* Portions Copyright 2007 Jeremy Teo */
27 27
28#ifdef _KERNEL 28#ifdef _KERNEL
29#include <sys/types.h> 29#include <sys/types.h>
30#include <sys/param.h> 30#include <sys/param.h>
31#include <sys/time.h> 31#include <sys/time.h>
32#include <sys/systm.h> 32#include <sys/systm.h>
33#include <sys/sysmacros.h> 33#include <sys/sysmacros.h>
34#include <sys/resource.h> 34#include <sys/resource.h>
35#include <sys/mntent.h> 35#include <sys/mntent.h>
36#include <sys/u8_textprep.h> 36#include <sys/u8_textprep.h>
37#include <sys/dsl_dataset.h> 37#include <sys/dsl_dataset.h>
38#include <sys/vfs.h> 38#include <sys/vfs.h>
39#include <sys/vnode.h> 39#include <sys/vnode.h>
40#include <sys/file.h> 40#include <sys/file.h>
41#include <sys/kmem.h> 41#include <sys/kmem.h>
42#include <sys/errno.h> 42#include <sys/errno.h>
43#include <sys/unistd.h> 43#include <sys/unistd.h>
44#include <sys/atomic.h> 44#include <sys/atomic.h>
45#include <sys/zfs_dir.h> 45#include <sys/zfs_dir.h>
46#include <sys/zfs_acl.h> 46#include <sys/zfs_acl.h>
47#include <sys/zfs_ioctl.h> 47#include <sys/zfs_ioctl.h>
48#include <sys/zfs_rlock.h> 48#include <sys/zfs_rlock.h>
49#include <sys/zfs_fuid.h> 49#include <sys/zfs_fuid.h>
50#include <sys/fs/zfs.h> 50#include <sys/fs/zfs.h>
51#include <sys/kidmap.h> 51#include <sys/kidmap.h>
52#endif /* _KERNEL */ 52#endif /* _KERNEL */
53 53
54#include <sys/dmu.h> 54#include <sys/dmu.h>
55#include <sys/refcount.h> 55#include <sys/refcount.h>
56#include <sys/stat.h> 56#include <sys/stat.h>
57#include <sys/zap.h> 57#include <sys/zap.h>
58#include <sys/zfs_znode.h> 58#include <sys/zfs_znode.h>
59 59
60#include "zfs_prop.h" 60#include "zfs_prop.h"
61 61
62#if defined(_KERNEL) && defined(__NetBSD__) 62#if defined(_KERNEL) && defined(__NetBSD__)
63#include <miscfs/specfs/specdev.h> 63#include <miscfs/specfs/specdev.h>
64static const struct genfs_ops zfs_genfsops = { 64static const struct genfs_ops zfs_genfsops = {
65 .gop_write = genfs_compat_gop_write, 65 .gop_write = genfs_compat_gop_write,
66}; 66};
67 67
68#endif 68#endif
69 69
70extern int (**zfs_vnodeop_p)(void *); 70extern int (**zfs_vnodeop_p)(void *);
71extern int (**zfs_fifoop_p)(void *); 71extern int (**zfs_fifoop_p)(void *);
72extern int (**zfs_specop_p)(void *); 72extern int (**zfs_specop_p)(void *);
73 73
74/* 74/*
75 * Define ZNODE_STATS to turn on statistic gathering. By default, it is only 75 * Define ZNODE_STATS to turn on statistic gathering. By default, it is only
76 * turned on when DEBUG is also defined. 76 * turned on when DEBUG is also defined.
77 */ 77 */
78#ifdef DEBUG 78#ifdef DEBUG
79#define ZNODE_STATS 79#define ZNODE_STATS
80#endif /* DEBUG */ 80#endif /* DEBUG */
81 81
82#ifdef ZNODE_STATS 82#ifdef ZNODE_STATS
83#define ZNODE_STAT_ADD(stat) ((stat)++) 83#define ZNODE_STAT_ADD(stat) ((stat)++)
84#else 84#else
85#define ZNODE_STAT_ADD(stat) /* nothing */ 85#define ZNODE_STAT_ADD(stat) /* nothing */
86#endif /* ZNODE_STATS */ 86#endif /* ZNODE_STATS */
87 87
88#define POINTER_IS_VALID(p) (!((uintptr_t)(p) & 0x3)) 88#define POINTER_IS_VALID(p) (!((uintptr_t)(p) & 0x3))
89#define POINTER_INVALIDATE(pp) (*(pp) = (void *)((uintptr_t)(*(pp)) | 0x1)) 89#define POINTER_INVALIDATE(pp) (*(pp) = (void *)((uintptr_t)(*(pp)) | 0x1))
90 90
91/* 91/*
92 * Functions needed for userland (ie: libzpool) are not put under 92 * Functions needed for userland (ie: libzpool) are not put under
93 * #ifdef_KERNEL; the rest of the functions have dependencies 93 * #ifdef_KERNEL; the rest of the functions have dependencies
94 * (such as VFS logic) that will not compile easily in userland. 94 * (such as VFS logic) that will not compile easily in userland.
95 */ 95 */
96#ifdef _KERNEL 96#ifdef _KERNEL
97/* 97/*
98 * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to 98 * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to
99 * be freed before it can be safely accessed. 99 * be freed before it can be safely accessed.
100 */ 100 */
101krwlock_t zfsvfs_lock; 101krwlock_t zfsvfs_lock;
102 102
103static kmem_cache_t *znode_cache = NULL; 103static kmem_cache_t *znode_cache = NULL;
104 104
105/*ARGSUSED*/ 105/*ARGSUSED*/
106static void 106static void
107znode_evict_error(dmu_buf_t *dbuf, void *user_ptr) 107znode_evict_error(dmu_buf_t *dbuf, void *user_ptr)
108{ 108{
109 /* 109 /*
110 * We should never drop all dbuf refs without first clearing 110 * We should never drop all dbuf refs without first clearing
111 * the eviction callback. 111 * the eviction callback.
112 */ 112 */
113 panic("evicting znode %p\n", user_ptr); 113 panic("evicting znode %p\n", user_ptr);
114} 114}
115 115
116/*ARGSUSED*/ 116/*ARGSUSED*/
117static int 117static int
118zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) 118zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
119{ 119{
120 znode_t *zp = arg; 120 znode_t *zp = arg;
121 121
122 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 122 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
123 123
124 list_link_init(&zp->z_link_node); 124 list_link_init(&zp->z_link_node);
125 125
126 mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); 126 mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
127 rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL); 127 rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
128 rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL); 128 rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL);
129 mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); 129 mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
130 130
131 mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL); 131 mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL);
132 avl_create(&zp->z_range_avl, zfs_range_compare, 132 avl_create(&zp->z_range_avl, zfs_range_compare,
133 sizeof (rl_t), offsetof(rl_t, r_node)); 133 sizeof (rl_t), offsetof(rl_t, r_node));
134 134
135 zp->z_dbuf = NULL; 135 zp->z_dbuf = NULL;
136 zp->z_dirlocks = NULL; 136 zp->z_dirlocks = NULL;
137 zp->z_acl_cached = NULL; 137 zp->z_acl_cached = NULL;
138 return (0); 138 return (0);
139} 139}
140 140
141/*ARGSUSED*/ 141/*ARGSUSED*/
142static void 142static void
143zfs_znode_cache_destructor(void *buf, void *arg) 143zfs_znode_cache_destructor(void *buf, void *arg)
144{ 144{
145 znode_t *zp = arg; 145 znode_t *zp = arg;
146 146
147 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 147 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
148 ASSERT(ZTOV(zp) == NULL); 148 ASSERT(ZTOV(zp) == NULL);
149  149
150 ASSERT(!list_link_active(&zp->z_link_node)); 150 ASSERT(!list_link_active(&zp->z_link_node));
151 mutex_destroy(&zp->z_lock); 151 mutex_destroy(&zp->z_lock);
152 rw_destroy(&zp->z_parent_lock); 152 rw_destroy(&zp->z_parent_lock);
153 rw_destroy(&zp->z_name_lock); 153 rw_destroy(&zp->z_name_lock);
154 mutex_destroy(&zp->z_acl_lock); 154 mutex_destroy(&zp->z_acl_lock);
155 avl_destroy(&zp->z_range_avl); 155 avl_destroy(&zp->z_range_avl);
156 mutex_destroy(&zp->z_range_lock); 156 mutex_destroy(&zp->z_range_lock);
157 157
158 ASSERT(zp->z_dbuf == NULL); 158 ASSERT(zp->z_dbuf == NULL);
159 ASSERT(zp->z_dirlocks == NULL); 159 ASSERT(zp->z_dirlocks == NULL);
160 ASSERT(zp->z_acl_cached == NULL); 160 ASSERT(zp->z_acl_cached == NULL);
161} 161}
162 162
163#ifdef ZNODE_STATS 163#ifdef ZNODE_STATS
164static struct { 164static struct {
165 uint64_t zms_zfsvfs_invalid; 165 uint64_t zms_zfsvfs_invalid;
166 uint64_t zms_zfsvfs_recheck1; 166 uint64_t zms_zfsvfs_recheck1;
167 uint64_t zms_zfsvfs_unmounted; 167 uint64_t zms_zfsvfs_unmounted;
168 uint64_t zms_zfsvfs_recheck2; 168 uint64_t zms_zfsvfs_recheck2;
169 uint64_t zms_obj_held; 169 uint64_t zms_obj_held;
170 uint64_t zms_vnode_locked; 170 uint64_t zms_vnode_locked;
171 uint64_t zms_not_only_dnlc; 171 uint64_t zms_not_only_dnlc;
172} znode_move_stats; 172} znode_move_stats;
173#endif /* ZNODE_STATS */ 173#endif /* ZNODE_STATS */
174 174
175static void 175static void
176zfs_znode_move_impl(znode_t *ozp, znode_t *nzp) 176zfs_znode_move_impl(znode_t *ozp, znode_t *nzp)
177{ 177{
178 vnode_t *vp; 178 vnode_t *vp;
179 179
180 /* Copy fields. */ 180 /* Copy fields. */
181 nzp->z_zfsvfs = ozp->z_zfsvfs; 181 nzp->z_zfsvfs = ozp->z_zfsvfs;
182 182
183 /* Swap vnodes. */ 183 /* Swap vnodes. */
184 vp = nzp->z_vnode; 184 vp = nzp->z_vnode;
185 nzp->z_vnode = ozp->z_vnode; 185 nzp->z_vnode = ozp->z_vnode;
186 ozp->z_vnode = vp; /* let destructor free the overwritten vnode */ 186 ozp->z_vnode = vp; /* let destructor free the overwritten vnode */
187 ZTOV(ozp)->v_data = ozp; 187 ZTOV(ozp)->v_data = ozp;
188 ZTOV(nzp)->v_data = nzp; 188 ZTOV(nzp)->v_data = nzp;
189 189
190 nzp->z_id = ozp->z_id; 190 nzp->z_id = ozp->z_id;
191 ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */ 191 ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */
192 ASSERT(avl_numnodes(&ozp->z_range_avl) == 0); 192 ASSERT(avl_numnodes(&ozp->z_range_avl) == 0);
193 nzp->z_unlinked = ozp->z_unlinked; 193 nzp->z_unlinked = ozp->z_unlinked;
194 nzp->z_atime_dirty = ozp->z_atime_dirty; 194 nzp->z_atime_dirty = ozp->z_atime_dirty;
195 nzp->z_zn_prefetch = ozp->z_zn_prefetch; 195 nzp->z_zn_prefetch = ozp->z_zn_prefetch;
196 nzp->z_blksz = ozp->z_blksz; 196 nzp->z_blksz = ozp->z_blksz;
197 nzp->z_seq = ozp->z_seq; 197 nzp->z_seq = ozp->z_seq;
198 nzp->z_mapcnt = ozp->z_mapcnt; 198 nzp->z_mapcnt = ozp->z_mapcnt;
199 nzp->z_last_itx = ozp->z_last_itx; 199 nzp->z_last_itx = ozp->z_last_itx;
200 nzp->z_gen = ozp->z_gen; 200 nzp->z_gen = ozp->z_gen;
201 nzp->z_sync_cnt = ozp->z_sync_cnt; 201 nzp->z_sync_cnt = ozp->z_sync_cnt;
202 nzp->z_phys = ozp->z_phys; 202 nzp->z_phys = ozp->z_phys;
203 nzp->z_dbuf = ozp->z_dbuf; 203 nzp->z_dbuf = ozp->z_dbuf;
204 204
205 /* 205 /*
206 * Since this is just an idle znode and kmem is already dealing with 206 * Since this is just an idle znode and kmem is already dealing with
207 * memory pressure, release any cached ACL. 207 * memory pressure, release any cached ACL.
208 */ 208 */
209 if (ozp->z_acl_cached) { 209 if (ozp->z_acl_cached) {
210 zfs_acl_free(ozp->z_acl_cached); 210 zfs_acl_free(ozp->z_acl_cached);
211 ozp->z_acl_cached = NULL; 211 ozp->z_acl_cached = NULL;
212 } 212 }
213 213
214 /* Update back pointers. */ 214 /* Update back pointers. */
215 (void) dmu_buf_update_user(nzp->z_dbuf, ozp, nzp, &nzp->z_phys, 215 (void) dmu_buf_update_user(nzp->z_dbuf, ozp, nzp, &nzp->z_phys,
216 znode_evict_error); 216 znode_evict_error);
217 217
218 /* 218 /*
219 * Invalidate the original znode by clearing fields that provide a 219 * Invalidate the original znode by clearing fields that provide a
220 * pointer back to the znode. Set the low bit of the vfs pointer to 220 * pointer back to the znode. Set the low bit of the vfs pointer to
221 * ensure that zfs_znode_move() recognizes the znode as invalid in any 221 * ensure that zfs_znode_move() recognizes the znode as invalid in any
222 * subsequent callback. 222 * subsequent callback.
223 */ 223 */
224 ozp->z_dbuf = NULL; 224 ozp->z_dbuf = NULL;
225 POINTER_INVALIDATE(&ozp->z_zfsvfs); 225 POINTER_INVALIDATE(&ozp->z_zfsvfs);
226} 226}
227 227
228#ifndef __NetBSD__ 228#ifndef __NetBSD__
229/*ARGSUSED*/ 229/*ARGSUSED*/
230static kmem_cbrc_t 230static kmem_cbrc_t
231zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) 231zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg)
232{ 232{
233 znode_t *ozp = buf, *nzp = newbuf; 233 znode_t *ozp = buf, *nzp = newbuf;
234 zfsvfs_t *zfsvfs; 234 zfsvfs_t *zfsvfs;
235 vnode_t *vp; 235 vnode_t *vp;
236 236
237 /* 237 /*
238 * The znode is on the file system's list of known znodes if the vfs 238 * The znode is on the file system's list of known znodes if the vfs
239 * pointer is valid. We set the low bit of the vfs pointer when freeing 239 * pointer is valid. We set the low bit of the vfs pointer when freeing
240 * the znode to invalidate it, and the memory patterns written by kmem 240 * the znode to invalidate it, and the memory patterns written by kmem
241 * (baddcafe and deadbeef) set at least one of the two low bits. A newly 241 * (baddcafe and deadbeef) set at least one of the two low bits. A newly
242 * created znode sets the vfs pointer last of all to indicate that the 242 * created znode sets the vfs pointer last of all to indicate that the
243 * znode is known and in a valid state to be moved by this function. 243 * znode is known and in a valid state to be moved by this function.
244 */ 244 */
245 zfsvfs = ozp->z_zfsvfs; 245 zfsvfs = ozp->z_zfsvfs;
246 if (!POINTER_IS_VALID(zfsvfs)) { 246 if (!POINTER_IS_VALID(zfsvfs)) {
247 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid); 247 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid);
248 return (KMEM_CBRC_DONT_KNOW); 248 return (KMEM_CBRC_DONT_KNOW);
249 } 249 }
250 250
251 /* 251 /*
252 * Close a small window in which it's possible that the filesystem could 252 * Close a small window in which it's possible that the filesystem could
253 * be unmounted and freed, and zfsvfs, though valid in the previous 253 * be unmounted and freed, and zfsvfs, though valid in the previous
254 * statement, could point to unrelated memory by the time we try to 254 * statement, could point to unrelated memory by the time we try to
255 * prevent the filesystem from being unmounted. 255 * prevent the filesystem from being unmounted.
256 */ 256 */
257 rw_enter(&zfsvfs_lock, RW_WRITER); 257 rw_enter(&zfsvfs_lock, RW_WRITER);
258 if (zfsvfs != ozp->z_zfsvfs) { 258 if (zfsvfs != ozp->z_zfsvfs) {
259 rw_exit(&zfsvfs_lock); 259 rw_exit(&zfsvfs_lock);
260 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1); 260 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1);
261 return (KMEM_CBRC_DONT_KNOW); 261 return (KMEM_CBRC_DONT_KNOW);
262 } 262 }
263 263
264 /* 264 /*
265 * If the znode is still valid, then so is the file system. We know that 265 * If the znode is still valid, then so is the file system. We know that
266 * no valid file system can be freed while we hold zfsvfs_lock, so we 266 * no valid file system can be freed while we hold zfsvfs_lock, so we
267 * can safely ensure that the filesystem is not and will not be 267 * can safely ensure that the filesystem is not and will not be
268 * unmounted. The next statement is equivalent to ZFS_ENTER(). 268 * unmounted. The next statement is equivalent to ZFS_ENTER().
269 */ 269 */
270 rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); 270 rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
271 if (zfsvfs->z_unmounted) { 271 if (zfsvfs->z_unmounted) {
272 ZFS_EXIT(zfsvfs); 272 ZFS_EXIT(zfsvfs);
273 rw_exit(&zfsvfs_lock); 273 rw_exit(&zfsvfs_lock);
274 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted); 274 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted);
275 return (KMEM_CBRC_DONT_KNOW); 275 return (KMEM_CBRC_DONT_KNOW);
276 } 276 }
277 rw_exit(&zfsvfs_lock); 277 rw_exit(&zfsvfs_lock);
278 278
279 mutex_enter(&zfsvfs->z_znodes_lock); 279 mutex_enter(&zfsvfs->z_znodes_lock);
280 /* 280 /*
281 * Recheck the vfs pointer in case the znode was removed just before 281 * Recheck the vfs pointer in case the znode was removed just before
282 * acquiring the lock. 282 * acquiring the lock.
283 */ 283 */
284 if (zfsvfs != ozp->z_zfsvfs) { 284 if (zfsvfs != ozp->z_zfsvfs) {
285 mutex_exit(&zfsvfs->z_znodes_lock); 285 mutex_exit(&zfsvfs->z_znodes_lock);
286 ZFS_EXIT(zfsvfs); 286 ZFS_EXIT(zfsvfs);
287 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2); 287 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2);
288 return (KMEM_CBRC_DONT_KNOW); 288 return (KMEM_CBRC_DONT_KNOW);
289 } 289 }
290 290
291 /* 291 /*
292 * At this point we know that as long as we hold z_znodes_lock, the 292 * At this point we know that as long as we hold z_znodes_lock, the
293 * znode cannot be freed and fields within the znode can be safely 293 * znode cannot be freed and fields within the znode can be safely
294 * accessed. Now, prevent a race with zfs_zget(). 294 * accessed. Now, prevent a race with zfs_zget().
295 */ 295 */
296 if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) { 296 if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) {
297 mutex_exit(&zfsvfs->z_znodes_lock); 297 mutex_exit(&zfsvfs->z_znodes_lock);
298 ZFS_EXIT(zfsvfs); 298 ZFS_EXIT(zfsvfs);
299 ZNODE_STAT_ADD(znode_move_stats.zms_obj_held); 299 ZNODE_STAT_ADD(znode_move_stats.zms_obj_held);
300 return (KMEM_CBRC_LATER); 300 return (KMEM_CBRC_LATER);
301 } 301 }
302 302
303 vp = ZTOV(ozp); 303 vp = ZTOV(ozp);
304 if (mutex_tryenter(&vp->v_lock) == 0) { 304 if (mutex_tryenter(&vp->v_lock) == 0) {
305 ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); 305 ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
306 mutex_exit(&zfsvfs->z_znodes_lock); 306 mutex_exit(&zfsvfs->z_znodes_lock);
307 ZFS_EXIT(zfsvfs); 307 ZFS_EXIT(zfsvfs);
308 ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked); 308 ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked);
309 return (KMEM_CBRC_LATER); 309 return (KMEM_CBRC_LATER);
310 } 310 }
311 311
312 /* Only move znodes that are referenced _only_ by the DNLC. */ 312 /* Only move znodes that are referenced _only_ by the DNLC. */
313 if (vp->v_count != 1 || !vn_in_dnlc(vp)) { 313 if (vp->v_count != 1 || !vn_in_dnlc(vp)) {
314 mutex_exit(&vp->v_lock); 314 mutex_exit(&vp->v_lock);
315 ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); 315 ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
316 mutex_exit(&zfsvfs->z_znodes_lock); 316 mutex_exit(&zfsvfs->z_znodes_lock);
317 ZFS_EXIT(zfsvfs); 317 ZFS_EXIT(zfsvfs);
318 ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc); 318 ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc);
319 return (KMEM_CBRC_LATER); 319 return (KMEM_CBRC_LATER);
320 } 320 }
321 321
322 /* 322 /*
323 * The znode is known and in a valid state to move. We're holding the 323 * The znode is known and in a valid state to move. We're holding the
324 * locks needed to execute the critical section. 324 * locks needed to execute the critical section.
325 */ 325 */
326 zfs_znode_move_impl(ozp, nzp); 326 zfs_znode_move_impl(ozp, nzp);
327 mutex_exit(&vp->v_lock); 327 mutex_exit(&vp->v_lock);
328 ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); 328 ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
329 329
330 list_link_replace(&ozp->z_link_node, &nzp->z_link_node); 330 list_link_replace(&ozp->z_link_node, &nzp->z_link_node);
331 mutex_exit(&zfsvfs->z_znodes_lock); 331 mutex_exit(&zfsvfs->z_znodes_lock);
332 ZFS_EXIT(zfsvfs); 332 ZFS_EXIT(zfsvfs);
333 333
334 return (KMEM_CBRC_YES); 334 return (KMEM_CBRC_YES);
335} 335}
336#endif /* !__NetBSD__ */ 336#endif /* !__NetBSD__ */
337 337
338void 338void
339zfs_znode_init(void) 339zfs_znode_init(void)
340{ 340{
341 /* 341 /*
342 * Initialize zcache 342 * Initialize zcache
343 */ 343 */
344 rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL); 344 rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL);
345 ASSERT(znode_cache == NULL); 345 ASSERT(znode_cache == NULL);
346 znode_cache = kmem_cache_create("zfs_znode_cache", 346 znode_cache = kmem_cache_create("zfs_znode_cache",
347 sizeof (znode_t), 0, zfs_znode_cache_constructor, 347 sizeof (znode_t), 0, zfs_znode_cache_constructor,
348 zfs_znode_cache_destructor, NULL, NULL, NULL, 0); 348 zfs_znode_cache_destructor, NULL, NULL, NULL, 0);
349} 349}
350 350
351void 351void
352zfs_znode_fini(void) 352zfs_znode_fini(void)
353{ 353{
354 354
355 /* 355 /*
356 * Cleanup zcache 356 * Cleanup zcache
357 */ 357 */
358 if (znode_cache) 358 if (znode_cache)
359 kmem_cache_destroy(znode_cache); 359 kmem_cache_destroy(znode_cache);
360 znode_cache = NULL; 360 znode_cache = NULL;
361 rw_destroy(&zfsvfs_lock); 361 rw_destroy(&zfsvfs_lock);
362} 362}
363 363
364#ifndef __NetBSD__ 364#ifndef __NetBSD__
365struct vnodeops *zfs_dvnodeops; 365struct vnodeops *zfs_dvnodeops;
366struct vnodeops *zfs_fvnodeops; 366struct vnodeops *zfs_fvnodeops;
367struct vnodeops *zfs_symvnodeops; 367struct vnodeops *zfs_symvnodeops;
368struct vnodeops *zfs_xdvnodeops; 368struct vnodeops *zfs_xdvnodeops;
369struct vnodeops *zfs_evnodeops; 369struct vnodeops *zfs_evnodeops;
370struct vnodeops *zfs_sharevnodeops; 370struct vnodeops *zfs_sharevnodeops;
371#endif 371#endif
372 372
373void 373void
374zfs_remove_op_tables() 374zfs_remove_op_tables()
375{ 375{
376#ifndef __NetBSD__ 376#ifndef __NetBSD__
377 /* 377 /*
378 * Remove vfs ops 378 * Remove vfs ops
379 */ 379 */
380 ASSERT(zfsfstype); 380 ASSERT(zfsfstype);
381 (void) vfs_freevfsops_by_type(zfsfstype); 381 (void) vfs_freevfsops_by_type(zfsfstype);
382 zfsfstype = 0; 382 zfsfstype = 0;
383 383
384 /* 384 /*
385 * Remove vnode ops 385 * Remove vnode ops
386 */ 386 */
387 if (zfs_dvnodeops) 387 if (zfs_dvnodeops)
388 vn_freevnodeops(zfs_dvnodeops); 388 vn_freevnodeops(zfs_dvnodeops);
389 if (zfs_fvnodeops) 389 if (zfs_fvnodeops)
390 vn_freevnodeops(zfs_fvnodeops); 390 vn_freevnodeops(zfs_fvnodeops);
391 if (zfs_symvnodeops) 391 if (zfs_symvnodeops)
392 vn_freevnodeops(zfs_symvnodeops); 392 vn_freevnodeops(zfs_symvnodeops);
393 if (zfs_xdvnodeops) 393 if (zfs_xdvnodeops)
394 vn_freevnodeops(zfs_xdvnodeops); 394 vn_freevnodeops(zfs_xdvnodeops);
395 if (zfs_evnodeops) 395 if (zfs_evnodeops)
396 vn_freevnodeops(zfs_evnodeops); 396 vn_freevnodeops(zfs_evnodeops);
397 if (zfs_sharevnodeops) 397 if (zfs_sharevnodeops)
398 vn_freevnodeops(zfs_sharevnodeops); 398 vn_freevnodeops(zfs_sharevnodeops);
399 399
400 zfs_dvnodeops = NULL; 400 zfs_dvnodeops = NULL;
401 zfs_fvnodeops = NULL; 401 zfs_fvnodeops = NULL;
402 zfs_symvnodeops = NULL; 402 zfs_symvnodeops = NULL;
403 zfs_xdvnodeops = NULL; 403 zfs_xdvnodeops = NULL;
404 zfs_evnodeops = NULL; 404 zfs_evnodeops = NULL;
405 zfs_sharevnodeops = NULL; 405 zfs_sharevnodeops = NULL;
406#endif 406#endif
407} 407}
408 408
409#ifndef __NetBSD__ 409#ifndef __NetBSD__
410extern const fs_operation_def_t zfs_dvnodeops_template[]; 410extern const fs_operation_def_t zfs_dvnodeops_template[];
411extern const fs_operation_def_t zfs_fvnodeops_template[]; 411extern const fs_operation_def_t zfs_fvnodeops_template[];
412extern const fs_operation_def_t zfs_xdvnodeops_template[]; 412extern const fs_operation_def_t zfs_xdvnodeops_template[];
413extern const fs_operation_def_t zfs_symvnodeops_template[]; 413extern const fs_operation_def_t zfs_symvnodeops_template[];
414extern const fs_operation_def_t zfs_evnodeops_template[]; 414extern const fs_operation_def_t zfs_evnodeops_template[];
415extern const fs_operation_def_t zfs_sharevnodeops_template[]; 415extern const fs_operation_def_t zfs_sharevnodeops_template[];
416#endif 416#endif
417 417
418int 418int
419zfs_create_op_tables() 419zfs_create_op_tables()
420{ 420{
421#ifndef __NetBSD__ 421#ifndef __NetBSD__
422 int error; 422 int error;
423 423
424 /* 424 /*
425 * zfs_dvnodeops can be set if mod_remove() calls mod_installfs() 425 * zfs_dvnodeops can be set if mod_remove() calls mod_installfs()
426 * due to a failure to remove the the 2nd modlinkage (zfs_modldrv). 426 * due to a failure to remove the the 2nd modlinkage (zfs_modldrv).
427 * In this case we just return as the ops vectors are already set up. 427 * In this case we just return as the ops vectors are already set up.
428 */ 428 */
429 if (zfs_dvnodeops) 429 if (zfs_dvnodeops)
430 return (0); 430 return (0);
431 431
432 error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template, 432 error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template,
433 &zfs_dvnodeops); 433 &zfs_dvnodeops);
434 if (error) 434 if (error)
435 return (error); 435 return (error);
436 436
437 error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template, 437 error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template,
438 &zfs_fvnodeops); 438 &zfs_fvnodeops);
439 if (error) 439 if (error)
440 return (error); 440 return (error);
441 441
442 error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template, 442 error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template,
443 &zfs_symvnodeops); 443 &zfs_symvnodeops);
444 if (error) 444 if (error)
445 return (error); 445 return (error);
446 446
447 error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template, 447 error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template,
448 &zfs_xdvnodeops); 448 &zfs_xdvnodeops);
449 if (error) 449 if (error)
450 return (error); 450 return (error);
451 451
452 error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template, 452 error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template,
453 &zfs_evnodeops); 453 &zfs_evnodeops);
454 if (error) 454 if (error)
455 return (error); 455 return (error);
456 456
457 error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template, 457 error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template,
458 &zfs_sharevnodeops); 458 &zfs_sharevnodeops);
459 459
460 return (error); 460 return (error);
461#endif 461#endif
462 return 0; 462 return 0;
463} 463}
464 464
465int 465int
466zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) 466zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
467{ 467{
468 zfs_acl_ids_t acl_ids; 468 zfs_acl_ids_t acl_ids;
469 vattr_t vattr; 469 vattr_t vattr;
470 znode_t *sharezp; 470 znode_t *sharezp;
471 vnode_t *vp; 471 vnode_t *vp;
472 znode_t *zp; 472 znode_t *zp;
473 int error; 473 int error;
474 474
475 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; 475 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
476 vattr.va_type = VDIR; 476 vattr.va_type = VDIR;
477 vattr.va_mode = S_IFDIR|0555; 477 vattr.va_mode = S_IFDIR|0555;
478 vattr.va_uid = crgetuid(kcred); 478 vattr.va_uid = crgetuid(kcred);
479 vattr.va_gid = crgetgid(kcred); 479 vattr.va_gid = crgetgid(kcred);
480 480
481 sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP); 481 sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
482 sharezp->z_unlinked = 0; 482 sharezp->z_unlinked = 0;
483 sharezp->z_atime_dirty = 0; 483 sharezp->z_atime_dirty = 0;
484 sharezp->z_zfsvfs = zfsvfs; 484 sharezp->z_zfsvfs = zfsvfs;
485 485
486 vp = ZTOV(sharezp); 486 vp = ZTOV(sharezp);
487 error = getnewvnode(VT_ZFS, zfsvfs->z_parent->z_vfs, 487 error = getnewvnode(VT_ZFS, zfsvfs->z_parent->z_vfs,
488 zfs_vnodeop_p, NULL, &sharezp->z_vnode); 488 zfs_vnodeop_p, NULL, &sharezp->z_vnode);
489 if (error) { 489 if (error) {
490 kmem_cache_free(znode_cache, sharezp); 490 kmem_cache_free(znode_cache, sharezp);
491 return error; 491 return error;
492 } 492 }
493 vp->v_type = VDIR; 493 vp->v_type = VDIR;
494 494
495 VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr, 495 VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
496 kcred, NULL, &acl_ids)); 496 kcred, NULL, &acl_ids));
497 zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, 497 zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE,
498 &zp, 0, &acl_ids); 498 &zp, 0, &acl_ids);
499 ASSERT3P(zp, ==, sharezp); 499 ASSERT3P(zp, ==, sharezp);
 500#ifndef __NetBSD__
500 ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */ 501 ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */
 502#endif
501 POINTER_INVALIDATE(&sharezp->z_zfsvfs); 503 POINTER_INVALIDATE(&sharezp->z_zfsvfs);
502 error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, 504 error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
503 ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx); 505 ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
504 zfsvfs->z_shares_dir = sharezp->z_id; 506 zfsvfs->z_shares_dir = sharezp->z_id;
505 507
506 zfs_acl_ids_free(&acl_ids); 508 zfs_acl_ids_free(&acl_ids);
507 ZTOV(sharezp)->v_count = 0; 509 ZTOV(sharezp)->v_count = 0;
508 dmu_buf_rele(sharezp->z_dbuf, NULL); 510 dmu_buf_rele(sharezp->z_dbuf, NULL);
509 sharezp->z_dbuf = NULL; 511 sharezp->z_dbuf = NULL;
510 kmem_cache_free(znode_cache, sharezp); 512 kmem_cache_free(znode_cache, sharezp);
511 513
512 return (error); 514 return (error);
513} 515}
514 516
515/* 517/*
516 * define a couple of values we need available 518 * define a couple of values we need available
517 * for both 64 and 32 bit environments. 519 * for both 64 and 32 bit environments.
518 */ 520 */
519#ifndef NBITSMINOR64 521#ifndef NBITSMINOR64
520#define NBITSMINOR64 32 522#define NBITSMINOR64 32
521#endif 523#endif
522#ifndef MAXMAJ64 524#ifndef MAXMAJ64
523#define MAXMAJ64 0xffffffffUL 525#define MAXMAJ64 0xffffffffUL
524#endif 526#endif
525#ifndef MAXMIN64 527#ifndef MAXMIN64
526#define MAXMIN64 0xffffffffUL 528#define MAXMIN64 0xffffffffUL
527#endif 529#endif
528 530
529/* 531/*
530 * Create special expldev for ZFS private use. 532 * Create special expldev for ZFS private use.
531 * Can't use standard expldev since it doesn't do 533 * Can't use standard expldev since it doesn't do
532 * what we want. The standard expldev() takes a 534 * what we want. The standard expldev() takes a
533 * dev32_t in LP64 and expands it to a long dev_t. 535 * dev32_t in LP64 and expands it to a long dev_t.
534 * We need an interface that takes a dev32_t in ILP32 536 * We need an interface that takes a dev32_t in ILP32
535 * and expands it to a long dev_t. 537 * and expands it to a long dev_t.
536 */ 538 */
537static uint64_t 539static uint64_t
538zfs_expldev(dev_t dev) 540zfs_expldev(dev_t dev)
539{ 541{
540 return ((uint64_t)major(dev) << NBITSMINOR64) | 542 return ((uint64_t)major(dev) << NBITSMINOR64) |
541 (minor_t)minor(dev); 543 (minor_t)minor(dev);
542} 544}
543 545
544/* 546/*
545 * Special cmpldev for ZFS private use. 547 * Special cmpldev for ZFS private use.
546 * Can't use standard cmpldev since it takes 548 * Can't use standard cmpldev since it takes
547 * a long dev_t and compresses it to dev32_t in 549 * a long dev_t and compresses it to dev32_t in
548 * LP64. We need to do a compaction of a long dev_t 550 * LP64. We need to do a compaction of a long dev_t
549 * to a dev32_t in ILP32. 551 * to a dev32_t in ILP32.
550 */ 552 */
551dev_t 553dev_t
552zfs_cmpldev(uint64_t dev) 554zfs_cmpldev(uint64_t dev)
553{ 555{
554 minor_t minor = (minor_t)dev & MAXMIN64; 556 minor_t minor = (minor_t)dev & MAXMIN64;
555 major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64; 557 major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64;
556 558
557 return makedev(minor, major); 559 return makedev(minor, major);
558} 560}
559 561
560static void 562static void
561zfs_znode_dmu_init(zfsvfs_t *zfsvfs, znode_t *zp, dmu_buf_t *db) 563zfs_znode_dmu_init(zfsvfs_t *zfsvfs, znode_t *zp, dmu_buf_t *db)
562{ 564{
563 znode_t *nzp; 565 znode_t *nzp;
564 566
565 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs)); 567 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs));
566 ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id))); 568 ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)));
567 569
568 mutex_enter(&zp->z_lock); 570 mutex_enter(&zp->z_lock);
569 571
570 ASSERT(zp->z_dbuf == NULL); 572 ASSERT(zp->z_dbuf == NULL);
571 ASSERT(zp->z_acl_cached == NULL); 573 ASSERT(zp->z_acl_cached == NULL);
572 zp->z_dbuf = db; 574 zp->z_dbuf = db;
573 nzp = dmu_buf_set_user_ie(db, zp, &zp->z_phys, znode_evict_error); 575 nzp = dmu_buf_set_user_ie(db, zp, &zp->z_phys, znode_evict_error);
574 576
575 /* 577 /*
576 * there should be no 578 * there should be no
577 * concurrent zgets on this object. 579 * concurrent zgets on this object.
578 */ 580 */
579 if (nzp != NULL) 581 if (nzp != NULL)
580 panic("existing znode %p for dbuf %p", (void *)nzp, (void *)db); 582 panic("existing znode %p for dbuf %p", (void *)nzp, (void *)db);
581 583
582 /* 584 /*
583 * Slap on VROOT if we are the root znode 585 * Slap on VROOT if we are the root znode
584 */ 586 */
585 if (zp->z_id == zfsvfs->z_root) 587 if (zp->z_id == zfsvfs->z_root)
586 ZTOV(zp)->v_flag |= VROOT; 588 ZTOV(zp)->v_flag |= VROOT;
587 589
588 mutex_exit(&zp->z_lock); 590 mutex_exit(&zp->z_lock);
589 vn_exists(ZTOV(zp)); 591 vn_exists(ZTOV(zp));
590} 592}
591 593
592void 594void
593zfs_znode_dmu_fini(znode_t *zp) 595zfs_znode_dmu_fini(znode_t *zp)
594{ 596{
595 dmu_buf_t *db = zp->z_dbuf; 597 dmu_buf_t *db = zp->z_dbuf;
596 ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) || 598 ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) ||
597 zp->z_unlinked || 599 zp->z_unlinked ||
598 RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock)); 600 RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock));
599 ASSERT(zp->z_dbuf != NULL); 601 ASSERT(zp->z_dbuf != NULL);
600 zp->z_dbuf = NULL; 602 zp->z_dbuf = NULL;
601 VERIFY(zp == dmu_buf_update_user(db, zp, NULL, NULL, NULL)); 603 VERIFY(zp == dmu_buf_update_user(db, zp, NULL, NULL, NULL));
602 dmu_buf_rele(db, NULL); 604 dmu_buf_rele(db, NULL);
603} 605}
604 606
605/* 607/*
606 * Construct a new znode/vnode and intialize. 608 * Construct a new znode/vnode and intialize.
607 * 609 *
608 * This does not do a call to dmu_set_user() that is 610 * This does not do a call to dmu_set_user() that is
609 * up to the caller to do, in case you don't want to 611 * up to the caller to do, in case you don't want to
610 * return the znode 612 * return the znode
611 */ 613 */
612 614
613static znode_t * 615static znode_t *
614zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz) 616zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz)
615{ 617{
616 znode_t *zp; 618 znode_t *zp;
617 vnode_t *vp; 619 vnode_t *vp;
618 int error; 620 int error;
619 621
620 zp = kmem_cache_alloc(znode_cache, KM_SLEEP); 622 zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
621 623
622 for (;;) { 624 for (;;) {
623 error = getnewvnode(VT_ZFS, zfsvfs->z_parent->z_vfs, 625 error = getnewvnode(VT_ZFS, zfsvfs->z_parent->z_vfs,
624 zfs_vnodeop_p, NULL, &zp->z_vnode); 626 zfs_vnodeop_p, NULL, &zp->z_vnode);
625 if (__predict_true(error == 0)) 627 if (__predict_true(error == 0))
626 break; 628 break;
627 printf("WARNING: zfs_znode_alloc: unable to get vnode, " 629 printf("WARNING: zfs_znode_alloc: unable to get vnode, "
628 "error=%d\n", error); 630 "error=%d\n", error);
629 (void)kpause("zfsnewvn", false, hz, NULL); 631 (void)kpause("zfsnewvn", false, hz, NULL);
630 } 632 }
631 633
632 ASSERT(zp->z_dirlocks == NULL); 634 ASSERT(zp->z_dirlocks == NULL);
633 ASSERT(zp->z_dbuf == NULL); 635 ASSERT(zp->z_dbuf == NULL);
634 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 636 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
635 637
636 /* 638 /*
637 * Defer setting z_zfsvfs until the znode is ready to be a candidate for 639 * Defer setting z_zfsvfs until the znode is ready to be a candidate for
638 * the zfs_znode_move() callback. 640 * the zfs_znode_move() callback.
639 */ 641 */
640 zp->z_phys = NULL; 642 zp->z_phys = NULL;
641 zp->z_unlinked = 0; 643 zp->z_unlinked = 0;
642 zp->z_atime_dirty = 0; 644 zp->z_atime_dirty = 0;
643 zp->z_mapcnt = 0; 645 zp->z_mapcnt = 0;
644 zp->z_last_itx = 0; 646 zp->z_last_itx = 0;
645 zp->z_id = db->db_object; 647 zp->z_id = db->db_object;
646 zp->z_blksz = blksz; 648 zp->z_blksz = blksz;
647 zp->z_seq = 0x7A4653; 649 zp->z_seq = 0x7A4653;
648 zp->z_sync_cnt = 0; 650 zp->z_sync_cnt = 0;
649 651
650 vp = ZTOV(zp); 652 vp = ZTOV(zp);
651 653
652 zfs_znode_dmu_init(zfsvfs, zp, db); 654 zfs_znode_dmu_init(zfsvfs, zp, db);
653 655
654 zp->z_gen = zp->z_phys->zp_gen; 656 zp->z_gen = zp->z_phys->zp_gen;
655 657
656 vp->v_vfsp = zfsvfs->z_parent->z_vfs; 658 vp->v_vfsp = zfsvfs->z_parent->z_vfs;
657 vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode); 659 vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode);
658 vp->v_data = zp; 660 vp->v_data = zp;
659 genfs_node_init(vp, &zfs_genfsops); 661 genfs_node_init(vp, &zfs_genfsops);
660 switch (vp->v_type) { 662 switch (vp->v_type) {
661 case VDIR: 663 case VDIR:
662 zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ 664 zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */
663 break; 665 break;
664 case VBLK: 666 case VBLK:
665 case VCHR: 667 case VCHR:
666 /* XXX NetBSD vp->v_op = zfs_specop_p; */ 668 /* XXX NetBSD vp->v_op = zfs_specop_p; */
667 spec_node_init(vp, zfs_cmpldev(zp->z_phys->zp_rdev)); 669 spec_node_init(vp, zfs_cmpldev(zp->z_phys->zp_rdev));
668 break; 670 break;
669 case VFIFO: 671 case VFIFO:
670 /* XXX NetBSD vp->v_op = zfs_fifoop_p; */ 672 /* XXX NetBSD vp->v_op = zfs_fifoop_p; */
671 break; 673 break;
672 } 674 }
673 675
674 dprintf("zfs_znode_alloc znode %p -- vnode %p\n", zp, vp); 676 dprintf("zfs_znode_alloc znode %p -- vnode %p\n", zp, vp);
675 dprintf("zfs_znode_alloc z_id %ld\n", zp->z_id); 677 dprintf("zfs_znode_alloc z_id %ld\n", zp->z_id);
676 //cpu_Debugger(); 678 //cpu_Debugger();
677  679
678 uvm_vnp_setsize(vp, zp->z_phys->zp_size); 680 uvm_vnp_setsize(vp, zp->z_phys->zp_size);
679 681
680 mutex_enter(&zfsvfs->z_znodes_lock); 682 mutex_enter(&zfsvfs->z_znodes_lock);
681 list_insert_tail(&zfsvfs->z_all_znodes, zp); 683 list_insert_tail(&zfsvfs->z_all_znodes, zp);
682 membar_producer(); 684 membar_producer();
683 /* 685 /*
684 * Everything else must be valid before assigning z_zfsvfs makes the 686 * Everything else must be valid before assigning z_zfsvfs makes the
685 * znode eligible for zfs_znode_move(). 687 * znode eligible for zfs_znode_move().
686 */ 688 */
687 zp->z_zfsvfs = zfsvfs; 689 zp->z_zfsvfs = zfsvfs;
688 mutex_exit(&zfsvfs->z_znodes_lock); 690 mutex_exit(&zfsvfs->z_znodes_lock);
689 691
690 VFS_HOLD(zfsvfs->z_vfs); 692 VFS_HOLD(zfsvfs->z_vfs);
691 return (zp); 693 return (zp);
692} 694}
693 695
694/* 696/*
695 * Create a new DMU object to hold a zfs znode. 697 * Create a new DMU object to hold a zfs znode.
696 * 698 *
697 * IN: dzp - parent directory for new znode 699 * IN: dzp - parent directory for new znode
698 * vap - file attributes for new znode 700 * vap - file attributes for new znode
699 * tx - dmu transaction id for zap operations 701 * tx - dmu transaction id for zap operations
700 * cr - credentials of caller 702 * cr - credentials of caller
701 * flag - flags: 703 * flag - flags:
702 * IS_ROOT_NODE - new object will be root 704 * IS_ROOT_NODE - new object will be root
703 * IS_XATTR - new object is an attribute 705 * IS_XATTR - new object is an attribute
704 * bonuslen - length of bonus buffer 706 * bonuslen - length of bonus buffer
705 * setaclp - File/Dir initial ACL 707 * setaclp - File/Dir initial ACL
706 * fuidp - Tracks fuid allocation. 708 * fuidp - Tracks fuid allocation.
707 * 709 *
708 * OUT: zpp - allocated znode 710 * OUT: zpp - allocated znode
709 * 711 *
710 */ 712 */
711void 713void
712zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, 714zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
713 uint_t flag, znode_t **zpp, int bonuslen, zfs_acl_ids_t *acl_ids) 715 uint_t flag, znode_t **zpp, int bonuslen, zfs_acl_ids_t *acl_ids)
714{ 716{
715 dmu_buf_t *db; 717 dmu_buf_t *db;
716 znode_phys_t *pzp; 718 znode_phys_t *pzp;
717 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 719 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
718 timestruc_t now; 720 timestruc_t now;
719 uint64_t gen, obj; 721 uint64_t gen, obj;
720 int err; 722 int err;
721 723
722 ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); 724 ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
723 725
724 if (zfsvfs->z_replay) { 726 if (zfsvfs->z_replay) {
725 obj = vap->va_nodeid; 727 obj = vap->va_nodeid;
726 now = vap->va_ctime; /* see zfs_replay_create() */ 728 now = vap->va_ctime; /* see zfs_replay_create() */
727 gen = vap->va_nblocks; /* ditto */ 729 gen = vap->va_nblocks; /* ditto */
728 } else { 730 } else {
729 obj = 0; 731 obj = 0;
730 gethrestime(&now); 732 gethrestime(&now);
731 gen = dmu_tx_get_txg(tx); 733 gen = dmu_tx_get_txg(tx);
732 } 734 }
733 735
734 /* 736 /*
735 * Create a new DMU object. 737 * Create a new DMU object.
736 */ 738 */
737 /* 739 /*
738 * There's currently no mechanism for pre-reading the blocks that will 740 * There's currently no mechanism for pre-reading the blocks that will
739 * be to needed allocate a new object, so we accept the small chance 741 * be to needed allocate a new object, so we accept the small chance
740 * that there will be an i/o error and we will fail one of the 742 * that there will be an i/o error and we will fail one of the
741 * assertions below. 743 * assertions below.
742 */ 744 */
743 if (vap->va_type == VDIR) { 745 if (vap->va_type == VDIR) {
744 if (zfsvfs->z_replay) { 746 if (zfsvfs->z_replay) {
745 err = zap_create_claim_norm(zfsvfs->z_os, obj, 747 err = zap_create_claim_norm(zfsvfs->z_os, obj,
746 zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 748 zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
747 DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 749 DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
748 ASSERT3U(err, ==, 0); 750 ASSERT3U(err, ==, 0);
749 } else { 751 } else {
750 obj = zap_create_norm(zfsvfs->z_os, 752 obj = zap_create_norm(zfsvfs->z_os,
751 zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 753 zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
752 DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 754 DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
753 } 755 }
754 } else { 756 } else {
755 if (zfsvfs->z_replay) { 757 if (zfsvfs->z_replay) {
756 err = dmu_object_claim(zfsvfs->z_os, obj, 758 err = dmu_object_claim(zfsvfs->z_os, obj,
757 DMU_OT_PLAIN_FILE_CONTENTS, 0, 759 DMU_OT_PLAIN_FILE_CONTENTS, 0,
758 DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 760 DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
759 ASSERT3U(err, ==, 0); 761 ASSERT3U(err, ==, 0);
760 } else { 762 } else {
761 obj = dmu_object_alloc(zfsvfs->z_os, 763 obj = dmu_object_alloc(zfsvfs->z_os,
762 DMU_OT_PLAIN_FILE_CONTENTS, 0, 764 DMU_OT_PLAIN_FILE_CONTENTS, 0,
763 DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 765 DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
764 } 766 }
765 } 767 }
766 768
767 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); 769 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
768 VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, obj, NULL, &db)); 770 VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, obj, NULL, &db));
769 dmu_buf_will_dirty(db, tx); 771 dmu_buf_will_dirty(db, tx);
770 772
771 /* 773 /*
772 * Initialize the znode physical data to zero. 774 * Initialize the znode physical data to zero.
773 */ 775 */
774 ASSERT(db->db_size >= sizeof (znode_phys_t)); 776 ASSERT(db->db_size >= sizeof (znode_phys_t));
775 bzero(db->db_data, db->db_size); 777 bzero(db->db_data, db->db_size);
776 pzp = db->db_data; 778 pzp = db->db_data;
777 779
778 /* 780 /*
779 * If this is the root, fix up the half-initialized parent pointer 781 * If this is the root, fix up the half-initialized parent pointer
780 * to reference the just-allocated physical data area. 782 * to reference the just-allocated physical data area.
781 */ 783 */
782 if (flag & IS_ROOT_NODE) { 784 if (flag & IS_ROOT_NODE) {
783 dzp->z_dbuf = db; 785 dzp->z_dbuf = db;
784 dzp->z_phys = pzp; 786 dzp->z_phys = pzp;
785 dzp->z_id = obj; 787 dzp->z_id = obj;
786 } 788 }
787 789
788 /* 790 /*
789 * If parent is an xattr, so am I. 791 * If parent is an xattr, so am I.
790 */ 792 */
791 if (dzp->z_phys->zp_flags & ZFS_XATTR) 793 if (dzp->z_phys->zp_flags & ZFS_XATTR)
792 flag |= IS_XATTR; 794 flag |= IS_XATTR;
793 795
794 if (vap->va_type == VBLK || vap->va_type == VCHR) { 796 if (vap->va_type == VBLK || vap->va_type == VCHR) {
795 pzp->zp_rdev = zfs_expldev(vap->va_rdev); 797 pzp->zp_rdev = zfs_expldev(vap->va_rdev);
796 } 798 }
797 799
798 if (zfsvfs->z_use_fuids) 800 if (zfsvfs->z_use_fuids)
799 pzp->zp_flags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; 801 pzp->zp_flags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
800 802
801 if (vap->va_type == VDIR) { 803 if (vap->va_type == VDIR) {
802 pzp->zp_size = 2; /* contents ("." and "..") */ 804 pzp->zp_size = 2; /* contents ("." and "..") */
803 pzp->zp_links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; 805 pzp->zp_links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
804 } 806 }
805 807
806 pzp->zp_parent = dzp->z_id; 808 pzp->zp_parent = dzp->z_id;
807 if (flag & IS_XATTR) 809 if (flag & IS_XATTR)
808 pzp->zp_flags |= ZFS_XATTR; 810 pzp->zp_flags |= ZFS_XATTR;
809 811
810 pzp->zp_gen = gen; 812 pzp->zp_gen = gen;
811 813
812 ZFS_TIME_ENCODE(&now, pzp->zp_crtime); 814 ZFS_TIME_ENCODE(&now, pzp->zp_crtime);
813 ZFS_TIME_ENCODE(&now, pzp->zp_ctime); 815 ZFS_TIME_ENCODE(&now, pzp->zp_ctime);
814 816
815 if (vap->va_mask & AT_ATIME) { 817 if (vap->va_mask & AT_ATIME) {
816 ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime); 818 ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime);
817 } else { 819 } else {
818 ZFS_TIME_ENCODE(&now, pzp->zp_atime); 820 ZFS_TIME_ENCODE(&now, pzp->zp_atime);
819 } 821 }
820 822
821 if (vap->va_mask & AT_MTIME) { 823 if (vap->va_mask & AT_MTIME) {
822 ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); 824 ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime);
823 } else { 825 } else {
824 ZFS_TIME_ENCODE(&now, pzp->zp_mtime); 826 ZFS_TIME_ENCODE(&now, pzp->zp_mtime);
825 } 827 }
826 pzp->zp_uid = acl_ids->z_fuid; 828 pzp->zp_uid = acl_ids->z_fuid;
827 pzp->zp_gid = acl_ids->z_fgid; 829 pzp->zp_gid = acl_ids->z_fgid;
828 pzp->zp_mode = acl_ids->z_mode; 830 pzp->zp_mode = acl_ids->z_mode;
829 if (!(flag & IS_ROOT_NODE)) { 831 if (!(flag & IS_ROOT_NODE)) {
830 *zpp = zfs_znode_alloc(zfsvfs, db, 0); 832 *zpp = zfs_znode_alloc(zfsvfs, db, 0);
831 } else { 833 } else {
832 /* 834 /*
833 * If we are creating the root node, the "parent" we 835 * If we are creating the root node, the "parent" we
834 * passed in is the znode for the root. 836 * passed in is the znode for the root.
835 */ 837 */
836 *zpp = dzp; 838 *zpp = dzp;
837 } 839 }
838 VERIFY(0 == zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx)); 840 VERIFY(0 == zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
839 if (vap->va_mask & AT_XVATTR) 841 if (vap->va_mask & AT_XVATTR)
840 zfs_xvattr_set(*zpp, (xvattr_t *)vap); 842 zfs_xvattr_set(*zpp, (xvattr_t *)vap);
841 843
842 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); 844 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
843} 845}
844 846
845void 847void
846zfs_xvattr_set(znode_t *zp, xvattr_t *xvap) 848zfs_xvattr_set(znode_t *zp, xvattr_t *xvap)
847{ 849{
848 xoptattr_t *xoap; 850 xoptattr_t *xoap;
849 851
850 xoap = xva_getxoptattr(xvap); 852 xoap = xva_getxoptattr(xvap);
851 ASSERT(xoap); 853 ASSERT(xoap);
852 854
853 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 855 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
854 ZFS_TIME_ENCODE(&xoap->xoa_createtime, zp->z_phys->zp_crtime); 856 ZFS_TIME_ENCODE(&xoap->xoa_createtime, zp->z_phys->zp_crtime);
855 XVA_SET_RTN(xvap, XAT_CREATETIME); 857 XVA_SET_RTN(xvap, XAT_CREATETIME);
856 } 858 }
857 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 859 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
858 ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly); 860 ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly);
859 XVA_SET_RTN(xvap, XAT_READONLY); 861 XVA_SET_RTN(xvap, XAT_READONLY);
860 } 862 }
861 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 863 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
862 ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden); 864 ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden);
863 XVA_SET_RTN(xvap, XAT_HIDDEN); 865 XVA_SET_RTN(xvap, XAT_HIDDEN);
864 } 866 }
865 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 867 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
866 ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system); 868 ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system);
867 XVA_SET_RTN(xvap, XAT_SYSTEM); 869 XVA_SET_RTN(xvap, XAT_SYSTEM);
868 } 870 }
869 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 871 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
870 ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive); 872 ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive);
871 XVA_SET_RTN(xvap, XAT_ARCHIVE); 873 XVA_SET_RTN(xvap, XAT_ARCHIVE);
872 } 874 }
873 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 875 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
874 ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable); 876 ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable);
875 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 877 XVA_SET_RTN(xvap, XAT_IMMUTABLE);
876 } 878 }
877 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 879 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
878 ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink); 880 ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink);
879 XVA_SET_RTN(xvap, XAT_NOUNLINK); 881 XVA_SET_RTN(xvap, XAT_NOUNLINK);
880 } 882 }
881 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 883 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
882 ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly); 884 ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly);
883 XVA_SET_RTN(xvap, XAT_APPENDONLY); 885 XVA_SET_RTN(xvap, XAT_APPENDONLY);
884 } 886 }
885 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 887 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
886 ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump); 888 ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump);
887 XVA_SET_RTN(xvap, XAT_NODUMP); 889 XVA_SET_RTN(xvap, XAT_NODUMP);
888 } 890 }
889 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 891 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
890 ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque); 892 ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque);
891 XVA_SET_RTN(xvap, XAT_OPAQUE); 893 XVA_SET_RTN(xvap, XAT_OPAQUE);
892 } 894 }
893 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 895 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
894 ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED, 896 ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
895 xoap->xoa_av_quarantined); 897 xoap->xoa_av_quarantined);
896 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 898 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
897 } 899 }
898 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 900 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
899 ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified); 901 ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified);
900 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 902 XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
901 } 903 }
902 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 904 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
903 (void) memcpy(zp->z_phys + 1, xoap->xoa_av_scanstamp, 905 (void) memcpy(zp->z_phys + 1, xoap->xoa_av_scanstamp,
904 sizeof (xoap->xoa_av_scanstamp)); 906 sizeof (xoap->xoa_av_scanstamp));
905 zp->z_phys->zp_flags |= ZFS_BONUS_SCANSTAMP; 907 zp->z_phys->zp_flags |= ZFS_BONUS_SCANSTAMP;
906 XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); 908 XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
907 } 909 }
908 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 910 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
909 ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse); 911 ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse);
910 XVA_SET_RTN(xvap, XAT_REPARSE); 912 XVA_SET_RTN(xvap, XAT_REPARSE);
911 } 913 }
912} 914}
913 915
914int 916int
915zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) 917zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
916{ 918{
917 dmu_object_info_t doi; 919 dmu_object_info_t doi;
918 dmu_buf_t *db; 920 dmu_buf_t *db;
919 znode_t *zp; 921 znode_t *zp;
920 vnode_t *vp; 922 vnode_t *vp;
921 int err, first = 1; 923 int err, first = 1;
922 924
923 *zpp = NULL; 925 *zpp = NULL;
924again: 926again:
925 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); 927 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
926 928
927 err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db); 929 err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db);
928 if (err) { 930 if (err) {
929 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 931 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
930 return (err); 932 return (err);
931 } 933 }
932 934
933 dmu_object_info_from_db(db, &doi); 935 dmu_object_info_from_db(db, &doi);
934 if (doi.doi_bonus_type != DMU_OT_ZNODE || 936 if (doi.doi_bonus_type != DMU_OT_ZNODE ||
935 doi.doi_bonus_size < sizeof (znode_phys_t)) { 937 doi.doi_bonus_size < sizeof (znode_phys_t)) {
936 dmu_buf_rele(db, NULL); 938 dmu_buf_rele(db, NULL);
937 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 939 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
938 return (EINVAL); 940 return (EINVAL);
939 } 941 }
940 942
941 zp = dmu_buf_get_user(db); 943 zp = dmu_buf_get_user(db);
942 if (zp != NULL) { 944 if (zp != NULL) {
943 mutex_enter(&zp->z_lock); 945 mutex_enter(&zp->z_lock);
944 946
945 /* 947 /*
946 * Since we do immediate eviction of the z_dbuf, we 948 * Since we do immediate eviction of the z_dbuf, we
947 * should never find a dbuf with a znode that doesn't 949 * should never find a dbuf with a znode that doesn't
948 * know about the dbuf. 950 * know about the dbuf.
949 */ 951 */
950 ASSERT3P(zp->z_dbuf, ==, db); 952 ASSERT3P(zp->z_dbuf, ==, db);
951 ASSERT3U(zp->z_id, ==, obj_num); 953 ASSERT3U(zp->z_id, ==, obj_num);
952 if (zp->z_unlinked) { 954 if (zp->z_unlinked) {
953 err = ENOENT; 955 err = ENOENT;
954 } else { 956 } else {
955 if ((vp = ZTOV(zp)) != NULL) { 957 if ((vp = ZTOV(zp)) != NULL) {
956 mutex_enter(vp->v_interlock); 958 mutex_enter(vp->v_interlock);
957 mutex_exit(&zp->z_lock); 959 mutex_exit(&zp->z_lock);
958 if (vget(vp, 0) != 0) { 960 if (vget(vp, 0) != 0) {
959 dmu_buf_rele(db, NULL); 961 dmu_buf_rele(db, NULL);
960 mutex_exit(vp->v_interlock); 962 mutex_exit(vp->v_interlock);
961 goto again; 963 goto again;
962 } 964 }
963 mutex_enter(&zp->z_lock); 965 mutex_enter(&zp->z_lock);
964 } else { 966 } else {
965 if (first) { 967 if (first) {
966 ZFS_LOG(1, "dying znode detected (zp=%p)", zp); 968 ZFS_LOG(1, "dying znode detected (zp=%p)", zp);
967 first = 0; 969 first = 0;
968 } 970 }
969 /* 971 /*
970 * znode is dying so we can't reuse it, we must 972 * znode is dying so we can't reuse it, we must
971 * wait until destruction is completed. 973 * wait until destruction is completed.
972 */ 974 */
973 dmu_buf_rele(db, NULL); 975 dmu_buf_rele(db, NULL);
974 mutex_exit(&zp->z_lock); 976 mutex_exit(&zp->z_lock);
975 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 977 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
976 kpause("zcollide", 0, 1, NULL); 978 kpause("zcollide", 0, 1, NULL);
977 goto again; 979 goto again;
978 } 980 }
979 *zpp = zp; 981 *zpp = zp;
980 err = 0; 982 err = 0;
981 } 983 }
982  984
983 dmu_buf_rele(db, NULL); 985 dmu_buf_rele(db, NULL);
984 mutex_exit(&zp->z_lock); 986 mutex_exit(&zp->z_lock);
985 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 987 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
986 return (err); 988 return (err);
987 } 989 }
988 990
989 /* 991 /*
990 * Not found create new znode/vnode 992 * Not found create new znode/vnode
991 * but only if file exists. 993 * but only if file exists.
992 * 994 *
993 * There is a small window where zfs_vget() could 995 * There is a small window where zfs_vget() could
994 * find this object while a file create is still in 996 * find this object while a file create is still in
995 * progress. Since a gen number can never be zero 997 * progress. Since a gen number can never be zero
996 * we will check that to determine if its an allocated 998 * we will check that to determine if its an allocated
997 * file. 999 * file.
998 */ 1000 */
999 1001
1000 if (((znode_phys_t *)db->db_data)->zp_gen != 0) { 1002 if (((znode_phys_t *)db->db_data)->zp_gen != 0) {
1001 zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size); 1003 zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size);
1002 *zpp = zp; 1004 *zpp = zp;
1003 err = 0; 1005 err = 0;
1004 } else { 1006 } else {
1005 dmu_buf_rele(db, NULL); 1007 dmu_buf_rele(db, NULL);
1006 err = ENOENT; 1008 err = ENOENT;
1007 } 1009 }
1008 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1010 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1009 return (err); 1011 return (err);
1010} 1012}
1011 1013
1012int 1014int
1013zfs_rezget(znode_t *zp) 1015zfs_rezget(znode_t *zp)
1014{ 1016{
1015 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1017 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1016 dmu_object_info_t doi; 1018 dmu_object_info_t doi;
1017 dmu_buf_t *db; 1019 dmu_buf_t *db;
1018 uint64_t obj_num = zp->z_id; 1020 uint64_t obj_num = zp->z_id;
1019 int err; 1021 int err;
1020 1022
1021 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); 1023 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
1022 1024
1023 err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db); 1025 err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db);
1024 if (err) { 1026 if (err) {
1025 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1027 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1026 return (err); 1028 return (err);
1027 } 1029 }
1028 1030
1029 dmu_object_info_from_db(db, &doi); 1031 dmu_object_info_from_db(db, &doi);
1030 if (doi.doi_bonus_type != DMU_OT_ZNODE || 1032 if (doi.doi_bonus_type != DMU_OT_ZNODE ||
1031 doi.doi_bonus_size < sizeof (znode_phys_t)) { 1033 doi.doi_bonus_size < sizeof (znode_phys_t)) {
1032 dmu_buf_rele(db, NULL); 1034 dmu_buf_rele(db, NULL);
1033 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1035 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1034 return (EINVAL); 1036 return (EINVAL);
1035 } 1037 }
1036 1038
1037 if (((znode_phys_t *)db->db_data)->zp_gen != zp->z_gen) { 1039 if (((znode_phys_t *)db->db_data)->zp_gen != zp->z_gen) {
1038 dmu_buf_rele(db, NULL); 1040 dmu_buf_rele(db, NULL);
1039 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1041 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1040 return (EIO); 1042 return (EIO);
1041 } 1043 }
1042 1044
1043 mutex_enter(&zp->z_acl_lock); 1045 mutex_enter(&zp->z_acl_lock);
1044 if (zp->z_acl_cached) { 1046 if (zp->z_acl_cached) {
1045 zfs_acl_free(zp->z_acl_cached); 1047 zfs_acl_free(zp->z_acl_cached);
1046 zp->z_acl_cached = NULL; 1048 zp->z_acl_cached = NULL;
1047 } 1049 }
1048 mutex_exit(&zp->z_acl_lock); 1050 mutex_exit(&zp->z_acl_lock);
1049 1051
1050 zfs_znode_dmu_init(zfsvfs, zp, db); 1052 zfs_znode_dmu_init(zfsvfs, zp, db);
1051 zp->z_unlinked = (zp->z_phys->zp_links == 0); 1053 zp->z_unlinked = (zp->z_phys->zp_links == 0);
1052 zp->z_blksz = doi.doi_data_block_size; 1054 zp->z_blksz = doi.doi_data_block_size;
1053 1055
1054 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1056 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1055 1057
1056 return (0); 1058 return (0);
1057} 1059}
1058 1060
1059void 1061void
1060zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) 1062zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
1061{ 1063{
1062 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1064 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1063 objset_t *os = zfsvfs->z_os; 1065 objset_t *os = zfsvfs->z_os;
1064 uint64_t obj = zp->z_id; 1066 uint64_t obj = zp->z_id;
1065 uint64_t acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj; 1067 uint64_t acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj;
1066 1068
1067 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); 1069 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
1068 if (acl_obj) 1070 if (acl_obj)
1069 VERIFY(0 == dmu_object_free(os, acl_obj, tx)); 1071 VERIFY(0 == dmu_object_free(os, acl_obj, tx));
1070 VERIFY(0 == dmu_object_free(os, obj, tx)); 1072 VERIFY(0 == dmu_object_free(os, obj, tx));
1071 zfs_znode_dmu_fini(zp); 1073 zfs_znode_dmu_fini(zp);
1072 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); 1074 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
1073 zfs_znode_free(zp); 1075 zfs_znode_free(zp);
1074} 1076}
1075 1077
1076void 1078void
1077zfs_zinactive(znode_t *zp) 1079zfs_zinactive(znode_t *zp)
1078{ 1080{
1079 vnode_t *vp = ZTOV(zp); 1081 vnode_t *vp = ZTOV(zp);
1080 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1082 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1081 uint64_t z_id = zp->z_id; 1083 uint64_t z_id = zp->z_id;
1082 1084
1083 ASSERT(zp->z_dbuf && zp->z_phys); 1085 ASSERT(zp->z_dbuf && zp->z_phys);
1084 1086
1085 /* 1087 /*
1086 * Don't allow a zfs_zget() while were trying to release this znode 1088 * Don't allow a zfs_zget() while were trying to release this znode
1087 */ 1089 */
1088 ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); 1090 ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id);
1089 1091
1090 mutex_enter(&zp->z_lock); 1092 mutex_enter(&zp->z_lock);
1091 /* 1093 /*
1092 * If this was the last reference to a file with no links, 1094 * If this was the last reference to a file with no links,
1093 * remove the file from the file system. 1095 * remove the file from the file system.
1094 */ 1096 */
1095 if (zp->z_unlinked) { 1097 if (zp->z_unlinked) {
1096 mutex_exit(&zp->z_lock); 1098 mutex_exit(&zp->z_lock);
1097 ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 1099 ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
1098 zfs_rmnode(zp); 1100 zfs_rmnode(zp);
1099 return; 1101 return;
1100 } 1102 }
1101 1103
1102 mutex_exit(&zp->z_lock); 1104 mutex_exit(&zp->z_lock);
1103 zfs_znode_dmu_fini(zp); 1105 zfs_znode_dmu_fini(zp);
1104 ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 1106 ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
1105 zfs_znode_free(zp); 1107 zfs_znode_free(zp);
1106} 1108}
1107 1109
1108void 1110void
1109zfs_znode_free(znode_t *zp) 1111zfs_znode_free(znode_t *zp)
1110{ 1112{
1111 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1113 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1112 struct vnode *vp = ZTOV(zp); 1114 struct vnode *vp = ZTOV(zp);
1113 1115
1114 /* XXX Not all callers are from VOP_RECLAIM. What to do? */ 1116 /* XXX Not all callers are from VOP_RECLAIM. What to do? */
1115 KASSERT(vp != NULL); 1117 KASSERT(vp != NULL);
1116 mutex_enter(vp->v_interlock); /* XXX Necessary? */ 1118 mutex_enter(vp->v_interlock); /* XXX Necessary? */
1117 genfs_node_destroy(vp); 1119 genfs_node_destroy(vp);
1118 vp->v_data = NULL; 1120 vp->v_data = NULL;
1119 mutex_exit(vp->v_interlock); 1121 mutex_exit(vp->v_interlock);
1120 1122
1121 dprintf("destroying znode %p\n", zp); 1123 dprintf("destroying znode %p\n", zp);
1122 //cpu_Debugger(); 1124 //cpu_Debugger();
1123 mutex_enter(&zfsvfs->z_znodes_lock); 1125 mutex_enter(&zfsvfs->z_znodes_lock);
1124 POINTER_INVALIDATE(&zp->z_zfsvfs); 1126 POINTER_INVALIDATE(&zp->z_zfsvfs);
1125 list_remove(&zfsvfs->z_all_znodes, zp); 1127 list_remove(&zfsvfs->z_all_znodes, zp);
1126 mutex_exit(&zfsvfs->z_znodes_lock); 1128 mutex_exit(&zfsvfs->z_znodes_lock);
1127 1129
1128 if (zp->z_acl_cached) { 1130 if (zp->z_acl_cached) {
1129 zfs_acl_free(zp->z_acl_cached); 1131 zfs_acl_free(zp->z_acl_cached);
1130 zp->z_acl_cached = NULL; 1132 zp->z_acl_cached = NULL;
1131 } 1133 }
1132 1134
1133 kmem_cache_free(znode_cache, zp); 1135 kmem_cache_free(znode_cache, zp);
1134 1136
1135 VFS_RELE(zfsvfs->z_vfs); 1137 VFS_RELE(zfsvfs->z_vfs);
1136} 1138}
1137 1139
1138void 1140void
1139zfs_time_stamper_locked(znode_t *zp, uint_t flag, dmu_tx_t *tx) 1141zfs_time_stamper_locked(znode_t *zp, uint_t flag, dmu_tx_t *tx)
1140{ 1142{
1141 timestruc_t now; 1143 timestruc_t now;
1142 1144
1143 ASSERT(MUTEX_HELD(&zp->z_lock)); 1145 ASSERT(MUTEX_HELD(&zp->z_lock));
1144 1146
1145 gethrestime(&now); 1147 gethrestime(&now);
1146 1148
1147 if (tx) { 1149 if (tx) {
1148 dmu_buf_will_dirty(zp->z_dbuf, tx); 1150 dmu_buf_will_dirty(zp->z_dbuf, tx);
1149 zp->z_atime_dirty = 0; 1151 zp->z_atime_dirty = 0;
1150 zp->z_seq++; 1152 zp->z_seq++;
1151 } else { 1153 } else {
1152 zp->z_atime_dirty = 1; 1154 zp->z_atime_dirty = 1;
1153 } 1155 }
1154 1156
1155 if (flag & AT_ATIME) 1157 if (flag & AT_ATIME)
1156 ZFS_TIME_ENCODE(&now, zp->z_phys->zp_atime); 1158 ZFS_TIME_ENCODE(&now, zp->z_phys->zp_atime);
1157 1159
1158 if (flag & AT_MTIME) { 1160 if (flag & AT_MTIME) {
1159 ZFS_TIME_ENCODE(&now, zp->z_phys->zp_mtime); 1161 ZFS_TIME_ENCODE(&now, zp->z_phys->zp_mtime);
1160 if (zp->z_zfsvfs->z_use_fuids) 1162 if (zp->z_zfsvfs->z_use_fuids)
1161 zp->z_phys->zp_flags |= (ZFS_ARCHIVE | ZFS_AV_MODIFIED); 1163 zp->z_phys->zp_flags |= (ZFS_ARCHIVE | ZFS_AV_MODIFIED);
1162 } 1164 }
1163 1165
1164 if (flag & AT_CTIME) { 1166 if (flag & AT_CTIME) {
1165 ZFS_TIME_ENCODE(&now, zp->z_phys->zp_ctime); 1167 ZFS_TIME_ENCODE(&now, zp->z_phys->zp_ctime);
1166 if (zp->z_zfsvfs->z_use_fuids) 1168 if (zp->z_zfsvfs->z_use_fuids)
1167 zp->z_phys->zp_flags |= ZFS_ARCHIVE; 1169 zp->z_phys->zp_flags |= ZFS_ARCHIVE;
1168 } 1170 }
1169} 1171}
1170 1172
1171/* 1173/*
1172 * Update the requested znode timestamps with the current time. 1174 * Update the requested znode timestamps with the current time.
1173 * If we are in a transaction, then go ahead and mark the znode 1175 * If we are in a transaction, then go ahead and mark the znode
1174 * dirty in the transaction so the timestamps will go to disk. 1176 * dirty in the transaction so the timestamps will go to disk.
1175 * Otherwise, we will get pushed next time the znode is updated 1177 * Otherwise, we will get pushed next time the znode is updated
1176 * in a transaction, or when this znode eventually goes inactive. 1178 * in a transaction, or when this znode eventually goes inactive.
1177 * 1179 *
1178 * Why is this OK? 1180 * Why is this OK?
1179 * 1 - Only the ACCESS time is ever updated outside of a transaction. 1181 * 1 - Only the ACCESS time is ever updated outside of a transaction.
1180 * 2 - Multiple consecutive updates will be collapsed into a single 1182 * 2 - Multiple consecutive updates will be collapsed into a single
1181 * znode update by the transaction grouping semantics of the DMU. 1183 * znode update by the transaction grouping semantics of the DMU.
1182 */ 1184 */
1183void 1185void
1184zfs_time_stamper(znode_t *zp, uint_t flag, dmu_tx_t *tx) 1186zfs_time_stamper(znode_t *zp, uint_t flag, dmu_tx_t *tx)
1185{ 1187{
1186 mutex_enter(&zp->z_lock); 1188 mutex_enter(&zp->z_lock);
1187 zfs_time_stamper_locked(zp, flag, tx); 1189 zfs_time_stamper_locked(zp, flag, tx);
1188 mutex_exit(&zp->z_lock); 1190 mutex_exit(&zp->z_lock);
1189} 1191}
1190 1192
1191/* 1193/*
1192 * Grow the block size for a file. 1194 * Grow the block size for a file.
1193 * 1195 *
1194 * IN: zp - znode of file to free data in. 1196 * IN: zp - znode of file to free data in.
1195 * size - requested block size 1197 * size - requested block size
1196 * tx - open transaction. 1198 * tx - open transaction.
1197 * 1199 *
1198 * NOTE: this function assumes that the znode is write locked. 1200 * NOTE: this function assumes that the znode is write locked.
1199 */ 1201 */
1200void 1202void
1201zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) 1203zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
1202{ 1204{
1203 int error; 1205 int error;
1204 u_longlong_t dummy; 1206 u_longlong_t dummy;
1205 1207
1206 if (size <= zp->z_blksz) 1208 if (size <= zp->z_blksz)
1207 return; 1209 return;
1208 /* 1210 /*
1209 * If the file size is already greater than the current blocksize, 1211 * If the file size is already greater than the current blocksize,
1210 * we will not grow. If there is more than one block in a file, 1212 * we will not grow. If there is more than one block in a file,
1211 * the blocksize cannot change. 1213 * the blocksize cannot change.
1212 */ 1214 */
1213 if (zp->z_blksz && zp->z_phys->zp_size > zp->z_blksz) 1215 if (zp->z_blksz && zp->z_phys->zp_size > zp->z_blksz)
1214 return; 1216 return;
1215 1217
1216 error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id, 1218 error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id,
1217 size, 0, tx); 1219 size, 0, tx);
1218 if (error == ENOTSUP) 1220 if (error == ENOTSUP)
1219 return; 1221 return;
1220 ASSERT3U(error, ==, 0); 1222 ASSERT3U(error, ==, 0);
1221 1223
1222 /* What blocksize did we actually get? */ 1224 /* What blocksize did we actually get? */
1223 dmu_object_size_from_db(zp->z_dbuf, &zp->z_blksz, &dummy); 1225 dmu_object_size_from_db(zp->z_dbuf, &zp->z_blksz, &dummy);
1224} 1226}
1225 1227
1226/* 1228/*
1227 * Increase the file length 1229 * Increase the file length
1228 * 1230 *
1229 * IN: zp - znode of file to free data in. 1231 * IN: zp - znode of file to free data in.
1230 * end - new end-of-file 1232 * end - new end-of-file
1231 * 1233 *
1232 * RETURN: 0 if success 1234 * RETURN: 0 if success
1233 * error code if failure 1235 * error code if failure
1234 */ 1236 */
1235static int 1237static int
1236zfs_extend(znode_t *zp, uint64_t end) 1238zfs_extend(znode_t *zp, uint64_t end)
1237{ 1239{
1238 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1240 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1239 dmu_tx_t *tx; 1241 dmu_tx_t *tx;
1240 rl_t *rl; 1242 rl_t *rl;
1241 uint64_t newblksz; 1243 uint64_t newblksz;
1242 int error; 1244 int error;
1243 1245
1244 /* 1246 /*
1245 * We will change zp_size, lock the whole file. 1247 * We will change zp_size, lock the whole file.
1246 */ 1248 */
1247 rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); 1249 rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
1248 1250
1249 /* 1251 /*
1250 * Nothing to do if file already at desired length. 1252 * Nothing to do if file already at desired length.
1251 */ 1253 */
1252 if (end <= zp->z_phys->zp_size) { 1254 if (end <= zp->z_phys->zp_size) {
1253 zfs_range_unlock(rl); 1255 zfs_range_unlock(rl);
1254 return (0); 1256 return (0);
1255 } 1257 }
1256top: 1258top:
1257 tx = dmu_tx_create(zfsvfs->z_os); 1259 tx = dmu_tx_create(zfsvfs->z_os);
1258 dmu_tx_hold_bonus(tx, zp->z_id); 1260 dmu_tx_hold_bonus(tx, zp->z_id);
1259 if (end > zp->z_blksz && 1261 if (end > zp->z_blksz &&
1260 (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { 1262 (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
1261 /* 1263 /*
1262 * We are growing the file past the current block size. 1264 * We are growing the file past the current block size.
1263 */ 1265 */
1264 if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { 1266 if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) {
1265 ASSERT(!ISP2(zp->z_blksz)); 1267 ASSERT(!ISP2(zp->z_blksz));
1266 newblksz = MIN(end, SPA_MAXBLOCKSIZE); 1268 newblksz = MIN(end, SPA_MAXBLOCKSIZE);
1267 } else { 1269 } else {
1268 newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz); 1270 newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz);
1269 } 1271 }
1270 dmu_tx_hold_write(tx, zp->z_id, 0, newblksz); 1272 dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
1271 } else { 1273 } else {
1272 newblksz = 0; 1274 newblksz = 0;
1273 } 1275 }
1274 1276
1275 error = dmu_tx_assign(tx, TXG_NOWAIT); 1277 error = dmu_tx_assign(tx, TXG_NOWAIT);
1276 if (error) { 1278 if (error) {
1277 if (error == ERESTART) { 1279 if (error == ERESTART) {
1278 dmu_tx_wait(tx); 1280 dmu_tx_wait(tx);
1279 dmu_tx_abort(tx); 1281 dmu_tx_abort(tx);
1280 goto top; 1282 goto top;
1281 } 1283 }
1282 dmu_tx_abort(tx); 1284 dmu_tx_abort(tx);
1283 zfs_range_unlock(rl); 1285 zfs_range_unlock(rl);
1284 return (error); 1286 return (error);
1285 } 1287 }
1286 dmu_buf_will_dirty(zp->z_dbuf, tx); 1288 dmu_buf_will_dirty(zp->z_dbuf, tx);
1287 1289
1288 if (newblksz) 1290 if (newblksz)
1289 zfs_grow_blocksize(zp, newblksz, tx); 1291 zfs_grow_blocksize(zp, newblksz, tx);
1290 1292
1291 zp->z_phys->zp_size = end; 1293 zp->z_phys->zp_size = end;
1292 1294
1293 zfs_range_unlock(rl); 1295 zfs_range_unlock(rl);
1294 1296
1295 dmu_tx_commit(tx); 1297 dmu_tx_commit(tx);
1296 1298
1297 uvm_vnp_setsize(ZTOV(zp), end); 1299 uvm_vnp_setsize(ZTOV(zp), end);
1298 1300
1299 return (0); 1301 return (0);
1300} 1302}
1301 1303
1302/* 1304/*
1303 * Free space in a file. 1305 * Free space in a file.
1304 * 1306 *
1305 * IN: zp - znode of file to free data in. 1307 * IN: zp - znode of file to free data in.
1306 * off - start of section to free. 1308 * off - start of section to free.
1307 * len - length of section to free. 1309 * len - length of section to free.
1308 * 1310 *
1309 * RETURN: 0 if success 1311 * RETURN: 0 if success
1310 * error code if failure 1312 * error code if failure
1311 */ 1313 */
1312static int 1314static int
1313zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) 1315zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
1314{ 1316{
1315 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1317 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1316 rl_t *rl; 1318 rl_t *rl;
1317 int error; 1319 int error;
1318 1320
1319 /* 1321 /*
1320 * Lock the range being freed. 1322 * Lock the range being freed.
1321 */ 1323 */
1322 rl = zfs_range_lock(zp, off, len, RL_WRITER); 1324 rl = zfs_range_lock(zp, off, len, RL_WRITER);
1323 1325
1324 /* 1326 /*
1325 * Nothing to do if file already at desired length. 1327 * Nothing to do if file already at desired length.
1326 */ 1328 */
1327 if (off >= zp->z_phys->zp_size) { 1329 if (off >= zp->z_phys->zp_size) {
1328 zfs_range_unlock(rl); 1330 zfs_range_unlock(rl);
1329 return (0); 1331 return (0);
1330 } 1332 }
1331 1333
1332 if (off + len > zp->z_phys->zp_size) 1334 if (off + len > zp->z_phys->zp_size)
1333 len = zp->z_phys->zp_size - off; 1335 len = zp->z_phys->zp_size - off;
1334 1336
1335 error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); 1337 error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
1336 1338
1337 if (error == 0) { 1339 if (error == 0) {
1338 /* 1340 /*
1339 * In NetBSD we cannot free block in the middle of a file, 1341 * In NetBSD we cannot free block in the middle of a file,
1340 * but only at the end of a file. 1342 * but only at the end of a file.
1341 */ 1343 */
1342 uvm_vnp_setsize(ZTOV(zp), off); 1344 uvm_vnp_setsize(ZTOV(zp), off);
1343 } 1345 }
1344 1346
1345 zfs_range_unlock(rl); 1347 zfs_range_unlock(rl);
1346 1348
1347 return (error); 1349 return (error);
1348} 1350}
1349 1351
1350/* 1352/*
1351 * Truncate a file 1353 * Truncate a file
1352 * 1354 *
1353 * IN: zp - znode of file to free data in. 1355 * IN: zp - znode of file to free data in.
1354 * end - new end-of-file. 1356 * end - new end-of-file.
1355 * 1357 *
1356 * RETURN: 0 if success 1358 * RETURN: 0 if success
1357 * error code if failure 1359 * error code if failure
1358 */ 1360 */
1359static int 1361static int
1360zfs_trunc(znode_t *zp, uint64_t end) 1362zfs_trunc(znode_t *zp, uint64_t end)
1361{ 1363{
1362 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1364 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1363 vnode_t *vp = ZTOV(zp); 1365 vnode_t *vp = ZTOV(zp);
1364 dmu_tx_t *tx; 1366 dmu_tx_t *tx;
1365 rl_t *rl; 1367 rl_t *rl;
1366 int error; 1368 int error;
1367 1369
1368 /* 1370 /*
1369 * We will change zp_size, lock the whole file. 1371 * We will change zp_size, lock the whole file.
1370 */ 1372 */
1371 rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); 1373 rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
1372 1374
1373 /* 1375 /*
1374 * Nothing to do if file already at desired length. 1376 * Nothing to do if file already at desired length.
1375 */ 1377 */
1376 if (end >= zp->z_phys->zp_size) { 1378 if (end >= zp->z_phys->zp_size) {
1377 zfs_range_unlock(rl); 1379 zfs_range_unlock(rl);
1378 return (0); 1380 return (0);
1379 } 1381 }
1380 1382
1381 error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, -1); 1383 error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, -1);
1382 if (error) { 1384 if (error) {
1383 zfs_range_unlock(rl); 1385 zfs_range_unlock(rl);
1384 return (error); 1386 return (error);
1385 } 1387 }
1386top: 1388top:
1387 tx = dmu_tx_create(zfsvfs->z_os); 1389 tx = dmu_tx_create(zfsvfs->z_os);
1388 dmu_tx_hold_bonus(tx, zp->z_id); 1390 dmu_tx_hold_bonus(tx, zp->z_id);
1389 error = dmu_tx_assign(tx, TXG_NOWAIT); 1391 error = dmu_tx_assign(tx, TXG_NOWAIT);
1390 if (error) { 1392 if (error) {
1391 if (error == ERESTART) { 1393 if (error == ERESTART) {
1392 dmu_tx_wait(tx); 1394 dmu_tx_wait(tx);
1393 dmu_tx_abort(tx); 1395 dmu_tx_abort(tx);
1394 goto top; 1396 goto top;
1395 } 1397 }
1396 dmu_tx_abort(tx); 1398 dmu_tx_abort(tx);
1397 zfs_range_unlock(rl); 1399 zfs_range_unlock(rl);
1398 return (error); 1400 return (error);
1399 } 1401 }
1400 dmu_buf_will_dirty(zp->z_dbuf, tx); 1402 dmu_buf_will_dirty(zp->z_dbuf, tx);
1401 1403
1402 zp->z_phys->zp_size = end; 1404 zp->z_phys->zp_size = end;
1403 1405
1404 dmu_tx_commit(tx); 1406 dmu_tx_commit(tx);
1405 1407
1406 zfs_range_unlock(rl); 1408 zfs_range_unlock(rl);
1407 1409
1408 /* 1410 /*
1409 * Clear any mapped pages in the truncated region. This has to 1411 * Clear any mapped pages in the truncated region. This has to
1410 * happen outside of the transaction to avoid the possibility of 1412 * happen outside of the transaction to avoid the possibility of
1411 * a deadlock with someone trying to push a page that we are 1413 * a deadlock with someone trying to push a page that we are
1412 * about to invalidate. 1414 * about to invalidate.
1413 */ 1415 */
1414 1416
1415 uvm_vnp_setsize(vp, end); 1417 uvm_vnp_setsize(vp, end);
1416 1418
1417 return (0); 1419 return (0);
1418} 1420}
1419 1421
1420/* 1422/*
1421 * Free space in a file 1423 * Free space in a file
1422 * 1424 *
1423 * IN: zp - znode of file to free data in. 1425 * IN: zp - znode of file to free data in.
1424 * off - start of range 1426 * off - start of range
1425 * len - end of range (0 => EOF) 1427 * len - end of range (0 => EOF)
1426 * flag - current file open mode flags. 1428 * flag - current file open mode flags.
1427 * log - TRUE if this action should be logged 1429 * log - TRUE if this action should be logged
1428 * 1430 *
1429 * RETURN: 0 if success 1431 * RETURN: 0 if success
1430 * error code if failure 1432 * error code if failure
1431 */ 1433 */
1432int 1434int
1433zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) 1435zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
1434{ 1436{
1435 vnode_t *vp = ZTOV(zp); 1437 vnode_t *vp = ZTOV(zp);
1436 dmu_tx_t *tx; 1438 dmu_tx_t *tx;
1437 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1439 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1438 zilog_t *zilog = zfsvfs->z_log; 1440 zilog_t *zilog = zfsvfs->z_log;
1439 int error; 1441 int error;
1440 1442
1441 if (off > zp->z_phys->zp_size) { 1443 if (off > zp->z_phys->zp_size) {
1442 error = zfs_extend(zp, off+len); 1444 error = zfs_extend(zp, off+len);
1443 if (error == 0 && log) 1445 if (error == 0 && log)
1444 goto log; 1446 goto log;
1445 else 1447 else
1446 return (error); 1448 return (error);
1447 } 1449 }
1448 1450
1449 if (len == 0) { 1451 if (len == 0) {
1450 error = zfs_trunc(zp, off); 1452 error = zfs_trunc(zp, off);
1451 } else { 1453 } else {
1452 if ((error = zfs_free_range(zp, off, len)) == 0 && 1454 if ((error = zfs_free_range(zp, off, len)) == 0 &&
1453 off + len > zp->z_phys->zp_size) 1455 off + len > zp->z_phys->zp_size)
1454 error = zfs_extend(zp, off+len); 1456 error = zfs_extend(zp, off+len);
1455 } 1457 }
1456 if (error || !log) 1458 if (error || !log)
1457 return (error); 1459 return (error);
1458log: 1460log:
1459 tx = dmu_tx_create(zfsvfs->z_os); 1461 tx = dmu_tx_create(zfsvfs->z_os);
1460 dmu_tx_hold_bonus(tx, zp->z_id); 1462 dmu_tx_hold_bonus(tx, zp->z_id);
1461 error = dmu_tx_assign(tx, TXG_NOWAIT); 1463 error = dmu_tx_assign(tx, TXG_NOWAIT);
1462 if (error) { 1464 if (error) {
1463 if (error == ERESTART) { 1465 if (error == ERESTART) {
1464 dmu_tx_wait(tx); 1466 dmu_tx_wait(tx);
1465 dmu_tx_abort(tx); 1467 dmu_tx_abort(tx);
1466 goto log; 1468 goto log;
1467 } 1469 }
1468 dmu_tx_abort(tx); 1470 dmu_tx_abort(tx);
1469 return (error); 1471 return (error);
1470 } 1472 }
1471 1473
1472 zfs_time_stamper(zp, CONTENT_MODIFIED, tx); 1474 zfs_time_stamper(zp, CONTENT_MODIFIED, tx);
1473 zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); 1475 zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
1474 1476
1475 dmu_tx_commit(tx); 1477 dmu_tx_commit(tx);
1476 return (0); 1478 return (0);
1477} 1479}
1478 1480
1479void 1481void
1480zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) 1482zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
1481{ 1483{
1482 zfsvfs_t zfsvfs; 1484 zfsvfs_t zfsvfs;
1483 uint64_t moid, obj, version; 1485 uint64_t moid, obj, version;
1484 uint64_t sense = ZFS_CASE_SENSITIVE; 1486 uint64_t sense = ZFS_CASE_SENSITIVE;
1485 uint64_t norm = 0; 1487 uint64_t norm = 0;
1486 nvpair_t *elem; 1488 nvpair_t *elem;
1487 int error; 1489 int error;
1488 int i; 1490 int i;
1489 znode_t *rootzp = NULL; 1491 znode_t *rootzp = NULL;
1490 vnode_t *vp; 1492 vnode_t *vp;
1491 vattr_t vattr; 1493 vattr_t vattr;
1492 znode_t *zp; 1494 znode_t *zp;
1493 zfs_acl_ids_t acl_ids; 1495 zfs_acl_ids_t acl_ids;
1494 1496
1495 /* 1497 /*
1496 * First attempt to create master node. 1498 * First attempt to create master node.
1497 */ 1499 */
1498 /* 1500 /*
1499 * In an empty objset, there are no blocks to read and thus 1501 * In an empty objset, there are no blocks to read and thus