| @@ -1,1499 +1,1501 @@ | | | @@ -1,1499 +1,1501 @@ |
1 | /* | | 1 | /* |
2 | * CDDL HEADER START | | 2 | * CDDL HEADER START |
3 | * | | 3 | * |
4 | * The contents of this file are subject to the terms of the | | 4 | * The contents of this file are subject to the terms of the |
5 | * Common Development and Distribution License (the "License"). | | 5 | * Common Development and Distribution License (the "License"). |
6 | * You may not use this file except in compliance with the License. | | 6 | * You may not use this file except in compliance with the License. |
7 | * | | 7 | * |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | | 8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
9 | * or http://www.opensolaris.org/os/licensing. | | 9 | * or http://www.opensolaris.org/os/licensing. |
10 | * See the License for the specific language governing permissions | | 10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. | | 11 | * and limitations under the License. |
12 | * | | 12 | * |
13 | * When distributing Covered Code, include this CDDL HEADER in each | | 13 | * When distributing Covered Code, include this CDDL HEADER in each |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | | 14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
15 | * If applicable, add the following below this CDDL HEADER, with the | | 15 | * If applicable, add the following below this CDDL HEADER, with the |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | | 16 | * fields enclosed by brackets "[]" replaced with your own identifying |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | | 17 | * information: Portions Copyright [yyyy] [name of copyright owner] |
18 | * | | 18 | * |
19 | * CDDL HEADER END | | 19 | * CDDL HEADER END |
20 | */ | | 20 | */ |
21 | /* | | 21 | /* |
22 | * Copyright 2009 Sun Microsystems, Inc. All rights reserved. | | 22 | * Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
23 | * Use is subject to license terms. | | 23 | * Use is subject to license terms. |
24 | */ | | 24 | */ |
25 | | | 25 | |
26 | /* Portions Copyright 2007 Jeremy Teo */ | | 26 | /* Portions Copyright 2007 Jeremy Teo */ |
27 | | | 27 | |
28 | #ifdef _KERNEL | | 28 | #ifdef _KERNEL |
29 | #include <sys/types.h> | | 29 | #include <sys/types.h> |
30 | #include <sys/param.h> | | 30 | #include <sys/param.h> |
31 | #include <sys/time.h> | | 31 | #include <sys/time.h> |
32 | #include <sys/systm.h> | | 32 | #include <sys/systm.h> |
33 | #include <sys/sysmacros.h> | | 33 | #include <sys/sysmacros.h> |
34 | #include <sys/resource.h> | | 34 | #include <sys/resource.h> |
35 | #include <sys/mntent.h> | | 35 | #include <sys/mntent.h> |
36 | #include <sys/u8_textprep.h> | | 36 | #include <sys/u8_textprep.h> |
37 | #include <sys/dsl_dataset.h> | | 37 | #include <sys/dsl_dataset.h> |
38 | #include <sys/vfs.h> | | 38 | #include <sys/vfs.h> |
39 | #include <sys/vnode.h> | | 39 | #include <sys/vnode.h> |
40 | #include <sys/file.h> | | 40 | #include <sys/file.h> |
41 | #include <sys/kmem.h> | | 41 | #include <sys/kmem.h> |
42 | #include <sys/errno.h> | | 42 | #include <sys/errno.h> |
43 | #include <sys/unistd.h> | | 43 | #include <sys/unistd.h> |
44 | #include <sys/atomic.h> | | 44 | #include <sys/atomic.h> |
45 | #include <sys/zfs_dir.h> | | 45 | #include <sys/zfs_dir.h> |
46 | #include <sys/zfs_acl.h> | | 46 | #include <sys/zfs_acl.h> |
47 | #include <sys/zfs_ioctl.h> | | 47 | #include <sys/zfs_ioctl.h> |
48 | #include <sys/zfs_rlock.h> | | 48 | #include <sys/zfs_rlock.h> |
49 | #include <sys/zfs_fuid.h> | | 49 | #include <sys/zfs_fuid.h> |
50 | #include <sys/fs/zfs.h> | | 50 | #include <sys/fs/zfs.h> |
51 | #include <sys/kidmap.h> | | 51 | #include <sys/kidmap.h> |
52 | #endif /* _KERNEL */ | | 52 | #endif /* _KERNEL */ |
53 | | | 53 | |
54 | #include <sys/dmu.h> | | 54 | #include <sys/dmu.h> |
55 | #include <sys/refcount.h> | | 55 | #include <sys/refcount.h> |
56 | #include <sys/stat.h> | | 56 | #include <sys/stat.h> |
57 | #include <sys/zap.h> | | 57 | #include <sys/zap.h> |
58 | #include <sys/zfs_znode.h> | | 58 | #include <sys/zfs_znode.h> |
59 | | | 59 | |
60 | #include "zfs_prop.h" | | 60 | #include "zfs_prop.h" |
61 | | | 61 | |
62 | #if defined(_KERNEL) && defined(__NetBSD__) | | 62 | #if defined(_KERNEL) && defined(__NetBSD__) |
63 | #include <miscfs/specfs/specdev.h> | | 63 | #include <miscfs/specfs/specdev.h> |
64 | static const struct genfs_ops zfs_genfsops = { | | 64 | static const struct genfs_ops zfs_genfsops = { |
65 | .gop_write = genfs_compat_gop_write, | | 65 | .gop_write = genfs_compat_gop_write, |
66 | }; | | 66 | }; |
67 | | | 67 | |
68 | #endif | | 68 | #endif |
69 | | | 69 | |
70 | extern int (**zfs_vnodeop_p)(void *); | | 70 | extern int (**zfs_vnodeop_p)(void *); |
71 | extern int (**zfs_fifoop_p)(void *); | | 71 | extern int (**zfs_fifoop_p)(void *); |
72 | extern int (**zfs_specop_p)(void *); | | 72 | extern int (**zfs_specop_p)(void *); |
73 | | | 73 | |
74 | /* | | 74 | /* |
75 | * Define ZNODE_STATS to turn on statistic gathering. By default, it is only | | 75 | * Define ZNODE_STATS to turn on statistic gathering. By default, it is only |
76 | * turned on when DEBUG is also defined. | | 76 | * turned on when DEBUG is also defined. |
77 | */ | | 77 | */ |
78 | #ifdef DEBUG | | 78 | #ifdef DEBUG |
79 | #define ZNODE_STATS | | 79 | #define ZNODE_STATS |
80 | #endif /* DEBUG */ | | 80 | #endif /* DEBUG */ |
81 | | | 81 | |
82 | #ifdef ZNODE_STATS | | 82 | #ifdef ZNODE_STATS |
83 | #define ZNODE_STAT_ADD(stat) ((stat)++) | | 83 | #define ZNODE_STAT_ADD(stat) ((stat)++) |
84 | #else | | 84 | #else |
85 | #define ZNODE_STAT_ADD(stat) /* nothing */ | | 85 | #define ZNODE_STAT_ADD(stat) /* nothing */ |
86 | #endif /* ZNODE_STATS */ | | 86 | #endif /* ZNODE_STATS */ |
87 | | | 87 | |
88 | #define POINTER_IS_VALID(p) (!((uintptr_t)(p) & 0x3)) | | 88 | #define POINTER_IS_VALID(p) (!((uintptr_t)(p) & 0x3)) |
89 | #define POINTER_INVALIDATE(pp) (*(pp) = (void *)((uintptr_t)(*(pp)) | 0x1)) | | 89 | #define POINTER_INVALIDATE(pp) (*(pp) = (void *)((uintptr_t)(*(pp)) | 0x1)) |
90 | | | 90 | |
91 | /* | | 91 | /* |
92 | * Functions needed for userland (ie: libzpool) are not put under | | 92 | * Functions needed for userland (ie: libzpool) are not put under |
93 | * #ifdef_KERNEL; the rest of the functions have dependencies | | 93 | * #ifdef_KERNEL; the rest of the functions have dependencies |
94 | * (such as VFS logic) that will not compile easily in userland. | | 94 | * (such as VFS logic) that will not compile easily in userland. |
95 | */ | | 95 | */ |
96 | #ifdef _KERNEL | | 96 | #ifdef _KERNEL |
97 | /* | | 97 | /* |
98 | * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to | | 98 | * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to |
99 | * be freed before it can be safely accessed. | | 99 | * be freed before it can be safely accessed. |
100 | */ | | 100 | */ |
101 | krwlock_t zfsvfs_lock; | | 101 | krwlock_t zfsvfs_lock; |
102 | | | 102 | |
103 | static kmem_cache_t *znode_cache = NULL; | | 103 | static kmem_cache_t *znode_cache = NULL; |
104 | | | 104 | |
105 | /*ARGSUSED*/ | | 105 | /*ARGSUSED*/ |
106 | static void | | 106 | static void |
107 | znode_evict_error(dmu_buf_t *dbuf, void *user_ptr) | | 107 | znode_evict_error(dmu_buf_t *dbuf, void *user_ptr) |
108 | { | | 108 | { |
109 | /* | | 109 | /* |
110 | * We should never drop all dbuf refs without first clearing | | 110 | * We should never drop all dbuf refs without first clearing |
111 | * the eviction callback. | | 111 | * the eviction callback. |
112 | */ | | 112 | */ |
113 | panic("evicting znode %p\n", user_ptr); | | 113 | panic("evicting znode %p\n", user_ptr); |
114 | } | | 114 | } |
115 | | | 115 | |
116 | /*ARGSUSED*/ | | 116 | /*ARGSUSED*/ |
117 | static int | | 117 | static int |
118 | zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) | | 118 | zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) |
119 | { | | 119 | { |
120 | znode_t *zp = arg; | | 120 | znode_t *zp = arg; |
121 | | | 121 | |
122 | ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); | | 122 | ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); |
123 | | | 123 | |
124 | list_link_init(&zp->z_link_node); | | 124 | list_link_init(&zp->z_link_node); |
125 | | | 125 | |
126 | mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); | | 126 | mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); |
127 | rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL); | | 127 | rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL); |
128 | rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL); | | 128 | rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL); |
129 | mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); | | 129 | mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); |
130 | | | 130 | |
131 | mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL); | | 131 | mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL); |
132 | avl_create(&zp->z_range_avl, zfs_range_compare, | | 132 | avl_create(&zp->z_range_avl, zfs_range_compare, |
133 | sizeof (rl_t), offsetof(rl_t, r_node)); | | 133 | sizeof (rl_t), offsetof(rl_t, r_node)); |
134 | | | 134 | |
135 | zp->z_dbuf = NULL; | | 135 | zp->z_dbuf = NULL; |
136 | zp->z_dirlocks = NULL; | | 136 | zp->z_dirlocks = NULL; |
137 | zp->z_acl_cached = NULL; | | 137 | zp->z_acl_cached = NULL; |
138 | return (0); | | 138 | return (0); |
139 | } | | 139 | } |
140 | | | 140 | |
141 | /*ARGSUSED*/ | | 141 | /*ARGSUSED*/ |
142 | static void | | 142 | static void |
143 | zfs_znode_cache_destructor(void *buf, void *arg) | | 143 | zfs_znode_cache_destructor(void *buf, void *arg) |
144 | { | | 144 | { |
145 | znode_t *zp = arg; | | 145 | znode_t *zp = arg; |
146 | | | 146 | |
147 | ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); | | 147 | ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); |
148 | ASSERT(ZTOV(zp) == NULL); | | 148 | ASSERT(ZTOV(zp) == NULL); |
149 | | | 149 | |
150 | ASSERT(!list_link_active(&zp->z_link_node)); | | 150 | ASSERT(!list_link_active(&zp->z_link_node)); |
151 | mutex_destroy(&zp->z_lock); | | 151 | mutex_destroy(&zp->z_lock); |
152 | rw_destroy(&zp->z_parent_lock); | | 152 | rw_destroy(&zp->z_parent_lock); |
153 | rw_destroy(&zp->z_name_lock); | | 153 | rw_destroy(&zp->z_name_lock); |
154 | mutex_destroy(&zp->z_acl_lock); | | 154 | mutex_destroy(&zp->z_acl_lock); |
155 | avl_destroy(&zp->z_range_avl); | | 155 | avl_destroy(&zp->z_range_avl); |
156 | mutex_destroy(&zp->z_range_lock); | | 156 | mutex_destroy(&zp->z_range_lock); |
157 | | | 157 | |
158 | ASSERT(zp->z_dbuf == NULL); | | 158 | ASSERT(zp->z_dbuf == NULL); |
159 | ASSERT(zp->z_dirlocks == NULL); | | 159 | ASSERT(zp->z_dirlocks == NULL); |
160 | ASSERT(zp->z_acl_cached == NULL); | | 160 | ASSERT(zp->z_acl_cached == NULL); |
161 | } | | 161 | } |
162 | | | 162 | |
163 | #ifdef ZNODE_STATS | | 163 | #ifdef ZNODE_STATS |
164 | static struct { | | 164 | static struct { |
165 | uint64_t zms_zfsvfs_invalid; | | 165 | uint64_t zms_zfsvfs_invalid; |
166 | uint64_t zms_zfsvfs_recheck1; | | 166 | uint64_t zms_zfsvfs_recheck1; |
167 | uint64_t zms_zfsvfs_unmounted; | | 167 | uint64_t zms_zfsvfs_unmounted; |
168 | uint64_t zms_zfsvfs_recheck2; | | 168 | uint64_t zms_zfsvfs_recheck2; |
169 | uint64_t zms_obj_held; | | 169 | uint64_t zms_obj_held; |
170 | uint64_t zms_vnode_locked; | | 170 | uint64_t zms_vnode_locked; |
171 | uint64_t zms_not_only_dnlc; | | 171 | uint64_t zms_not_only_dnlc; |
172 | } znode_move_stats; | | 172 | } znode_move_stats; |
173 | #endif /* ZNODE_STATS */ | | 173 | #endif /* ZNODE_STATS */ |
174 | | | 174 | |
175 | static void | | 175 | static void |
176 | zfs_znode_move_impl(znode_t *ozp, znode_t *nzp) | | 176 | zfs_znode_move_impl(znode_t *ozp, znode_t *nzp) |
177 | { | | 177 | { |
178 | vnode_t *vp; | | 178 | vnode_t *vp; |
179 | | | 179 | |
180 | /* Copy fields. */ | | 180 | /* Copy fields. */ |
181 | nzp->z_zfsvfs = ozp->z_zfsvfs; | | 181 | nzp->z_zfsvfs = ozp->z_zfsvfs; |
182 | | | 182 | |
183 | /* Swap vnodes. */ | | 183 | /* Swap vnodes. */ |
184 | vp = nzp->z_vnode; | | 184 | vp = nzp->z_vnode; |
185 | nzp->z_vnode = ozp->z_vnode; | | 185 | nzp->z_vnode = ozp->z_vnode; |
186 | ozp->z_vnode = vp; /* let destructor free the overwritten vnode */ | | 186 | ozp->z_vnode = vp; /* let destructor free the overwritten vnode */ |
187 | ZTOV(ozp)->v_data = ozp; | | 187 | ZTOV(ozp)->v_data = ozp; |
188 | ZTOV(nzp)->v_data = nzp; | | 188 | ZTOV(nzp)->v_data = nzp; |
189 | | | 189 | |
190 | nzp->z_id = ozp->z_id; | | 190 | nzp->z_id = ozp->z_id; |
191 | ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */ | | 191 | ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */ |
192 | ASSERT(avl_numnodes(&ozp->z_range_avl) == 0); | | 192 | ASSERT(avl_numnodes(&ozp->z_range_avl) == 0); |
193 | nzp->z_unlinked = ozp->z_unlinked; | | 193 | nzp->z_unlinked = ozp->z_unlinked; |
194 | nzp->z_atime_dirty = ozp->z_atime_dirty; | | 194 | nzp->z_atime_dirty = ozp->z_atime_dirty; |
195 | nzp->z_zn_prefetch = ozp->z_zn_prefetch; | | 195 | nzp->z_zn_prefetch = ozp->z_zn_prefetch; |
196 | nzp->z_blksz = ozp->z_blksz; | | 196 | nzp->z_blksz = ozp->z_blksz; |
197 | nzp->z_seq = ozp->z_seq; | | 197 | nzp->z_seq = ozp->z_seq; |
198 | nzp->z_mapcnt = ozp->z_mapcnt; | | 198 | nzp->z_mapcnt = ozp->z_mapcnt; |
199 | nzp->z_last_itx = ozp->z_last_itx; | | 199 | nzp->z_last_itx = ozp->z_last_itx; |
200 | nzp->z_gen = ozp->z_gen; | | 200 | nzp->z_gen = ozp->z_gen; |
201 | nzp->z_sync_cnt = ozp->z_sync_cnt; | | 201 | nzp->z_sync_cnt = ozp->z_sync_cnt; |
202 | nzp->z_phys = ozp->z_phys; | | 202 | nzp->z_phys = ozp->z_phys; |
203 | nzp->z_dbuf = ozp->z_dbuf; | | 203 | nzp->z_dbuf = ozp->z_dbuf; |
204 | | | 204 | |
205 | /* | | 205 | /* |
206 | * Since this is just an idle znode and kmem is already dealing with | | 206 | * Since this is just an idle znode and kmem is already dealing with |
207 | * memory pressure, release any cached ACL. | | 207 | * memory pressure, release any cached ACL. |
208 | */ | | 208 | */ |
209 | if (ozp->z_acl_cached) { | | 209 | if (ozp->z_acl_cached) { |
210 | zfs_acl_free(ozp->z_acl_cached); | | 210 | zfs_acl_free(ozp->z_acl_cached); |
211 | ozp->z_acl_cached = NULL; | | 211 | ozp->z_acl_cached = NULL; |
212 | } | | 212 | } |
213 | | | 213 | |
214 | /* Update back pointers. */ | | 214 | /* Update back pointers. */ |
215 | (void) dmu_buf_update_user(nzp->z_dbuf, ozp, nzp, &nzp->z_phys, | | 215 | (void) dmu_buf_update_user(nzp->z_dbuf, ozp, nzp, &nzp->z_phys, |
216 | znode_evict_error); | | 216 | znode_evict_error); |
217 | | | 217 | |
218 | /* | | 218 | /* |
219 | * Invalidate the original znode by clearing fields that provide a | | 219 | * Invalidate the original znode by clearing fields that provide a |
220 | * pointer back to the znode. Set the low bit of the vfs pointer to | | 220 | * pointer back to the znode. Set the low bit of the vfs pointer to |
221 | * ensure that zfs_znode_move() recognizes the znode as invalid in any | | 221 | * ensure that zfs_znode_move() recognizes the znode as invalid in any |
222 | * subsequent callback. | | 222 | * subsequent callback. |
223 | */ | | 223 | */ |
224 | ozp->z_dbuf = NULL; | | 224 | ozp->z_dbuf = NULL; |
225 | POINTER_INVALIDATE(&ozp->z_zfsvfs); | | 225 | POINTER_INVALIDATE(&ozp->z_zfsvfs); |
226 | } | | 226 | } |
227 | | | 227 | |
228 | #ifndef __NetBSD__ | | 228 | #ifndef __NetBSD__ |
229 | /*ARGSUSED*/ | | 229 | /*ARGSUSED*/ |
230 | static kmem_cbrc_t | | 230 | static kmem_cbrc_t |
231 | zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) | | 231 | zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) |
232 | { | | 232 | { |
233 | znode_t *ozp = buf, *nzp = newbuf; | | 233 | znode_t *ozp = buf, *nzp = newbuf; |
234 | zfsvfs_t *zfsvfs; | | 234 | zfsvfs_t *zfsvfs; |
235 | vnode_t *vp; | | 235 | vnode_t *vp; |
236 | | | 236 | |
237 | /* | | 237 | /* |
238 | * The znode is on the file system's list of known znodes if the vfs | | 238 | * The znode is on the file system's list of known znodes if the vfs |
239 | * pointer is valid. We set the low bit of the vfs pointer when freeing | | 239 | * pointer is valid. We set the low bit of the vfs pointer when freeing |
240 | * the znode to invalidate it, and the memory patterns written by kmem | | 240 | * the znode to invalidate it, and the memory patterns written by kmem |
241 | * (baddcafe and deadbeef) set at least one of the two low bits. A newly | | 241 | * (baddcafe and deadbeef) set at least one of the two low bits. A newly |
242 | * created znode sets the vfs pointer last of all to indicate that the | | 242 | * created znode sets the vfs pointer last of all to indicate that the |
243 | * znode is known and in a valid state to be moved by this function. | | 243 | * znode is known and in a valid state to be moved by this function. |
244 | */ | | 244 | */ |
245 | zfsvfs = ozp->z_zfsvfs; | | 245 | zfsvfs = ozp->z_zfsvfs; |
246 | if (!POINTER_IS_VALID(zfsvfs)) { | | 246 | if (!POINTER_IS_VALID(zfsvfs)) { |
247 | ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid); | | 247 | ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid); |
248 | return (KMEM_CBRC_DONT_KNOW); | | 248 | return (KMEM_CBRC_DONT_KNOW); |
249 | } | | 249 | } |
250 | | | 250 | |
251 | /* | | 251 | /* |
252 | * Close a small window in which it's possible that the filesystem could | | 252 | * Close a small window in which it's possible that the filesystem could |
253 | * be unmounted and freed, and zfsvfs, though valid in the previous | | 253 | * be unmounted and freed, and zfsvfs, though valid in the previous |
254 | * statement, could point to unrelated memory by the time we try to | | 254 | * statement, could point to unrelated memory by the time we try to |
255 | * prevent the filesystem from being unmounted. | | 255 | * prevent the filesystem from being unmounted. |
256 | */ | | 256 | */ |
257 | rw_enter(&zfsvfs_lock, RW_WRITER); | | 257 | rw_enter(&zfsvfs_lock, RW_WRITER); |
258 | if (zfsvfs != ozp->z_zfsvfs) { | | 258 | if (zfsvfs != ozp->z_zfsvfs) { |
259 | rw_exit(&zfsvfs_lock); | | 259 | rw_exit(&zfsvfs_lock); |
260 | ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1); | | 260 | ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1); |
261 | return (KMEM_CBRC_DONT_KNOW); | | 261 | return (KMEM_CBRC_DONT_KNOW); |
262 | } | | 262 | } |
263 | | | 263 | |
264 | /* | | 264 | /* |
265 | * If the znode is still valid, then so is the file system. We know that | | 265 | * If the znode is still valid, then so is the file system. We know that |
266 | * no valid file system can be freed while we hold zfsvfs_lock, so we | | 266 | * no valid file system can be freed while we hold zfsvfs_lock, so we |
267 | * can safely ensure that the filesystem is not and will not be | | 267 | * can safely ensure that the filesystem is not and will not be |
268 | * unmounted. The next statement is equivalent to ZFS_ENTER(). | | 268 | * unmounted. The next statement is equivalent to ZFS_ENTER(). |
269 | */ | | 269 | */ |
270 | rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); | | 270 | rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); |
271 | if (zfsvfs->z_unmounted) { | | 271 | if (zfsvfs->z_unmounted) { |
272 | ZFS_EXIT(zfsvfs); | | 272 | ZFS_EXIT(zfsvfs); |
273 | rw_exit(&zfsvfs_lock); | | 273 | rw_exit(&zfsvfs_lock); |
274 | ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted); | | 274 | ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted); |
275 | return (KMEM_CBRC_DONT_KNOW); | | 275 | return (KMEM_CBRC_DONT_KNOW); |
276 | } | | 276 | } |
277 | rw_exit(&zfsvfs_lock); | | 277 | rw_exit(&zfsvfs_lock); |
278 | | | 278 | |
279 | mutex_enter(&zfsvfs->z_znodes_lock); | | 279 | mutex_enter(&zfsvfs->z_znodes_lock); |
280 | /* | | 280 | /* |
281 | * Recheck the vfs pointer in case the znode was removed just before | | 281 | * Recheck the vfs pointer in case the znode was removed just before |
282 | * acquiring the lock. | | 282 | * acquiring the lock. |
283 | */ | | 283 | */ |
284 | if (zfsvfs != ozp->z_zfsvfs) { | | 284 | if (zfsvfs != ozp->z_zfsvfs) { |
285 | mutex_exit(&zfsvfs->z_znodes_lock); | | 285 | mutex_exit(&zfsvfs->z_znodes_lock); |
286 | ZFS_EXIT(zfsvfs); | | 286 | ZFS_EXIT(zfsvfs); |
287 | ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2); | | 287 | ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2); |
288 | return (KMEM_CBRC_DONT_KNOW); | | 288 | return (KMEM_CBRC_DONT_KNOW); |
289 | } | | 289 | } |
290 | | | 290 | |
291 | /* | | 291 | /* |
292 | * At this point we know that as long as we hold z_znodes_lock, the | | 292 | * At this point we know that as long as we hold z_znodes_lock, the |
293 | * znode cannot be freed and fields within the znode can be safely | | 293 | * znode cannot be freed and fields within the znode can be safely |
294 | * accessed. Now, prevent a race with zfs_zget(). | | 294 | * accessed. Now, prevent a race with zfs_zget(). |
295 | */ | | 295 | */ |
296 | if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) { | | 296 | if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) { |
297 | mutex_exit(&zfsvfs->z_znodes_lock); | | 297 | mutex_exit(&zfsvfs->z_znodes_lock); |
298 | ZFS_EXIT(zfsvfs); | | 298 | ZFS_EXIT(zfsvfs); |
299 | ZNODE_STAT_ADD(znode_move_stats.zms_obj_held); | | 299 | ZNODE_STAT_ADD(znode_move_stats.zms_obj_held); |
300 | return (KMEM_CBRC_LATER); | | 300 | return (KMEM_CBRC_LATER); |
301 | } | | 301 | } |
302 | | | 302 | |
303 | vp = ZTOV(ozp); | | 303 | vp = ZTOV(ozp); |
304 | if (mutex_tryenter(&vp->v_lock) == 0) { | | 304 | if (mutex_tryenter(&vp->v_lock) == 0) { |
305 | ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); | | 305 | ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); |
306 | mutex_exit(&zfsvfs->z_znodes_lock); | | 306 | mutex_exit(&zfsvfs->z_znodes_lock); |
307 | ZFS_EXIT(zfsvfs); | | 307 | ZFS_EXIT(zfsvfs); |
308 | ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked); | | 308 | ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked); |
309 | return (KMEM_CBRC_LATER); | | 309 | return (KMEM_CBRC_LATER); |
310 | } | | 310 | } |
311 | | | 311 | |
312 | /* Only move znodes that are referenced _only_ by the DNLC. */ | | 312 | /* Only move znodes that are referenced _only_ by the DNLC. */ |
313 | if (vp->v_count != 1 || !vn_in_dnlc(vp)) { | | 313 | if (vp->v_count != 1 || !vn_in_dnlc(vp)) { |
314 | mutex_exit(&vp->v_lock); | | 314 | mutex_exit(&vp->v_lock); |
315 | ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); | | 315 | ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); |
316 | mutex_exit(&zfsvfs->z_znodes_lock); | | 316 | mutex_exit(&zfsvfs->z_znodes_lock); |
317 | ZFS_EXIT(zfsvfs); | | 317 | ZFS_EXIT(zfsvfs); |
318 | ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc); | | 318 | ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc); |
319 | return (KMEM_CBRC_LATER); | | 319 | return (KMEM_CBRC_LATER); |
320 | } | | 320 | } |
321 | | | 321 | |
322 | /* | | 322 | /* |
323 | * The znode is known and in a valid state to move. We're holding the | | 323 | * The znode is known and in a valid state to move. We're holding the |
324 | * locks needed to execute the critical section. | | 324 | * locks needed to execute the critical section. |
325 | */ | | 325 | */ |
326 | zfs_znode_move_impl(ozp, nzp); | | 326 | zfs_znode_move_impl(ozp, nzp); |
327 | mutex_exit(&vp->v_lock); | | 327 | mutex_exit(&vp->v_lock); |
328 | ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); | | 328 | ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); |
329 | | | 329 | |
330 | list_link_replace(&ozp->z_link_node, &nzp->z_link_node); | | 330 | list_link_replace(&ozp->z_link_node, &nzp->z_link_node); |
331 | mutex_exit(&zfsvfs->z_znodes_lock); | | 331 | mutex_exit(&zfsvfs->z_znodes_lock); |
332 | ZFS_EXIT(zfsvfs); | | 332 | ZFS_EXIT(zfsvfs); |
333 | | | 333 | |
334 | return (KMEM_CBRC_YES); | | 334 | return (KMEM_CBRC_YES); |
335 | } | | 335 | } |
336 | #endif /* !__NetBSD__ */ | | 336 | #endif /* !__NetBSD__ */ |
337 | | | 337 | |
338 | void | | 338 | void |
339 | zfs_znode_init(void) | | 339 | zfs_znode_init(void) |
340 | { | | 340 | { |
341 | /* | | 341 | /* |
342 | * Initialize zcache | | 342 | * Initialize zcache |
343 | */ | | 343 | */ |
344 | rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL); | | 344 | rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL); |
345 | ASSERT(znode_cache == NULL); | | 345 | ASSERT(znode_cache == NULL); |
346 | znode_cache = kmem_cache_create("zfs_znode_cache", | | 346 | znode_cache = kmem_cache_create("zfs_znode_cache", |
347 | sizeof (znode_t), 0, zfs_znode_cache_constructor, | | 347 | sizeof (znode_t), 0, zfs_znode_cache_constructor, |
348 | zfs_znode_cache_destructor, NULL, NULL, NULL, 0); | | 348 | zfs_znode_cache_destructor, NULL, NULL, NULL, 0); |
349 | } | | 349 | } |
350 | | | 350 | |
351 | void | | 351 | void |
352 | zfs_znode_fini(void) | | 352 | zfs_znode_fini(void) |
353 | { | | 353 | { |
354 | | | 354 | |
355 | /* | | 355 | /* |
356 | * Cleanup zcache | | 356 | * Cleanup zcache |
357 | */ | | 357 | */ |
358 | if (znode_cache) | | 358 | if (znode_cache) |
359 | kmem_cache_destroy(znode_cache); | | 359 | kmem_cache_destroy(znode_cache); |
360 | znode_cache = NULL; | | 360 | znode_cache = NULL; |
361 | rw_destroy(&zfsvfs_lock); | | 361 | rw_destroy(&zfsvfs_lock); |
362 | } | | 362 | } |
363 | | | 363 | |
364 | #ifndef __NetBSD__ | | 364 | #ifndef __NetBSD__ |
365 | struct vnodeops *zfs_dvnodeops; | | 365 | struct vnodeops *zfs_dvnodeops; |
366 | struct vnodeops *zfs_fvnodeops; | | 366 | struct vnodeops *zfs_fvnodeops; |
367 | struct vnodeops *zfs_symvnodeops; | | 367 | struct vnodeops *zfs_symvnodeops; |
368 | struct vnodeops *zfs_xdvnodeops; | | 368 | struct vnodeops *zfs_xdvnodeops; |
369 | struct vnodeops *zfs_evnodeops; | | 369 | struct vnodeops *zfs_evnodeops; |
370 | struct vnodeops *zfs_sharevnodeops; | | 370 | struct vnodeops *zfs_sharevnodeops; |
371 | #endif | | 371 | #endif |
372 | | | 372 | |
373 | void | | 373 | void |
374 | zfs_remove_op_tables() | | 374 | zfs_remove_op_tables() |
375 | { | | 375 | { |
376 | #ifndef __NetBSD__ | | 376 | #ifndef __NetBSD__ |
377 | /* | | 377 | /* |
378 | * Remove vfs ops | | 378 | * Remove vfs ops |
379 | */ | | 379 | */ |
380 | ASSERT(zfsfstype); | | 380 | ASSERT(zfsfstype); |
381 | (void) vfs_freevfsops_by_type(zfsfstype); | | 381 | (void) vfs_freevfsops_by_type(zfsfstype); |
382 | zfsfstype = 0; | | 382 | zfsfstype = 0; |
383 | | | 383 | |
384 | /* | | 384 | /* |
385 | * Remove vnode ops | | 385 | * Remove vnode ops |
386 | */ | | 386 | */ |
387 | if (zfs_dvnodeops) | | 387 | if (zfs_dvnodeops) |
388 | vn_freevnodeops(zfs_dvnodeops); | | 388 | vn_freevnodeops(zfs_dvnodeops); |
389 | if (zfs_fvnodeops) | | 389 | if (zfs_fvnodeops) |
390 | vn_freevnodeops(zfs_fvnodeops); | | 390 | vn_freevnodeops(zfs_fvnodeops); |
391 | if (zfs_symvnodeops) | | 391 | if (zfs_symvnodeops) |
392 | vn_freevnodeops(zfs_symvnodeops); | | 392 | vn_freevnodeops(zfs_symvnodeops); |
393 | if (zfs_xdvnodeops) | | 393 | if (zfs_xdvnodeops) |
394 | vn_freevnodeops(zfs_xdvnodeops); | | 394 | vn_freevnodeops(zfs_xdvnodeops); |
395 | if (zfs_evnodeops) | | 395 | if (zfs_evnodeops) |
396 | vn_freevnodeops(zfs_evnodeops); | | 396 | vn_freevnodeops(zfs_evnodeops); |
397 | if (zfs_sharevnodeops) | | 397 | if (zfs_sharevnodeops) |
398 | vn_freevnodeops(zfs_sharevnodeops); | | 398 | vn_freevnodeops(zfs_sharevnodeops); |
399 | | | 399 | |
400 | zfs_dvnodeops = NULL; | | 400 | zfs_dvnodeops = NULL; |
401 | zfs_fvnodeops = NULL; | | 401 | zfs_fvnodeops = NULL; |
402 | zfs_symvnodeops = NULL; | | 402 | zfs_symvnodeops = NULL; |
403 | zfs_xdvnodeops = NULL; | | 403 | zfs_xdvnodeops = NULL; |
404 | zfs_evnodeops = NULL; | | 404 | zfs_evnodeops = NULL; |
405 | zfs_sharevnodeops = NULL; | | 405 | zfs_sharevnodeops = NULL; |
406 | #endif | | 406 | #endif |
407 | } | | 407 | } |
408 | | | 408 | |
409 | #ifndef __NetBSD__ | | 409 | #ifndef __NetBSD__ |
410 | extern const fs_operation_def_t zfs_dvnodeops_template[]; | | 410 | extern const fs_operation_def_t zfs_dvnodeops_template[]; |
411 | extern const fs_operation_def_t zfs_fvnodeops_template[]; | | 411 | extern const fs_operation_def_t zfs_fvnodeops_template[]; |
412 | extern const fs_operation_def_t zfs_xdvnodeops_template[]; | | 412 | extern const fs_operation_def_t zfs_xdvnodeops_template[]; |
413 | extern const fs_operation_def_t zfs_symvnodeops_template[]; | | 413 | extern const fs_operation_def_t zfs_symvnodeops_template[]; |
414 | extern const fs_operation_def_t zfs_evnodeops_template[]; | | 414 | extern const fs_operation_def_t zfs_evnodeops_template[]; |
415 | extern const fs_operation_def_t zfs_sharevnodeops_template[]; | | 415 | extern const fs_operation_def_t zfs_sharevnodeops_template[]; |
416 | #endif | | 416 | #endif |
417 | | | 417 | |
418 | int | | 418 | int |
419 | zfs_create_op_tables() | | 419 | zfs_create_op_tables() |
420 | { | | 420 | { |
421 | #ifndef __NetBSD__ | | 421 | #ifndef __NetBSD__ |
422 | int error; | | 422 | int error; |
423 | | | 423 | |
424 | /* | | 424 | /* |
425 | * zfs_dvnodeops can be set if mod_remove() calls mod_installfs() | | 425 | * zfs_dvnodeops can be set if mod_remove() calls mod_installfs() |
426 | * due to a failure to remove the the 2nd modlinkage (zfs_modldrv). | | 426 | * due to a failure to remove the the 2nd modlinkage (zfs_modldrv). |
427 | * In this case we just return as the ops vectors are already set up. | | 427 | * In this case we just return as the ops vectors are already set up. |
428 | */ | | 428 | */ |
429 | if (zfs_dvnodeops) | | 429 | if (zfs_dvnodeops) |
430 | return (0); | | 430 | return (0); |
431 | | | 431 | |
432 | error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template, | | 432 | error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template, |
433 | &zfs_dvnodeops); | | 433 | &zfs_dvnodeops); |
434 | if (error) | | 434 | if (error) |
435 | return (error); | | 435 | return (error); |
436 | | | 436 | |
437 | error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template, | | 437 | error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template, |
438 | &zfs_fvnodeops); | | 438 | &zfs_fvnodeops); |
439 | if (error) | | 439 | if (error) |
440 | return (error); | | 440 | return (error); |
441 | | | 441 | |
442 | error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template, | | 442 | error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template, |
443 | &zfs_symvnodeops); | | 443 | &zfs_symvnodeops); |
444 | if (error) | | 444 | if (error) |
445 | return (error); | | 445 | return (error); |
446 | | | 446 | |
447 | error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template, | | 447 | error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template, |
448 | &zfs_xdvnodeops); | | 448 | &zfs_xdvnodeops); |
449 | if (error) | | 449 | if (error) |
450 | return (error); | | 450 | return (error); |
451 | | | 451 | |
452 | error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template, | | 452 | error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template, |
453 | &zfs_evnodeops); | | 453 | &zfs_evnodeops); |
454 | if (error) | | 454 | if (error) |
455 | return (error); | | 455 | return (error); |
456 | | | 456 | |
457 | error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template, | | 457 | error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template, |
458 | &zfs_sharevnodeops); | | 458 | &zfs_sharevnodeops); |
459 | | | 459 | |
460 | return (error); | | 460 | return (error); |
461 | #endif | | 461 | #endif |
462 | return 0; | | 462 | return 0; |
463 | } | | 463 | } |
464 | | | 464 | |
465 | int | | 465 | int |
466 | zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) | | 466 | zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) |
467 | { | | 467 | { |
468 | zfs_acl_ids_t acl_ids; | | 468 | zfs_acl_ids_t acl_ids; |
469 | vattr_t vattr; | | 469 | vattr_t vattr; |
470 | znode_t *sharezp; | | 470 | znode_t *sharezp; |
471 | vnode_t *vp; | | 471 | vnode_t *vp; |
472 | znode_t *zp; | | 472 | znode_t *zp; |
473 | int error; | | 473 | int error; |
474 | | | 474 | |
475 | vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; | | 475 | vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; |
476 | vattr.va_type = VDIR; | | 476 | vattr.va_type = VDIR; |
477 | vattr.va_mode = S_IFDIR|0555; | | 477 | vattr.va_mode = S_IFDIR|0555; |
478 | vattr.va_uid = crgetuid(kcred); | | 478 | vattr.va_uid = crgetuid(kcred); |
479 | vattr.va_gid = crgetgid(kcred); | | 479 | vattr.va_gid = crgetgid(kcred); |
480 | | | 480 | |
481 | sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP); | | 481 | sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP); |
482 | sharezp->z_unlinked = 0; | | 482 | sharezp->z_unlinked = 0; |
483 | sharezp->z_atime_dirty = 0; | | 483 | sharezp->z_atime_dirty = 0; |
484 | sharezp->z_zfsvfs = zfsvfs; | | 484 | sharezp->z_zfsvfs = zfsvfs; |
485 | | | 485 | |
486 | vp = ZTOV(sharezp); | | 486 | vp = ZTOV(sharezp); |
487 | error = getnewvnode(VT_ZFS, zfsvfs->z_parent->z_vfs, | | 487 | error = getnewvnode(VT_ZFS, zfsvfs->z_parent->z_vfs, |
488 | zfs_vnodeop_p, NULL, &sharezp->z_vnode); | | 488 | zfs_vnodeop_p, NULL, &sharezp->z_vnode); |
489 | if (error) { | | 489 | if (error) { |
490 | kmem_cache_free(znode_cache, sharezp); | | 490 | kmem_cache_free(znode_cache, sharezp); |
491 | return error; | | 491 | return error; |
492 | } | | 492 | } |
493 | vp->v_type = VDIR; | | 493 | vp->v_type = VDIR; |
494 | | | 494 | |
495 | VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr, | | 495 | VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr, |
496 | kcred, NULL, &acl_ids)); | | 496 | kcred, NULL, &acl_ids)); |
497 | zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, | | 497 | zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, |
498 | &zp, 0, &acl_ids); | | 498 | &zp, 0, &acl_ids); |
499 | ASSERT3P(zp, ==, sharezp); | | 499 | ASSERT3P(zp, ==, sharezp); |
| | | 500 | #ifndef __NetBSD__ |
500 | ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */ | | 501 | ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */ |
| | | 502 | #endif |
501 | POINTER_INVALIDATE(&sharezp->z_zfsvfs); | | 503 | POINTER_INVALIDATE(&sharezp->z_zfsvfs); |
502 | error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, | | 504 | error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, |
503 | ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx); | | 505 | ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx); |
504 | zfsvfs->z_shares_dir = sharezp->z_id; | | 506 | zfsvfs->z_shares_dir = sharezp->z_id; |
505 | | | 507 | |
506 | zfs_acl_ids_free(&acl_ids); | | 508 | zfs_acl_ids_free(&acl_ids); |
507 | ZTOV(sharezp)->v_count = 0; | | 509 | ZTOV(sharezp)->v_count = 0; |
508 | dmu_buf_rele(sharezp->z_dbuf, NULL); | | 510 | dmu_buf_rele(sharezp->z_dbuf, NULL); |
509 | sharezp->z_dbuf = NULL; | | 511 | sharezp->z_dbuf = NULL; |
510 | kmem_cache_free(znode_cache, sharezp); | | 512 | kmem_cache_free(znode_cache, sharezp); |
511 | | | 513 | |
512 | return (error); | | 514 | return (error); |
513 | } | | 515 | } |
514 | | | 516 | |
515 | /* | | 517 | /* |
516 | * define a couple of values we need available | | 518 | * define a couple of values we need available |
517 | * for both 64 and 32 bit environments. | | 519 | * for both 64 and 32 bit environments. |
518 | */ | | 520 | */ |
519 | #ifndef NBITSMINOR64 | | 521 | #ifndef NBITSMINOR64 |
520 | #define NBITSMINOR64 32 | | 522 | #define NBITSMINOR64 32 |
521 | #endif | | 523 | #endif |
522 | #ifndef MAXMAJ64 | | 524 | #ifndef MAXMAJ64 |
523 | #define MAXMAJ64 0xffffffffUL | | 525 | #define MAXMAJ64 0xffffffffUL |
524 | #endif | | 526 | #endif |
525 | #ifndef MAXMIN64 | | 527 | #ifndef MAXMIN64 |
526 | #define MAXMIN64 0xffffffffUL | | 528 | #define MAXMIN64 0xffffffffUL |
527 | #endif | | 529 | #endif |
528 | | | 530 | |
529 | /* | | 531 | /* |
530 | * Create special expldev for ZFS private use. | | 532 | * Create special expldev for ZFS private use. |
531 | * Can't use standard expldev since it doesn't do | | 533 | * Can't use standard expldev since it doesn't do |
532 | * what we want. The standard expldev() takes a | | 534 | * what we want. The standard expldev() takes a |
533 | * dev32_t in LP64 and expands it to a long dev_t. | | 535 | * dev32_t in LP64 and expands it to a long dev_t. |
534 | * We need an interface that takes a dev32_t in ILP32 | | 536 | * We need an interface that takes a dev32_t in ILP32 |
535 | * and expands it to a long dev_t. | | 537 | * and expands it to a long dev_t. |
536 | */ | | 538 | */ |
537 | static uint64_t | | 539 | static uint64_t |
538 | zfs_expldev(dev_t dev) | | 540 | zfs_expldev(dev_t dev) |
539 | { | | 541 | { |
540 | return ((uint64_t)major(dev) << NBITSMINOR64) | | | 542 | return ((uint64_t)major(dev) << NBITSMINOR64) | |
541 | (minor_t)minor(dev); | | 543 | (minor_t)minor(dev); |
542 | } | | 544 | } |
543 | | | 545 | |
544 | /* | | 546 | /* |
545 | * Special cmpldev for ZFS private use. | | 547 | * Special cmpldev for ZFS private use. |
546 | * Can't use standard cmpldev since it takes | | 548 | * Can't use standard cmpldev since it takes |
547 | * a long dev_t and compresses it to dev32_t in | | 549 | * a long dev_t and compresses it to dev32_t in |
548 | * LP64. We need to do a compaction of a long dev_t | | 550 | * LP64. We need to do a compaction of a long dev_t |
549 | * to a dev32_t in ILP32. | | 551 | * to a dev32_t in ILP32. |
550 | */ | | 552 | */ |
551 | dev_t | | 553 | dev_t |
552 | zfs_cmpldev(uint64_t dev) | | 554 | zfs_cmpldev(uint64_t dev) |
553 | { | | 555 | { |
554 | minor_t minor = (minor_t)dev & MAXMIN64; | | 556 | minor_t minor = (minor_t)dev & MAXMIN64; |
555 | major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64; | | 557 | major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64; |
556 | | | 558 | |
557 | return makedev(minor, major); | | 559 | return makedev(minor, major); |
558 | } | | 560 | } |
559 | | | 561 | |
560 | static void | | 562 | static void |
561 | zfs_znode_dmu_init(zfsvfs_t *zfsvfs, znode_t *zp, dmu_buf_t *db) | | 563 | zfs_znode_dmu_init(zfsvfs_t *zfsvfs, znode_t *zp, dmu_buf_t *db) |
562 | { | | 564 | { |
563 | znode_t *nzp; | | 565 | znode_t *nzp; |
564 | | | 566 | |
565 | ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs)); | | 567 | ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs)); |
566 | ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id))); | | 568 | ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id))); |
567 | | | 569 | |
568 | mutex_enter(&zp->z_lock); | | 570 | mutex_enter(&zp->z_lock); |
569 | | | 571 | |
570 | ASSERT(zp->z_dbuf == NULL); | | 572 | ASSERT(zp->z_dbuf == NULL); |
571 | ASSERT(zp->z_acl_cached == NULL); | | 573 | ASSERT(zp->z_acl_cached == NULL); |
572 | zp->z_dbuf = db; | | 574 | zp->z_dbuf = db; |
573 | nzp = dmu_buf_set_user_ie(db, zp, &zp->z_phys, znode_evict_error); | | 575 | nzp = dmu_buf_set_user_ie(db, zp, &zp->z_phys, znode_evict_error); |
574 | | | 576 | |
575 | /* | | 577 | /* |
576 | * there should be no | | 578 | * there should be no |
577 | * concurrent zgets on this object. | | 579 | * concurrent zgets on this object. |
578 | */ | | 580 | */ |
579 | if (nzp != NULL) | | 581 | if (nzp != NULL) |
580 | panic("existing znode %p for dbuf %p", (void *)nzp, (void *)db); | | 582 | panic("existing znode %p for dbuf %p", (void *)nzp, (void *)db); |
581 | | | 583 | |
582 | /* | | 584 | /* |
583 | * Slap on VROOT if we are the root znode | | 585 | * Slap on VROOT if we are the root znode |
584 | */ | | 586 | */ |
585 | if (zp->z_id == zfsvfs->z_root) | | 587 | if (zp->z_id == zfsvfs->z_root) |
586 | ZTOV(zp)->v_flag |= VROOT; | | 588 | ZTOV(zp)->v_flag |= VROOT; |
587 | | | 589 | |
588 | mutex_exit(&zp->z_lock); | | 590 | mutex_exit(&zp->z_lock); |
589 | vn_exists(ZTOV(zp)); | | 591 | vn_exists(ZTOV(zp)); |
590 | } | | 592 | } |
591 | | | 593 | |
592 | void | | 594 | void |
593 | zfs_znode_dmu_fini(znode_t *zp) | | 595 | zfs_znode_dmu_fini(znode_t *zp) |
594 | { | | 596 | { |
595 | dmu_buf_t *db = zp->z_dbuf; | | 597 | dmu_buf_t *db = zp->z_dbuf; |
596 | ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) || | | 598 | ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) || |
597 | zp->z_unlinked || | | 599 | zp->z_unlinked || |
598 | RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock)); | | 600 | RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock)); |
599 | ASSERT(zp->z_dbuf != NULL); | | 601 | ASSERT(zp->z_dbuf != NULL); |
600 | zp->z_dbuf = NULL; | | 602 | zp->z_dbuf = NULL; |
601 | VERIFY(zp == dmu_buf_update_user(db, zp, NULL, NULL, NULL)); | | 603 | VERIFY(zp == dmu_buf_update_user(db, zp, NULL, NULL, NULL)); |
602 | dmu_buf_rele(db, NULL); | | 604 | dmu_buf_rele(db, NULL); |
603 | } | | 605 | } |
604 | | | 606 | |
605 | /* | | 607 | /* |
606 | * Construct a new znode/vnode and intialize. | | 608 | * Construct a new znode/vnode and intialize. |
607 | * | | 609 | * |
608 | * This does not do a call to dmu_set_user() that is | | 610 | * This does not do a call to dmu_set_user() that is |
609 | * up to the caller to do, in case you don't want to | | 611 | * up to the caller to do, in case you don't want to |
610 | * return the znode | | 612 | * return the znode |
611 | */ | | 613 | */ |
612 | | | 614 | |
613 | static znode_t * | | 615 | static znode_t * |
614 | zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz) | | 616 | zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz) |
615 | { | | 617 | { |
616 | znode_t *zp; | | 618 | znode_t *zp; |
617 | vnode_t *vp; | | 619 | vnode_t *vp; |
618 | int error; | | 620 | int error; |
619 | | | 621 | |
620 | zp = kmem_cache_alloc(znode_cache, KM_SLEEP); | | 622 | zp = kmem_cache_alloc(znode_cache, KM_SLEEP); |
621 | | | 623 | |
622 | for (;;) { | | 624 | for (;;) { |
623 | error = getnewvnode(VT_ZFS, zfsvfs->z_parent->z_vfs, | | 625 | error = getnewvnode(VT_ZFS, zfsvfs->z_parent->z_vfs, |
624 | zfs_vnodeop_p, NULL, &zp->z_vnode); | | 626 | zfs_vnodeop_p, NULL, &zp->z_vnode); |
625 | if (__predict_true(error == 0)) | | 627 | if (__predict_true(error == 0)) |
626 | break; | | 628 | break; |
627 | printf("WARNING: zfs_znode_alloc: unable to get vnode, " | | 629 | printf("WARNING: zfs_znode_alloc: unable to get vnode, " |
628 | "error=%d\n", error); | | 630 | "error=%d\n", error); |
629 | (void)kpause("zfsnewvn", false, hz, NULL); | | 631 | (void)kpause("zfsnewvn", false, hz, NULL); |
630 | } | | 632 | } |
631 | | | 633 | |
632 | ASSERT(zp->z_dirlocks == NULL); | | 634 | ASSERT(zp->z_dirlocks == NULL); |
633 | ASSERT(zp->z_dbuf == NULL); | | 635 | ASSERT(zp->z_dbuf == NULL); |
634 | ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); | | 636 | ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); |
635 | | | 637 | |
636 | /* | | 638 | /* |
637 | * Defer setting z_zfsvfs until the znode is ready to be a candidate for | | 639 | * Defer setting z_zfsvfs until the znode is ready to be a candidate for |
638 | * the zfs_znode_move() callback. | | 640 | * the zfs_znode_move() callback. |
639 | */ | | 641 | */ |
640 | zp->z_phys = NULL; | | 642 | zp->z_phys = NULL; |
641 | zp->z_unlinked = 0; | | 643 | zp->z_unlinked = 0; |
642 | zp->z_atime_dirty = 0; | | 644 | zp->z_atime_dirty = 0; |
643 | zp->z_mapcnt = 0; | | 645 | zp->z_mapcnt = 0; |
644 | zp->z_last_itx = 0; | | 646 | zp->z_last_itx = 0; |
645 | zp->z_id = db->db_object; | | 647 | zp->z_id = db->db_object; |
646 | zp->z_blksz = blksz; | | 648 | zp->z_blksz = blksz; |
647 | zp->z_seq = 0x7A4653; | | 649 | zp->z_seq = 0x7A4653; |
648 | zp->z_sync_cnt = 0; | | 650 | zp->z_sync_cnt = 0; |
649 | | | 651 | |
650 | vp = ZTOV(zp); | | 652 | vp = ZTOV(zp); |
651 | | | 653 | |
652 | zfs_znode_dmu_init(zfsvfs, zp, db); | | 654 | zfs_znode_dmu_init(zfsvfs, zp, db); |
653 | | | 655 | |
654 | zp->z_gen = zp->z_phys->zp_gen; | | 656 | zp->z_gen = zp->z_phys->zp_gen; |
655 | | | 657 | |
656 | vp->v_vfsp = zfsvfs->z_parent->z_vfs; | | 658 | vp->v_vfsp = zfsvfs->z_parent->z_vfs; |
657 | vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode); | | 659 | vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode); |
658 | vp->v_data = zp; | | 660 | vp->v_data = zp; |
659 | genfs_node_init(vp, &zfs_genfsops); | | 661 | genfs_node_init(vp, &zfs_genfsops); |
660 | switch (vp->v_type) { | | 662 | switch (vp->v_type) { |
661 | case VDIR: | | 663 | case VDIR: |
662 | zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ | | 664 | zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ |
663 | break; | | 665 | break; |
664 | case VBLK: | | 666 | case VBLK: |
665 | case VCHR: | | 667 | case VCHR: |
666 | /* XXX NetBSD vp->v_op = zfs_specop_p; */ | | 668 | /* XXX NetBSD vp->v_op = zfs_specop_p; */ |
667 | spec_node_init(vp, zfs_cmpldev(zp->z_phys->zp_rdev)); | | 669 | spec_node_init(vp, zfs_cmpldev(zp->z_phys->zp_rdev)); |
668 | break; | | 670 | break; |
669 | case VFIFO: | | 671 | case VFIFO: |
670 | /* XXX NetBSD vp->v_op = zfs_fifoop_p; */ | | 672 | /* XXX NetBSD vp->v_op = zfs_fifoop_p; */ |
671 | break; | | 673 | break; |
672 | } | | 674 | } |
673 | | | 675 | |
674 | dprintf("zfs_znode_alloc znode %p -- vnode %p\n", zp, vp); | | 676 | dprintf("zfs_znode_alloc znode %p -- vnode %p\n", zp, vp); |
675 | dprintf("zfs_znode_alloc z_id %ld\n", zp->z_id); | | 677 | dprintf("zfs_znode_alloc z_id %ld\n", zp->z_id); |
676 | //cpu_Debugger(); | | 678 | //cpu_Debugger(); |
677 | | | 679 | |
678 | uvm_vnp_setsize(vp, zp->z_phys->zp_size); | | 680 | uvm_vnp_setsize(vp, zp->z_phys->zp_size); |
679 | | | 681 | |
680 | mutex_enter(&zfsvfs->z_znodes_lock); | | 682 | mutex_enter(&zfsvfs->z_znodes_lock); |
681 | list_insert_tail(&zfsvfs->z_all_znodes, zp); | | 683 | list_insert_tail(&zfsvfs->z_all_znodes, zp); |
682 | membar_producer(); | | 684 | membar_producer(); |
683 | /* | | 685 | /* |
684 | * Everything else must be valid before assigning z_zfsvfs makes the | | 686 | * Everything else must be valid before assigning z_zfsvfs makes the |
685 | * znode eligible for zfs_znode_move(). | | 687 | * znode eligible for zfs_znode_move(). |
686 | */ | | 688 | */ |
687 | zp->z_zfsvfs = zfsvfs; | | 689 | zp->z_zfsvfs = zfsvfs; |
688 | mutex_exit(&zfsvfs->z_znodes_lock); | | 690 | mutex_exit(&zfsvfs->z_znodes_lock); |
689 | | | 691 | |
690 | VFS_HOLD(zfsvfs->z_vfs); | | 692 | VFS_HOLD(zfsvfs->z_vfs); |
691 | return (zp); | | 693 | return (zp); |
692 | } | | 694 | } |
693 | | | 695 | |
694 | /* | | 696 | /* |
695 | * Create a new DMU object to hold a zfs znode. | | 697 | * Create a new DMU object to hold a zfs znode. |
696 | * | | 698 | * |
697 | * IN: dzp - parent directory for new znode | | 699 | * IN: dzp - parent directory for new znode |
698 | * vap - file attributes for new znode | | 700 | * vap - file attributes for new znode |
699 | * tx - dmu transaction id for zap operations | | 701 | * tx - dmu transaction id for zap operations |
700 | * cr - credentials of caller | | 702 | * cr - credentials of caller |
701 | * flag - flags: | | 703 | * flag - flags: |
702 | * IS_ROOT_NODE - new object will be root | | 704 | * IS_ROOT_NODE - new object will be root |
703 | * IS_XATTR - new object is an attribute | | 705 | * IS_XATTR - new object is an attribute |
704 | * bonuslen - length of bonus buffer | | 706 | * bonuslen - length of bonus buffer |
705 | * setaclp - File/Dir initial ACL | | 707 | * setaclp - File/Dir initial ACL |
706 | * fuidp - Tracks fuid allocation. | | 708 | * fuidp - Tracks fuid allocation. |
707 | * | | 709 | * |
708 | * OUT: zpp - allocated znode | | 710 | * OUT: zpp - allocated znode |
709 | * | | 711 | * |
710 | */ | | 712 | */ |
711 | void | | 713 | void |
712 | zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, | | 714 | zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, |
713 | uint_t flag, znode_t **zpp, int bonuslen, zfs_acl_ids_t *acl_ids) | | 715 | uint_t flag, znode_t **zpp, int bonuslen, zfs_acl_ids_t *acl_ids) |
714 | { | | 716 | { |
715 | dmu_buf_t *db; | | 717 | dmu_buf_t *db; |
716 | znode_phys_t *pzp; | | 718 | znode_phys_t *pzp; |
717 | zfsvfs_t *zfsvfs = dzp->z_zfsvfs; | | 719 | zfsvfs_t *zfsvfs = dzp->z_zfsvfs; |
718 | timestruc_t now; | | 720 | timestruc_t now; |
719 | uint64_t gen, obj; | | 721 | uint64_t gen, obj; |
720 | int err; | | 722 | int err; |
721 | | | 723 | |
722 | ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); | | 724 | ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); |
723 | | | 725 | |
724 | if (zfsvfs->z_replay) { | | 726 | if (zfsvfs->z_replay) { |
725 | obj = vap->va_nodeid; | | 727 | obj = vap->va_nodeid; |
726 | now = vap->va_ctime; /* see zfs_replay_create() */ | | 728 | now = vap->va_ctime; /* see zfs_replay_create() */ |
727 | gen = vap->va_nblocks; /* ditto */ | | 729 | gen = vap->va_nblocks; /* ditto */ |
728 | } else { | | 730 | } else { |
729 | obj = 0; | | 731 | obj = 0; |
730 | gethrestime(&now); | | 732 | gethrestime(&now); |
731 | gen = dmu_tx_get_txg(tx); | | 733 | gen = dmu_tx_get_txg(tx); |
732 | } | | 734 | } |
733 | | | 735 | |
734 | /* | | 736 | /* |
735 | * Create a new DMU object. | | 737 | * Create a new DMU object. |
736 | */ | | 738 | */ |
737 | /* | | 739 | /* |
738 | * There's currently no mechanism for pre-reading the blocks that will | | 740 | * There's currently no mechanism for pre-reading the blocks that will |
739 | * be to needed allocate a new object, so we accept the small chance | | 741 | * be to needed allocate a new object, so we accept the small chance |
740 | * that there will be an i/o error and we will fail one of the | | 742 | * that there will be an i/o error and we will fail one of the |
741 | * assertions below. | | 743 | * assertions below. |
742 | */ | | 744 | */ |
743 | if (vap->va_type == VDIR) { | | 745 | if (vap->va_type == VDIR) { |
744 | if (zfsvfs->z_replay) { | | 746 | if (zfsvfs->z_replay) { |
745 | err = zap_create_claim_norm(zfsvfs->z_os, obj, | | 747 | err = zap_create_claim_norm(zfsvfs->z_os, obj, |
746 | zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, | | 748 | zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, |
747 | DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); | | 749 | DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); |
748 | ASSERT3U(err, ==, 0); | | 750 | ASSERT3U(err, ==, 0); |
749 | } else { | | 751 | } else { |
750 | obj = zap_create_norm(zfsvfs->z_os, | | 752 | obj = zap_create_norm(zfsvfs->z_os, |
751 | zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, | | 753 | zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, |
752 | DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); | | 754 | DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); |
753 | } | | 755 | } |
754 | } else { | | 756 | } else { |
755 | if (zfsvfs->z_replay) { | | 757 | if (zfsvfs->z_replay) { |
756 | err = dmu_object_claim(zfsvfs->z_os, obj, | | 758 | err = dmu_object_claim(zfsvfs->z_os, obj, |
757 | DMU_OT_PLAIN_FILE_CONTENTS, 0, | | 759 | DMU_OT_PLAIN_FILE_CONTENTS, 0, |
758 | DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); | | 760 | DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); |
759 | ASSERT3U(err, ==, 0); | | 761 | ASSERT3U(err, ==, 0); |
760 | } else { | | 762 | } else { |
761 | obj = dmu_object_alloc(zfsvfs->z_os, | | 763 | obj = dmu_object_alloc(zfsvfs->z_os, |
762 | DMU_OT_PLAIN_FILE_CONTENTS, 0, | | 764 | DMU_OT_PLAIN_FILE_CONTENTS, 0, |
763 | DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); | | 765 | DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); |
764 | } | | 766 | } |
765 | } | | 767 | } |
766 | | | 768 | |
767 | ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); | | 769 | ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); |
768 | VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, obj, NULL, &db)); | | 770 | VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, obj, NULL, &db)); |
769 | dmu_buf_will_dirty(db, tx); | | 771 | dmu_buf_will_dirty(db, tx); |
770 | | | 772 | |
771 | /* | | 773 | /* |
772 | * Initialize the znode physical data to zero. | | 774 | * Initialize the znode physical data to zero. |
773 | */ | | 775 | */ |
774 | ASSERT(db->db_size >= sizeof (znode_phys_t)); | | 776 | ASSERT(db->db_size >= sizeof (znode_phys_t)); |
775 | bzero(db->db_data, db->db_size); | | 777 | bzero(db->db_data, db->db_size); |
776 | pzp = db->db_data; | | 778 | pzp = db->db_data; |
777 | | | 779 | |
778 | /* | | 780 | /* |
779 | * If this is the root, fix up the half-initialized parent pointer | | 781 | * If this is the root, fix up the half-initialized parent pointer |
780 | * to reference the just-allocated physical data area. | | 782 | * to reference the just-allocated physical data area. |
781 | */ | | 783 | */ |
782 | if (flag & IS_ROOT_NODE) { | | 784 | if (flag & IS_ROOT_NODE) { |
783 | dzp->z_dbuf = db; | | 785 | dzp->z_dbuf = db; |
784 | dzp->z_phys = pzp; | | 786 | dzp->z_phys = pzp; |
785 | dzp->z_id = obj; | | 787 | dzp->z_id = obj; |
786 | } | | 788 | } |
787 | | | 789 | |
788 | /* | | 790 | /* |
789 | * If parent is an xattr, so am I. | | 791 | * If parent is an xattr, so am I. |
790 | */ | | 792 | */ |
791 | if (dzp->z_phys->zp_flags & ZFS_XATTR) | | 793 | if (dzp->z_phys->zp_flags & ZFS_XATTR) |
792 | flag |= IS_XATTR; | | 794 | flag |= IS_XATTR; |
793 | | | 795 | |
794 | if (vap->va_type == VBLK || vap->va_type == VCHR) { | | 796 | if (vap->va_type == VBLK || vap->va_type == VCHR) { |
795 | pzp->zp_rdev = zfs_expldev(vap->va_rdev); | | 797 | pzp->zp_rdev = zfs_expldev(vap->va_rdev); |
796 | } | | 798 | } |
797 | | | 799 | |
798 | if (zfsvfs->z_use_fuids) | | 800 | if (zfsvfs->z_use_fuids) |
799 | pzp->zp_flags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; | | 801 | pzp->zp_flags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; |
800 | | | 802 | |
801 | if (vap->va_type == VDIR) { | | 803 | if (vap->va_type == VDIR) { |
802 | pzp->zp_size = 2; /* contents ("." and "..") */ | | 804 | pzp->zp_size = 2; /* contents ("." and "..") */ |
803 | pzp->zp_links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; | | 805 | pzp->zp_links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; |
804 | } | | 806 | } |
805 | | | 807 | |
806 | pzp->zp_parent = dzp->z_id; | | 808 | pzp->zp_parent = dzp->z_id; |
807 | if (flag & IS_XATTR) | | 809 | if (flag & IS_XATTR) |
808 | pzp->zp_flags |= ZFS_XATTR; | | 810 | pzp->zp_flags |= ZFS_XATTR; |
809 | | | 811 | |
810 | pzp->zp_gen = gen; | | 812 | pzp->zp_gen = gen; |
811 | | | 813 | |
812 | ZFS_TIME_ENCODE(&now, pzp->zp_crtime); | | 814 | ZFS_TIME_ENCODE(&now, pzp->zp_crtime); |
813 | ZFS_TIME_ENCODE(&now, pzp->zp_ctime); | | 815 | ZFS_TIME_ENCODE(&now, pzp->zp_ctime); |
814 | | | 816 | |
815 | if (vap->va_mask & AT_ATIME) { | | 817 | if (vap->va_mask & AT_ATIME) { |
816 | ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime); | | 818 | ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime); |
817 | } else { | | 819 | } else { |
818 | ZFS_TIME_ENCODE(&now, pzp->zp_atime); | | 820 | ZFS_TIME_ENCODE(&now, pzp->zp_atime); |
819 | } | | 821 | } |
820 | | | 822 | |
821 | if (vap->va_mask & AT_MTIME) { | | 823 | if (vap->va_mask & AT_MTIME) { |
822 | ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); | | 824 | ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); |
823 | } else { | | 825 | } else { |
824 | ZFS_TIME_ENCODE(&now, pzp->zp_mtime); | | 826 | ZFS_TIME_ENCODE(&now, pzp->zp_mtime); |
825 | } | | 827 | } |
826 | pzp->zp_uid = acl_ids->z_fuid; | | 828 | pzp->zp_uid = acl_ids->z_fuid; |
827 | pzp->zp_gid = acl_ids->z_fgid; | | 829 | pzp->zp_gid = acl_ids->z_fgid; |
828 | pzp->zp_mode = acl_ids->z_mode; | | 830 | pzp->zp_mode = acl_ids->z_mode; |
829 | if (!(flag & IS_ROOT_NODE)) { | | 831 | if (!(flag & IS_ROOT_NODE)) { |
830 | *zpp = zfs_znode_alloc(zfsvfs, db, 0); | | 832 | *zpp = zfs_znode_alloc(zfsvfs, db, 0); |
831 | } else { | | 833 | } else { |
832 | /* | | 834 | /* |
833 | * If we are creating the root node, the "parent" we | | 835 | * If we are creating the root node, the "parent" we |
834 | * passed in is the znode for the root. | | 836 | * passed in is the znode for the root. |
835 | */ | | 837 | */ |
836 | *zpp = dzp; | | 838 | *zpp = dzp; |
837 | } | | 839 | } |
838 | VERIFY(0 == zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx)); | | 840 | VERIFY(0 == zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx)); |
839 | if (vap->va_mask & AT_XVATTR) | | 841 | if (vap->va_mask & AT_XVATTR) |
840 | zfs_xvattr_set(*zpp, (xvattr_t *)vap); | | 842 | zfs_xvattr_set(*zpp, (xvattr_t *)vap); |
841 | | | 843 | |
842 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); | | 844 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); |
843 | } | | 845 | } |
844 | | | 846 | |
845 | void | | 847 | void |
846 | zfs_xvattr_set(znode_t *zp, xvattr_t *xvap) | | 848 | zfs_xvattr_set(znode_t *zp, xvattr_t *xvap) |
847 | { | | 849 | { |
848 | xoptattr_t *xoap; | | 850 | xoptattr_t *xoap; |
849 | | | 851 | |
850 | xoap = xva_getxoptattr(xvap); | | 852 | xoap = xva_getxoptattr(xvap); |
851 | ASSERT(xoap); | | 853 | ASSERT(xoap); |
852 | | | 854 | |
853 | if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { | | 855 | if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { |
854 | ZFS_TIME_ENCODE(&xoap->xoa_createtime, zp->z_phys->zp_crtime); | | 856 | ZFS_TIME_ENCODE(&xoap->xoa_createtime, zp->z_phys->zp_crtime); |
855 | XVA_SET_RTN(xvap, XAT_CREATETIME); | | 857 | XVA_SET_RTN(xvap, XAT_CREATETIME); |
856 | } | | 858 | } |
857 | if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { | | 859 | if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { |
858 | ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly); | | 860 | ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly); |
859 | XVA_SET_RTN(xvap, XAT_READONLY); | | 861 | XVA_SET_RTN(xvap, XAT_READONLY); |
860 | } | | 862 | } |
861 | if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { | | 863 | if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { |
862 | ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden); | | 864 | ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden); |
863 | XVA_SET_RTN(xvap, XAT_HIDDEN); | | 865 | XVA_SET_RTN(xvap, XAT_HIDDEN); |
864 | } | | 866 | } |
865 | if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { | | 867 | if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { |
866 | ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system); | | 868 | ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system); |
867 | XVA_SET_RTN(xvap, XAT_SYSTEM); | | 869 | XVA_SET_RTN(xvap, XAT_SYSTEM); |
868 | } | | 870 | } |
869 | if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { | | 871 | if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { |
870 | ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive); | | 872 | ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive); |
871 | XVA_SET_RTN(xvap, XAT_ARCHIVE); | | 873 | XVA_SET_RTN(xvap, XAT_ARCHIVE); |
872 | } | | 874 | } |
873 | if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { | | 875 | if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { |
874 | ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable); | | 876 | ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable); |
875 | XVA_SET_RTN(xvap, XAT_IMMUTABLE); | | 877 | XVA_SET_RTN(xvap, XAT_IMMUTABLE); |
876 | } | | 878 | } |
877 | if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { | | 879 | if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { |
878 | ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink); | | 880 | ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink); |
879 | XVA_SET_RTN(xvap, XAT_NOUNLINK); | | 881 | XVA_SET_RTN(xvap, XAT_NOUNLINK); |
880 | } | | 882 | } |
881 | if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { | | 883 | if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { |
882 | ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly); | | 884 | ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly); |
883 | XVA_SET_RTN(xvap, XAT_APPENDONLY); | | 885 | XVA_SET_RTN(xvap, XAT_APPENDONLY); |
884 | } | | 886 | } |
885 | if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { | | 887 | if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { |
886 | ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump); | | 888 | ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump); |
887 | XVA_SET_RTN(xvap, XAT_NODUMP); | | 889 | XVA_SET_RTN(xvap, XAT_NODUMP); |
888 | } | | 890 | } |
889 | if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { | | 891 | if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { |
890 | ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque); | | 892 | ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque); |
891 | XVA_SET_RTN(xvap, XAT_OPAQUE); | | 893 | XVA_SET_RTN(xvap, XAT_OPAQUE); |
892 | } | | 894 | } |
893 | if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { | | 895 | if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { |
894 | ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED, | | 896 | ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED, |
895 | xoap->xoa_av_quarantined); | | 897 | xoap->xoa_av_quarantined); |
896 | XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); | | 898 | XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); |
897 | } | | 899 | } |
898 | if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { | | 900 | if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { |
899 | ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified); | | 901 | ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified); |
900 | XVA_SET_RTN(xvap, XAT_AV_MODIFIED); | | 902 | XVA_SET_RTN(xvap, XAT_AV_MODIFIED); |
901 | } | | 903 | } |
902 | if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { | | 904 | if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { |
903 | (void) memcpy(zp->z_phys + 1, xoap->xoa_av_scanstamp, | | 905 | (void) memcpy(zp->z_phys + 1, xoap->xoa_av_scanstamp, |
904 | sizeof (xoap->xoa_av_scanstamp)); | | 906 | sizeof (xoap->xoa_av_scanstamp)); |
905 | zp->z_phys->zp_flags |= ZFS_BONUS_SCANSTAMP; | | 907 | zp->z_phys->zp_flags |= ZFS_BONUS_SCANSTAMP; |
906 | XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); | | 908 | XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); |
907 | } | | 909 | } |
908 | if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { | | 910 | if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { |
909 | ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse); | | 911 | ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse); |
910 | XVA_SET_RTN(xvap, XAT_REPARSE); | | 912 | XVA_SET_RTN(xvap, XAT_REPARSE); |
911 | } | | 913 | } |
912 | } | | 914 | } |
913 | | | 915 | |
914 | int | | 916 | int |
915 | zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) | | 917 | zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) |
916 | { | | 918 | { |
917 | dmu_object_info_t doi; | | 919 | dmu_object_info_t doi; |
918 | dmu_buf_t *db; | | 920 | dmu_buf_t *db; |
919 | znode_t *zp; | | 921 | znode_t *zp; |
920 | vnode_t *vp; | | 922 | vnode_t *vp; |
921 | int err, first = 1; | | 923 | int err, first = 1; |
922 | | | 924 | |
923 | *zpp = NULL; | | 925 | *zpp = NULL; |
924 | again: | | 926 | again: |
925 | ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); | | 927 | ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); |
926 | | | 928 | |
927 | err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db); | | 929 | err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db); |
928 | if (err) { | | 930 | if (err) { |
929 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); | | 931 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); |
930 | return (err); | | 932 | return (err); |
931 | } | | 933 | } |
932 | | | 934 | |
933 | dmu_object_info_from_db(db, &doi); | | 935 | dmu_object_info_from_db(db, &doi); |
934 | if (doi.doi_bonus_type != DMU_OT_ZNODE || | | 936 | if (doi.doi_bonus_type != DMU_OT_ZNODE || |
935 | doi.doi_bonus_size < sizeof (znode_phys_t)) { | | 937 | doi.doi_bonus_size < sizeof (znode_phys_t)) { |
936 | dmu_buf_rele(db, NULL); | | 938 | dmu_buf_rele(db, NULL); |
937 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); | | 939 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); |
938 | return (EINVAL); | | 940 | return (EINVAL); |
939 | } | | 941 | } |
940 | | | 942 | |
941 | zp = dmu_buf_get_user(db); | | 943 | zp = dmu_buf_get_user(db); |
942 | if (zp != NULL) { | | 944 | if (zp != NULL) { |
943 | mutex_enter(&zp->z_lock); | | 945 | mutex_enter(&zp->z_lock); |
944 | | | 946 | |
945 | /* | | 947 | /* |
946 | * Since we do immediate eviction of the z_dbuf, we | | 948 | * Since we do immediate eviction of the z_dbuf, we |
947 | * should never find a dbuf with a znode that doesn't | | 949 | * should never find a dbuf with a znode that doesn't |
948 | * know about the dbuf. | | 950 | * know about the dbuf. |
949 | */ | | 951 | */ |
950 | ASSERT3P(zp->z_dbuf, ==, db); | | 952 | ASSERT3P(zp->z_dbuf, ==, db); |
951 | ASSERT3U(zp->z_id, ==, obj_num); | | 953 | ASSERT3U(zp->z_id, ==, obj_num); |
952 | if (zp->z_unlinked) { | | 954 | if (zp->z_unlinked) { |
953 | err = ENOENT; | | 955 | err = ENOENT; |
954 | } else { | | 956 | } else { |
955 | if ((vp = ZTOV(zp)) != NULL) { | | 957 | if ((vp = ZTOV(zp)) != NULL) { |
956 | mutex_enter(vp->v_interlock); | | 958 | mutex_enter(vp->v_interlock); |
957 | mutex_exit(&zp->z_lock); | | 959 | mutex_exit(&zp->z_lock); |
958 | if (vget(vp, 0) != 0) { | | 960 | if (vget(vp, 0) != 0) { |
959 | dmu_buf_rele(db, NULL); | | 961 | dmu_buf_rele(db, NULL); |
960 | mutex_exit(vp->v_interlock); | | 962 | mutex_exit(vp->v_interlock); |
961 | goto again; | | 963 | goto again; |
962 | } | | 964 | } |
963 | mutex_enter(&zp->z_lock); | | 965 | mutex_enter(&zp->z_lock); |
964 | } else { | | 966 | } else { |
965 | if (first) { | | 967 | if (first) { |
966 | ZFS_LOG(1, "dying znode detected (zp=%p)", zp); | | 968 | ZFS_LOG(1, "dying znode detected (zp=%p)", zp); |
967 | first = 0; | | 969 | first = 0; |
968 | } | | 970 | } |
969 | /* | | 971 | /* |
970 | * znode is dying so we can't reuse it, we must | | 972 | * znode is dying so we can't reuse it, we must |
971 | * wait until destruction is completed. | | 973 | * wait until destruction is completed. |
972 | */ | | 974 | */ |
973 | dmu_buf_rele(db, NULL); | | 975 | dmu_buf_rele(db, NULL); |
974 | mutex_exit(&zp->z_lock); | | 976 | mutex_exit(&zp->z_lock); |
975 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); | | 977 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); |
976 | kpause("zcollide", 0, 1, NULL); | | 978 | kpause("zcollide", 0, 1, NULL); |
977 | goto again; | | 979 | goto again; |
978 | } | | 980 | } |
979 | *zpp = zp; | | 981 | *zpp = zp; |
980 | err = 0; | | 982 | err = 0; |
981 | } | | 983 | } |
982 | | | 984 | |
983 | dmu_buf_rele(db, NULL); | | 985 | dmu_buf_rele(db, NULL); |
984 | mutex_exit(&zp->z_lock); | | 986 | mutex_exit(&zp->z_lock); |
985 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); | | 987 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); |
986 | return (err); | | 988 | return (err); |
987 | } | | 989 | } |
988 | | | 990 | |
989 | /* | | 991 | /* |
990 | * Not found create new znode/vnode | | 992 | * Not found create new znode/vnode |
991 | * but only if file exists. | | 993 | * but only if file exists. |
992 | * | | 994 | * |
993 | * There is a small window where zfs_vget() could | | 995 | * There is a small window where zfs_vget() could |
994 | * find this object while a file create is still in | | 996 | * find this object while a file create is still in |
995 | * progress. Since a gen number can never be zero | | 997 | * progress. Since a gen number can never be zero |
996 | * we will check that to determine if its an allocated | | 998 | * we will check that to determine if its an allocated |
997 | * file. | | 999 | * file. |
998 | */ | | 1000 | */ |
999 | | | 1001 | |
1000 | if (((znode_phys_t *)db->db_data)->zp_gen != 0) { | | 1002 | if (((znode_phys_t *)db->db_data)->zp_gen != 0) { |
1001 | zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size); | | 1003 | zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size); |
1002 | *zpp = zp; | | 1004 | *zpp = zp; |
1003 | err = 0; | | 1005 | err = 0; |
1004 | } else { | | 1006 | } else { |
1005 | dmu_buf_rele(db, NULL); | | 1007 | dmu_buf_rele(db, NULL); |
1006 | err = ENOENT; | | 1008 | err = ENOENT; |
1007 | } | | 1009 | } |
1008 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); | | 1010 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); |
1009 | return (err); | | 1011 | return (err); |
1010 | } | | 1012 | } |
1011 | | | 1013 | |
1012 | int | | 1014 | int |
1013 | zfs_rezget(znode_t *zp) | | 1015 | zfs_rezget(znode_t *zp) |
1014 | { | | 1016 | { |
1015 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | | 1017 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; |
1016 | dmu_object_info_t doi; | | 1018 | dmu_object_info_t doi; |
1017 | dmu_buf_t *db; | | 1019 | dmu_buf_t *db; |
1018 | uint64_t obj_num = zp->z_id; | | 1020 | uint64_t obj_num = zp->z_id; |
1019 | int err; | | 1021 | int err; |
1020 | | | 1022 | |
1021 | ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); | | 1023 | ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); |
1022 | | | 1024 | |
1023 | err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db); | | 1025 | err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db); |
1024 | if (err) { | | 1026 | if (err) { |
1025 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); | | 1027 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); |
1026 | return (err); | | 1028 | return (err); |
1027 | } | | 1029 | } |
1028 | | | 1030 | |
1029 | dmu_object_info_from_db(db, &doi); | | 1031 | dmu_object_info_from_db(db, &doi); |
1030 | if (doi.doi_bonus_type != DMU_OT_ZNODE || | | 1032 | if (doi.doi_bonus_type != DMU_OT_ZNODE || |
1031 | doi.doi_bonus_size < sizeof (znode_phys_t)) { | | 1033 | doi.doi_bonus_size < sizeof (znode_phys_t)) { |
1032 | dmu_buf_rele(db, NULL); | | 1034 | dmu_buf_rele(db, NULL); |
1033 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); | | 1035 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); |
1034 | return (EINVAL); | | 1036 | return (EINVAL); |
1035 | } | | 1037 | } |
1036 | | | 1038 | |
1037 | if (((znode_phys_t *)db->db_data)->zp_gen != zp->z_gen) { | | 1039 | if (((znode_phys_t *)db->db_data)->zp_gen != zp->z_gen) { |
1038 | dmu_buf_rele(db, NULL); | | 1040 | dmu_buf_rele(db, NULL); |
1039 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); | | 1041 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); |
1040 | return (EIO); | | 1042 | return (EIO); |
1041 | } | | 1043 | } |
1042 | | | 1044 | |
1043 | mutex_enter(&zp->z_acl_lock); | | 1045 | mutex_enter(&zp->z_acl_lock); |
1044 | if (zp->z_acl_cached) { | | 1046 | if (zp->z_acl_cached) { |
1045 | zfs_acl_free(zp->z_acl_cached); | | 1047 | zfs_acl_free(zp->z_acl_cached); |
1046 | zp->z_acl_cached = NULL; | | 1048 | zp->z_acl_cached = NULL; |
1047 | } | | 1049 | } |
1048 | mutex_exit(&zp->z_acl_lock); | | 1050 | mutex_exit(&zp->z_acl_lock); |
1049 | | | 1051 | |
1050 | zfs_znode_dmu_init(zfsvfs, zp, db); | | 1052 | zfs_znode_dmu_init(zfsvfs, zp, db); |
1051 | zp->z_unlinked = (zp->z_phys->zp_links == 0); | | 1053 | zp->z_unlinked = (zp->z_phys->zp_links == 0); |
1052 | zp->z_blksz = doi.doi_data_block_size; | | 1054 | zp->z_blksz = doi.doi_data_block_size; |
1053 | | | 1055 | |
1054 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); | | 1056 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); |
1055 | | | 1057 | |
1056 | return (0); | | 1058 | return (0); |
1057 | } | | 1059 | } |
1058 | | | 1060 | |
1059 | void | | 1061 | void |
1060 | zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) | | 1062 | zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) |
1061 | { | | 1063 | { |
1062 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | | 1064 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; |
1063 | objset_t *os = zfsvfs->z_os; | | 1065 | objset_t *os = zfsvfs->z_os; |
1064 | uint64_t obj = zp->z_id; | | 1066 | uint64_t obj = zp->z_id; |
1065 | uint64_t acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj; | | 1067 | uint64_t acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj; |
1066 | | | 1068 | |
1067 | ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); | | 1069 | ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); |
1068 | if (acl_obj) | | 1070 | if (acl_obj) |
1069 | VERIFY(0 == dmu_object_free(os, acl_obj, tx)); | | 1071 | VERIFY(0 == dmu_object_free(os, acl_obj, tx)); |
1070 | VERIFY(0 == dmu_object_free(os, obj, tx)); | | 1072 | VERIFY(0 == dmu_object_free(os, obj, tx)); |
1071 | zfs_znode_dmu_fini(zp); | | 1073 | zfs_znode_dmu_fini(zp); |
1072 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); | | 1074 | ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); |
1073 | zfs_znode_free(zp); | | 1075 | zfs_znode_free(zp); |
1074 | } | | 1076 | } |
1075 | | | 1077 | |
1076 | void | | 1078 | void |
1077 | zfs_zinactive(znode_t *zp) | | 1079 | zfs_zinactive(znode_t *zp) |
1078 | { | | 1080 | { |
1079 | vnode_t *vp = ZTOV(zp); | | 1081 | vnode_t *vp = ZTOV(zp); |
1080 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | | 1082 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; |
1081 | uint64_t z_id = zp->z_id; | | 1083 | uint64_t z_id = zp->z_id; |
1082 | | | 1084 | |
1083 | ASSERT(zp->z_dbuf && zp->z_phys); | | 1085 | ASSERT(zp->z_dbuf && zp->z_phys); |
1084 | | | 1086 | |
1085 | /* | | 1087 | /* |
1086 | * Don't allow a zfs_zget() while were trying to release this znode | | 1088 | * Don't allow a zfs_zget() while were trying to release this znode |
1087 | */ | | 1089 | */ |
1088 | ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); | | 1090 | ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); |
1089 | | | 1091 | |
1090 | mutex_enter(&zp->z_lock); | | 1092 | mutex_enter(&zp->z_lock); |
1091 | /* | | 1093 | /* |
1092 | * If this was the last reference to a file with no links, | | 1094 | * If this was the last reference to a file with no links, |
1093 | * remove the file from the file system. | | 1095 | * remove the file from the file system. |
1094 | */ | | 1096 | */ |
1095 | if (zp->z_unlinked) { | | 1097 | if (zp->z_unlinked) { |
1096 | mutex_exit(&zp->z_lock); | | 1098 | mutex_exit(&zp->z_lock); |
1097 | ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); | | 1099 | ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); |
1098 | zfs_rmnode(zp); | | 1100 | zfs_rmnode(zp); |
1099 | return; | | 1101 | return; |
1100 | } | | 1102 | } |
1101 | | | 1103 | |
1102 | mutex_exit(&zp->z_lock); | | 1104 | mutex_exit(&zp->z_lock); |
1103 | zfs_znode_dmu_fini(zp); | | 1105 | zfs_znode_dmu_fini(zp); |
1104 | ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); | | 1106 | ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); |
1105 | zfs_znode_free(zp); | | 1107 | zfs_znode_free(zp); |
1106 | } | | 1108 | } |
1107 | | | 1109 | |
1108 | void | | 1110 | void |
1109 | zfs_znode_free(znode_t *zp) | | 1111 | zfs_znode_free(znode_t *zp) |
1110 | { | | 1112 | { |
1111 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | | 1113 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; |
1112 | struct vnode *vp = ZTOV(zp); | | 1114 | struct vnode *vp = ZTOV(zp); |
1113 | | | 1115 | |
1114 | /* XXX Not all callers are from VOP_RECLAIM. What to do? */ | | 1116 | /* XXX Not all callers are from VOP_RECLAIM. What to do? */ |
1115 | KASSERT(vp != NULL); | | 1117 | KASSERT(vp != NULL); |
1116 | mutex_enter(vp->v_interlock); /* XXX Necessary? */ | | 1118 | mutex_enter(vp->v_interlock); /* XXX Necessary? */ |
1117 | genfs_node_destroy(vp); | | 1119 | genfs_node_destroy(vp); |
1118 | vp->v_data = NULL; | | 1120 | vp->v_data = NULL; |
1119 | mutex_exit(vp->v_interlock); | | 1121 | mutex_exit(vp->v_interlock); |
1120 | | | 1122 | |
1121 | dprintf("destroying znode %p\n", zp); | | 1123 | dprintf("destroying znode %p\n", zp); |
1122 | //cpu_Debugger(); | | 1124 | //cpu_Debugger(); |
1123 | mutex_enter(&zfsvfs->z_znodes_lock); | | 1125 | mutex_enter(&zfsvfs->z_znodes_lock); |
1124 | POINTER_INVALIDATE(&zp->z_zfsvfs); | | 1126 | POINTER_INVALIDATE(&zp->z_zfsvfs); |
1125 | list_remove(&zfsvfs->z_all_znodes, zp); | | 1127 | list_remove(&zfsvfs->z_all_znodes, zp); |
1126 | mutex_exit(&zfsvfs->z_znodes_lock); | | 1128 | mutex_exit(&zfsvfs->z_znodes_lock); |
1127 | | | 1129 | |
1128 | if (zp->z_acl_cached) { | | 1130 | if (zp->z_acl_cached) { |
1129 | zfs_acl_free(zp->z_acl_cached); | | 1131 | zfs_acl_free(zp->z_acl_cached); |
1130 | zp->z_acl_cached = NULL; | | 1132 | zp->z_acl_cached = NULL; |
1131 | } | | 1133 | } |
1132 | | | 1134 | |
1133 | kmem_cache_free(znode_cache, zp); | | 1135 | kmem_cache_free(znode_cache, zp); |
1134 | | | 1136 | |
1135 | VFS_RELE(zfsvfs->z_vfs); | | 1137 | VFS_RELE(zfsvfs->z_vfs); |
1136 | } | | 1138 | } |
1137 | | | 1139 | |
1138 | void | | 1140 | void |
1139 | zfs_time_stamper_locked(znode_t *zp, uint_t flag, dmu_tx_t *tx) | | 1141 | zfs_time_stamper_locked(znode_t *zp, uint_t flag, dmu_tx_t *tx) |
1140 | { | | 1142 | { |
1141 | timestruc_t now; | | 1143 | timestruc_t now; |
1142 | | | 1144 | |
1143 | ASSERT(MUTEX_HELD(&zp->z_lock)); | | 1145 | ASSERT(MUTEX_HELD(&zp->z_lock)); |
1144 | | | 1146 | |
1145 | gethrestime(&now); | | 1147 | gethrestime(&now); |
1146 | | | 1148 | |
1147 | if (tx) { | | 1149 | if (tx) { |
1148 | dmu_buf_will_dirty(zp->z_dbuf, tx); | | 1150 | dmu_buf_will_dirty(zp->z_dbuf, tx); |
1149 | zp->z_atime_dirty = 0; | | 1151 | zp->z_atime_dirty = 0; |
1150 | zp->z_seq++; | | 1152 | zp->z_seq++; |
1151 | } else { | | 1153 | } else { |
1152 | zp->z_atime_dirty = 1; | | 1154 | zp->z_atime_dirty = 1; |
1153 | } | | 1155 | } |
1154 | | | 1156 | |
1155 | if (flag & AT_ATIME) | | 1157 | if (flag & AT_ATIME) |
1156 | ZFS_TIME_ENCODE(&now, zp->z_phys->zp_atime); | | 1158 | ZFS_TIME_ENCODE(&now, zp->z_phys->zp_atime); |
1157 | | | 1159 | |
1158 | if (flag & AT_MTIME) { | | 1160 | if (flag & AT_MTIME) { |
1159 | ZFS_TIME_ENCODE(&now, zp->z_phys->zp_mtime); | | 1161 | ZFS_TIME_ENCODE(&now, zp->z_phys->zp_mtime); |
1160 | if (zp->z_zfsvfs->z_use_fuids) | | 1162 | if (zp->z_zfsvfs->z_use_fuids) |
1161 | zp->z_phys->zp_flags |= (ZFS_ARCHIVE | ZFS_AV_MODIFIED); | | 1163 | zp->z_phys->zp_flags |= (ZFS_ARCHIVE | ZFS_AV_MODIFIED); |
1162 | } | | 1164 | } |
1163 | | | 1165 | |
1164 | if (flag & AT_CTIME) { | | 1166 | if (flag & AT_CTIME) { |
1165 | ZFS_TIME_ENCODE(&now, zp->z_phys->zp_ctime); | | 1167 | ZFS_TIME_ENCODE(&now, zp->z_phys->zp_ctime); |
1166 | if (zp->z_zfsvfs->z_use_fuids) | | 1168 | if (zp->z_zfsvfs->z_use_fuids) |
1167 | zp->z_phys->zp_flags |= ZFS_ARCHIVE; | | 1169 | zp->z_phys->zp_flags |= ZFS_ARCHIVE; |
1168 | } | | 1170 | } |
1169 | } | | 1171 | } |
1170 | | | 1172 | |
1171 | /* | | 1173 | /* |
1172 | * Update the requested znode timestamps with the current time. | | 1174 | * Update the requested znode timestamps with the current time. |
1173 | * If we are in a transaction, then go ahead and mark the znode | | 1175 | * If we are in a transaction, then go ahead and mark the znode |
1174 | * dirty in the transaction so the timestamps will go to disk. | | 1176 | * dirty in the transaction so the timestamps will go to disk. |
1175 | * Otherwise, we will get pushed next time the znode is updated | | 1177 | * Otherwise, we will get pushed next time the znode is updated |
1176 | * in a transaction, or when this znode eventually goes inactive. | | 1178 | * in a transaction, or when this znode eventually goes inactive. |
1177 | * | | 1179 | * |
1178 | * Why is this OK? | | 1180 | * Why is this OK? |
1179 | * 1 - Only the ACCESS time is ever updated outside of a transaction. | | 1181 | * 1 - Only the ACCESS time is ever updated outside of a transaction. |
1180 | * 2 - Multiple consecutive updates will be collapsed into a single | | 1182 | * 2 - Multiple consecutive updates will be collapsed into a single |
1181 | * znode update by the transaction grouping semantics of the DMU. | | 1183 | * znode update by the transaction grouping semantics of the DMU. |
1182 | */ | | 1184 | */ |
1183 | void | | 1185 | void |
1184 | zfs_time_stamper(znode_t *zp, uint_t flag, dmu_tx_t *tx) | | 1186 | zfs_time_stamper(znode_t *zp, uint_t flag, dmu_tx_t *tx) |
1185 | { | | 1187 | { |
1186 | mutex_enter(&zp->z_lock); | | 1188 | mutex_enter(&zp->z_lock); |
1187 | zfs_time_stamper_locked(zp, flag, tx); | | 1189 | zfs_time_stamper_locked(zp, flag, tx); |
1188 | mutex_exit(&zp->z_lock); | | 1190 | mutex_exit(&zp->z_lock); |
1189 | } | | 1191 | } |
1190 | | | 1192 | |
1191 | /* | | 1193 | /* |
1192 | * Grow the block size for a file. | | 1194 | * Grow the block size for a file. |
1193 | * | | 1195 | * |
1194 | * IN: zp - znode of file to free data in. | | 1196 | * IN: zp - znode of file to free data in. |
1195 | * size - requested block size | | 1197 | * size - requested block size |
1196 | * tx - open transaction. | | 1198 | * tx - open transaction. |
1197 | * | | 1199 | * |
1198 | * NOTE: this function assumes that the znode is write locked. | | 1200 | * NOTE: this function assumes that the znode is write locked. |
1199 | */ | | 1201 | */ |
1200 | void | | 1202 | void |
1201 | zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) | | 1203 | zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) |
1202 | { | | 1204 | { |
1203 | int error; | | 1205 | int error; |
1204 | u_longlong_t dummy; | | 1206 | u_longlong_t dummy; |
1205 | | | 1207 | |
1206 | if (size <= zp->z_blksz) | | 1208 | if (size <= zp->z_blksz) |
1207 | return; | | 1209 | return; |
1208 | /* | | 1210 | /* |
1209 | * If the file size is already greater than the current blocksize, | | 1211 | * If the file size is already greater than the current blocksize, |
1210 | * we will not grow. If there is more than one block in a file, | | 1212 | * we will not grow. If there is more than one block in a file, |
1211 | * the blocksize cannot change. | | 1213 | * the blocksize cannot change. |
1212 | */ | | 1214 | */ |
1213 | if (zp->z_blksz && zp->z_phys->zp_size > zp->z_blksz) | | 1215 | if (zp->z_blksz && zp->z_phys->zp_size > zp->z_blksz) |
1214 | return; | | 1216 | return; |
1215 | | | 1217 | |
1216 | error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id, | | 1218 | error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id, |
1217 | size, 0, tx); | | 1219 | size, 0, tx); |
1218 | if (error == ENOTSUP) | | 1220 | if (error == ENOTSUP) |
1219 | return; | | 1221 | return; |
1220 | ASSERT3U(error, ==, 0); | | 1222 | ASSERT3U(error, ==, 0); |
1221 | | | 1223 | |
1222 | /* What blocksize did we actually get? */ | | 1224 | /* What blocksize did we actually get? */ |
1223 | dmu_object_size_from_db(zp->z_dbuf, &zp->z_blksz, &dummy); | | 1225 | dmu_object_size_from_db(zp->z_dbuf, &zp->z_blksz, &dummy); |
1224 | } | | 1226 | } |
1225 | | | 1227 | |
1226 | /* | | 1228 | /* |
1227 | * Increase the file length | | 1229 | * Increase the file length |
1228 | * | | 1230 | * |
1229 | * IN: zp - znode of file to free data in. | | 1231 | * IN: zp - znode of file to free data in. |
1230 | * end - new end-of-file | | 1232 | * end - new end-of-file |
1231 | * | | 1233 | * |
1232 | * RETURN: 0 if success | | 1234 | * RETURN: 0 if success |
1233 | * error code if failure | | 1235 | * error code if failure |
1234 | */ | | 1236 | */ |
1235 | static int | | 1237 | static int |
1236 | zfs_extend(znode_t *zp, uint64_t end) | | 1238 | zfs_extend(znode_t *zp, uint64_t end) |
1237 | { | | 1239 | { |
1238 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | | 1240 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; |
1239 | dmu_tx_t *tx; | | 1241 | dmu_tx_t *tx; |
1240 | rl_t *rl; | | 1242 | rl_t *rl; |
1241 | uint64_t newblksz; | | 1243 | uint64_t newblksz; |
1242 | int error; | | 1244 | int error; |
1243 | | | 1245 | |
1244 | /* | | 1246 | /* |
1245 | * We will change zp_size, lock the whole file. | | 1247 | * We will change zp_size, lock the whole file. |
1246 | */ | | 1248 | */ |
1247 | rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); | | 1249 | rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); |
1248 | | | 1250 | |
1249 | /* | | 1251 | /* |
1250 | * Nothing to do if file already at desired length. | | 1252 | * Nothing to do if file already at desired length. |
1251 | */ | | 1253 | */ |
1252 | if (end <= zp->z_phys->zp_size) { | | 1254 | if (end <= zp->z_phys->zp_size) { |
1253 | zfs_range_unlock(rl); | | 1255 | zfs_range_unlock(rl); |
1254 | return (0); | | 1256 | return (0); |
1255 | } | | 1257 | } |
1256 | top: | | 1258 | top: |
1257 | tx = dmu_tx_create(zfsvfs->z_os); | | 1259 | tx = dmu_tx_create(zfsvfs->z_os); |
1258 | dmu_tx_hold_bonus(tx, zp->z_id); | | 1260 | dmu_tx_hold_bonus(tx, zp->z_id); |
1259 | if (end > zp->z_blksz && | | 1261 | if (end > zp->z_blksz && |
1260 | (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { | | 1262 | (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { |
1261 | /* | | 1263 | /* |
1262 | * We are growing the file past the current block size. | | 1264 | * We are growing the file past the current block size. |
1263 | */ | | 1265 | */ |
1264 | if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { | | 1266 | if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { |
1265 | ASSERT(!ISP2(zp->z_blksz)); | | 1267 | ASSERT(!ISP2(zp->z_blksz)); |
1266 | newblksz = MIN(end, SPA_MAXBLOCKSIZE); | | 1268 | newblksz = MIN(end, SPA_MAXBLOCKSIZE); |
1267 | } else { | | 1269 | } else { |
1268 | newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz); | | 1270 | newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz); |
1269 | } | | 1271 | } |
1270 | dmu_tx_hold_write(tx, zp->z_id, 0, newblksz); | | 1272 | dmu_tx_hold_write(tx, zp->z_id, 0, newblksz); |
1271 | } else { | | 1273 | } else { |
1272 | newblksz = 0; | | 1274 | newblksz = 0; |
1273 | } | | 1275 | } |
1274 | | | 1276 | |
1275 | error = dmu_tx_assign(tx, TXG_NOWAIT); | | 1277 | error = dmu_tx_assign(tx, TXG_NOWAIT); |
1276 | if (error) { | | 1278 | if (error) { |
1277 | if (error == ERESTART) { | | 1279 | if (error == ERESTART) { |
1278 | dmu_tx_wait(tx); | | 1280 | dmu_tx_wait(tx); |
1279 | dmu_tx_abort(tx); | | 1281 | dmu_tx_abort(tx); |
1280 | goto top; | | 1282 | goto top; |
1281 | } | | 1283 | } |
1282 | dmu_tx_abort(tx); | | 1284 | dmu_tx_abort(tx); |
1283 | zfs_range_unlock(rl); | | 1285 | zfs_range_unlock(rl); |
1284 | return (error); | | 1286 | return (error); |
1285 | } | | 1287 | } |
1286 | dmu_buf_will_dirty(zp->z_dbuf, tx); | | 1288 | dmu_buf_will_dirty(zp->z_dbuf, tx); |
1287 | | | 1289 | |
1288 | if (newblksz) | | 1290 | if (newblksz) |
1289 | zfs_grow_blocksize(zp, newblksz, tx); | | 1291 | zfs_grow_blocksize(zp, newblksz, tx); |
1290 | | | 1292 | |
1291 | zp->z_phys->zp_size = end; | | 1293 | zp->z_phys->zp_size = end; |
1292 | | | 1294 | |
1293 | zfs_range_unlock(rl); | | 1295 | zfs_range_unlock(rl); |
1294 | | | 1296 | |
1295 | dmu_tx_commit(tx); | | 1297 | dmu_tx_commit(tx); |
1296 | | | 1298 | |
1297 | uvm_vnp_setsize(ZTOV(zp), end); | | 1299 | uvm_vnp_setsize(ZTOV(zp), end); |
1298 | | | 1300 | |
1299 | return (0); | | 1301 | return (0); |
1300 | } | | 1302 | } |
1301 | | | 1303 | |
1302 | /* | | 1304 | /* |
1303 | * Free space in a file. | | 1305 | * Free space in a file. |
1304 | * | | 1306 | * |
1305 | * IN: zp - znode of file to free data in. | | 1307 | * IN: zp - znode of file to free data in. |
1306 | * off - start of section to free. | | 1308 | * off - start of section to free. |
1307 | * len - length of section to free. | | 1309 | * len - length of section to free. |
1308 | * | | 1310 | * |
1309 | * RETURN: 0 if success | | 1311 | * RETURN: 0 if success |
1310 | * error code if failure | | 1312 | * error code if failure |
1311 | */ | | 1313 | */ |
1312 | static int | | 1314 | static int |
1313 | zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) | | 1315 | zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) |
1314 | { | | 1316 | { |
1315 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | | 1317 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; |
1316 | rl_t *rl; | | 1318 | rl_t *rl; |
1317 | int error; | | 1319 | int error; |
1318 | | | 1320 | |
1319 | /* | | 1321 | /* |
1320 | * Lock the range being freed. | | 1322 | * Lock the range being freed. |
1321 | */ | | 1323 | */ |
1322 | rl = zfs_range_lock(zp, off, len, RL_WRITER); | | 1324 | rl = zfs_range_lock(zp, off, len, RL_WRITER); |
1323 | | | 1325 | |
1324 | /* | | 1326 | /* |
1325 | * Nothing to do if file already at desired length. | | 1327 | * Nothing to do if file already at desired length. |
1326 | */ | | 1328 | */ |
1327 | if (off >= zp->z_phys->zp_size) { | | 1329 | if (off >= zp->z_phys->zp_size) { |
1328 | zfs_range_unlock(rl); | | 1330 | zfs_range_unlock(rl); |
1329 | return (0); | | 1331 | return (0); |
1330 | } | | 1332 | } |
1331 | | | 1333 | |
1332 | if (off + len > zp->z_phys->zp_size) | | 1334 | if (off + len > zp->z_phys->zp_size) |
1333 | len = zp->z_phys->zp_size - off; | | 1335 | len = zp->z_phys->zp_size - off; |
1334 | | | 1336 | |
1335 | error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); | | 1337 | error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); |
1336 | | | 1338 | |
1337 | if (error == 0) { | | 1339 | if (error == 0) { |
1338 | /* | | 1340 | /* |
1339 | * In NetBSD we cannot free block in the middle of a file, | | 1341 | * In NetBSD we cannot free block in the middle of a file, |
1340 | * but only at the end of a file. | | 1342 | * but only at the end of a file. |
1341 | */ | | 1343 | */ |
1342 | uvm_vnp_setsize(ZTOV(zp), off); | | 1344 | uvm_vnp_setsize(ZTOV(zp), off); |
1343 | } | | 1345 | } |
1344 | | | 1346 | |
1345 | zfs_range_unlock(rl); | | 1347 | zfs_range_unlock(rl); |
1346 | | | 1348 | |
1347 | return (error); | | 1349 | return (error); |
1348 | } | | 1350 | } |
1349 | | | 1351 | |
1350 | /* | | 1352 | /* |
1351 | * Truncate a file | | 1353 | * Truncate a file |
1352 | * | | 1354 | * |
1353 | * IN: zp - znode of file to free data in. | | 1355 | * IN: zp - znode of file to free data in. |
1354 | * end - new end-of-file. | | 1356 | * end - new end-of-file. |
1355 | * | | 1357 | * |
1356 | * RETURN: 0 if success | | 1358 | * RETURN: 0 if success |
1357 | * error code if failure | | 1359 | * error code if failure |
1358 | */ | | 1360 | */ |
1359 | static int | | 1361 | static int |
1360 | zfs_trunc(znode_t *zp, uint64_t end) | | 1362 | zfs_trunc(znode_t *zp, uint64_t end) |
1361 | { | | 1363 | { |
1362 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | | 1364 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; |
1363 | vnode_t *vp = ZTOV(zp); | | 1365 | vnode_t *vp = ZTOV(zp); |
1364 | dmu_tx_t *tx; | | 1366 | dmu_tx_t *tx; |
1365 | rl_t *rl; | | 1367 | rl_t *rl; |
1366 | int error; | | 1368 | int error; |
1367 | | | 1369 | |
1368 | /* | | 1370 | /* |
1369 | * We will change zp_size, lock the whole file. | | 1371 | * We will change zp_size, lock the whole file. |
1370 | */ | | 1372 | */ |
1371 | rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); | | 1373 | rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); |
1372 | | | 1374 | |
1373 | /* | | 1375 | /* |
1374 | * Nothing to do if file already at desired length. | | 1376 | * Nothing to do if file already at desired length. |
1375 | */ | | 1377 | */ |
1376 | if (end >= zp->z_phys->zp_size) { | | 1378 | if (end >= zp->z_phys->zp_size) { |
1377 | zfs_range_unlock(rl); | | 1379 | zfs_range_unlock(rl); |
1378 | return (0); | | 1380 | return (0); |
1379 | } | | 1381 | } |
1380 | | | 1382 | |
1381 | error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, -1); | | 1383 | error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, -1); |
1382 | if (error) { | | 1384 | if (error) { |
1383 | zfs_range_unlock(rl); | | 1385 | zfs_range_unlock(rl); |
1384 | return (error); | | 1386 | return (error); |
1385 | } | | 1387 | } |
1386 | top: | | 1388 | top: |
1387 | tx = dmu_tx_create(zfsvfs->z_os); | | 1389 | tx = dmu_tx_create(zfsvfs->z_os); |
1388 | dmu_tx_hold_bonus(tx, zp->z_id); | | 1390 | dmu_tx_hold_bonus(tx, zp->z_id); |
1389 | error = dmu_tx_assign(tx, TXG_NOWAIT); | | 1391 | error = dmu_tx_assign(tx, TXG_NOWAIT); |
1390 | if (error) { | | 1392 | if (error) { |
1391 | if (error == ERESTART) { | | 1393 | if (error == ERESTART) { |
1392 | dmu_tx_wait(tx); | | 1394 | dmu_tx_wait(tx); |
1393 | dmu_tx_abort(tx); | | 1395 | dmu_tx_abort(tx); |
1394 | goto top; | | 1396 | goto top; |
1395 | } | | 1397 | } |
1396 | dmu_tx_abort(tx); | | 1398 | dmu_tx_abort(tx); |
1397 | zfs_range_unlock(rl); | | 1399 | zfs_range_unlock(rl); |
1398 | return (error); | | 1400 | return (error); |
1399 | } | | 1401 | } |
1400 | dmu_buf_will_dirty(zp->z_dbuf, tx); | | 1402 | dmu_buf_will_dirty(zp->z_dbuf, tx); |
1401 | | | 1403 | |
1402 | zp->z_phys->zp_size = end; | | 1404 | zp->z_phys->zp_size = end; |
1403 | | | 1405 | |
1404 | dmu_tx_commit(tx); | | 1406 | dmu_tx_commit(tx); |
1405 | | | 1407 | |
1406 | zfs_range_unlock(rl); | | 1408 | zfs_range_unlock(rl); |
1407 | | | 1409 | |
1408 | /* | | 1410 | /* |
1409 | * Clear any mapped pages in the truncated region. This has to | | 1411 | * Clear any mapped pages in the truncated region. This has to |
1410 | * happen outside of the transaction to avoid the possibility of | | 1412 | * happen outside of the transaction to avoid the possibility of |
1411 | * a deadlock with someone trying to push a page that we are | | 1413 | * a deadlock with someone trying to push a page that we are |
1412 | * about to invalidate. | | 1414 | * about to invalidate. |
1413 | */ | | 1415 | */ |
1414 | | | 1416 | |
1415 | uvm_vnp_setsize(vp, end); | | 1417 | uvm_vnp_setsize(vp, end); |
1416 | | | 1418 | |
1417 | return (0); | | 1419 | return (0); |
1418 | } | | 1420 | } |
1419 | | | 1421 | |
1420 | /* | | 1422 | /* |
1421 | * Free space in a file | | 1423 | * Free space in a file |
1422 | * | | 1424 | * |
1423 | * IN: zp - znode of file to free data in. | | 1425 | * IN: zp - znode of file to free data in. |
1424 | * off - start of range | | 1426 | * off - start of range |
1425 | * len - end of range (0 => EOF) | | 1427 | * len - end of range (0 => EOF) |
1426 | * flag - current file open mode flags. | | 1428 | * flag - current file open mode flags. |
1427 | * log - TRUE if this action should be logged | | 1429 | * log - TRUE if this action should be logged |
1428 | * | | 1430 | * |
1429 | * RETURN: 0 if success | | 1431 | * RETURN: 0 if success |
1430 | * error code if failure | | 1432 | * error code if failure |
1431 | */ | | 1433 | */ |
1432 | int | | 1434 | int |
1433 | zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) | | 1435 | zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) |
1434 | { | | 1436 | { |
1435 | vnode_t *vp = ZTOV(zp); | | 1437 | vnode_t *vp = ZTOV(zp); |
1436 | dmu_tx_t *tx; | | 1438 | dmu_tx_t *tx; |
1437 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | | 1439 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; |
1438 | zilog_t *zilog = zfsvfs->z_log; | | 1440 | zilog_t *zilog = zfsvfs->z_log; |
1439 | int error; | | 1441 | int error; |
1440 | | | 1442 | |
1441 | if (off > zp->z_phys->zp_size) { | | 1443 | if (off > zp->z_phys->zp_size) { |
1442 | error = zfs_extend(zp, off+len); | | 1444 | error = zfs_extend(zp, off+len); |
1443 | if (error == 0 && log) | | 1445 | if (error == 0 && log) |
1444 | goto log; | | 1446 | goto log; |
1445 | else | | 1447 | else |
1446 | return (error); | | 1448 | return (error); |
1447 | } | | 1449 | } |
1448 | | | 1450 | |
1449 | if (len == 0) { | | 1451 | if (len == 0) { |
1450 | error = zfs_trunc(zp, off); | | 1452 | error = zfs_trunc(zp, off); |
1451 | } else { | | 1453 | } else { |
1452 | if ((error = zfs_free_range(zp, off, len)) == 0 && | | 1454 | if ((error = zfs_free_range(zp, off, len)) == 0 && |
1453 | off + len > zp->z_phys->zp_size) | | 1455 | off + len > zp->z_phys->zp_size) |
1454 | error = zfs_extend(zp, off+len); | | 1456 | error = zfs_extend(zp, off+len); |
1455 | } | | 1457 | } |
1456 | if (error || !log) | | 1458 | if (error || !log) |
1457 | return (error); | | 1459 | return (error); |
1458 | log: | | 1460 | log: |
1459 | tx = dmu_tx_create(zfsvfs->z_os); | | 1461 | tx = dmu_tx_create(zfsvfs->z_os); |
1460 | dmu_tx_hold_bonus(tx, zp->z_id); | | 1462 | dmu_tx_hold_bonus(tx, zp->z_id); |
1461 | error = dmu_tx_assign(tx, TXG_NOWAIT); | | 1463 | error = dmu_tx_assign(tx, TXG_NOWAIT); |
1462 | if (error) { | | 1464 | if (error) { |
1463 | if (error == ERESTART) { | | 1465 | if (error == ERESTART) { |
1464 | dmu_tx_wait(tx); | | 1466 | dmu_tx_wait(tx); |
1465 | dmu_tx_abort(tx); | | 1467 | dmu_tx_abort(tx); |
1466 | goto log; | | 1468 | goto log; |
1467 | } | | 1469 | } |
1468 | dmu_tx_abort(tx); | | 1470 | dmu_tx_abort(tx); |
1469 | return (error); | | 1471 | return (error); |
1470 | } | | 1472 | } |
1471 | | | 1473 | |
1472 | zfs_time_stamper(zp, CONTENT_MODIFIED, tx); | | 1474 | zfs_time_stamper(zp, CONTENT_MODIFIED, tx); |
1473 | zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); | | 1475 | zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); |
1474 | | | 1476 | |
1475 | dmu_tx_commit(tx); | | 1477 | dmu_tx_commit(tx); |
1476 | return (0); | | 1478 | return (0); |
1477 | } | | 1479 | } |
1478 | | | 1480 | |
1479 | void | | 1481 | void |
1480 | zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) | | 1482 | zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) |
1481 | { | | 1483 | { |
1482 | zfsvfs_t zfsvfs; | | 1484 | zfsvfs_t zfsvfs; |
1483 | uint64_t moid, obj, version; | | 1485 | uint64_t moid, obj, version; |
1484 | uint64_t sense = ZFS_CASE_SENSITIVE; | | 1486 | uint64_t sense = ZFS_CASE_SENSITIVE; |
1485 | uint64_t norm = 0; | | 1487 | uint64_t norm = 0; |
1486 | nvpair_t *elem; | | 1488 | nvpair_t *elem; |
1487 | int error; | | 1489 | int error; |
1488 | int i; | | 1490 | int i; |
1489 | znode_t *rootzp = NULL; | | 1491 | znode_t *rootzp = NULL; |
1490 | vnode_t *vp; | | 1492 | vnode_t *vp; |
1491 | vattr_t vattr; | | 1493 | vattr_t vattr; |
1492 | znode_t *zp; | | 1494 | znode_t *zp; |
1493 | zfs_acl_ids_t acl_ids; | | 1495 | zfs_acl_ids_t acl_ids; |
1494 | | | 1496 | |
1495 | /* | | 1497 | /* |
1496 | * First attempt to create master node. | | 1498 | * First attempt to create master node. |
1497 | */ | | 1499 | */ |
1498 | /* | | 1500 | /* |
1499 | * In an empty objset, there are no blocks to read and thus | | 1501 | * In an empty objset, there are no blocks to read and thus |