Tue Apr 15 09:50:45 2014 UTC ()
Fix a deadlock where one thread exits, enters fstrans_lwp_dtor()
and wants fstrans_lock.  This thread holds the proc_lock.
Another thread holds fstrans_lock and runs pserialize_perform().
As the first thread holds the proc_lock, timeouts are blocked and
the second thread blocks forever in kpause().

Change fstrans_lwp_dtor() to invalidate, but not free its info
structs.  No need to take fstrans_lock.

Change fstrans_get_lwp_info() to reuse invalidated info before
trying to allocate a new one.


(hannken)
diff -r1.29 -r1.30 src/sys/kern/vfs_trans.c

cvs diff -r1.29 -r1.30 src/sys/kern/vfs_trans.c (expand / switch to unified diff)

--- src/sys/kern/vfs_trans.c 2013/11/23 13:35:36 1.29
+++ src/sys/kern/vfs_trans.c 2014/04/15 09:50:45 1.30
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: vfs_trans.c,v 1.29 2013/11/23 13:35:36 christos Exp $ */ 1/* $NetBSD: vfs_trans.c,v 1.30 2014/04/15 09:50:45 hannken Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Juergen Hannken-Illjes. 8 * by Juergen Hannken-Illjes.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
@@ -20,27 +20,27 @@ @@ -20,27 +20,27 @@
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32#include <sys/cdefs.h> 32#include <sys/cdefs.h>
33__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.29 2013/11/23 13:35:36 christos Exp $"); 33__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.30 2014/04/15 09:50:45 hannken Exp $");
34 34
35/* 35/*
36 * File system transaction operations. 36 * File system transaction operations.
37 */ 37 */
38 38
39#include "opt_ddb.h" 39#include "opt_ddb.h"
40 40
41#include <sys/param.h> 41#include <sys/param.h>
42#include <sys/systm.h> 42#include <sys/systm.h>
43#include <sys/atomic.h> 43#include <sys/atomic.h>
44#include <sys/buf.h> 44#include <sys/buf.h>
45#include <sys/kmem.h> 45#include <sys/kmem.h>
46#include <sys/mount.h> 46#include <sys/mount.h>
@@ -72,27 +72,26 @@ struct fstrans_mount_info { @@ -72,27 +72,26 @@ struct fstrans_mount_info {
72 unsigned int fmi_ref_cnt; 72 unsigned int fmi_ref_cnt;
73 bool fmi_cow_change; 73 bool fmi_cow_change;
74 LIST_HEAD(, fscow_handler) fmi_cow_handler; 74 LIST_HEAD(, fscow_handler) fmi_cow_handler;
75}; 75};
76 76
77static specificdata_key_t lwp_data_key; /* Our specific data key. */ 77static specificdata_key_t lwp_data_key; /* Our specific data key. */
78static kmutex_t vfs_suspend_lock; /* Serialize suspensions. */ 78static kmutex_t vfs_suspend_lock; /* Serialize suspensions. */
79static kmutex_t fstrans_lock; /* Fstrans big lock. */ 79static kmutex_t fstrans_lock; /* Fstrans big lock. */
80static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */ 80static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */
81static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */ 81static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */
82static pserialize_t fstrans_psz; /* Pserialize state. */ 82static pserialize_t fstrans_psz; /* Pserialize state. */
83static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head; 83static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head;
84 /* List of all fstrans_lwp_info. */ 84 /* List of all fstrans_lwp_info. */
85static pool_cache_t fstrans_cache; /* Pool of struct fstrans_lwp_info. */ 
86 85
87static void fstrans_lwp_dtor(void *); 86static void fstrans_lwp_dtor(void *);
88static void fstrans_mount_dtor(struct mount *); 87static void fstrans_mount_dtor(struct mount *);
89static struct fstrans_lwp_info *fstrans_get_lwp_info(struct mount *, bool); 88static struct fstrans_lwp_info *fstrans_get_lwp_info(struct mount *, bool);
90static bool grant_lock(const enum fstrans_state, const enum fstrans_lock_type); 89static bool grant_lock(const enum fstrans_state, const enum fstrans_lock_type);
91static bool state_change_done(const struct mount *); 90static bool state_change_done(const struct mount *);
92static bool cow_state_change_done(const struct mount *); 91static bool cow_state_change_done(const struct mount *);
93static void cow_change_enter(const struct mount *); 92static void cow_change_enter(const struct mount *);
94static void cow_change_done(const struct mount *); 93static void cow_change_done(const struct mount *);
95 94
96/* 95/*
97 * Initialize. 96 * Initialize.
98 */ 97 */
@@ -100,49 +99,46 @@ void @@ -100,49 +99,46 @@ void
100fstrans_init(void) 99fstrans_init(void)
101{ 100{
102 int error __diagused; 101 int error __diagused;
103 102
104 error = lwp_specific_key_create(&lwp_data_key, fstrans_lwp_dtor); 103 error = lwp_specific_key_create(&lwp_data_key, fstrans_lwp_dtor);
105 KASSERT(error == 0); 104 KASSERT(error == 0);
106 105
107 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE); 106 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
108 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE); 107 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE);
109 cv_init(&fstrans_state_cv, "fstchg"); 108 cv_init(&fstrans_state_cv, "fstchg");
110 cv_init(&fstrans_count_cv, "fstcnt"); 109 cv_init(&fstrans_count_cv, "fstcnt");
111 fstrans_psz = pserialize_create(); 110 fstrans_psz = pserialize_create();
112 LIST_INIT(&fstrans_fli_head); 111 LIST_INIT(&fstrans_fli_head);
113 fstrans_cache = pool_cache_init(sizeof(struct fstrans_lwp_info), 0, 0, 
114 0, "fstrans", NULL, IPL_NONE, NULL, NULL, NULL); 
115} 112}
116 113
117/* 114/*
118 * Deallocate lwp state. 115 * Deallocate lwp state.
119 */ 116 */
120static void 117static void
121fstrans_lwp_dtor(void *arg) 118fstrans_lwp_dtor(void *arg)
122{ 119{
123 struct fstrans_lwp_info *fli, *fli_next; 120 struct fstrans_lwp_info *fli, *fli_next;
124 121
125 mutex_enter(&fstrans_lock); 
126 for (fli = arg; fli; fli = fli_next) { 122 for (fli = arg; fli; fli = fli_next) {
127 KASSERT(fli->fli_trans_cnt == 0); 123 KASSERT(fli->fli_trans_cnt == 0);
128 KASSERT(fli->fli_cow_cnt == 0); 124 KASSERT(fli->fli_cow_cnt == 0);
129 if (fli->fli_mount != NULL) 125 if (fli->fli_mount != NULL)
130 fstrans_mount_dtor(fli->fli_mount); 126 fstrans_mount_dtor(fli->fli_mount);
131 fli_next = fli->fli_succ; 127 fli_next = fli->fli_succ;
132 LIST_REMOVE(fli, fli_list); 128 fli->fli_mount = NULL;
133 pool_cache_put(fstrans_cache, fli); 129 membar_sync();
 130 fli->fli_self = NULL;
134 } 131 }
135 mutex_exit(&fstrans_lock); 
136} 132}
137 133
138/* 134/*
139 * Dereference mount state. 135 * Dereference mount state.
140 */ 136 */
141static void 137static void
142fstrans_mount_dtor(struct mount *mp) 138fstrans_mount_dtor(struct mount *mp)
143{ 139{
144 struct fstrans_mount_info *fmi; 140 struct fstrans_mount_info *fmi;
145 141
146 fmi = mp->mnt_transinfo; 142 fmi = mp->mnt_transinfo;
147 if (atomic_dec_uint_nv(&fmi->fmi_ref_cnt) > 0) 143 if (atomic_dec_uint_nv(&fmi->fmi_ref_cnt) > 0)
148 return; 144 return;
@@ -227,27 +223,41 @@ fstrans_get_lwp_info(struct mount *mp, b @@ -227,27 +223,41 @@ fstrans_get_lwp_info(struct mount *mp, b
227 return NULL; 223 return NULL;
228 224
229 /* 225 /*
230 * Try to reuse a cleared entry or allocate a new one. 226 * Try to reuse a cleared entry or allocate a new one.
231 */ 227 */
232 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) { 228 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
233 if (fli->fli_mount == NULL) { 229 if (fli->fli_mount == NULL) {
234 KASSERT(fli->fli_trans_cnt == 0); 230 KASSERT(fli->fli_trans_cnt == 0);
235 KASSERT(fli->fli_cow_cnt == 0); 231 KASSERT(fli->fli_cow_cnt == 0);
236 break; 232 break;
237 } 233 }
238 } 234 }
239 if (fli == NULL) { 235 if (fli == NULL) {
240 fli = pool_cache_get(fstrans_cache, PR_WAITOK); 236 mutex_enter(&fstrans_lock);
 237 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
 238 if (fli->fli_self == NULL) {
 239 KASSERT(fli->fli_trans_cnt == 0);
 240 KASSERT(fli->fli_cow_cnt == 0);
 241 fli->fli_self = curlwp;
 242 fli->fli_succ = lwp_getspecific(lwp_data_key);
 243 lwp_setspecific(lwp_data_key, fli);
 244 break;
 245 }
 246 }
 247 mutex_exit(&fstrans_lock);
 248 }
 249 if (fli == NULL) {
 250 fli = kmem_alloc(sizeof(*fli), KM_SLEEP);
241 mutex_enter(&fstrans_lock); 251 mutex_enter(&fstrans_lock);
242 memset(fli, 0, sizeof(*fli)); 252 memset(fli, 0, sizeof(*fli));
243 fli->fli_self = curlwp; 253 fli->fli_self = curlwp;
244 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list); 254 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list);
245 mutex_exit(&fstrans_lock); 255 mutex_exit(&fstrans_lock);
246 fli->fli_succ = lwp_getspecific(lwp_data_key); 256 fli->fli_succ = lwp_getspecific(lwp_data_key);
247 lwp_setspecific(lwp_data_key, fli); 257 lwp_setspecific(lwp_data_key, fli);
248 } 258 }
249 259
250 /* 260 /*
251 * Attach the entry to the mount. 261 * Attach the entry to the mount.
252 */ 262 */
253 fmi = mp->mnt_transinfo; 263 fmi = mp->mnt_transinfo;