Sat Dec 3 22:41:40 2011 UTC ()
hypervisor_unmask_event(): don't check/update evtchn_pending_sel for the
  current CPU, but for any CPU which may accept this event.
xen/xenevt.c: more use of atomic ops and locks where appropriate, and some
  other SMP fixes. Handle all events on the primary CPU (may be revisited
  later). Set/clear ci_evtmask[] for watched events.

This should fix the problems on dom0 kernels reported by jym@


(bouyer)
diff -r1.17 -r1.18 src/sys/arch/xen/x86/hypervisor_machdep.c
diff -r1.38 -r1.39 src/sys/arch/xen/xen/xenevt.c

cvs diff -r1.17 -r1.18 src/sys/arch/xen/x86/hypervisor_machdep.c (expand / switch to unified diff)

--- src/sys/arch/xen/x86/hypervisor_machdep.c 2011/11/19 17:13:39 1.17
+++ src/sys/arch/xen/x86/hypervisor_machdep.c 2011/12/03 22:41:40 1.18
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: hypervisor_machdep.c,v 1.17 2011/11/19 17:13:39 cherry Exp $ */ 1/* $NetBSD: hypervisor_machdep.c,v 1.18 2011/12/03 22:41:40 bouyer Exp $ */
2 2
3/* 3/*
4 * 4 *
5 * Copyright (c) 2004 Christian Limpach. 5 * Copyright (c) 2004 Christian Limpach.
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions 9 * modification, are permitted provided that the following conditions
10 * are met: 10 * are met:
11 * 1. Redistributions of source code must retain the above copyright 11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer. 12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright 13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the 14 * notice, this list of conditions and the following disclaimer in the
@@ -44,27 +44,27 @@ @@ -44,27 +44,27 @@
44 * all copies or substantial portions of the Software. 44 * all copies or substantial portions of the Software.
45 *  45 *
46 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  46 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
47 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  47 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
48 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE  48 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
49 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER  49 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
50 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING  50 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
51 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER  51 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
52 * DEALINGS IN THE SOFTWARE. 52 * DEALINGS IN THE SOFTWARE.
53 */ 53 */
54 54
55 55
56#include <sys/cdefs.h> 56#include <sys/cdefs.h>
57__KERNEL_RCSID(0, "$NetBSD: hypervisor_machdep.c,v 1.17 2011/11/19 17:13:39 cherry Exp $"); 57__KERNEL_RCSID(0, "$NetBSD: hypervisor_machdep.c,v 1.18 2011/12/03 22:41:40 bouyer Exp $");
58 58
59#include <sys/param.h> 59#include <sys/param.h>
60#include <sys/systm.h> 60#include <sys/systm.h>
61#include <sys/kmem.h> 61#include <sys/kmem.h>
62 62
63#include <uvm/uvm_extern.h> 63#include <uvm/uvm_extern.h>
64 64
65#include <machine/vmparam.h> 65#include <machine/vmparam.h>
66#include <machine/pmap.h> 66#include <machine/pmap.h>
67 67
68#include <xen/xen.h> 68#include <xen/xen.h>
69#include <xen/hypervisor.h> 69#include <xen/hypervisor.h>
70#include <xen/evtchn.h> 70#include <xen/evtchn.h>
@@ -295,54 +295,74 @@ hypervisor_send_event(struct cpu_info *c @@ -295,54 +295,74 @@ hypervisor_send_event(struct cpu_info *c
295#endif 295#endif
296 296
297 xen_atomic_set_bit(&s->evtchn_pending[0], ev); 297 xen_atomic_set_bit(&s->evtchn_pending[0], ev);
298 xen_atomic_set_bit(&vci->evtchn_pending_sel, 298 xen_atomic_set_bit(&vci->evtchn_pending_sel,
299 ev >> LONG_SHIFT); 299 ev >> LONG_SHIFT);
300 300
301 xen_atomic_set_bit(&vci->evtchn_upcall_pending, 0); 301 xen_atomic_set_bit(&vci->evtchn_upcall_pending, 0);
302 302
303 xen_atomic_clear_bit(&s->evtchn_mask[0], ev); 303 xen_atomic_clear_bit(&s->evtchn_mask[0], ev);
304 304
305 if (__predict_true(ci == curcpu())) { 305 if (__predict_true(ci == curcpu())) {
306 hypervisor_force_callback(); 306 hypervisor_force_callback();
307 } else { 307 } else {
308 if (xen_send_ipi(ci, XEN_IPI_HVCB)) { 308 if (__predict_false(xen_send_ipi(ci, XEN_IPI_HVCB))) {
309 panic("xen_send_ipi(cpu%d, XEN_IPI_HVCB) failed\n", (int) ci->ci_cpuid); 309 panic("xen_send_ipi(cpu%d, XEN_IPI_HVCB) failed\n",
 310 (int) ci->ci_cpuid);
310 } 311 }
311 } 312 }
312} 313}
313 314
314void 315void
315hypervisor_unmask_event(unsigned int ev) 316hypervisor_unmask_event(unsigned int ev)
316{ 317{
317 volatile shared_info_t *s = HYPERVISOR_shared_info; 318 volatile shared_info_t *s = HYPERVISOR_shared_info;
318 volatile struct vcpu_info *vci = curcpu()->ci_vcpu; 319 CPU_INFO_ITERATOR cii;
 320 struct cpu_info *ci;
 321 volatile struct vcpu_info *vci;
319 322
320#ifdef PORT_DEBUG 323#ifdef PORT_DEBUG
321 if (ev == PORT_DEBUG) 324 if (ev == PORT_DEBUG)
322 printf("hypervisor_unmask_event %d\n", ev); 325 printf("hypervisor_unmask_event %d\n", ev);
323#endif 326#endif
324 327
325 xen_atomic_clear_bit(&s->evtchn_mask[0], ev); 328 xen_atomic_clear_bit(&s->evtchn_mask[0], ev);
326 /* 329 /*
327 * The following is basically the equivalent of 330 * The following is basically the equivalent of
328 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose the 331 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose the
329 * interrupt edge' if the channel is masked. 332 * interrupt edge' if the channel is masked.
330 */ 333 */
331 if (xen_atomic_test_bit(&s->evtchn_pending[0], ev) &&  334 if (!xen_atomic_test_bit(&s->evtchn_pending[0], ev))
332 !xen_atomic_test_and_set_bit(&vci->evtchn_pending_sel, ev>>LONG_SHIFT)) { 335 return;
333 xen_atomic_set_bit(&vci->evtchn_upcall_pending, 0); 336
334 if (!vci->evtchn_upcall_mask) 337 for (CPU_INFO_FOREACH(cii, ci)) {
335 hypervisor_force_callback(); 338 if (!xen_atomic_test_bit(&ci->ci_evtmask[0], ev))
 339 continue;
 340 vci = ci->ci_vcpu;
 341 if (!xen_atomic_test_and_set_bit(&vci->evtchn_pending_sel,
 342 ev>>LONG_SHIFT))
 343 xen_atomic_set_bit(&vci->evtchn_upcall_pending, 0);
 344 if (!vci->evtchn_upcall_mask) {
 345 if (__predict_true(ci == curcpu())) {
 346 hypervisor_force_callback();
 347 } else {
 348 if (__predict_false(
 349 xen_send_ipi(ci, XEN_IPI_HVCB))) {
 350 panic("xen_send_ipi(cpu%d, "
 351 "XEN_IPI_HVCB) failed\n",
 352 (int) ci->ci_cpuid);
 353 }
 354 }
 355 }
336 } 356 }
337} 357}
338 358
339void 359void
340hypervisor_mask_event(unsigned int ev) 360hypervisor_mask_event(unsigned int ev)
341{ 361{
342 volatile shared_info_t *s = HYPERVISOR_shared_info; 362 volatile shared_info_t *s = HYPERVISOR_shared_info;
343#ifdef PORT_DEBUG 363#ifdef PORT_DEBUG
344 if (ev == PORT_DEBUG) 364 if (ev == PORT_DEBUG)
345 printf("hypervisor_mask_event %d\n", ev); 365 printf("hypervisor_mask_event %d\n", ev);
346#endif 366#endif
347 367
348 xen_atomic_set_bit(&s->evtchn_mask[0], ev); 368 xen_atomic_set_bit(&s->evtchn_mask[0], ev);

cvs diff -r1.38 -r1.39 src/sys/arch/xen/xen/xenevt.c (expand / switch to unified diff)

--- src/sys/arch/xen/xen/xenevt.c 2011/08/11 17:59:00 1.38
+++ src/sys/arch/xen/xen/xenevt.c 2011/12/03 22:41:40 1.39
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: xenevt.c,v 1.38 2011/08/11 17:59:00 cherry Exp $ */ 1/* $NetBSD: xenevt.c,v 1.39 2011/12/03 22:41:40 bouyer Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2005 Manuel Bouyer. 4 * Copyright (c) 2005 Manuel Bouyer.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright 11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the 12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution. 13 * documentation and/or other materials provided with the distribution.
14 * 14 *
@@ -16,27 +16,27 @@ @@ -16,27 +16,27 @@
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 * 25 *
26 */ 26 */
27 27
28#include <sys/cdefs.h> 28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: xenevt.c,v 1.38 2011/08/11 17:59:00 cherry Exp $"); 29__KERNEL_RCSID(0, "$NetBSD: xenevt.c,v 1.39 2011/12/03 22:41:40 bouyer Exp $");
30 30
31#include "opt_xen.h" 31#include "opt_xen.h"
32#include <sys/param.h> 32#include <sys/param.h>
33#include <sys/kernel.h> 33#include <sys/kernel.h>
34#include <sys/malloc.h> 34#include <sys/malloc.h>
35#include <sys/mutex.h> 35#include <sys/mutex.h>
36#include <sys/systm.h> 36#include <sys/systm.h>
37#include <sys/device.h> 37#include <sys/device.h>
38#include <sys/file.h> 38#include <sys/file.h>
39#include <sys/filedesc.h> 39#include <sys/filedesc.h>
40#include <sys/poll.h> 40#include <sys/poll.h>
41#include <sys/select.h> 41#include <sys/select.h>
42#include <sys/proc.h> 42#include <sys/proc.h>
@@ -102,39 +102,41 @@ const struct cdevsw xenevt_cdevsw = { @@ -102,39 +102,41 @@ const struct cdevsw xenevt_cdevsw = {
102 102
103#define BYTES_PER_PORT (sizeof(evtchn_port_t) / sizeof(uint8_t)) 103#define BYTES_PER_PORT (sizeof(evtchn_port_t) / sizeof(uint8_t))
104 104
105struct xenevt_d { 105struct xenevt_d {
106 kmutex_t lock; 106 kmutex_t lock;
107 kcondvar_t cv; 107 kcondvar_t cv;
108 STAILQ_ENTRY(xenevt_d) pendingq; 108 STAILQ_ENTRY(xenevt_d) pendingq;
109 bool pending; 109 bool pending;
110 evtchn_port_t ring[2048];  110 evtchn_port_t ring[2048];
111 u_int ring_read; /* pointer of the reader */ 111 u_int ring_read; /* pointer of the reader */
112 u_int ring_write; /* pointer of the writer */ 112 u_int ring_write; /* pointer of the writer */
113 u_int flags; 113 u_int flags;
114#define XENEVT_F_OVERFLOW 0x01 /* ring overflow */ 114#define XENEVT_F_OVERFLOW 0x01 /* ring overflow */
 115#define XENEVT_F_FREE 0x02 /* free entry */
115 struct selinfo sel; /* used by poll */ 116 struct selinfo sel; /* used by poll */
 117 struct cpu_info *ci; /* prefered CPU for events for this device */
116}; 118};
117 119
118/* event -> user device mapping */ 120/* event -> user device mapping */
119static struct xenevt_d *devevent[NR_EVENT_CHANNELS]; 121static struct xenevt_d *devevent[NR_EVENT_CHANNELS];
120 122
121/* pending events */ 123/* pending events */
122static void *devevent_sih; 124static void *devevent_sih;
123static kmutex_t devevent_lock; 125static kmutex_t devevent_lock;
124static STAILQ_HEAD(, xenevt_d) devevent_pending; 126static STAILQ_HEAD(, xenevt_d) devevent_pending;
125 127
126static void xenevt_donotify(struct xenevt_d *); 
127static void xenevt_record(struct xenevt_d *, evtchn_port_t); 128static void xenevt_record(struct xenevt_d *, evtchn_port_t);
 129static void xenevt_free(struct xenevt_d *);
128 130
129/* pending events */ 131/* pending events */
130long xenevt_ev1; 132long xenevt_ev1;
131long xenevt_ev2[NR_EVENT_CHANNELS]; 133long xenevt_ev2[NR_EVENT_CHANNELS];
132static int xenevt_processevt(void *); 134static int xenevt_processevt(void *);
133 135
134/* called at boot time */ 136/* called at boot time */
135void 137void
136xenevtattach(int n) 138xenevtattach(int n)
137{ 139{
138 struct intrhand *ih; 140 struct intrhand *ih;
139 int s; 141 int s;
140 int level = IPL_HIGH; 142 int level = IPL_HIGH;
@@ -150,46 +152,46 @@ xenevtattach(int n) @@ -150,46 +152,46 @@ xenevtattach(int n)
150 memset(devevent, 0, sizeof(devevent)); 152 memset(devevent, 0, sizeof(devevent));
151 xenevt_ev1 = 0; 153 xenevt_ev1 = 0;
152 memset(xenevt_ev2, 0, sizeof(xenevt_ev2)); 154 memset(xenevt_ev2, 0, sizeof(xenevt_ev2));
153 155
154 /* register a handler at splhigh, so that spllower() will call us */ 156 /* register a handler at splhigh, so that spllower() will call us */
155 ih = malloc(sizeof (struct intrhand), M_DEVBUF, 157 ih = malloc(sizeof (struct intrhand), M_DEVBUF,
156 M_WAITOK|M_ZERO); 158 M_WAITOK|M_ZERO);
157 if (ih == NULL) 159 if (ih == NULL)
158 panic("can't allocate xenevt interrupt source"); 160 panic("can't allocate xenevt interrupt source");
159 ih->ih_level = level; 161 ih->ih_level = level;
160 ih->ih_fun = ih->ih_realfun = xenevt_processevt; 162 ih->ih_fun = ih->ih_realfun = xenevt_processevt;
161 ih->ih_arg = ih->ih_realarg = NULL; 163 ih->ih_arg = ih->ih_realarg = NULL;
162 ih->ih_ipl_next = NULL; 164 ih->ih_ipl_next = NULL;
163 ih->ih_cpu = curcpu(); 165 ih->ih_cpu = &cpu_info_primary;
164#ifdef MULTIPROCESSOR 166#ifdef MULTIPROCESSOR
165 if (!mpsafe) { 167 if (!mpsafe) {
166 ih->ih_fun = intr_biglock_wrapper; 168 ih->ih_fun = intr_biglock_wrapper;
167 ih->ih_arg = ih; 169 ih->ih_arg = ih;
168 } 170 }
169#endif /* MULTIPROCESSOR */ 171#endif /* MULTIPROCESSOR */
170 172
171 s = splhigh(); 173 s = splhigh();
172 event_set_iplhandler(ih->ih_cpu, ih, level); 174 event_set_iplhandler(ih->ih_cpu, ih, level);
173 splx(s); 175 splx(s);
174} 176}
175 177
176/* register pending event - always called with interrupt disabled */ 178/* register pending event - always called with interrupt disabled */
177void 179void
178xenevt_setipending(int l1, int l2) 180xenevt_setipending(int l1, int l2)
179{ 181{
180 xenevt_ev1 |= 1UL << l1; 182 atomic_or_ulong(&xenevt_ev1, 1UL << l1);
181 xenevt_ev2[l1] |= 1UL << l2; 183 atomic_or_ulong(&xenevt_ev2[l1], 1UL << l2);
182 curcpu()/*XXX*/->ci_ipending |= 1 << IPL_HIGH; 184 atomic_or_32(&cpu_info_primary.ci_ipending, 1 << IPL_HIGH);
183} 185}
184 186
185/* process pending events */ 187/* process pending events */
186static int 188static int
187xenevt_processevt(void *v) 189xenevt_processevt(void *v)
188{ 190{
189 long l1, l2; 191 long l1, l2;
190 int l1i, l2i; 192 int l1i, l2i;
191 int port; 193 int port;
192 194
193 l1 = xen_atomic_xchg(&xenevt_ev1, 0); 195 l1 = xen_atomic_xchg(&xenevt_ev1, 0);
194 while ((l1i = xen_ffs(l1)) != 0) { 196 while ((l1i = xen_ffs(l1)) != 0) {
195 l1i--; 197 l1i--;
@@ -203,109 +205,108 @@ xenevt_processevt(void *v) @@ -203,109 +205,108 @@ xenevt_processevt(void *v)
203 } 205 }
204 } 206 }
205 207
206 return 0; 208 return 0;
207} 209}
208 210
209 211
210/* event callback, called at splhigh() */ 212/* event callback, called at splhigh() */
211void 213void
212xenevt_event(int port) 214xenevt_event(int port)
213{ 215{
214 struct xenevt_d *d; 216 struct xenevt_d *d;
215 217
 218 mutex_enter(&devevent_lock);
216 d = devevent[port]; 219 d = devevent[port];
217 if (d != NULL) { 220 if (d != NULL) {
218 xenevt_record(d, port); 221 xenevt_record(d, port);
219 222
220 if (d->pending) { 223 if (d->pending == false) {
 224 STAILQ_INSERT_TAIL(&devevent_pending, d, pendingq);
 225 d->pending = true;
 226 mutex_exit(&devevent_lock);
 227 softint_schedule(devevent_sih);
221 return; 228 return;
222 } 229 }
223 
224 mutex_enter(&devevent_lock); 
225 STAILQ_INSERT_TAIL(&devevent_pending, d, pendingq); 
226 d->pending = true; 
227 mutex_exit(&devevent_lock); 
228 
229 softint_schedule(devevent_sih); 
230 } 230 }
 231 mutex_exit(&devevent_lock);
231} 232}
232 233
233void 234void
234xenevt_notify(void) 235xenevt_notify(void)
235{ 236{
236 struct xenevt_d *d; 237 struct xenevt_d *d;
237 238
238 for (;;) { 239 for (;;) {
239 mutex_enter(&devevent_lock); 240 mutex_enter(&devevent_lock);
240 d = STAILQ_FIRST(&devevent_pending); 241 d = STAILQ_FIRST(&devevent_pending);
241 if (d == NULL) { 242 if (d == NULL) {
242 mutex_exit(&devevent_lock); 243 mutex_exit(&devevent_lock);
243 break; 244 break;
244 } 245 }
245 STAILQ_REMOVE_HEAD(&devevent_pending, pendingq); 246 STAILQ_REMOVE_HEAD(&devevent_pending, pendingq);
246 d->pending = false; 247 d->pending = false;
247 mutex_exit(&devevent_lock); 248 mutex_enter(&d->lock);
248 249 if (d->flags & XENEVT_F_FREE) {
249 xenevt_donotify(d); 250 xenevt_free(d);
 251 mutex_exit(&devevent_lock);
 252 } else {
 253 mutex_exit(&devevent_lock);
 254 selnotify(&d->sel, 0, 1);
 255 cv_broadcast(&d->cv);
 256 mutex_exit(&d->lock);
 257 }
250 } 258 }
251} 259}
252 260
253static void 261static void
254xenevt_donotify(struct xenevt_d *d) 
255{ 
256 
257 mutex_enter(&d->lock); 
258 selnotify(&d->sel, 0, 1); 
259 cv_broadcast(&d->cv); 
260 mutex_exit(&d->lock); 
261} 
262 
263static void 
264xenevt_record(struct xenevt_d *d, evtchn_port_t port) 262xenevt_record(struct xenevt_d *d, evtchn_port_t port)
265{ 263{
266 264
267 /* 265 /*
268 * This algorithm overflows for one less slot than available. 266 * This algorithm overflows for one less slot than available.
269 * Not really an issue, and the correct algorithm would be more 267 * Not really an issue, and the correct algorithm would be more
270 * complex 268 * complex
271 */ 269 */
272 270
 271 mutex_enter(&d->lock);
273 if (d->ring_read == 272 if (d->ring_read ==
274 ((d->ring_write + 1) & XENEVT_RING_MASK)) { 273 ((d->ring_write + 1) & XENEVT_RING_MASK)) {
275 d->flags |= XENEVT_F_OVERFLOW; 274 d->flags |= XENEVT_F_OVERFLOW;
276 printf("xenevt_event: ring overflow port %d\n", port); 275 printf("xenevt_event: ring overflow port %d\n", port);
277 } else { 276 } else {
278 d->ring[d->ring_write] = port; 277 d->ring[d->ring_write] = port;
279 d->ring_write = (d->ring_write + 1) & XENEVT_RING_MASK; 278 d->ring_write = (d->ring_write + 1) & XENEVT_RING_MASK;
280 } 279 }
 280 mutex_exit(&d->lock);
281} 281}
282 282
283/* open the xenevt device; this is where we clone */ 283/* open the xenevt device; this is where we clone */
284int 284int
285xenevtopen(dev_t dev, int flags, int mode, struct lwp *l) 285xenevtopen(dev_t dev, int flags, int mode, struct lwp *l)
286{ 286{
287 struct xenevt_d *d; 287 struct xenevt_d *d;
288 struct file *fp; 288 struct file *fp;
289 int fd, error; 289 int fd, error;
290 290
291 switch(minor(dev)) { 291 switch(minor(dev)) {
292 case DEV_EVT: 292 case DEV_EVT:
293 /* falloc() will use the descriptor for us. */ 293 /* falloc() will use the descriptor for us. */
294 if ((error = fd_allocfile(&fp, &fd)) != 0) 294 if ((error = fd_allocfile(&fp, &fd)) != 0)
295 return error; 295 return error;
296 296
297 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_ZERO); 297 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_ZERO);
298 mutex_init(&d->lock, MUTEX_DEFAULT, IPL_SOFTSERIAL); 298 d->ci = &cpu_info_primary;
 299 mutex_init(&d->lock, MUTEX_DEFAULT, IPL_HIGH);
299 cv_init(&d->cv, "xenevt"); 300 cv_init(&d->cv, "xenevt");
300 selinit(&d->sel); 301 selinit(&d->sel);
301 return fd_clone(fp, fd, flags, &xenevt_fileops, d); 302 return fd_clone(fp, fd, flags, &xenevt_fileops, d);
302 case DEV_XSD: 303 case DEV_XSD:
303 /* no clone for /dev/xsd_kva */ 304 /* no clone for /dev/xsd_kva */
304 return (0); 305 return (0);
305 default: 306 default:
306 break; 307 break;
307 } 308 }
308 return ENODEV; 309 return ENODEV;
309} 310}
310 311
311/* read from device: only for /dev/xsd_kva, xenevt is done though fread */ 312/* read from device: only for /dev/xsd_kva, xenevt is done though fread */
@@ -340,54 +341,73 @@ xenevtread(dev_t dev, struct uio *uio, i @@ -340,54 +341,73 @@ xenevtread(dev_t dev, struct uio *uio, i
340paddr_t 341paddr_t
341xenevtmmap(dev_t dev, off_t off, int prot) 342xenevtmmap(dev_t dev, off_t off, int prot)
342{ 343{
343 if (minor(dev) == DEV_XSD) { 344 if (minor(dev) == DEV_XSD) {
344 /* only one page, so off is always 0 */ 345 /* only one page, so off is always 0 */
345 if (off != 0) 346 if (off != 0)
346 return -1; 347 return -1;
347 return x86_btop( 348 return x86_btop(
348 xpmap_mtop((paddr_t)xen_start_info.store_mfn << PAGE_SHIFT)); 349 xpmap_mtop((paddr_t)xen_start_info.store_mfn << PAGE_SHIFT));
349 } 350 }
350 return -1; 351 return -1;
351} 352}
352 353
353static int 354static void
354xenevt_fclose(struct file *fp) 355xenevt_free(struct xenevt_d *d)
355{ 356{
356 struct xenevt_d *d = fp->f_data; 
357 int i; 357 int i;
 358 KASSERT(mutex_owned(&devevent_lock));
 359 KASSERT(mutex_owned(&d->lock));
358 360
359 for (i = 0; i < NR_EVENT_CHANNELS; i++ ) { 361 for (i = 0; i < NR_EVENT_CHANNELS; i++ ) {
360 if (devevent[i] == d) { 362 if (devevent[i] == d) {
361 evtchn_op_t op = { .cmd = 0 }; 363 evtchn_op_t op = { .cmd = 0 };
362 int error; 364 int error;
363 365
364 hypervisor_mask_event(i); 366 hypervisor_mask_event(i);
 367 xen_atomic_clear_bit(&d->ci->ci_evtmask[0], i);
365 devevent[i] = NULL; 368 devevent[i] = NULL;
366 369
367 op.cmd = EVTCHNOP_close; 370 op.cmd = EVTCHNOP_close;
368 op.u.close.port = i; 371 op.u.close.port = i;
369 if ((error = HYPERVISOR_event_channel_op(&op))) { 372 if ((error = HYPERVISOR_event_channel_op(&op))) {
370 printf("xenevt_fclose: error %d from " 373 printf("xenevt_fclose: error %d from "
371 "hypervisor\n", -error); 374 "hypervisor\n", -error);
372 } 375 }
373 } 376 }
374 } 377 }
 378 mutex_exit(&d->lock);
375 seldestroy(&d->sel); 379 seldestroy(&d->sel);
376 cv_destroy(&d->cv); 380 cv_destroy(&d->cv);
377 mutex_destroy(&d->lock); 381 mutex_destroy(&d->lock);
378 fp->f_data = NULL; 
379 free(d, M_DEVBUF); 382 free(d, M_DEVBUF);
 383}
380 384
 385static int
 386xenevt_fclose(struct file *fp)
 387{
 388 struct xenevt_d *d = fp->f_data;
 389
 390 mutex_enter(&devevent_lock);
 391 mutex_enter(&d->lock);
 392 if (d->pending) {
 393 d->flags |= XENEVT_F_FREE;
 394 mutex_exit(&d->lock);
 395 } else {
 396 xenevt_free(d);
 397 }
 398
 399 mutex_exit(&devevent_lock);
 400 fp->f_data = NULL;
381 return (0); 401 return (0);
382} 402}
383 403
384static int 404static int
385xenevt_fread(struct file *fp, off_t *offp, struct uio *uio, 405xenevt_fread(struct file *fp, off_t *offp, struct uio *uio,
386 kauth_cred_t cred, int flags) 406 kauth_cred_t cred, int flags)
387{ 407{
388 struct xenevt_d *d = fp->f_data; 408 struct xenevt_d *d = fp->f_data;
389 int error, ring_read, ring_write; 409 int error, ring_read, ring_write;
390 size_t len, uio_len; 410 size_t len, uio_len;
391 411
392 error = 0; 412 error = 0;
393 mutex_enter(&d->lock); 413 mutex_enter(&d->lock);
@@ -457,116 +477,141 @@ xenevt_fwrite(struct file *fp, off_t *of @@ -457,116 +477,141 @@ xenevt_fwrite(struct file *fp, off_t *of
457 int i, nentries, error; 477 int i, nentries, error;
458 478
459 if (uio->uio_resid == 0) 479 if (uio->uio_resid == 0)
460 return (0); 480 return (0);
461 nentries = uio->uio_resid / sizeof(uint16_t); 481 nentries = uio->uio_resid / sizeof(uint16_t);
462 if (nentries > NR_EVENT_CHANNELS) 482 if (nentries > NR_EVENT_CHANNELS)
463 return EMSGSIZE; 483 return EMSGSIZE;
464 chans = kmem_alloc(nentries * sizeof(uint16_t), KM_SLEEP); 484 chans = kmem_alloc(nentries * sizeof(uint16_t), KM_SLEEP);
465 if (chans == NULL) 485 if (chans == NULL)
466 return ENOMEM; 486 return ENOMEM;
467 error = uiomove(chans, uio->uio_resid, uio); 487 error = uiomove(chans, uio->uio_resid, uio);
468 if (error) 488 if (error)
469 goto out; 489 goto out;
 490 mutex_enter(&devevent_lock);
470 for (i = 0; i < nentries; i++) { 491 for (i = 0; i < nentries; i++) {
471 if (chans[i] < NR_EVENT_CHANNELS && 492 if (chans[i] < NR_EVENT_CHANNELS &&
472 devevent[chans[i]] == d) { 493 devevent[chans[i]] == d) {
473 hypervisor_unmask_event(chans[i]); 494 hypervisor_unmask_event(chans[i]);
474 } 495 }
475 } 496 }
 497 mutex_exit(&devevent_lock);
476out: 498out:
477 kmem_free(chans, nentries * sizeof(uint16_t)); 499 kmem_free(chans, nentries * sizeof(uint16_t));
478 return 0; 500 return 0;
479} 501}
480 502
481static int 503static int
482xenevt_fioctl(struct file *fp, u_long cmd, void *addr) 504xenevt_fioctl(struct file *fp, u_long cmd, void *addr)
483{ 505{
484 struct xenevt_d *d = fp->f_data; 506 struct xenevt_d *d = fp->f_data;
485 evtchn_op_t op = { .cmd = 0 }; 507 evtchn_op_t op = { .cmd = 0 };
486 int error; 508 int error;
487 509
488 switch(cmd) { 510 switch(cmd) {
489 case EVTCHN_RESET: 511 case EVTCHN_RESET:
490 case IOCTL_EVTCHN_RESET: 512 case IOCTL_EVTCHN_RESET:
 513 mutex_enter(&d->lock);
491 d->ring_read = d->ring_write = 0; 514 d->ring_read = d->ring_write = 0;
492 d->flags = 0; 515 d->flags = 0;
 516 mutex_exit(&d->lock);
493 break; 517 break;
494 case IOCTL_EVTCHN_BIND_VIRQ: 518 case IOCTL_EVTCHN_BIND_VIRQ:
495 { 519 {
496 struct ioctl_evtchn_bind_virq *bind_virq = addr; 520 struct ioctl_evtchn_bind_virq *bind_virq = addr;
497 op.cmd = EVTCHNOP_bind_virq; 521 op.cmd = EVTCHNOP_bind_virq;
498 op.u.bind_virq.virq = bind_virq->virq; 522 op.u.bind_virq.virq = bind_virq->virq;
499 op.u.bind_virq.vcpu = 0; 523 op.u.bind_virq.vcpu = 0;
500 if ((error = HYPERVISOR_event_channel_op(&op))) { 524 if ((error = HYPERVISOR_event_channel_op(&op))) {
501 printf("IOCTL_EVTCHN_BIND_VIRQ failed: virq %d error %d\n", bind_virq->virq, error); 525 printf("IOCTL_EVTCHN_BIND_VIRQ failed: virq %d error %d\n", bind_virq->virq, error);
502 return -error; 526 return -error;
503 } 527 }
504 bind_virq->port = op.u.bind_virq.port; 528 bind_virq->port = op.u.bind_virq.port;
 529 mutex_enter(&devevent_lock);
 530 KASSERT(devevent[bind_virq->port] == NULL);
505 devevent[bind_virq->port] = d; 531 devevent[bind_virq->port] = d;
 532 mutex_exit(&devevent_lock);
 533 xen_atomic_set_bit(&d->ci->ci_evtmask[0], bind_virq->port);
506 hypervisor_unmask_event(bind_virq->port); 534 hypervisor_unmask_event(bind_virq->port);
507 break; 535 break;
508 } 536 }
509 case IOCTL_EVTCHN_BIND_INTERDOMAIN: 537 case IOCTL_EVTCHN_BIND_INTERDOMAIN:
510 { 538 {
511 struct ioctl_evtchn_bind_interdomain *bind_intd = addr; 539 struct ioctl_evtchn_bind_interdomain *bind_intd = addr;
512 op.cmd = EVTCHNOP_bind_interdomain; 540 op.cmd = EVTCHNOP_bind_interdomain;
513 op.u.bind_interdomain.remote_dom = bind_intd->remote_domain; 541 op.u.bind_interdomain.remote_dom = bind_intd->remote_domain;
514 op.u.bind_interdomain.remote_port = bind_intd->remote_port; 542 op.u.bind_interdomain.remote_port = bind_intd->remote_port;
515 if ((error = HYPERVISOR_event_channel_op(&op))) 543 if ((error = HYPERVISOR_event_channel_op(&op)))
516 return -error; 544 return -error;
517 bind_intd->port = op.u.bind_interdomain.local_port; 545 bind_intd->port = op.u.bind_interdomain.local_port;
 546 mutex_enter(&devevent_lock);
 547 KASSERT(devevent[bind_intd->port] == NULL);
518 devevent[bind_intd->port] = d; 548 devevent[bind_intd->port] = d;
 549 mutex_exit(&devevent_lock);
 550 xen_atomic_set_bit(&d->ci->ci_evtmask[0], bind_intd->port);
519 hypervisor_unmask_event(bind_intd->port); 551 hypervisor_unmask_event(bind_intd->port);
520 break; 552 break;
521 } 553 }
522 case IOCTL_EVTCHN_BIND_UNBOUND_PORT: 554 case IOCTL_EVTCHN_BIND_UNBOUND_PORT:
523 { 555 {
524 struct ioctl_evtchn_bind_unbound_port *bind_unbound = addr; 556 struct ioctl_evtchn_bind_unbound_port *bind_unbound = addr;
525 op.cmd = EVTCHNOP_alloc_unbound; 557 op.cmd = EVTCHNOP_alloc_unbound;
526 op.u.alloc_unbound.dom = DOMID_SELF; 558 op.u.alloc_unbound.dom = DOMID_SELF;
527 op.u.alloc_unbound.remote_dom = bind_unbound->remote_domain; 559 op.u.alloc_unbound.remote_dom = bind_unbound->remote_domain;
528 if ((error = HYPERVISOR_event_channel_op(&op))) 560 if ((error = HYPERVISOR_event_channel_op(&op)))
529 return -error; 561 return -error;
530 bind_unbound->port = op.u.alloc_unbound.port; 562 bind_unbound->port = op.u.alloc_unbound.port;
 563 mutex_enter(&devevent_lock);
 564 KASSERT(devevent[bind_unbound->port] == NULL);
531 devevent[bind_unbound->port] = d; 565 devevent[bind_unbound->port] = d;
 566 mutex_exit(&devevent_lock);
 567 xen_atomic_set_bit(&d->ci->ci_evtmask[0], bind_unbound->port);
532 hypervisor_unmask_event(bind_unbound->port); 568 hypervisor_unmask_event(bind_unbound->port);
533 break; 569 break;
534 } 570 }
535 case IOCTL_EVTCHN_UNBIND: 571 case IOCTL_EVTCHN_UNBIND:
536 { 572 {
537 struct ioctl_evtchn_unbind *unbind = addr; 573 struct ioctl_evtchn_unbind *unbind = addr;
538  574
539 if (unbind->port > NR_EVENT_CHANNELS) 575 if (unbind->port > NR_EVENT_CHANNELS)
540 return EINVAL; 576 return EINVAL;
541 if (devevent[unbind->port] != d) 577 mutex_enter(&devevent_lock);
 578 if (devevent[unbind->port] != d) {
 579 mutex_exit(&devevent_lock);
542 return ENOTCONN; 580 return ENOTCONN;
 581 }
543 devevent[unbind->port] = NULL; 582 devevent[unbind->port] = NULL;
 583 mutex_exit(&devevent_lock);
544 hypervisor_mask_event(unbind->port); 584 hypervisor_mask_event(unbind->port);
 585 xen_atomic_clear_bit(&d->ci->ci_evtmask[0], unbind->port);
545 op.cmd = EVTCHNOP_close; 586 op.cmd = EVTCHNOP_close;
546 op.u.close.port = unbind->port; 587 op.u.close.port = unbind->port;
547 if ((error = HYPERVISOR_event_channel_op(&op))) 588 if ((error = HYPERVISOR_event_channel_op(&op)))
548 return -error; 589 return -error;
549 break; 590 break;
550 } 591 }
551 case IOCTL_EVTCHN_NOTIFY: 592 case IOCTL_EVTCHN_NOTIFY:
552 { 593 {
553 struct ioctl_evtchn_notify *notify = addr; 594 struct ioctl_evtchn_notify *notify = addr;
554  595
555 if (notify->port > NR_EVENT_CHANNELS) 596 if (notify->port > NR_EVENT_CHANNELS)
556 return EINVAL; 597 return EINVAL;
557 if (devevent[notify->port] != d) 598 mutex_enter(&devevent_lock);
 599 if (devevent[notify->port] != d) {
 600 mutex_exit(&devevent_lock);
558 return ENOTCONN; 601 return ENOTCONN;
 602 }
559 hypervisor_notify_via_evtchn(notify->port); 603 hypervisor_notify_via_evtchn(notify->port);
 604 mutex_exit(&devevent_lock);
560 break; 605 break;
561 } 606 }
562 case FIONBIO: 607 case FIONBIO:
563 break; 608 break;
564 default: 609 default:
565 return EINVAL; 610 return EINVAL;
566 } 611 }
567 return 0; 612 return 0;
568} 613}
569 614
570/*  615/*
571 * Support for poll() system call  616 * Support for poll() system call
572 * 617 *