Sat Jul 30 06:19:02 2011 UTC ()
Correct sizes to pass uvm_km_free(9) in error paths.


(uebayasi)
diff -r1.61 -r1.62 src/sys/kern/sysv_msg.c
diff -r1.87 -r1.88 src/sys/kern/sysv_sem.c
diff -r1.120 -r1.121 src/sys/kern/sysv_shm.c

cvs diff -r1.61 -r1.62 src/sys/kern/sysv_msg.c (switch to unified diff)

--- src/sys/kern/sysv_msg.c 2009/01/28 00:59:03 1.61
+++ src/sys/kern/sysv_msg.c 2011/07/30 06:19:02 1.62
@@ -1,1278 +1,1279 @@ @@ -1,1278 +1,1279 @@
1/* $NetBSD: sysv_msg.c,v 1.61 2009/01/28 00:59:03 njoly Exp $ */ 1/* $NetBSD: sysv_msg.c,v 1.62 2011/07/30 06:19:02 uebayasi Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1999, 2006, 2007 The NetBSD Foundation, Inc. 4 * Copyright (c) 1999, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Andrew Doran. 9 * NASA Ames Research Center, and by Andrew Doran.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE. 30 * POSSIBILITY OF SUCH DAMAGE.
31 */ 31 */
32 32
33/* 33/*
34 * Implementation of SVID messages 34 * Implementation of SVID messages
35 * 35 *
36 * Author: Daniel Boulet 36 * Author: Daniel Boulet
37 * 37 *
38 * Copyright 1993 Daniel Boulet and RTMX Inc. 38 * Copyright 1993 Daniel Boulet and RTMX Inc.
39 * 39 *
40 * This system call was implemented by Daniel Boulet under contract from RTMX. 40 * This system call was implemented by Daniel Boulet under contract from RTMX.
41 * 41 *
42 * Redistribution and use in source forms, with and without modification, 42 * Redistribution and use in source forms, with and without modification,
43 * are permitted provided that this entire comment appears intact. 43 * are permitted provided that this entire comment appears intact.
44 * 44 *
45 * Redistribution in binary form may occur without any restrictions. 45 * Redistribution in binary form may occur without any restrictions.
46 * Obviously, it would be nice if you gave credit where credit is due 46 * Obviously, it would be nice if you gave credit where credit is due
47 * but requiring it would be too onerous. 47 * but requiring it would be too onerous.
48 * 48 *
49 * This software is provided ``AS IS'' without any warranties of any kind. 49 * This software is provided ``AS IS'' without any warranties of any kind.
50 */ 50 */
51 51
52#include <sys/cdefs.h> 52#include <sys/cdefs.h>
53__KERNEL_RCSID(0, "$NetBSD: sysv_msg.c,v 1.61 2009/01/28 00:59:03 njoly Exp $"); 53__KERNEL_RCSID(0, "$NetBSD: sysv_msg.c,v 1.62 2011/07/30 06:19:02 uebayasi Exp $");
54 54
55#define SYSVMSG 55#define SYSVMSG
56 56
57#include <sys/param.h> 57#include <sys/param.h>
58#include <sys/kernel.h> 58#include <sys/kernel.h>
59#include <sys/msg.h> 59#include <sys/msg.h>
60#include <sys/sysctl.h> 60#include <sys/sysctl.h>
61#include <sys/mount.h> /* XXX for <sys/syscallargs.h> */ 61#include <sys/mount.h> /* XXX for <sys/syscallargs.h> */
62#include <sys/syscallargs.h> 62#include <sys/syscallargs.h>
63#include <sys/kauth.h> 63#include <sys/kauth.h>
64 64
65#define MSG_DEBUG 65#define MSG_DEBUG
66#undef MSG_DEBUG_OK 66#undef MSG_DEBUG_OK
67 67
68#ifdef MSG_DEBUG_OK 68#ifdef MSG_DEBUG_OK
69#define MSG_PRINTF(a) printf a 69#define MSG_PRINTF(a) printf a
70#else 70#else
71#define MSG_PRINTF(a) 71#define MSG_PRINTF(a)
72#endif 72#endif
73 73
74static int nfree_msgmaps; /* # of free map entries */ 74static int nfree_msgmaps; /* # of free map entries */
75static short free_msgmaps; /* head of linked list of free map entries */ 75static short free_msgmaps; /* head of linked list of free map entries */
76static struct __msg *free_msghdrs; /* list of free msg headers */ 76static struct __msg *free_msghdrs; /* list of free msg headers */
77static char *msgpool; /* MSGMAX byte long msg buffer pool */ 77static char *msgpool; /* MSGMAX byte long msg buffer pool */
78static struct msgmap *msgmaps; /* MSGSEG msgmap structures */ 78static struct msgmap *msgmaps; /* MSGSEG msgmap structures */
79static struct __msg *msghdrs; /* MSGTQL msg headers */ 79static struct __msg *msghdrs; /* MSGTQL msg headers */
80 80
81kmsq_t *msqs; /* MSGMNI msqid_ds struct's */ 81kmsq_t *msqs; /* MSGMNI msqid_ds struct's */
82kmutex_t msgmutex; /* subsystem lock */ 82kmutex_t msgmutex; /* subsystem lock */
83 83
84static u_int msg_waiters = 0; /* total number of msgrcv waiters */ 84static u_int msg_waiters = 0; /* total number of msgrcv waiters */
85static bool msg_realloc_state; 85static bool msg_realloc_state;
86static kcondvar_t msg_realloc_cv; 86static kcondvar_t msg_realloc_cv;
87 87
88static void msg_freehdr(struct __msg *); 88static void msg_freehdr(struct __msg *);
89 89
90void 90void
91msginit(void) 91msginit(void)
92{ 92{
93 int i, sz; 93 int i, sz;
94 vaddr_t v; 94 vaddr_t v;
95 95
96 /* 96 /*
97 * msginfo.msgssz should be a power of two for efficiency reasons. 97 * msginfo.msgssz should be a power of two for efficiency reasons.
98 * It is also pretty silly if msginfo.msgssz is less than 8 98 * It is also pretty silly if msginfo.msgssz is less than 8
99 * or greater than about 256 so ... 99 * or greater than about 256 so ...
100 */ 100 */
101 101
102 i = 8; 102 i = 8;
103 while (i < 1024 && i != msginfo.msgssz) 103 while (i < 1024 && i != msginfo.msgssz)
104 i <<= 1; 104 i <<= 1;
105 if (i != msginfo.msgssz) { 105 if (i != msginfo.msgssz) {
106 panic("msginfo.msgssz = %d, not a small power of 2", 106 panic("msginfo.msgssz = %d, not a small power of 2",
107 msginfo.msgssz); 107 msginfo.msgssz);
108 } 108 }
109 109
110 if (msginfo.msgseg > 32767) { 110 if (msginfo.msgseg > 32767) {
111 panic("msginfo.msgseg = %d > 32767", msginfo.msgseg); 111 panic("msginfo.msgseg = %d > 32767", msginfo.msgseg);
112 } 112 }
113 113
114 /* Allocate the wired memory for our structures */ 114 /* Allocate the wired memory for our structures */
115 sz = ALIGN(msginfo.msgmax) + 115 sz = ALIGN(msginfo.msgmax) +
116 ALIGN(msginfo.msgseg * sizeof(struct msgmap)) + 116 ALIGN(msginfo.msgseg * sizeof(struct msgmap)) +
117 ALIGN(msginfo.msgtql * sizeof(struct __msg)) + 117 ALIGN(msginfo.msgtql * sizeof(struct __msg)) +
118 ALIGN(msginfo.msgmni * sizeof(kmsq_t)); 118 ALIGN(msginfo.msgmni * sizeof(kmsq_t));
119 v = uvm_km_alloc(kernel_map, round_page(sz), 0, 119 sz = round_page(sz);
120 UVM_KMF_WIRED|UVM_KMF_ZERO); 120 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
121 if (v == 0) 121 if (v == 0)
122 panic("sysv_msg: cannot allocate memory"); 122 panic("sysv_msg: cannot allocate memory");
123 msgpool = (void *)v; 123 msgpool = (void *)v;
124 msgmaps = (void *)((uintptr_t)msgpool + ALIGN(msginfo.msgmax)); 124 msgmaps = (void *)((uintptr_t)msgpool + ALIGN(msginfo.msgmax));
125 msghdrs = (void *)((uintptr_t)msgmaps + 125 msghdrs = (void *)((uintptr_t)msgmaps +
126 ALIGN(msginfo.msgseg * sizeof(struct msgmap))); 126 ALIGN(msginfo.msgseg * sizeof(struct msgmap)));
127 msqs = (void *)((uintptr_t)msghdrs + 127 msqs = (void *)((uintptr_t)msghdrs +
128 ALIGN(msginfo.msgtql * sizeof(struct __msg))); 128 ALIGN(msginfo.msgtql * sizeof(struct __msg)));
129 129
130 for (i = 0; i < (msginfo.msgseg - 1); i++) 130 for (i = 0; i < (msginfo.msgseg - 1); i++)
131 msgmaps[i].next = i + 1; 131 msgmaps[i].next = i + 1;
132 msgmaps[msginfo.msgseg - 1].next = -1; 132 msgmaps[msginfo.msgseg - 1].next = -1;
133 133
134 free_msgmaps = 0; 134 free_msgmaps = 0;
135 nfree_msgmaps = msginfo.msgseg; 135 nfree_msgmaps = msginfo.msgseg;
136 136
137 for (i = 0; i < (msginfo.msgtql - 1); i++) { 137 for (i = 0; i < (msginfo.msgtql - 1); i++) {
138 msghdrs[i].msg_type = 0; 138 msghdrs[i].msg_type = 0;
139 msghdrs[i].msg_next = &msghdrs[i + 1]; 139 msghdrs[i].msg_next = &msghdrs[i + 1];
140 } 140 }
141 i = msginfo.msgtql - 1; 141 i = msginfo.msgtql - 1;
142 msghdrs[i].msg_type = 0; 142 msghdrs[i].msg_type = 0;
143 msghdrs[i].msg_next = NULL; 143 msghdrs[i].msg_next = NULL;
144 free_msghdrs = &msghdrs[0]; 144 free_msghdrs = &msghdrs[0];
145 145
146 for (i = 0; i < msginfo.msgmni; i++) { 146 for (i = 0; i < msginfo.msgmni; i++) {
147 cv_init(&msqs[i].msq_cv, "msgwait"); 147 cv_init(&msqs[i].msq_cv, "msgwait");
148 /* Implies entry is available */ 148 /* Implies entry is available */
149 msqs[i].msq_u.msg_qbytes = 0; 149 msqs[i].msq_u.msg_qbytes = 0;
150 /* Reset to a known value */ 150 /* Reset to a known value */
151 msqs[i].msq_u.msg_perm._seq = 0; 151 msqs[i].msq_u.msg_perm._seq = 0;
152 } 152 }
153 153
154 mutex_init(&msgmutex, MUTEX_DEFAULT, IPL_NONE); 154 mutex_init(&msgmutex, MUTEX_DEFAULT, IPL_NONE);
155 cv_init(&msg_realloc_cv, "msgrealc"); 155 cv_init(&msg_realloc_cv, "msgrealc");
156 msg_realloc_state = false; 156 msg_realloc_state = false;
157} 157}
158 158
159static int 159static int
160msgrealloc(int newmsgmni, int newmsgseg) 160msgrealloc(int newmsgmni, int newmsgseg)
161{ 161{
162 struct msgmap *new_msgmaps; 162 struct msgmap *new_msgmaps;
163 struct __msg *new_msghdrs, *new_free_msghdrs; 163 struct __msg *new_msghdrs, *new_free_msghdrs;
164 char *old_msgpool, *new_msgpool; 164 char *old_msgpool, *new_msgpool;
165 kmsq_t *new_msqs; 165 kmsq_t *new_msqs;
166 vaddr_t v; 166 vaddr_t v;
167 int i, sz, msqid, newmsgmax, new_nfree_msgmaps; 167 int i, sz, msqid, newmsgmax, new_nfree_msgmaps;
168 short new_free_msgmaps; 168 short new_free_msgmaps;
169 169
170 if (newmsgmni < 1 || newmsgseg < 1) 170 if (newmsgmni < 1 || newmsgseg < 1)
171 return EINVAL; 171 return EINVAL;
172 172
173 /* Allocate the wired memory for our structures */ 173 /* Allocate the wired memory for our structures */
174 newmsgmax = msginfo.msgssz * newmsgseg; 174 newmsgmax = msginfo.msgssz * newmsgseg;
175 sz = ALIGN(newmsgmax) + 175 sz = ALIGN(newmsgmax) +
176 ALIGN(newmsgseg * sizeof(struct msgmap)) + 176 ALIGN(newmsgseg * sizeof(struct msgmap)) +
177 ALIGN(msginfo.msgtql * sizeof(struct __msg)) + 177 ALIGN(msginfo.msgtql * sizeof(struct __msg)) +
178 ALIGN(newmsgmni * sizeof(kmsq_t)); 178 ALIGN(newmsgmni * sizeof(kmsq_t));
179 v = uvm_km_alloc(kernel_map, round_page(sz), 0, 179 sz = round_page(sz);
180 UVM_KMF_WIRED|UVM_KMF_ZERO); 180 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
181 if (v == 0) 181 if (v == 0)
182 return ENOMEM; 182 return ENOMEM;
183 183
184 mutex_enter(&msgmutex); 184 mutex_enter(&msgmutex);
185 if (msg_realloc_state) { 185 if (msg_realloc_state) {
186 mutex_exit(&msgmutex); 186 mutex_exit(&msgmutex);
187 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED); 187 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
188 return EBUSY; 188 return EBUSY;
189 } 189 }
190 msg_realloc_state = true; 190 msg_realloc_state = true;
191 if (msg_waiters) { 191 if (msg_waiters) {
192 /* 192 /*
193 * Mark reallocation state, wake-up all waiters, 193 * Mark reallocation state, wake-up all waiters,
194 * and wait while they will all exit. 194 * and wait while they will all exit.
195 */ 195 */
196 for (i = 0; i < msginfo.msgmni; i++) 196 for (i = 0; i < msginfo.msgmni; i++)
197 cv_broadcast(&msqs[i].msq_cv); 197 cv_broadcast(&msqs[i].msq_cv);
198 while (msg_waiters) 198 while (msg_waiters)
199 cv_wait(&msg_realloc_cv, &msgmutex); 199 cv_wait(&msg_realloc_cv, &msgmutex);
200 } 200 }
201 old_msgpool = msgpool; 201 old_msgpool = msgpool;
202 202
203 /* We cannot reallocate less memory than we use */ 203 /* We cannot reallocate less memory than we use */
204 i = 0; 204 i = 0;
205 for (msqid = 0; msqid < msginfo.msgmni; msqid++) { 205 for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
206 struct msqid_ds *mptr; 206 struct msqid_ds *mptr;
207 kmsq_t *msq; 207 kmsq_t *msq;
208 208
209 msq = &msqs[msqid]; 209 msq = &msqs[msqid];
210 mptr = &msq->msq_u; 210 mptr = &msq->msq_u;
211 if (mptr->msg_qbytes || (mptr->msg_perm.mode & MSG_LOCKED)) 211 if (mptr->msg_qbytes || (mptr->msg_perm.mode & MSG_LOCKED))
212 i = msqid; 212 i = msqid;
213 } 213 }
214 if (i >= newmsgmni || (msginfo.msgseg - nfree_msgmaps) > newmsgseg) { 214 if (i >= newmsgmni || (msginfo.msgseg - nfree_msgmaps) > newmsgseg) {
215 mutex_exit(&msgmutex); 215 mutex_exit(&msgmutex);
216 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED); 216 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
217 return EBUSY; 217 return EBUSY;
218 } 218 }
219 219
220 new_msgpool = (void *)v; 220 new_msgpool = (void *)v;
221 new_msgmaps = (void *)((uintptr_t)new_msgpool + ALIGN(newmsgmax)); 221 new_msgmaps = (void *)((uintptr_t)new_msgpool + ALIGN(newmsgmax));
222 new_msghdrs = (void *)((uintptr_t)new_msgmaps + 222 new_msghdrs = (void *)((uintptr_t)new_msgmaps +
223 ALIGN(newmsgseg * sizeof(struct msgmap))); 223 ALIGN(newmsgseg * sizeof(struct msgmap)));
224 new_msqs = (void *)((uintptr_t)new_msghdrs + 224 new_msqs = (void *)((uintptr_t)new_msghdrs +
225 ALIGN(msginfo.msgtql * sizeof(struct __msg))); 225 ALIGN(msginfo.msgtql * sizeof(struct __msg)));
226 226
227 /* Initialize the structures */ 227 /* Initialize the structures */
228 for (i = 0; i < (newmsgseg - 1); i++) 228 for (i = 0; i < (newmsgseg - 1); i++)
229 new_msgmaps[i].next = i + 1; 229 new_msgmaps[i].next = i + 1;
230 new_msgmaps[newmsgseg - 1].next = -1; 230 new_msgmaps[newmsgseg - 1].next = -1;
231 new_free_msgmaps = 0; 231 new_free_msgmaps = 0;
232 new_nfree_msgmaps = newmsgseg; 232 new_nfree_msgmaps = newmsgseg;
233 233
234 for (i = 0; i < (msginfo.msgtql - 1); i++) { 234 for (i = 0; i < (msginfo.msgtql - 1); i++) {
235 new_msghdrs[i].msg_type = 0; 235 new_msghdrs[i].msg_type = 0;
236 new_msghdrs[i].msg_next = &new_msghdrs[i + 1]; 236 new_msghdrs[i].msg_next = &new_msghdrs[i + 1];
237 } 237 }
238 i = msginfo.msgtql - 1; 238 i = msginfo.msgtql - 1;
239 new_msghdrs[i].msg_type = 0; 239 new_msghdrs[i].msg_type = 0;
240 new_msghdrs[i].msg_next = NULL; 240 new_msghdrs[i].msg_next = NULL;
241 new_free_msghdrs = &new_msghdrs[0]; 241 new_free_msghdrs = &new_msghdrs[0];
242 242
243 for (i = 0; i < newmsgmni; i++) { 243 for (i = 0; i < newmsgmni; i++) {
244 new_msqs[i].msq_u.msg_qbytes = 0; 244 new_msqs[i].msq_u.msg_qbytes = 0;
245 new_msqs[i].msq_u.msg_perm._seq = 0; 245 new_msqs[i].msq_u.msg_perm._seq = 0;
246 cv_init(&new_msqs[i].msq_cv, "msgwait"); 246 cv_init(&new_msqs[i].msq_cv, "msgwait");
247 } 247 }
248 248
249 /* 249 /*
250 * Copy all message queue identifiers, mesage headers and buffer 250 * Copy all message queue identifiers, mesage headers and buffer
251 * pools to the new memory location. 251 * pools to the new memory location.
252 */ 252 */
253 for (msqid = 0; msqid < msginfo.msgmni; msqid++) { 253 for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
254 struct __msg *nmsghdr, *msghdr, *pmsghdr; 254 struct __msg *nmsghdr, *msghdr, *pmsghdr;
255 struct msqid_ds *nmptr, *mptr; 255 struct msqid_ds *nmptr, *mptr;
256 kmsq_t *nmsq, *msq; 256 kmsq_t *nmsq, *msq;
257 257
258 msq = &msqs[msqid]; 258 msq = &msqs[msqid];
259 mptr = &msq->msq_u; 259 mptr = &msq->msq_u;
260 260
261 if (mptr->msg_qbytes == 0 && 261 if (mptr->msg_qbytes == 0 &&
262 (mptr->msg_perm.mode & MSG_LOCKED) == 0) 262 (mptr->msg_perm.mode & MSG_LOCKED) == 0)
263 continue; 263 continue;
264 264
265 nmsq = &new_msqs[msqid]; 265 nmsq = &new_msqs[msqid];
266 nmptr = &nmsq->msq_u; 266 nmptr = &nmsq->msq_u;
267 memcpy(nmptr, mptr, sizeof(struct msqid_ds)); 267 memcpy(nmptr, mptr, sizeof(struct msqid_ds));
268 268
269 /* 269 /*
270 * Go through the message headers, and and copy each 270 * Go through the message headers, and and copy each
271 * one by taking the new ones, and thus defragmenting. 271 * one by taking the new ones, and thus defragmenting.
272 */ 272 */
273 nmsghdr = pmsghdr = NULL; 273 nmsghdr = pmsghdr = NULL;
274 msghdr = mptr->_msg_first; 274 msghdr = mptr->_msg_first;
275 while (msghdr) { 275 while (msghdr) {
276 short nnext = 0, next; 276 short nnext = 0, next;
277 u_short msgsz, segcnt; 277 u_short msgsz, segcnt;
278 278
279 /* Take an entry from the new list of free msghdrs */ 279 /* Take an entry from the new list of free msghdrs */
280 nmsghdr = new_free_msghdrs; 280 nmsghdr = new_free_msghdrs;
281 KASSERT(nmsghdr != NULL); 281 KASSERT(nmsghdr != NULL);
282 new_free_msghdrs = nmsghdr->msg_next; 282 new_free_msghdrs = nmsghdr->msg_next;
283 283
284 nmsghdr->msg_next = NULL; 284 nmsghdr->msg_next = NULL;
285 if (pmsghdr) { 285 if (pmsghdr) {
286 pmsghdr->msg_next = nmsghdr; 286 pmsghdr->msg_next = nmsghdr;
287 } else { 287 } else {
288 nmptr->_msg_first = nmsghdr; 288 nmptr->_msg_first = nmsghdr;
289 pmsghdr = nmsghdr; 289 pmsghdr = nmsghdr;
290 } 290 }
291 nmsghdr->msg_ts = msghdr->msg_ts; 291 nmsghdr->msg_ts = msghdr->msg_ts;
292 nmsghdr->msg_spot = -1; 292 nmsghdr->msg_spot = -1;
293 293
294 /* Compute the amount of segments and reserve them */ 294 /* Compute the amount of segments and reserve them */
295 msgsz = msghdr->msg_ts; 295 msgsz = msghdr->msg_ts;
296 segcnt = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz; 296 segcnt = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
297 if (segcnt == 0) 297 if (segcnt == 0)
298 continue; 298 continue;
299 while (segcnt--) { 299 while (segcnt--) {
300 nnext = new_free_msgmaps; 300 nnext = new_free_msgmaps;
301 new_free_msgmaps = new_msgmaps[nnext].next; 301 new_free_msgmaps = new_msgmaps[nnext].next;
302 new_nfree_msgmaps--; 302 new_nfree_msgmaps--;
303 new_msgmaps[nnext].next = nmsghdr->msg_spot; 303 new_msgmaps[nnext].next = nmsghdr->msg_spot;
304 nmsghdr->msg_spot = nnext; 304 nmsghdr->msg_spot = nnext;
305 } 305 }
306 306
307 /* Copy all segments */ 307 /* Copy all segments */
308 KASSERT(nnext == nmsghdr->msg_spot); 308 KASSERT(nnext == nmsghdr->msg_spot);
309 next = msghdr->msg_spot; 309 next = msghdr->msg_spot;
310 while (msgsz > 0) { 310 while (msgsz > 0) {
311 size_t tlen; 311 size_t tlen;
312 312
313 if (msgsz >= msginfo.msgssz) { 313 if (msgsz >= msginfo.msgssz) {
314 tlen = msginfo.msgssz; 314 tlen = msginfo.msgssz;
315 msgsz -= msginfo.msgssz; 315 msgsz -= msginfo.msgssz;
316 } else { 316 } else {
317 tlen = msgsz; 317 tlen = msgsz;
318 msgsz = 0; 318 msgsz = 0;
319 } 319 }
320 320
321 /* Copy the message buffer */ 321 /* Copy the message buffer */
322 memcpy(&new_msgpool[nnext * msginfo.msgssz], 322 memcpy(&new_msgpool[nnext * msginfo.msgssz],
323 &msgpool[next * msginfo.msgssz], tlen); 323 &msgpool[next * msginfo.msgssz], tlen);
324 324
325 /* Next entry of the map */ 325 /* Next entry of the map */
326 nnext = msgmaps[nnext].next; 326 nnext = msgmaps[nnext].next;
327 next = msgmaps[next].next; 327 next = msgmaps[next].next;
328 } 328 }
329 329
330 /* Next message header */ 330 /* Next message header */
331 msghdr = msghdr->msg_next; 331 msghdr = msghdr->msg_next;
332 } 332 }
333 nmptr->_msg_last = nmsghdr; 333 nmptr->_msg_last = nmsghdr;
334 } 334 }
335 KASSERT((msginfo.msgseg - nfree_msgmaps) == 335 KASSERT((msginfo.msgseg - nfree_msgmaps) ==
336 (newmsgseg - new_nfree_msgmaps)); 336 (newmsgseg - new_nfree_msgmaps));
337 337
338 sz = ALIGN(msginfo.msgmax) + 338 sz = ALIGN(msginfo.msgmax) +
339 ALIGN(msginfo.msgseg * sizeof(struct msgmap)) + 339 ALIGN(msginfo.msgseg * sizeof(struct msgmap)) +
340 ALIGN(msginfo.msgtql * sizeof(struct __msg)) + 340 ALIGN(msginfo.msgtql * sizeof(struct __msg)) +
341 ALIGN(msginfo.msgmni * sizeof(kmsq_t)); 341 ALIGN(msginfo.msgmni * sizeof(kmsq_t));
 342 sz = round_page(sz);
342 343
343 for (i = 0; i < msginfo.msgmni; i++) 344 for (i = 0; i < msginfo.msgmni; i++)
344 cv_destroy(&msqs[i].msq_cv); 345 cv_destroy(&msqs[i].msq_cv);
345 346
346 /* Set the pointers and update the new values */ 347 /* Set the pointers and update the new values */
347 msgpool = new_msgpool; 348 msgpool = new_msgpool;
348 msgmaps = new_msgmaps; 349 msgmaps = new_msgmaps;
349 msghdrs = new_msghdrs; 350 msghdrs = new_msghdrs;
350 msqs = new_msqs; 351 msqs = new_msqs;
351 352
352 free_msghdrs = new_free_msghdrs; 353 free_msghdrs = new_free_msghdrs;
353 free_msgmaps = new_free_msgmaps; 354 free_msgmaps = new_free_msgmaps;
354 nfree_msgmaps = new_nfree_msgmaps; 355 nfree_msgmaps = new_nfree_msgmaps;
355 msginfo.msgmni = newmsgmni; 356 msginfo.msgmni = newmsgmni;
356 msginfo.msgseg = newmsgseg; 357 msginfo.msgseg = newmsgseg;
357 msginfo.msgmax = newmsgmax; 358 msginfo.msgmax = newmsgmax;
358 359
359 /* Reallocation completed - notify all waiters, if any */ 360 /* Reallocation completed - notify all waiters, if any */
360 msg_realloc_state = false; 361 msg_realloc_state = false;
361 cv_broadcast(&msg_realloc_cv); 362 cv_broadcast(&msg_realloc_cv);
362 mutex_exit(&msgmutex); 363 mutex_exit(&msgmutex);
363 364
364 uvm_km_free(kernel_map, (vaddr_t)old_msgpool, sz, UVM_KMF_WIRED); 365 uvm_km_free(kernel_map, (vaddr_t)old_msgpool, sz, UVM_KMF_WIRED);
365 return 0; 366 return 0;
366} 367}
367 368
368static void 369static void
369msg_freehdr(struct __msg *msghdr) 370msg_freehdr(struct __msg *msghdr)
370{ 371{
371 372
372 KASSERT(mutex_owned(&msgmutex)); 373 KASSERT(mutex_owned(&msgmutex));
373 374
374 while (msghdr->msg_ts > 0) { 375 while (msghdr->msg_ts > 0) {
375 short next; 376 short next;
376 KASSERT(msghdr->msg_spot >= 0); 377 KASSERT(msghdr->msg_spot >= 0);
377 KASSERT(msghdr->msg_spot < msginfo.msgseg); 378 KASSERT(msghdr->msg_spot < msginfo.msgseg);
378 379
379 next = msgmaps[msghdr->msg_spot].next; 380 next = msgmaps[msghdr->msg_spot].next;
380 msgmaps[msghdr->msg_spot].next = free_msgmaps; 381 msgmaps[msghdr->msg_spot].next = free_msgmaps;
381 free_msgmaps = msghdr->msg_spot; 382 free_msgmaps = msghdr->msg_spot;
382 nfree_msgmaps++; 383 nfree_msgmaps++;
383 msghdr->msg_spot = next; 384 msghdr->msg_spot = next;
384 if (msghdr->msg_ts >= msginfo.msgssz) 385 if (msghdr->msg_ts >= msginfo.msgssz)
385 msghdr->msg_ts -= msginfo.msgssz; 386 msghdr->msg_ts -= msginfo.msgssz;
386 else 387 else
387 msghdr->msg_ts = 0; 388 msghdr->msg_ts = 0;
388 } 389 }
389 KASSERT(msghdr->msg_spot == -1); 390 KASSERT(msghdr->msg_spot == -1);
390 msghdr->msg_next = free_msghdrs; 391 msghdr->msg_next = free_msghdrs;
391 free_msghdrs = msghdr; 392 free_msghdrs = msghdr;
392} 393}
393 394
394int 395int
395sys___msgctl50(struct lwp *l, const struct sys___msgctl50_args *uap, 396sys___msgctl50(struct lwp *l, const struct sys___msgctl50_args *uap,
396 register_t *retval) 397 register_t *retval)
397{ 398{
398 /* { 399 /* {
399 syscallarg(int) msqid; 400 syscallarg(int) msqid;
400 syscallarg(int) cmd; 401 syscallarg(int) cmd;
401 syscallarg(struct msqid_ds *) buf; 402 syscallarg(struct msqid_ds *) buf;
402 } */ 403 } */
403 struct msqid_ds msqbuf; 404 struct msqid_ds msqbuf;
404 int cmd, error; 405 int cmd, error;
405 406
406 cmd = SCARG(uap, cmd); 407 cmd = SCARG(uap, cmd);
407 408
408 if (cmd == IPC_SET) { 409 if (cmd == IPC_SET) {
409 error = copyin(SCARG(uap, buf), &msqbuf, sizeof(msqbuf)); 410 error = copyin(SCARG(uap, buf), &msqbuf, sizeof(msqbuf));
410 if (error) 411 if (error)
411 return (error); 412 return (error);
412 } 413 }
413 414
414 error = msgctl1(l, SCARG(uap, msqid), cmd, 415 error = msgctl1(l, SCARG(uap, msqid), cmd,
415 (cmd == IPC_SET || cmd == IPC_STAT) ? &msqbuf : NULL); 416 (cmd == IPC_SET || cmd == IPC_STAT) ? &msqbuf : NULL);
416 417
417 if (error == 0 && cmd == IPC_STAT) 418 if (error == 0 && cmd == IPC_STAT)
418 error = copyout(&msqbuf, SCARG(uap, buf), sizeof(msqbuf)); 419 error = copyout(&msqbuf, SCARG(uap, buf), sizeof(msqbuf));
419 420
420 return (error); 421 return (error);
421} 422}
422 423
423int 424int
424msgctl1(struct lwp *l, int msqid, int cmd, struct msqid_ds *msqbuf) 425msgctl1(struct lwp *l, int msqid, int cmd, struct msqid_ds *msqbuf)
425{ 426{
426 kauth_cred_t cred = l->l_cred; 427 kauth_cred_t cred = l->l_cred;
427 struct msqid_ds *msqptr; 428 struct msqid_ds *msqptr;
428 kmsq_t *msq; 429 kmsq_t *msq;
429 int error = 0, ix; 430 int error = 0, ix;
430 431
431 MSG_PRINTF(("call to msgctl1(%d, %d)\n", msqid, cmd)); 432 MSG_PRINTF(("call to msgctl1(%d, %d)\n", msqid, cmd));
432 433
433 ix = IPCID_TO_IX(msqid); 434 ix = IPCID_TO_IX(msqid);
434 435
435 mutex_enter(&msgmutex); 436 mutex_enter(&msgmutex);
436 437
437 if (ix < 0 || ix >= msginfo.msgmni) { 438 if (ix < 0 || ix >= msginfo.msgmni) {
438 MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", ix, 439 MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", ix,
439 msginfo.msgmni)); 440 msginfo.msgmni));
440 error = EINVAL; 441 error = EINVAL;
441 goto unlock; 442 goto unlock;
442 } 443 }
443 444
444 msq = &msqs[ix]; 445 msq = &msqs[ix];
445 msqptr = &msq->msq_u; 446 msqptr = &msq->msq_u;
446 447
447 if (msqptr->msg_qbytes == 0) { 448 if (msqptr->msg_qbytes == 0) {
448 MSG_PRINTF(("no such msqid\n")); 449 MSG_PRINTF(("no such msqid\n"));
449 error = EINVAL; 450 error = EINVAL;
450 goto unlock; 451 goto unlock;
451 } 452 }
452 if (msqptr->msg_perm._seq != IPCID_TO_SEQ(msqid)) { 453 if (msqptr->msg_perm._seq != IPCID_TO_SEQ(msqid)) {
453 MSG_PRINTF(("wrong sequence number\n")); 454 MSG_PRINTF(("wrong sequence number\n"));
454 error = EINVAL; 455 error = EINVAL;
455 goto unlock; 456 goto unlock;
456 } 457 }
457 458
458 switch (cmd) { 459 switch (cmd) {
459 case IPC_RMID: 460 case IPC_RMID:
460 { 461 {
461 struct __msg *msghdr; 462 struct __msg *msghdr;
462 if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_M)) != 0) 463 if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_M)) != 0)
463 break; 464 break;
464 /* Free the message headers */ 465 /* Free the message headers */
465 msghdr = msqptr->_msg_first; 466 msghdr = msqptr->_msg_first;
466 while (msghdr != NULL) { 467 while (msghdr != NULL) {
467 struct __msg *msghdr_tmp; 468 struct __msg *msghdr_tmp;
468 469
469 /* Free the segments of each message */ 470 /* Free the segments of each message */
470 msqptr->_msg_cbytes -= msghdr->msg_ts; 471 msqptr->_msg_cbytes -= msghdr->msg_ts;
471 msqptr->msg_qnum--; 472 msqptr->msg_qnum--;
472 msghdr_tmp = msghdr; 473 msghdr_tmp = msghdr;
473 msghdr = msghdr->msg_next; 474 msghdr = msghdr->msg_next;
474 msg_freehdr(msghdr_tmp); 475 msg_freehdr(msghdr_tmp);
475 } 476 }
476 KASSERT(msqptr->_msg_cbytes == 0); 477 KASSERT(msqptr->_msg_cbytes == 0);
477 KASSERT(msqptr->msg_qnum == 0); 478 KASSERT(msqptr->msg_qnum == 0);
478 479
479 /* Mark it as free */ 480 /* Mark it as free */
480 msqptr->msg_qbytes = 0; 481 msqptr->msg_qbytes = 0;
481 cv_broadcast(&msq->msq_cv); 482 cv_broadcast(&msq->msq_cv);
482 } 483 }
483 break; 484 break;
484 485
485 case IPC_SET: 486 case IPC_SET:
486 if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_M))) 487 if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_M)))
487 break; 488 break;
488 if (msqbuf->msg_qbytes > msqptr->msg_qbytes && 489 if (msqbuf->msg_qbytes > msqptr->msg_qbytes &&
489 kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 490 kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER,
490 NULL) != 0) { 491 NULL) != 0) {
491 error = EPERM; 492 error = EPERM;
492 break; 493 break;
493 } 494 }
494 if (msqbuf->msg_qbytes > msginfo.msgmnb) { 495 if (msqbuf->msg_qbytes > msginfo.msgmnb) {
495 MSG_PRINTF(("can't increase msg_qbytes beyond %d " 496 MSG_PRINTF(("can't increase msg_qbytes beyond %d "
496 "(truncating)\n", msginfo.msgmnb)); 497 "(truncating)\n", msginfo.msgmnb));
497 /* silently restrict qbytes to system limit */ 498 /* silently restrict qbytes to system limit */
498 msqbuf->msg_qbytes = msginfo.msgmnb; 499 msqbuf->msg_qbytes = msginfo.msgmnb;
499 } 500 }
500 if (msqbuf->msg_qbytes == 0) { 501 if (msqbuf->msg_qbytes == 0) {
501 MSG_PRINTF(("can't reduce msg_qbytes to 0\n")); 502 MSG_PRINTF(("can't reduce msg_qbytes to 0\n"));
502 error = EINVAL; /* XXX non-standard errno! */ 503 error = EINVAL; /* XXX non-standard errno! */
503 break; 504 break;
504 } 505 }
505 msqptr->msg_perm.uid = msqbuf->msg_perm.uid; 506 msqptr->msg_perm.uid = msqbuf->msg_perm.uid;
506 msqptr->msg_perm.gid = msqbuf->msg_perm.gid; 507 msqptr->msg_perm.gid = msqbuf->msg_perm.gid;
507 msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) | 508 msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
508 (msqbuf->msg_perm.mode & 0777); 509 (msqbuf->msg_perm.mode & 0777);
509 msqptr->msg_qbytes = msqbuf->msg_qbytes; 510 msqptr->msg_qbytes = msqbuf->msg_qbytes;
510 msqptr->msg_ctime = time_second; 511 msqptr->msg_ctime = time_second;
511 break; 512 break;
512 513
513 case IPC_STAT: 514 case IPC_STAT:
514 if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_R))) { 515 if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_R))) {
515 MSG_PRINTF(("requester doesn't have read access\n")); 516 MSG_PRINTF(("requester doesn't have read access\n"));
516 break; 517 break;
517 } 518 }
518 memcpy(msqbuf, msqptr, sizeof(struct msqid_ds)); 519 memcpy(msqbuf, msqptr, sizeof(struct msqid_ds));
519 break; 520 break;
520 521
521 default: 522 default:
522 MSG_PRINTF(("invalid command %d\n", cmd)); 523 MSG_PRINTF(("invalid command %d\n", cmd));
523 error = EINVAL; 524 error = EINVAL;
524 break; 525 break;
525 } 526 }
526 527
527unlock: 528unlock:
528 mutex_exit(&msgmutex); 529 mutex_exit(&msgmutex);
529 return (error); 530 return (error);
530} 531}
531 532
532int 533int
533sys_msgget(struct lwp *l, const struct sys_msgget_args *uap, register_t *retval) 534sys_msgget(struct lwp *l, const struct sys_msgget_args *uap, register_t *retval)
534{ 535{
535 /* { 536 /* {
536 syscallarg(key_t) key; 537 syscallarg(key_t) key;
537 syscallarg(int) msgflg; 538 syscallarg(int) msgflg;
538 } */ 539 } */
539 int msqid, error = 0; 540 int msqid, error = 0;
540 int key = SCARG(uap, key); 541 int key = SCARG(uap, key);
541 int msgflg = SCARG(uap, msgflg); 542 int msgflg = SCARG(uap, msgflg);
542 kauth_cred_t cred = l->l_cred; 543 kauth_cred_t cred = l->l_cred;
543 struct msqid_ds *msqptr = NULL; 544 struct msqid_ds *msqptr = NULL;
544 kmsq_t *msq; 545 kmsq_t *msq;
545 546
546 mutex_enter(&msgmutex); 547 mutex_enter(&msgmutex);
547 548
548 MSG_PRINTF(("msgget(0x%x, 0%o)\n", key, msgflg)); 549 MSG_PRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
549 550
550 if (key != IPC_PRIVATE) { 551 if (key != IPC_PRIVATE) {
551 for (msqid = 0; msqid < msginfo.msgmni; msqid++) { 552 for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
552 msq = &msqs[msqid]; 553 msq = &msqs[msqid];
553 msqptr = &msq->msq_u; 554 msqptr = &msq->msq_u;
554 if (msqptr->msg_qbytes != 0 && 555 if (msqptr->msg_qbytes != 0 &&
555 msqptr->msg_perm._key == key) 556 msqptr->msg_perm._key == key)
556 break; 557 break;
557 } 558 }
558 if (msqid < msginfo.msgmni) { 559 if (msqid < msginfo.msgmni) {
559 MSG_PRINTF(("found public key\n")); 560 MSG_PRINTF(("found public key\n"));
560 if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) { 561 if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
561 MSG_PRINTF(("not exclusive\n")); 562 MSG_PRINTF(("not exclusive\n"));
562 error = EEXIST; 563 error = EEXIST;
563 goto unlock; 564 goto unlock;
564 } 565 }
565 if ((error = ipcperm(cred, &msqptr->msg_perm, 566 if ((error = ipcperm(cred, &msqptr->msg_perm,
566 msgflg & 0700 ))) { 567 msgflg & 0700 ))) {
567 MSG_PRINTF(("requester doesn't have 0%o access\n", 568 MSG_PRINTF(("requester doesn't have 0%o access\n",
568 msgflg & 0700)); 569 msgflg & 0700));
569 goto unlock; 570 goto unlock;
570 } 571 }
571 goto found; 572 goto found;
572 } 573 }
573 } 574 }
574 575
575 MSG_PRINTF(("need to allocate the msqid_ds\n")); 576 MSG_PRINTF(("need to allocate the msqid_ds\n"));
576 if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) { 577 if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
577 for (msqid = 0; msqid < msginfo.msgmni; msqid++) { 578 for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
578 /* 579 /*
579 * Look for an unallocated and unlocked msqid_ds. 580 * Look for an unallocated and unlocked msqid_ds.
580 * msqid_ds's can be locked by msgsnd or msgrcv while 581 * msqid_ds's can be locked by msgsnd or msgrcv while
581 * they are copying the message in/out. We can't 582 * they are copying the message in/out. We can't
582 * re-use the entry until they release it. 583 * re-use the entry until they release it.
583 */ 584 */
584 msq = &msqs[msqid]; 585 msq = &msqs[msqid];
585 msqptr = &msq->msq_u; 586 msqptr = &msq->msq_u;
586 if (msqptr->msg_qbytes == 0 && 587 if (msqptr->msg_qbytes == 0 &&
587 (msqptr->msg_perm.mode & MSG_LOCKED) == 0) 588 (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
588 break; 589 break;
589 } 590 }
590 if (msqid == msginfo.msgmni) { 591 if (msqid == msginfo.msgmni) {
591 MSG_PRINTF(("no more msqid_ds's available\n")); 592 MSG_PRINTF(("no more msqid_ds's available\n"));
592 error = ENOSPC; 593 error = ENOSPC;
593 goto unlock; 594 goto unlock;
594 } 595 }
595 MSG_PRINTF(("msqid %d is available\n", msqid)); 596 MSG_PRINTF(("msqid %d is available\n", msqid));
596 msqptr->msg_perm._key = key; 597 msqptr->msg_perm._key = key;
597 msqptr->msg_perm.cuid = kauth_cred_geteuid(cred); 598 msqptr->msg_perm.cuid = kauth_cred_geteuid(cred);
598 msqptr->msg_perm.uid = kauth_cred_geteuid(cred); 599 msqptr->msg_perm.uid = kauth_cred_geteuid(cred);
599 msqptr->msg_perm.cgid = kauth_cred_getegid(cred); 600 msqptr->msg_perm.cgid = kauth_cred_getegid(cred);
600 msqptr->msg_perm.gid = kauth_cred_getegid(cred); 601 msqptr->msg_perm.gid = kauth_cred_getegid(cred);
601 msqptr->msg_perm.mode = (msgflg & 0777); 602 msqptr->msg_perm.mode = (msgflg & 0777);
602 /* Make sure that the returned msqid is unique */ 603 /* Make sure that the returned msqid is unique */
603 msqptr->msg_perm._seq++; 604 msqptr->msg_perm._seq++;
604 msqptr->_msg_first = NULL; 605 msqptr->_msg_first = NULL;
605 msqptr->_msg_last = NULL; 606 msqptr->_msg_last = NULL;
606 msqptr->_msg_cbytes = 0; 607 msqptr->_msg_cbytes = 0;
607 msqptr->msg_qnum = 0; 608 msqptr->msg_qnum = 0;
608 msqptr->msg_qbytes = msginfo.msgmnb; 609 msqptr->msg_qbytes = msginfo.msgmnb;
609 msqptr->msg_lspid = 0; 610 msqptr->msg_lspid = 0;
610 msqptr->msg_lrpid = 0; 611 msqptr->msg_lrpid = 0;
611 msqptr->msg_stime = 0; 612 msqptr->msg_stime = 0;
612 msqptr->msg_rtime = 0; 613 msqptr->msg_rtime = 0;
613 msqptr->msg_ctime = time_second; 614 msqptr->msg_ctime = time_second;
614 } else { 615 } else {
615 MSG_PRINTF(("didn't find it and wasn't asked to create it\n")); 616 MSG_PRINTF(("didn't find it and wasn't asked to create it\n"));
616 error = ENOENT; 617 error = ENOENT;
617 goto unlock; 618 goto unlock;
618 } 619 }
619 620
620found: 621found:
621 /* Construct the unique msqid */ 622 /* Construct the unique msqid */
622 *retval = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm); 623 *retval = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
623 624
624unlock: 625unlock:
625 mutex_exit(&msgmutex); 626 mutex_exit(&msgmutex);
626 return (error); 627 return (error);
627} 628}
628 629
629int 630int
630sys_msgsnd(struct lwp *l, const struct sys_msgsnd_args *uap, register_t *retval) 631sys_msgsnd(struct lwp *l, const struct sys_msgsnd_args *uap, register_t *retval)
631{ 632{
632 /* { 633 /* {
633 syscallarg(int) msqid; 634 syscallarg(int) msqid;
634 syscallarg(const void *) msgp; 635 syscallarg(const void *) msgp;
635 syscallarg(size_t) msgsz; 636 syscallarg(size_t) msgsz;
636 syscallarg(int) msgflg; 637 syscallarg(int) msgflg;
637 } */ 638 } */
638 639
639 return msgsnd1(l, SCARG(uap, msqid), SCARG(uap, msgp), 640 return msgsnd1(l, SCARG(uap, msqid), SCARG(uap, msgp),
640 SCARG(uap, msgsz), SCARG(uap, msgflg), sizeof(long), copyin); 641 SCARG(uap, msgsz), SCARG(uap, msgflg), sizeof(long), copyin);
641} 642}
642 643
643int 644int
644msgsnd1(struct lwp *l, int msqidr, const char *user_msgp, size_t msgsz, 645msgsnd1(struct lwp *l, int msqidr, const char *user_msgp, size_t msgsz,
645 int msgflg, size_t typesz, copyin_t fetch_type) 646 int msgflg, size_t typesz, copyin_t fetch_type)
646{ 647{
647 int segs_needed, error = 0, msqid; 648 int segs_needed, error = 0, msqid;
648 kauth_cred_t cred = l->l_cred; 649 kauth_cred_t cred = l->l_cred;
649 struct msqid_ds *msqptr; 650 struct msqid_ds *msqptr;
650 struct __msg *msghdr; 651 struct __msg *msghdr;
651 kmsq_t *msq; 652 kmsq_t *msq;
652 short next; 653 short next;
653 654
654 MSG_PRINTF(("call to msgsnd(%d, %p, %lld, %d)\n", msqid, user_msgp, 655 MSG_PRINTF(("call to msgsnd(%d, %p, %lld, %d)\n", msqid, user_msgp,
655 (long long)msgsz, msgflg)); 656 (long long)msgsz, msgflg));
656 657
657 if ((ssize_t)msgsz < 0) 658 if ((ssize_t)msgsz < 0)
658 return EINVAL; 659 return EINVAL;
659 660
660restart: 661restart:
661 msqid = IPCID_TO_IX(msqidr); 662 msqid = IPCID_TO_IX(msqidr);
662 663
663 mutex_enter(&msgmutex); 664 mutex_enter(&msgmutex);
664 /* In case of reallocation, we will wait for completion */ 665 /* In case of reallocation, we will wait for completion */
665 while (__predict_false(msg_realloc_state)) 666 while (__predict_false(msg_realloc_state))
666 cv_wait(&msg_realloc_cv, &msgmutex); 667 cv_wait(&msg_realloc_cv, &msgmutex);
667 668
668 if (msqid < 0 || msqid >= msginfo.msgmni) { 669 if (msqid < 0 || msqid >= msginfo.msgmni) {
669 MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid, 670 MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
670 msginfo.msgmni)); 671 msginfo.msgmni));
671 error = EINVAL; 672 error = EINVAL;
672 goto unlock; 673 goto unlock;
673 } 674 }
674 675
675 msq = &msqs[msqid]; 676 msq = &msqs[msqid];
676 msqptr = &msq->msq_u; 677 msqptr = &msq->msq_u;
677 678
678 if (msqptr->msg_qbytes == 0) { 679 if (msqptr->msg_qbytes == 0) {
679 MSG_PRINTF(("no such message queue id\n")); 680 MSG_PRINTF(("no such message queue id\n"));
680 error = EINVAL; 681 error = EINVAL;
681 goto unlock; 682 goto unlock;
682 } 683 }
683 if (msqptr->msg_perm._seq != IPCID_TO_SEQ(msqidr)) { 684 if (msqptr->msg_perm._seq != IPCID_TO_SEQ(msqidr)) {
684 MSG_PRINTF(("wrong sequence number\n")); 685 MSG_PRINTF(("wrong sequence number\n"));
685 error = EINVAL; 686 error = EINVAL;
686 goto unlock; 687 goto unlock;
687 } 688 }
688 689
689 if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_W))) { 690 if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_W))) {
690 MSG_PRINTF(("requester doesn't have write access\n")); 691 MSG_PRINTF(("requester doesn't have write access\n"));
691 goto unlock; 692 goto unlock;
692 } 693 }
693 694
694 segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz; 695 segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
695 MSG_PRINTF(("msgsz=%lld, msgssz=%d, segs_needed=%d\n", 696 MSG_PRINTF(("msgsz=%lld, msgssz=%d, segs_needed=%d\n",
696 (long long)msgsz, msginfo.msgssz, segs_needed)); 697 (long long)msgsz, msginfo.msgssz, segs_needed));
697 for (;;) { 698 for (;;) {
698 int need_more_resources = 0; 699 int need_more_resources = 0;
699 700
700 /* 701 /*
701 * check msgsz [cannot be negative since it is unsigned] 702 * check msgsz [cannot be negative since it is unsigned]
702 * (inside this loop in case msg_qbytes changes while we sleep) 703 * (inside this loop in case msg_qbytes changes while we sleep)
703 */ 704 */
704 705
705 if (msgsz > msqptr->msg_qbytes) { 706 if (msgsz > msqptr->msg_qbytes) {
706 MSG_PRINTF(("msgsz > msqptr->msg_qbytes\n")); 707 MSG_PRINTF(("msgsz > msqptr->msg_qbytes\n"));
707 error = EINVAL; 708 error = EINVAL;
708 goto unlock; 709 goto unlock;
709 } 710 }
710 711
711 if (msqptr->msg_perm.mode & MSG_LOCKED) { 712 if (msqptr->msg_perm.mode & MSG_LOCKED) {
712 MSG_PRINTF(("msqid is locked\n")); 713 MSG_PRINTF(("msqid is locked\n"));
713 need_more_resources = 1; 714 need_more_resources = 1;
714 } 715 }
715 if (msgsz + msqptr->_msg_cbytes > msqptr->msg_qbytes) { 716 if (msgsz + msqptr->_msg_cbytes > msqptr->msg_qbytes) {
716 MSG_PRINTF(("msgsz + msg_cbytes > msg_qbytes\n")); 717 MSG_PRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
717 need_more_resources = 1; 718 need_more_resources = 1;
718 } 719 }
719 if (segs_needed > nfree_msgmaps) { 720 if (segs_needed > nfree_msgmaps) {
720 MSG_PRINTF(("segs_needed > nfree_msgmaps\n")); 721 MSG_PRINTF(("segs_needed > nfree_msgmaps\n"));
721 need_more_resources = 1; 722 need_more_resources = 1;
722 } 723 }
723 if (free_msghdrs == NULL) { 724 if (free_msghdrs == NULL) {
724 MSG_PRINTF(("no more msghdrs\n")); 725 MSG_PRINTF(("no more msghdrs\n"));
725 need_more_resources = 1; 726 need_more_resources = 1;
726 } 727 }
727 728
728 if (need_more_resources) { 729 if (need_more_resources) {
729 int we_own_it; 730 int we_own_it;
730 731
731 if ((msgflg & IPC_NOWAIT) != 0) { 732 if ((msgflg & IPC_NOWAIT) != 0) {
732 MSG_PRINTF(("need more resources but caller " 733 MSG_PRINTF(("need more resources but caller "
733 "doesn't want to wait\n")); 734 "doesn't want to wait\n"));
734 error = EAGAIN; 735 error = EAGAIN;
735 goto unlock; 736 goto unlock;
736 } 737 }
737 738
738 if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) { 739 if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
739 MSG_PRINTF(("we don't own the msqid_ds\n")); 740 MSG_PRINTF(("we don't own the msqid_ds\n"));
740 we_own_it = 0; 741 we_own_it = 0;
741 } else { 742 } else {
742 /* Force later arrivals to wait for our 743 /* Force later arrivals to wait for our
743 request */ 744 request */
744 MSG_PRINTF(("we own the msqid_ds\n")); 745 MSG_PRINTF(("we own the msqid_ds\n"));
745 msqptr->msg_perm.mode |= MSG_LOCKED; 746 msqptr->msg_perm.mode |= MSG_LOCKED;
746 we_own_it = 1; 747 we_own_it = 1;
747 } 748 }
748 749
749 msg_waiters++; 750 msg_waiters++;
750 MSG_PRINTF(("goodnight\n")); 751 MSG_PRINTF(("goodnight\n"));
751 error = cv_wait_sig(&msq->msq_cv, &msgmutex); 752 error = cv_wait_sig(&msq->msq_cv, &msgmutex);
752 MSG_PRINTF(("good morning, error=%d\n", error)); 753 MSG_PRINTF(("good morning, error=%d\n", error));
753 msg_waiters--; 754 msg_waiters--;
754 755
755 if (we_own_it) 756 if (we_own_it)
756 msqptr->msg_perm.mode &= ~MSG_LOCKED; 757 msqptr->msg_perm.mode &= ~MSG_LOCKED;
757 758
758 /* 759 /*
759 * In case of such state, notify reallocator and 760 * In case of such state, notify reallocator and
760 * restart the call. 761 * restart the call.
761 */ 762 */
762 if (msg_realloc_state) { 763 if (msg_realloc_state) {
763 cv_broadcast(&msg_realloc_cv); 764 cv_broadcast(&msg_realloc_cv);
764 mutex_exit(&msgmutex); 765 mutex_exit(&msgmutex);
765 goto restart; 766 goto restart;
766 } 767 }
767 768
768 if (error != 0) { 769 if (error != 0) {
769 MSG_PRINTF(("msgsnd: interrupted system " 770 MSG_PRINTF(("msgsnd: interrupted system "
770 "call\n")); 771 "call\n"));
771 error = EINTR; 772 error = EINTR;
772 goto unlock; 773 goto unlock;
773 } 774 }
774 775
775 /* 776 /*
776 * Make sure that the msq queue still exists 777 * Make sure that the msq queue still exists
777 */ 778 */
778 779
779 if (msqptr->msg_qbytes == 0) { 780 if (msqptr->msg_qbytes == 0) {
780 MSG_PRINTF(("msqid deleted\n")); 781 MSG_PRINTF(("msqid deleted\n"));
781 error = EIDRM; 782 error = EIDRM;
782 goto unlock; 783 goto unlock;
783 } 784 }
784 } else { 785 } else {
785 MSG_PRINTF(("got all the resources that we need\n")); 786 MSG_PRINTF(("got all the resources that we need\n"));
786 break; 787 break;
787 } 788 }
788 } 789 }
789 790
790 /* 791 /*
791 * We have the resources that we need. 792 * We have the resources that we need.
792 * Make sure! 793 * Make sure!
793 */ 794 */
794 795
795 KASSERT((msqptr->msg_perm.mode & MSG_LOCKED) == 0); 796 KASSERT((msqptr->msg_perm.mode & MSG_LOCKED) == 0);
796 KASSERT(segs_needed <= nfree_msgmaps); 797 KASSERT(segs_needed <= nfree_msgmaps);
797 KASSERT(msgsz + msqptr->_msg_cbytes <= msqptr->msg_qbytes); 798 KASSERT(msgsz + msqptr->_msg_cbytes <= msqptr->msg_qbytes);
798 KASSERT(free_msghdrs != NULL); 799 KASSERT(free_msghdrs != NULL);
799 800
800 /* 801 /*
801 * Re-lock the msqid_ds in case we page-fault when copying in the 802 * Re-lock the msqid_ds in case we page-fault when copying in the
802 * message 803 * message
803 */ 804 */
804 805
805 KASSERT((msqptr->msg_perm.mode & MSG_LOCKED) == 0); 806 KASSERT((msqptr->msg_perm.mode & MSG_LOCKED) == 0);
806 msqptr->msg_perm.mode |= MSG_LOCKED; 807 msqptr->msg_perm.mode |= MSG_LOCKED;
807 808
808 /* 809 /*
809 * Allocate a message header 810 * Allocate a message header
810 */ 811 */
811 812
812 msghdr = free_msghdrs; 813 msghdr = free_msghdrs;
813 free_msghdrs = msghdr->msg_next; 814 free_msghdrs = msghdr->msg_next;
814 msghdr->msg_spot = -1; 815 msghdr->msg_spot = -1;
815 msghdr->msg_ts = msgsz; 816 msghdr->msg_ts = msgsz;
816 817
817 /* 818 /*
818 * Allocate space for the message 819 * Allocate space for the message
819 */ 820 */
820 821
821 while (segs_needed > 0) { 822 while (segs_needed > 0) {
822 KASSERT(nfree_msgmaps > 0); 823 KASSERT(nfree_msgmaps > 0);
823 KASSERT(free_msgmaps != -1); 824 KASSERT(free_msgmaps != -1);
824 KASSERT(free_msgmaps < msginfo.msgseg); 825 KASSERT(free_msgmaps < msginfo.msgseg);
825 826
826 next = free_msgmaps; 827 next = free_msgmaps;
827 MSG_PRINTF(("allocating segment %d to message\n", next)); 828 MSG_PRINTF(("allocating segment %d to message\n", next));
828 free_msgmaps = msgmaps[next].next; 829 free_msgmaps = msgmaps[next].next;
829 nfree_msgmaps--; 830 nfree_msgmaps--;
830 msgmaps[next].next = msghdr->msg_spot; 831 msgmaps[next].next = msghdr->msg_spot;
831 msghdr->msg_spot = next; 832 msghdr->msg_spot = next;
832 segs_needed--; 833 segs_needed--;
833 } 834 }
834 835
835 /* 836 /*
836 * Copy in the message type 837 * Copy in the message type
837 */ 838 */
838 mutex_exit(&msgmutex); 839 mutex_exit(&msgmutex);
839 error = (*fetch_type)(user_msgp, &msghdr->msg_type, typesz); 840 error = (*fetch_type)(user_msgp, &msghdr->msg_type, typesz);
840 mutex_enter(&msgmutex); 841 mutex_enter(&msgmutex);
841 if (error != 0) { 842 if (error != 0) {
842 MSG_PRINTF(("error %d copying the message type\n", error)); 843 MSG_PRINTF(("error %d copying the message type\n", error));
843 msg_freehdr(msghdr); 844 msg_freehdr(msghdr);
844 msqptr->msg_perm.mode &= ~MSG_LOCKED; 845 msqptr->msg_perm.mode &= ~MSG_LOCKED;
845 cv_broadcast(&msq->msq_cv); 846 cv_broadcast(&msq->msq_cv);
846 goto unlock; 847 goto unlock;
847 } 848 }
848 user_msgp += typesz; 849 user_msgp += typesz;
849 850
850 /* 851 /*
851 * Validate the message type 852 * Validate the message type
852 */ 853 */
853 854
854 if (msghdr->msg_type < 1) { 855 if (msghdr->msg_type < 1) {
855 msg_freehdr(msghdr); 856 msg_freehdr(msghdr);
856 msqptr->msg_perm.mode &= ~MSG_LOCKED; 857 msqptr->msg_perm.mode &= ~MSG_LOCKED;
857 cv_broadcast(&msq->msq_cv); 858 cv_broadcast(&msq->msq_cv);
858 MSG_PRINTF(("mtype (%ld) < 1\n", msghdr->msg_type)); 859 MSG_PRINTF(("mtype (%ld) < 1\n", msghdr->msg_type));
859 error = EINVAL; 860 error = EINVAL;
860 goto unlock; 861 goto unlock;
861 } 862 }
862 863
863 /* 864 /*
864 * Copy in the message body 865 * Copy in the message body
865 */ 866 */
866 867
867 next = msghdr->msg_spot; 868 next = msghdr->msg_spot;
868 while (msgsz > 0) { 869 while (msgsz > 0) {
869 size_t tlen; 870 size_t tlen;
870 KASSERT(next > -1); 871 KASSERT(next > -1);
871 KASSERT(next < msginfo.msgseg); 872 KASSERT(next < msginfo.msgseg);
872 873
873 if (msgsz > msginfo.msgssz) 874 if (msgsz > msginfo.msgssz)
874 tlen = msginfo.msgssz; 875 tlen = msginfo.msgssz;
875 else 876 else
876 tlen = msgsz; 877 tlen = msgsz;
877 mutex_exit(&msgmutex); 878 mutex_exit(&msgmutex);
878 error = copyin(user_msgp, &msgpool[next * msginfo.msgssz], tlen); 879 error = copyin(user_msgp, &msgpool[next * msginfo.msgssz], tlen);
879 mutex_enter(&msgmutex); 880 mutex_enter(&msgmutex);
880 if (error != 0) { 881 if (error != 0) {
881 MSG_PRINTF(("error %d copying in message segment\n", 882 MSG_PRINTF(("error %d copying in message segment\n",
882 error)); 883 error));
883 msg_freehdr(msghdr); 884 msg_freehdr(msghdr);
884 msqptr->msg_perm.mode &= ~MSG_LOCKED; 885 msqptr->msg_perm.mode &= ~MSG_LOCKED;
885 cv_broadcast(&msq->msq_cv); 886 cv_broadcast(&msq->msq_cv);
886 goto unlock; 887 goto unlock;
887 } 888 }
888 msgsz -= tlen; 889 msgsz -= tlen;
889 user_msgp += tlen; 890 user_msgp += tlen;
890 next = msgmaps[next].next; 891 next = msgmaps[next].next;
891 } 892 }
892 KASSERT(next == -1); 893 KASSERT(next == -1);
893 894
894 /* 895 /*
895 * We've got the message. Unlock the msqid_ds. 896 * We've got the message. Unlock the msqid_ds.
896 */ 897 */
897 898
898 msqptr->msg_perm.mode &= ~MSG_LOCKED; 899 msqptr->msg_perm.mode &= ~MSG_LOCKED;
899 900
900 /* 901 /*
901 * Make sure that the msqid_ds is still allocated. 902 * Make sure that the msqid_ds is still allocated.
902 */ 903 */
903 904
904 if (msqptr->msg_qbytes == 0) { 905 if (msqptr->msg_qbytes == 0) {
905 msg_freehdr(msghdr); 906 msg_freehdr(msghdr);
906 cv_broadcast(&msq->msq_cv); 907 cv_broadcast(&msq->msq_cv);
907 error = EIDRM; 908 error = EIDRM;
908 goto unlock; 909 goto unlock;
909 } 910 }
910 911
911 /* 912 /*
912 * Put the message into the queue 913 * Put the message into the queue
913 */ 914 */
914 915
915 if (msqptr->_msg_first == NULL) { 916 if (msqptr->_msg_first == NULL) {
916 msqptr->_msg_first = msghdr; 917 msqptr->_msg_first = msghdr;
917 msqptr->_msg_last = msghdr; 918 msqptr->_msg_last = msghdr;
918 } else { 919 } else {
919 msqptr->_msg_last->msg_next = msghdr; 920 msqptr->_msg_last->msg_next = msghdr;
920 msqptr->_msg_last = msghdr; 921 msqptr->_msg_last = msghdr;
921 } 922 }
922 msqptr->_msg_last->msg_next = NULL; 923 msqptr->_msg_last->msg_next = NULL;
923 924
924 msqptr->_msg_cbytes += msghdr->msg_ts; 925 msqptr->_msg_cbytes += msghdr->msg_ts;
925 msqptr->msg_qnum++; 926 msqptr->msg_qnum++;
926 msqptr->msg_lspid = l->l_proc->p_pid; 927 msqptr->msg_lspid = l->l_proc->p_pid;
927 msqptr->msg_stime = time_second; 928 msqptr->msg_stime = time_second;
928 929
929 cv_broadcast(&msq->msq_cv); 930 cv_broadcast(&msq->msq_cv);
930 931
931unlock: 932unlock:
932 mutex_exit(&msgmutex); 933 mutex_exit(&msgmutex);
933 return error; 934 return error;
934} 935}
935 936
936int 937int
937sys_msgrcv(struct lwp *l, const struct sys_msgrcv_args *uap, register_t *retval) 938sys_msgrcv(struct lwp *l, const struct sys_msgrcv_args *uap, register_t *retval)
938{ 939{
939 /* { 940 /* {
940 syscallarg(int) msqid; 941 syscallarg(int) msqid;
941 syscallarg(void *) msgp; 942 syscallarg(void *) msgp;
942 syscallarg(size_t) msgsz; 943 syscallarg(size_t) msgsz;
943 syscallarg(long) msgtyp; 944 syscallarg(long) msgtyp;
944 syscallarg(int) msgflg; 945 syscallarg(int) msgflg;
945 } */ 946 } */
946 947
947 return msgrcv1(l, SCARG(uap, msqid), SCARG(uap, msgp), 948 return msgrcv1(l, SCARG(uap, msqid), SCARG(uap, msgp),
948 SCARG(uap, msgsz), SCARG(uap, msgtyp), SCARG(uap, msgflg), 949 SCARG(uap, msgsz), SCARG(uap, msgtyp), SCARG(uap, msgflg),
949 sizeof(long), copyout, retval); 950 sizeof(long), copyout, retval);
950} 951}
951 952
952int 953int
953msgrcv1(struct lwp *l, int msqidr, char *user_msgp, size_t msgsz, long msgtyp, 954msgrcv1(struct lwp *l, int msqidr, char *user_msgp, size_t msgsz, long msgtyp,
954 int msgflg, size_t typesz, copyout_t put_type, register_t *retval) 955 int msgflg, size_t typesz, copyout_t put_type, register_t *retval)
955{ 956{
956 size_t len; 957 size_t len;
957 kauth_cred_t cred = l->l_cred; 958 kauth_cred_t cred = l->l_cred;
958 struct msqid_ds *msqptr; 959 struct msqid_ds *msqptr;
959 struct __msg *msghdr; 960 struct __msg *msghdr;
960 int error = 0, msqid; 961 int error = 0, msqid;
961 kmsq_t *msq; 962 kmsq_t *msq;
962 short next; 963 short next;
963 964
964 MSG_PRINTF(("call to msgrcv(%d, %p, %lld, %ld, %d)\n", msqid, 965 MSG_PRINTF(("call to msgrcv(%d, %p, %lld, %ld, %d)\n", msqid,
965 user_msgp, (long long)msgsz, msgtyp, msgflg)); 966 user_msgp, (long long)msgsz, msgtyp, msgflg));
966 967
967 if ((ssize_t)msgsz < 0) 968 if ((ssize_t)msgsz < 0)
968 return EINVAL; 969 return EINVAL;
969 970
970restart: 971restart:
971 msqid = IPCID_TO_IX(msqidr); 972 msqid = IPCID_TO_IX(msqidr);
972 973
973 mutex_enter(&msgmutex); 974 mutex_enter(&msgmutex);
974 /* In case of reallocation, we will wait for completion */ 975 /* In case of reallocation, we will wait for completion */
975 while (__predict_false(msg_realloc_state)) 976 while (__predict_false(msg_realloc_state))
976 cv_wait(&msg_realloc_cv, &msgmutex); 977 cv_wait(&msg_realloc_cv, &msgmutex);
977 978
978 if (msqid < 0 || msqid >= msginfo.msgmni) { 979 if (msqid < 0 || msqid >= msginfo.msgmni) {
979 MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid, 980 MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
980 msginfo.msgmni)); 981 msginfo.msgmni));
981 error = EINVAL; 982 error = EINVAL;
982 goto unlock; 983 goto unlock;
983 } 984 }
984 985
985 msq = &msqs[msqid]; 986 msq = &msqs[msqid];
986 msqptr = &msq->msq_u; 987 msqptr = &msq->msq_u;
987 988
988 if (msqptr->msg_qbytes == 0) { 989 if (msqptr->msg_qbytes == 0) {
989 MSG_PRINTF(("no such message queue id\n")); 990 MSG_PRINTF(("no such message queue id\n"));
990 error = EINVAL; 991 error = EINVAL;
991 goto unlock; 992 goto unlock;
992 } 993 }
993 if (msqptr->msg_perm._seq != IPCID_TO_SEQ(msqidr)) { 994 if (msqptr->msg_perm._seq != IPCID_TO_SEQ(msqidr)) {
994 MSG_PRINTF(("wrong sequence number\n")); 995 MSG_PRINTF(("wrong sequence number\n"));
995 error = EINVAL; 996 error = EINVAL;
996 goto unlock; 997 goto unlock;
997 } 998 }
998 999
999 if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_R))) { 1000 if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_R))) {
1000 MSG_PRINTF(("requester doesn't have read access\n")); 1001 MSG_PRINTF(("requester doesn't have read access\n"));
1001 goto unlock; 1002 goto unlock;
1002 } 1003 }
1003 1004
1004 msghdr = NULL; 1005 msghdr = NULL;
1005 while (msghdr == NULL) { 1006 while (msghdr == NULL) {
1006 if (msgtyp == 0) { 1007 if (msgtyp == 0) {
1007 msghdr = msqptr->_msg_first; 1008 msghdr = msqptr->_msg_first;
1008 if (msghdr != NULL) { 1009 if (msghdr != NULL) {
1009 if (msgsz < msghdr->msg_ts && 1010 if (msgsz < msghdr->msg_ts &&
1010 (msgflg & MSG_NOERROR) == 0) { 1011 (msgflg & MSG_NOERROR) == 0) {
1011 MSG_PRINTF(("first msg on the queue " 1012 MSG_PRINTF(("first msg on the queue "
1012 "is too big (want %lld, got %d)\n", 1013 "is too big (want %lld, got %d)\n",
1013 (long long)msgsz, msghdr->msg_ts)); 1014 (long long)msgsz, msghdr->msg_ts));
1014 error = E2BIG; 1015 error = E2BIG;
1015 goto unlock; 1016 goto unlock;
1016 } 1017 }
1017 if (msqptr->_msg_first == msqptr->_msg_last) { 1018 if (msqptr->_msg_first == msqptr->_msg_last) {
1018 msqptr->_msg_first = NULL; 1019 msqptr->_msg_first = NULL;
1019 msqptr->_msg_last = NULL; 1020 msqptr->_msg_last = NULL;
1020 } else { 1021 } else {
1021 msqptr->_msg_first = msghdr->msg_next; 1022 msqptr->_msg_first = msghdr->msg_next;
1022 KASSERT(msqptr->_msg_first != NULL); 1023 KASSERT(msqptr->_msg_first != NULL);
1023 } 1024 }
1024 } 1025 }
1025 } else { 1026 } else {
1026 struct __msg *previous; 1027 struct __msg *previous;
1027 struct __msg **prev; 1028 struct __msg **prev;
1028 1029
1029 for (previous = NULL, prev = &msqptr->_msg_first; 1030 for (previous = NULL, prev = &msqptr->_msg_first;
1030 (msghdr = *prev) != NULL; 1031 (msghdr = *prev) != NULL;
1031 previous = msghdr, prev = &msghdr->msg_next) { 1032 previous = msghdr, prev = &msghdr->msg_next) {
1032 /* 1033 /*
1033 * Is this message's type an exact match or is 1034 * Is this message's type an exact match or is
1034 * this message's type less than or equal to 1035 * this message's type less than or equal to
1035 * the absolute value of a negative msgtyp? 1036 * the absolute value of a negative msgtyp?
1036 * Note that the second half of this test can 1037 * Note that the second half of this test can
1037 * NEVER be true if msgtyp is positive since 1038 * NEVER be true if msgtyp is positive since
1038 * msg_type is always positive! 1039 * msg_type is always positive!
1039 */ 1040 */
1040 1041
1041 if (msgtyp != msghdr->msg_type && 1042 if (msgtyp != msghdr->msg_type &&
1042 msghdr->msg_type > -msgtyp) 1043 msghdr->msg_type > -msgtyp)
1043 continue; 1044 continue;
1044 1045
1045 MSG_PRINTF(("found message type %ld, requested %ld\n", 1046 MSG_PRINTF(("found message type %ld, requested %ld\n",
1046 msghdr->msg_type, msgtyp)); 1047 msghdr->msg_type, msgtyp));
1047 if (msgsz < msghdr->msg_ts && 1048 if (msgsz < msghdr->msg_ts &&
1048 (msgflg & MSG_NOERROR) == 0) { 1049 (msgflg & MSG_NOERROR) == 0) {
1049 MSG_PRINTF(("requested message on the queue " 1050 MSG_PRINTF(("requested message on the queue "
1050 "is too big (want %lld, got %d)\n", 1051 "is too big (want %lld, got %d)\n",
1051 (long long)msgsz, msghdr->msg_ts)); 1052 (long long)msgsz, msghdr->msg_ts));
1052 error = E2BIG; 1053 error = E2BIG;
1053 goto unlock; 1054 goto unlock;
1054 } 1055 }
1055 *prev = msghdr->msg_next; 1056 *prev = msghdr->msg_next;
1056 if (msghdr != msqptr->_msg_last) 1057 if (msghdr != msqptr->_msg_last)
1057 break; 1058 break;
1058 if (previous == NULL) { 1059 if (previous == NULL) {
1059 KASSERT(prev == &msqptr->_msg_first); 1060 KASSERT(prev == &msqptr->_msg_first);
1060 msqptr->_msg_first = NULL; 1061 msqptr->_msg_first = NULL;
1061 msqptr->_msg_last = NULL; 1062 msqptr->_msg_last = NULL;
1062 } else { 1063 } else {
1063 KASSERT(prev != &msqptr->_msg_first); 1064 KASSERT(prev != &msqptr->_msg_first);
1064 msqptr->_msg_last = previous; 1065 msqptr->_msg_last = previous;
1065 } 1066 }
1066 break; 1067 break;
1067 } 1068 }
1068 } 1069 }
1069 1070
1070 /* 1071 /*
1071 * We've either extracted the msghdr for the appropriate 1072 * We've either extracted the msghdr for the appropriate
1072 * message or there isn't one. 1073 * message or there isn't one.
1073 * If there is one then bail out of this loop. 1074 * If there is one then bail out of this loop.
1074 */ 1075 */
1075 if (msghdr != NULL) 1076 if (msghdr != NULL)
1076 break; 1077 break;
1077 1078
1078 /* 1079 /*
1079 * Hmph! No message found. Does the user want to wait? 1080 * Hmph! No message found. Does the user want to wait?
1080 */ 1081 */
1081 1082
1082 if ((msgflg & IPC_NOWAIT) != 0) { 1083 if ((msgflg & IPC_NOWAIT) != 0) {
1083 MSG_PRINTF(("no appropriate message found (msgtyp=%ld)\n", 1084 MSG_PRINTF(("no appropriate message found (msgtyp=%ld)\n",
1084 msgtyp)); 1085 msgtyp));
1085 error = ENOMSG; 1086 error = ENOMSG;
1086 goto unlock; 1087 goto unlock;
1087 } 1088 }
1088 1089
1089 /* 1090 /*
1090 * Wait for something to happen 1091 * Wait for something to happen
1091 */ 1092 */
1092 1093
1093 msg_waiters++; 1094 msg_waiters++;
1094 MSG_PRINTF(("msgrcv: goodnight\n")); 1095 MSG_PRINTF(("msgrcv: goodnight\n"));
1095 error = cv_wait_sig(&msq->msq_cv, &msgmutex); 1096 error = cv_wait_sig(&msq->msq_cv, &msgmutex);
1096 MSG_PRINTF(("msgrcv: good morning (error=%d)\n", error)); 1097 MSG_PRINTF(("msgrcv: good morning (error=%d)\n", error));
1097 msg_waiters--; 1098 msg_waiters--;
1098 1099
1099 /* 1100 /*
1100 * In case of such state, notify reallocator and 1101 * In case of such state, notify reallocator and
1101 * restart the call. 1102 * restart the call.
1102 */ 1103 */
1103 if (msg_realloc_state) { 1104 if (msg_realloc_state) {
1104 cv_broadcast(&msg_realloc_cv); 1105 cv_broadcast(&msg_realloc_cv);
1105 mutex_exit(&msgmutex); 1106 mutex_exit(&msgmutex);
1106 goto restart; 1107 goto restart;
1107 } 1108 }
1108 1109
1109 if (error != 0) { 1110 if (error != 0) {
1110 MSG_PRINTF(("msgsnd: interrupted system call\n")); 1111 MSG_PRINTF(("msgsnd: interrupted system call\n"));
1111 error = EINTR; 1112 error = EINTR;
1112 goto unlock; 1113 goto unlock;
1113 } 1114 }
1114 1115
1115 /* 1116 /*
1116 * Make sure that the msq queue still exists 1117 * Make sure that the msq queue still exists
1117 */ 1118 */
1118 1119
1119 if (msqptr->msg_qbytes == 0 || 1120 if (msqptr->msg_qbytes == 0 ||
1120 msqptr->msg_perm._seq != IPCID_TO_SEQ(msqidr)) { 1121 msqptr->msg_perm._seq != IPCID_TO_SEQ(msqidr)) {
1121 MSG_PRINTF(("msqid deleted\n")); 1122 MSG_PRINTF(("msqid deleted\n"));
1122 error = EIDRM; 1123 error = EIDRM;
1123 goto unlock; 1124 goto unlock;
1124 } 1125 }
1125 } 1126 }
1126 1127
1127 /* 1128 /*
1128 * Return the message to the user. 1129 * Return the message to the user.
1129 * 1130 *
1130 * First, do the bookkeeping (before we risk being interrupted). 1131 * First, do the bookkeeping (before we risk being interrupted).
1131 */ 1132 */
1132 1133
1133 msqptr->_msg_cbytes -= msghdr->msg_ts; 1134 msqptr->_msg_cbytes -= msghdr->msg_ts;
1134 msqptr->msg_qnum--; 1135 msqptr->msg_qnum--;
1135 msqptr->msg_lrpid = l->l_proc->p_pid; 1136 msqptr->msg_lrpid = l->l_proc->p_pid;
1136 msqptr->msg_rtime = time_second; 1137 msqptr->msg_rtime = time_second;
1137 1138
1138 /* 1139 /*
1139 * Make msgsz the actual amount that we'll be returning. 1140 * Make msgsz the actual amount that we'll be returning.
1140 * Note that this effectively truncates the message if it is too long 1141 * Note that this effectively truncates the message if it is too long
1141 * (since msgsz is never increased). 1142 * (since msgsz is never increased).
1142 */ 1143 */
1143 1144
1144 MSG_PRINTF(("found a message, msgsz=%lld, msg_ts=%d\n", 1145 MSG_PRINTF(("found a message, msgsz=%lld, msg_ts=%d\n",
1145 (long long)msgsz, msghdr->msg_ts)); 1146 (long long)msgsz, msghdr->msg_ts));
1146 if (msgsz > msghdr->msg_ts) 1147 if (msgsz > msghdr->msg_ts)
1147 msgsz = msghdr->msg_ts; 1148 msgsz = msghdr->msg_ts;
1148 1149
1149 /* 1150 /*
1150 * Return the type to the user. 1151 * Return the type to the user.
1151 */ 1152 */
1152 mutex_exit(&msgmutex); 1153 mutex_exit(&msgmutex);
1153 error = (*put_type)(&msghdr->msg_type, user_msgp, typesz); 1154 error = (*put_type)(&msghdr->msg_type, user_msgp, typesz);
1154 mutex_enter(&msgmutex); 1155 mutex_enter(&msgmutex);
1155 if (error != 0) { 1156 if (error != 0) {
1156 MSG_PRINTF(("error (%d) copying out message type\n", error)); 1157 MSG_PRINTF(("error (%d) copying out message type\n", error));
1157 msg_freehdr(msghdr); 1158 msg_freehdr(msghdr);
1158 cv_broadcast(&msq->msq_cv); 1159 cv_broadcast(&msq->msq_cv);
1159 goto unlock; 1160 goto unlock;
1160 } 1161 }
1161 user_msgp += typesz; 1162 user_msgp += typesz;
1162 1163
1163 /* 1164 /*
1164 * Return the segments to the user 1165 * Return the segments to the user
1165 */ 1166 */
1166 1167
1167 next = msghdr->msg_spot; 1168 next = msghdr->msg_spot;
1168 for (len = 0; len < msgsz; len += msginfo.msgssz) { 1169 for (len = 0; len < msgsz; len += msginfo.msgssz) {
1169 size_t tlen; 1170 size_t tlen;
1170 KASSERT(next > -1); 1171 KASSERT(next > -1);
1171 KASSERT(next < msginfo.msgseg); 1172 KASSERT(next < msginfo.msgseg);
1172 1173
1173 if (msgsz - len > msginfo.msgssz) 1174 if (msgsz - len > msginfo.msgssz)
1174 tlen = msginfo.msgssz; 1175 tlen = msginfo.msgssz;
1175 else 1176 else
1176 tlen = msgsz - len; 1177 tlen = msgsz - len;
1177 mutex_exit(&msgmutex); 1178 mutex_exit(&msgmutex);
1178 error = copyout(&msgpool[next * msginfo.msgssz], 1179 error = copyout(&msgpool[next * msginfo.msgssz],
1179 user_msgp, tlen); 1180 user_msgp, tlen);
1180 mutex_enter(&msgmutex); 1181 mutex_enter(&msgmutex);
1181 if (error != 0) { 1182 if (error != 0) {
1182 MSG_PRINTF(("error (%d) copying out message segment\n", 1183 MSG_PRINTF(("error (%d) copying out message segment\n",
1183 error)); 1184 error));
1184 msg_freehdr(msghdr); 1185 msg_freehdr(msghdr);
1185 cv_broadcast(&msq->msq_cv); 1186 cv_broadcast(&msq->msq_cv);
1186 goto unlock; 1187 goto unlock;
1187 } 1188 }
1188 user_msgp += tlen; 1189 user_msgp += tlen;
1189 next = msgmaps[next].next; 1190 next = msgmaps[next].next;
1190 } 1191 }
1191 1192
1192 /* 1193 /*
1193 * Done, return the actual number of bytes copied out. 1194 * Done, return the actual number of bytes copied out.
1194 */ 1195 */
1195 1196
1196 msg_freehdr(msghdr); 1197 msg_freehdr(msghdr);
1197 cv_broadcast(&msq->msq_cv); 1198 cv_broadcast(&msq->msq_cv);
1198 *retval = msgsz; 1199 *retval = msgsz;
1199 1200
1200unlock: 1201unlock:
1201 mutex_exit(&msgmutex); 1202 mutex_exit(&msgmutex);
1202 return error; 1203 return error;
1203} 1204}
1204 1205
1205/* 1206/*
1206 * Sysctl initialization and nodes. 1207 * Sysctl initialization and nodes.
1207 */ 1208 */
1208 1209
1209static int 1210static int
1210sysctl_ipc_msgmni(SYSCTLFN_ARGS) 1211sysctl_ipc_msgmni(SYSCTLFN_ARGS)
1211{ 1212{
1212 int newsize, error; 1213 int newsize, error;
1213 struct sysctlnode node; 1214 struct sysctlnode node;
1214 node = *rnode; 1215 node = *rnode;
1215 node.sysctl_data = &newsize; 1216 node.sysctl_data = &newsize;
1216 1217
1217 newsize = msginfo.msgmni; 1218 newsize = msginfo.msgmni;
1218 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1219 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1219 if (error || newp == NULL) 1220 if (error || newp == NULL)
1220 return error; 1221 return error;
1221 1222
1222 sysctl_unlock(); 1223 sysctl_unlock();
1223 error = msgrealloc(newsize, msginfo.msgseg); 1224 error = msgrealloc(newsize, msginfo.msgseg);
1224 sysctl_relock(); 1225 sysctl_relock();
1225 return error; 1226 return error;
1226} 1227}
1227 1228
1228static int 1229static int
1229sysctl_ipc_msgseg(SYSCTLFN_ARGS) 1230sysctl_ipc_msgseg(SYSCTLFN_ARGS)
1230{ 1231{
1231 int newsize, error; 1232 int newsize, error;
1232 struct sysctlnode node; 1233 struct sysctlnode node;
1233 node = *rnode; 1234 node = *rnode;
1234 node.sysctl_data = &newsize; 1235 node.sysctl_data = &newsize;
1235 1236
1236 newsize = msginfo.msgseg; 1237 newsize = msginfo.msgseg;
1237 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1238 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1238 if (error || newp == NULL) 1239 if (error || newp == NULL)
1239 return error; 1240 return error;
1240 1241
1241 sysctl_unlock(); 1242 sysctl_unlock();
1242 error = msgrealloc(msginfo.msgmni, newsize); 1243 error = msgrealloc(msginfo.msgmni, newsize);
1243 sysctl_relock(); 1244 sysctl_relock();
1244 return error; 1245 return error;
1245} 1246}
1246 1247
1247SYSCTL_SETUP(sysctl_ipc_msg_setup, "sysctl kern.ipc subtree setup") 1248SYSCTL_SETUP(sysctl_ipc_msg_setup, "sysctl kern.ipc subtree setup")
1248{ 1249{
1249 const struct sysctlnode *node = NULL; 1250 const struct sysctlnode *node = NULL;
1250 1251
1251 sysctl_createv(clog, 0, NULL, NULL, 1252 sysctl_createv(clog, 0, NULL, NULL,
1252 CTLFLAG_PERMANENT, 1253 CTLFLAG_PERMANENT,
1253 CTLTYPE_NODE, "kern", NULL, 1254 CTLTYPE_NODE, "kern", NULL,
1254 NULL, 0, NULL, 0, 1255 NULL, 0, NULL, 0,
1255 CTL_KERN, CTL_EOL); 1256 CTL_KERN, CTL_EOL);
1256 sysctl_createv(clog, 0, NULL, &node, 1257 sysctl_createv(clog, 0, NULL, &node,
1257 CTLFLAG_PERMANENT, 1258 CTLFLAG_PERMANENT,
1258 CTLTYPE_NODE, "ipc", 1259 CTLTYPE_NODE, "ipc",
1259 SYSCTL_DESCR("SysV IPC options"), 1260 SYSCTL_DESCR("SysV IPC options"),
1260 NULL, 0, NULL, 0, 1261 NULL, 0, NULL, 0,
1261 CTL_KERN, KERN_SYSVIPC, CTL_EOL); 1262 CTL_KERN, KERN_SYSVIPC, CTL_EOL);
1262 1263
1263 if (node == NULL) 1264 if (node == NULL)
1264 return; 1265 return;
1265 1266
1266 sysctl_createv(clog, 0, &node, NULL, 1267 sysctl_createv(clog, 0, &node, NULL,
1267 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1268 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1268 CTLTYPE_INT, "msgmni", 1269 CTLTYPE_INT, "msgmni",
1269 SYSCTL_DESCR("Max number of message queue identifiers"), 1270 SYSCTL_DESCR("Max number of message queue identifiers"),
1270 sysctl_ipc_msgmni, 0, &msginfo.msgmni, 0, 1271 sysctl_ipc_msgmni, 0, &msginfo.msgmni, 0,
1271 CTL_CREATE, CTL_EOL); 1272 CTL_CREATE, CTL_EOL);
1272 sysctl_createv(clog, 0, &node, NULL, 1273 sysctl_createv(clog, 0, &node, NULL,
1273 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1274 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1274 CTLTYPE_INT, "msgseg", 1275 CTLTYPE_INT, "msgseg",
1275 SYSCTL_DESCR("Max number of number of message segments"), 1276 SYSCTL_DESCR("Max number of number of message segments"),
1276 sysctl_ipc_msgseg, 0, &msginfo.msgseg, 0, 1277 sysctl_ipc_msgseg, 0, &msginfo.msgseg, 0,
1277 CTL_CREATE, CTL_EOL); 1278 CTL_CREATE, CTL_EOL);
1278} 1279}

cvs diff -r1.87 -r1.88 src/sys/kern/sysv_sem.c (switch to unified diff)

--- src/sys/kern/sysv_sem.c 2011/05/13 22:16:44 1.87
+++ src/sys/kern/sysv_sem.c 2011/07/30 06:19:02 1.88
@@ -1,1183 +1,1184 @@ @@ -1,1183 +1,1184 @@
1/* $NetBSD: sysv_sem.c,v 1.87 2011/05/13 22:16:44 rmind Exp $ */ 1/* $NetBSD: sysv_sem.c,v 1.88 2011/07/30 06:19:02 uebayasi Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc. 4 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Andrew Doran. 9 * NASA Ames Research Center, and by Andrew Doran.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE. 30 * POSSIBILITY OF SUCH DAMAGE.
31 */ 31 */
32 32
33/* 33/*
34 * Implementation of SVID semaphores 34 * Implementation of SVID semaphores
35 * 35 *
36 * Author: Daniel Boulet 36 * Author: Daniel Boulet
37 * 37 *
38 * This software is provided ``AS IS'' without any warranties of any kind. 38 * This software is provided ``AS IS'' without any warranties of any kind.
39 */ 39 */
40 40
41#include <sys/cdefs.h> 41#include <sys/cdefs.h>
42__KERNEL_RCSID(0, "$NetBSD: sysv_sem.c,v 1.87 2011/05/13 22:16:44 rmind Exp $"); 42__KERNEL_RCSID(0, "$NetBSD: sysv_sem.c,v 1.88 2011/07/30 06:19:02 uebayasi Exp $");
43 43
44#define SYSVSEM 44#define SYSVSEM
45 45
46#include <sys/param.h> 46#include <sys/param.h>
47#include <sys/kernel.h> 47#include <sys/kernel.h>
48#include <sys/sem.h> 48#include <sys/sem.h>
49#include <sys/sysctl.h> 49#include <sys/sysctl.h>
50#include <sys/kmem.h> 50#include <sys/kmem.h>
51#include <sys/mount.h> /* XXX for <sys/syscallargs.h> */ 51#include <sys/mount.h> /* XXX for <sys/syscallargs.h> */
52#include <sys/syscallargs.h> 52#include <sys/syscallargs.h>
53#include <sys/kauth.h> 53#include <sys/kauth.h>
54 54
55/*  55/*
56 * Memory areas: 56 * Memory areas:
57 * 1st: Pool of semaphore identifiers 57 * 1st: Pool of semaphore identifiers
58 * 2nd: Semaphores 58 * 2nd: Semaphores
59 * 3rd: Conditional variables 59 * 3rd: Conditional variables
60 * 4th: Undo structures 60 * 4th: Undo structures
61 */ 61 */
62struct semid_ds * sema __read_mostly; 62struct semid_ds * sema __read_mostly;
63static struct __sem * sem __read_mostly; 63static struct __sem * sem __read_mostly;
64static kcondvar_t * semcv __read_mostly; 64static kcondvar_t * semcv __read_mostly;
65static int * semu __read_mostly; 65static int * semu __read_mostly;
66 66
67static kmutex_t semlock __cacheline_aligned; 67static kmutex_t semlock __cacheline_aligned;
68static bool sem_realloc_state __read_mostly; 68static bool sem_realloc_state __read_mostly;
69static kcondvar_t sem_realloc_cv; 69static kcondvar_t sem_realloc_cv;
70 70
71/* 71/*
72 * List of active undo structures, total number of semaphores, 72 * List of active undo structures, total number of semaphores,
73 * and total number of semop waiters. 73 * and total number of semop waiters.
74 */ 74 */
75static struct sem_undo *semu_list __read_mostly; 75static struct sem_undo *semu_list __read_mostly;
76static u_int semtot __cacheline_aligned; 76static u_int semtot __cacheline_aligned;
77static u_int sem_waiters __cacheline_aligned; 77static u_int sem_waiters __cacheline_aligned;
78 78
79/* Macro to find a particular sem_undo vector */ 79/* Macro to find a particular sem_undo vector */
80#define SEMU(s, ix) ((struct sem_undo *)(((long)s) + ix * seminfo.semusz)) 80#define SEMU(s, ix) ((struct sem_undo *)(((long)s) + ix * seminfo.semusz))
81 81
82#ifdef SEM_DEBUG 82#ifdef SEM_DEBUG
83#define SEM_PRINTF(a) printf a 83#define SEM_PRINTF(a) printf a
84#else 84#else
85#define SEM_PRINTF(a) 85#define SEM_PRINTF(a)
86#endif 86#endif
87 87
88struct sem_undo *semu_alloc(struct proc *); 88struct sem_undo *semu_alloc(struct proc *);
89int semundo_adjust(struct proc *, struct sem_undo **, int, int, int); 89int semundo_adjust(struct proc *, struct sem_undo **, int, int, int);
90void semundo_clear(int, int); 90void semundo_clear(int, int);
91 91
92void 92void
93seminit(void) 93seminit(void)
94{ 94{
95 int i, sz; 95 int i, sz;
96 vaddr_t v; 96 vaddr_t v;
97 97
98 mutex_init(&semlock, MUTEX_DEFAULT, IPL_NONE); 98 mutex_init(&semlock, MUTEX_DEFAULT, IPL_NONE);
99 cv_init(&sem_realloc_cv, "semrealc"); 99 cv_init(&sem_realloc_cv, "semrealc");
100 sem_realloc_state = false; 100 sem_realloc_state = false;
101 semtot = 0; 101 semtot = 0;
102 sem_waiters = 0; 102 sem_waiters = 0;
103 103
104 /* Allocate the wired memory for our structures */ 104 /* Allocate the wired memory for our structures */
105 sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) + 105 sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) +
106 ALIGN(seminfo.semmns * sizeof(struct __sem)) + 106 ALIGN(seminfo.semmns * sizeof(struct __sem)) +
107 ALIGN(seminfo.semmni * sizeof(kcondvar_t)) + 107 ALIGN(seminfo.semmni * sizeof(kcondvar_t)) +
108 ALIGN(seminfo.semmnu * seminfo.semusz); 108 ALIGN(seminfo.semmnu * seminfo.semusz);
109 v = uvm_km_alloc(kernel_map, round_page(sz), 0, 109 sz = round_page(sz);
110 UVM_KMF_WIRED|UVM_KMF_ZERO); 110 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
111 if (v == 0) 111 if (v == 0)
112 panic("sysv_sem: cannot allocate memory"); 112 panic("sysv_sem: cannot allocate memory");
113 sema = (void *)v; 113 sema = (void *)v;
114 sem = (void *)((uintptr_t)sema + 114 sem = (void *)((uintptr_t)sema +
115 ALIGN(seminfo.semmni * sizeof(struct semid_ds))); 115 ALIGN(seminfo.semmni * sizeof(struct semid_ds)));
116 semcv = (void *)((uintptr_t)sem + 116 semcv = (void *)((uintptr_t)sem +
117 ALIGN(seminfo.semmns * sizeof(struct __sem))); 117 ALIGN(seminfo.semmns * sizeof(struct __sem)));
118 semu = (void *)((uintptr_t)semcv + 118 semu = (void *)((uintptr_t)semcv +
119 ALIGN(seminfo.semmni * sizeof(kcondvar_t))); 119 ALIGN(seminfo.semmni * sizeof(kcondvar_t)));
120 120
121 for (i = 0; i < seminfo.semmni; i++) { 121 for (i = 0; i < seminfo.semmni; i++) {
122 sema[i]._sem_base = 0; 122 sema[i]._sem_base = 0;
123 sema[i].sem_perm.mode = 0; 123 sema[i].sem_perm.mode = 0;
124 cv_init(&semcv[i], "semwait"); 124 cv_init(&semcv[i], "semwait");
125 } 125 }
126 for (i = 0; i < seminfo.semmnu; i++) { 126 for (i = 0; i < seminfo.semmnu; i++) {
127 struct sem_undo *suptr = SEMU(semu, i); 127 struct sem_undo *suptr = SEMU(semu, i);
128 suptr->un_proc = NULL; 128 suptr->un_proc = NULL;
129 } 129 }
130 semu_list = NULL; 130 semu_list = NULL;
131 exithook_establish(semexit, NULL); 131 exithook_establish(semexit, NULL);
132} 132}
133 133
134static int 134static int
135semrealloc(int newsemmni, int newsemmns, int newsemmnu) 135semrealloc(int newsemmni, int newsemmns, int newsemmnu)
136{ 136{
137 struct semid_ds *new_sema, *old_sema; 137 struct semid_ds *new_sema, *old_sema;
138 struct __sem *new_sem; 138 struct __sem *new_sem;
139 struct sem_undo *new_semu_list, *suptr, *nsuptr; 139 struct sem_undo *new_semu_list, *suptr, *nsuptr;
140 int *new_semu; 140 int *new_semu;
141 kcondvar_t *new_semcv; 141 kcondvar_t *new_semcv;
142 vaddr_t v; 142 vaddr_t v;
143 int i, j, lsemid, nmnus, sz; 143 int i, j, lsemid, nmnus, sz;
144 144
145 if (newsemmni < 1 || newsemmns < 1 || newsemmnu < 1) 145 if (newsemmni < 1 || newsemmns < 1 || newsemmnu < 1)
146 return EINVAL; 146 return EINVAL;
147 147
148 /* Allocate the wired memory for our structures */ 148 /* Allocate the wired memory for our structures */
149 sz = ALIGN(newsemmni * sizeof(struct semid_ds)) + 149 sz = ALIGN(newsemmni * sizeof(struct semid_ds)) +
150 ALIGN(newsemmns * sizeof(struct __sem)) + 150 ALIGN(newsemmns * sizeof(struct __sem)) +
151 ALIGN(newsemmni * sizeof(kcondvar_t)) + 151 ALIGN(newsemmni * sizeof(kcondvar_t)) +
152 ALIGN(newsemmnu * seminfo.semusz); 152 ALIGN(newsemmnu * seminfo.semusz);
153 v = uvm_km_alloc(kernel_map, round_page(sz), 0, 153 sz = round_page(sz);
154 UVM_KMF_WIRED|UVM_KMF_ZERO); 154 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
155 if (v == 0) 155 if (v == 0)
156 return ENOMEM; 156 return ENOMEM;
157 157
158 mutex_enter(&semlock); 158 mutex_enter(&semlock);
159 if (sem_realloc_state) { 159 if (sem_realloc_state) {
160 mutex_exit(&semlock); 160 mutex_exit(&semlock);
161 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED); 161 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
162 return EBUSY; 162 return EBUSY;
163 } 163 }
164 sem_realloc_state = true; 164 sem_realloc_state = true;
165 if (sem_waiters) { 165 if (sem_waiters) {
166 /* 166 /*
167 * Mark reallocation state, wake-up all waiters, 167 * Mark reallocation state, wake-up all waiters,
168 * and wait while they will all exit. 168 * and wait while they will all exit.
169 */ 169 */
170 for (i = 0; i < seminfo.semmni; i++) 170 for (i = 0; i < seminfo.semmni; i++)
171 cv_broadcast(&semcv[i]); 171 cv_broadcast(&semcv[i]);
172 while (sem_waiters) 172 while (sem_waiters)
173 cv_wait(&sem_realloc_cv, &semlock); 173 cv_wait(&sem_realloc_cv, &semlock);
174 } 174 }
175 old_sema = sema; 175 old_sema = sema;
176 176
177 /* Get the number of last slot */ 177 /* Get the number of last slot */
178 lsemid = 0; 178 lsemid = 0;
179 for (i = 0; i < seminfo.semmni; i++) 179 for (i = 0; i < seminfo.semmni; i++)
180 if (sema[i].sem_perm.mode & SEM_ALLOC) 180 if (sema[i].sem_perm.mode & SEM_ALLOC)
181 lsemid = i; 181 lsemid = i;
182 182
183 /* Get the number of currently used undo structures */ 183 /* Get the number of currently used undo structures */
184 nmnus = 0; 184 nmnus = 0;
185 for (i = 0; i < seminfo.semmnu; i++) { 185 for (i = 0; i < seminfo.semmnu; i++) {
186 suptr = SEMU(semu, i); 186 suptr = SEMU(semu, i);
187 if (suptr->un_proc == NULL) 187 if (suptr->un_proc == NULL)
188 continue; 188 continue;
189 nmnus++; 189 nmnus++;
190 } 190 }
191 191
192 /* We cannot reallocate less memory than we use */ 192 /* We cannot reallocate less memory than we use */
193 if (lsemid >= newsemmni || semtot > newsemmns || nmnus > newsemmnu) { 193 if (lsemid >= newsemmni || semtot > newsemmns || nmnus > newsemmnu) {
194 mutex_exit(&semlock); 194 mutex_exit(&semlock);
195 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED); 195 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
196 return EBUSY; 196 return EBUSY;
197 } 197 }
198 198
199 new_sema = (void *)v; 199 new_sema = (void *)v;
200 new_sem = (void *)((uintptr_t)new_sema + 200 new_sem = (void *)((uintptr_t)new_sema +
201 ALIGN(newsemmni * sizeof(struct semid_ds))); 201 ALIGN(newsemmni * sizeof(struct semid_ds)));
202 new_semcv = (void *)((uintptr_t)new_sem + 202 new_semcv = (void *)((uintptr_t)new_sem +
203 ALIGN(newsemmns * sizeof(struct __sem))); 203 ALIGN(newsemmns * sizeof(struct __sem)));
204 new_semu = (void *)((uintptr_t)new_semcv + 204 new_semu = (void *)((uintptr_t)new_semcv +
205 ALIGN(newsemmni * sizeof(kcondvar_t))); 205 ALIGN(newsemmni * sizeof(kcondvar_t)));
206 206
207 /* Initialize all semaphore identifiers and condvars */ 207 /* Initialize all semaphore identifiers and condvars */
208 for (i = 0; i < newsemmni; i++) { 208 for (i = 0; i < newsemmni; i++) {
209 new_sema[i]._sem_base = 0; 209 new_sema[i]._sem_base = 0;
210 new_sema[i].sem_perm.mode = 0; 210 new_sema[i].sem_perm.mode = 0;
211 cv_init(&new_semcv[i], "semwait"); 211 cv_init(&new_semcv[i], "semwait");
212 } 212 }
213 for (i = 0; i < newsemmnu; i++) { 213 for (i = 0; i < newsemmnu; i++) {
214 nsuptr = SEMU(new_semu, i); 214 nsuptr = SEMU(new_semu, i);
215 nsuptr->un_proc = NULL; 215 nsuptr->un_proc = NULL;
216 } 216 }
217 217
218 /* 218 /*
219 * Copy all identifiers, semaphores and list of the 219 * Copy all identifiers, semaphores and list of the
220 * undo structures to the new memory allocation. 220 * undo structures to the new memory allocation.
221 */ 221 */
222 j = 0; 222 j = 0;
223 for (i = 0; i <= lsemid; i++) { 223 for (i = 0; i <= lsemid; i++) {
224 if ((sema[i].sem_perm.mode & SEM_ALLOC) == 0) 224 if ((sema[i].sem_perm.mode & SEM_ALLOC) == 0)
225 continue; 225 continue;
226 memcpy(&new_sema[i], &sema[i], sizeof(struct semid_ds)); 226 memcpy(&new_sema[i], &sema[i], sizeof(struct semid_ds));
227 new_sema[i]._sem_base = &new_sem[j]; 227 new_sema[i]._sem_base = &new_sem[j];
228 memcpy(new_sema[i]._sem_base, sema[i]._sem_base, 228 memcpy(new_sema[i]._sem_base, sema[i]._sem_base,
229 (sizeof(struct __sem) * sema[i].sem_nsems)); 229 (sizeof(struct __sem) * sema[i].sem_nsems));
230 j += sema[i].sem_nsems; 230 j += sema[i].sem_nsems;
231 } 231 }
232 KASSERT(j == semtot); 232 KASSERT(j == semtot);
233 233
234 j = 0; 234 j = 0;
235 new_semu_list = NULL; 235 new_semu_list = NULL;
236 for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next) { 236 for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next) {
237 KASSERT(j < newsemmnu); 237 KASSERT(j < newsemmnu);
238 nsuptr = SEMU(new_semu, j); 238 nsuptr = SEMU(new_semu, j);
239 memcpy(nsuptr, suptr, SEMUSZ); 239 memcpy(nsuptr, suptr, SEMUSZ);
240 nsuptr->un_next = new_semu_list; 240 nsuptr->un_next = new_semu_list;
241 new_semu_list = nsuptr; 241 new_semu_list = nsuptr;
242 j++; 242 j++;
243 } 243 }
244 244
245 for (i = 0; i < seminfo.semmni; i++) { 245 for (i = 0; i < seminfo.semmni; i++) {
246 KASSERT(cv_has_waiters(&semcv[i]) == false); 246 KASSERT(cv_has_waiters(&semcv[i]) == false);
247 cv_destroy(&semcv[i]); 247 cv_destroy(&semcv[i]);
248 } 248 }
249 249
250 sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) + 250 sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) +
251 ALIGN(seminfo.semmns * sizeof(struct __sem)) + 251 ALIGN(seminfo.semmns * sizeof(struct __sem)) +
252 ALIGN(seminfo.semmni * sizeof(kcondvar_t)) + 252 ALIGN(seminfo.semmni * sizeof(kcondvar_t)) +
253 ALIGN(seminfo.semmnu * seminfo.semusz); 253 ALIGN(seminfo.semmnu * seminfo.semusz);
 254 sz = round_page(sz);
254 255
255 /* Set the pointers and update the new values */ 256 /* Set the pointers and update the new values */
256 sema = new_sema; 257 sema = new_sema;
257 sem = new_sem; 258 sem = new_sem;
258 semcv = new_semcv; 259 semcv = new_semcv;
259 semu = new_semu; 260 semu = new_semu;
260 semu_list = new_semu_list; 261 semu_list = new_semu_list;
261 262
262 seminfo.semmni = newsemmni; 263 seminfo.semmni = newsemmni;
263 seminfo.semmns = newsemmns; 264 seminfo.semmns = newsemmns;
264 seminfo.semmnu = newsemmnu; 265 seminfo.semmnu = newsemmnu;
265 266
266 /* Reallocation completed - notify all waiters, if any */ 267 /* Reallocation completed - notify all waiters, if any */
267 sem_realloc_state = false; 268 sem_realloc_state = false;
268 cv_broadcast(&sem_realloc_cv); 269 cv_broadcast(&sem_realloc_cv);
269 mutex_exit(&semlock); 270 mutex_exit(&semlock);
270 271
271 uvm_km_free(kernel_map, (vaddr_t)old_sema, sz, UVM_KMF_WIRED); 272 uvm_km_free(kernel_map, (vaddr_t)old_sema, sz, UVM_KMF_WIRED);
272 return 0; 273 return 0;
273} 274}
274 275
275/* 276/*
276 * Placebo. 277 * Placebo.
277 */ 278 */
278 279
279int 280int
280sys_semconfig(struct lwp *l, const struct sys_semconfig_args *uap, register_t *retval) 281sys_semconfig(struct lwp *l, const struct sys_semconfig_args *uap, register_t *retval)
281{ 282{
282 283
283 *retval = 0; 284 *retval = 0;
284 return 0; 285 return 0;
285} 286}
286 287
287/* 288/*
288 * Allocate a new sem_undo structure for a process. 289 * Allocate a new sem_undo structure for a process.
289 * => Returns NULL on failure. 290 * => Returns NULL on failure.
290 */ 291 */
291struct sem_undo * 292struct sem_undo *
292semu_alloc(struct proc *p) 293semu_alloc(struct proc *p)
293{ 294{
294 struct sem_undo *suptr, **supptr; 295 struct sem_undo *suptr, **supptr;
295 bool attempted = false; 296 bool attempted = false;
296 int i; 297 int i;
297 298
298 KASSERT(mutex_owned(&semlock)); 299 KASSERT(mutex_owned(&semlock));
299again: 300again:
300 /* Look for a free structure. */ 301 /* Look for a free structure. */
301 for (i = 0; i < seminfo.semmnu; i++) { 302 for (i = 0; i < seminfo.semmnu; i++) {
302 suptr = SEMU(semu, i); 303 suptr = SEMU(semu, i);
303 if (suptr->un_proc == NULL) { 304 if (suptr->un_proc == NULL) {
304 /* Found. Fill it in and return. */ 305 /* Found. Fill it in and return. */
305 suptr->un_next = semu_list; 306 suptr->un_next = semu_list;
306 semu_list = suptr; 307 semu_list = suptr;
307 suptr->un_cnt = 0; 308 suptr->un_cnt = 0;
308 suptr->un_proc = p; 309 suptr->un_proc = p;
309 return suptr; 310 return suptr;
310 } 311 }
311 } 312 }
312 313
313 /* Not found. Attempt to free some structures. */ 314 /* Not found. Attempt to free some structures. */
314 if (!attempted) { 315 if (!attempted) {
315 bool freed = false; 316 bool freed = false;
316 317
317 attempted = true; 318 attempted = true;
318 supptr = &semu_list; 319 supptr = &semu_list;
319 while ((suptr = *supptr) != NULL) { 320 while ((suptr = *supptr) != NULL) {
320 if (suptr->un_cnt == 0) { 321 if (suptr->un_cnt == 0) {
321 suptr->un_proc = NULL; 322 suptr->un_proc = NULL;
322 *supptr = suptr->un_next; 323 *supptr = suptr->un_next;
323 freed = true; 324 freed = true;
324 } else { 325 } else {
325 supptr = &suptr->un_next; 326 supptr = &suptr->un_next;
326 } 327 }
327 } 328 }
328 if (freed) { 329 if (freed) {
329 goto again; 330 goto again;
330 } 331 }
331 } 332 }
332 return NULL; 333 return NULL;
333} 334}
334 335
335/* 336/*
336 * Adjust a particular entry for a particular proc 337 * Adjust a particular entry for a particular proc
337 */ 338 */
338 339
339int 340int
340semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid, int semnum, 341semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid, int semnum,
341 int adjval) 342 int adjval)
342{ 343{
343 struct sem_undo *suptr; 344 struct sem_undo *suptr;
344 struct undo *sunptr; 345 struct undo *sunptr;
345 int i; 346 int i;
346 347
347 KASSERT(mutex_owned(&semlock)); 348 KASSERT(mutex_owned(&semlock));
348 349
349 /* 350 /*
350 * Look for and remember the sem_undo if the caller doesn't 351 * Look for and remember the sem_undo if the caller doesn't
351 * provide it 352 * provide it
352 */ 353 */
353 354
354 suptr = *supptr; 355 suptr = *supptr;
355 if (suptr == NULL) { 356 if (suptr == NULL) {
356 for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next) 357 for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next)
357 if (suptr->un_proc == p) 358 if (suptr->un_proc == p)
358 break; 359 break;
359 360
360 if (suptr == NULL) { 361 if (suptr == NULL) {
361 suptr = semu_alloc(p); 362 suptr = semu_alloc(p);
362 if (suptr == NULL) 363 if (suptr == NULL)
363 return (ENOSPC); 364 return (ENOSPC);
364 } 365 }
365 *supptr = suptr; 366 *supptr = suptr;
366 } 367 }
367 368
368 /* 369 /*
369 * Look for the requested entry and adjust it (delete if 370 * Look for the requested entry and adjust it (delete if
370 * adjval becomes 0). 371 * adjval becomes 0).
371 */ 372 */
372 sunptr = &suptr->un_ent[0]; 373 sunptr = &suptr->un_ent[0];
373 for (i = 0; i < suptr->un_cnt; i++, sunptr++) { 374 for (i = 0; i < suptr->un_cnt; i++, sunptr++) {
374 if (sunptr->un_id != semid || sunptr->un_num != semnum) 375 if (sunptr->un_id != semid || sunptr->un_num != semnum)
375 continue; 376 continue;
376 sunptr->un_adjval += adjval; 377 sunptr->un_adjval += adjval;
377 if (sunptr->un_adjval == 0) { 378 if (sunptr->un_adjval == 0) {
378 suptr->un_cnt--; 379 suptr->un_cnt--;
379 if (i < suptr->un_cnt) 380 if (i < suptr->un_cnt)
380 suptr->un_ent[i] = 381 suptr->un_ent[i] =
381 suptr->un_ent[suptr->un_cnt]; 382 suptr->un_ent[suptr->un_cnt];
382 } 383 }
383 return (0); 384 return (0);
384 } 385 }
385 386
386 /* Didn't find the right entry - create it */ 387 /* Didn't find the right entry - create it */
387 if (suptr->un_cnt == SEMUME) 388 if (suptr->un_cnt == SEMUME)
388 return (EINVAL); 389 return (EINVAL);
389 390
390 sunptr = &suptr->un_ent[suptr->un_cnt]; 391 sunptr = &suptr->un_ent[suptr->un_cnt];
391 suptr->un_cnt++; 392 suptr->un_cnt++;
392 sunptr->un_adjval = adjval; 393 sunptr->un_adjval = adjval;
393 sunptr->un_id = semid; 394 sunptr->un_id = semid;
394 sunptr->un_num = semnum; 395 sunptr->un_num = semnum;
395 return (0); 396 return (0);
396} 397}
397 398
398void 399void
399semundo_clear(int semid, int semnum) 400semundo_clear(int semid, int semnum)
400{ 401{
401 struct sem_undo *suptr; 402 struct sem_undo *suptr;
402 struct undo *sunptr, *sunend; 403 struct undo *sunptr, *sunend;
403 404
404 KASSERT(mutex_owned(&semlock)); 405 KASSERT(mutex_owned(&semlock));
405 406
406 for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next) 407 for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next)
407 for (sunptr = &suptr->un_ent[0], 408 for (sunptr = &suptr->un_ent[0],
408 sunend = sunptr + suptr->un_cnt; sunptr < sunend;) { 409 sunend = sunptr + suptr->un_cnt; sunptr < sunend;) {
409 if (sunptr->un_id == semid) { 410 if (sunptr->un_id == semid) {
410 if (semnum == -1 || sunptr->un_num == semnum) { 411 if (semnum == -1 || sunptr->un_num == semnum) {
411 suptr->un_cnt--; 412 suptr->un_cnt--;
412 sunend--; 413 sunend--;
413 if (sunptr != sunend) 414 if (sunptr != sunend)
414 *sunptr = *sunend; 415 *sunptr = *sunend;
415 if (semnum != -1) 416 if (semnum != -1)
416 break; 417 break;
417 else 418 else
418 continue; 419 continue;
419 } 420 }
420 } 421 }
421 sunptr++; 422 sunptr++;
422 } 423 }
423} 424}
424 425
425int 426int
426sys_____semctl50(struct lwp *l, const struct sys_____semctl50_args *uap, 427sys_____semctl50(struct lwp *l, const struct sys_____semctl50_args *uap,
427 register_t *retval) 428 register_t *retval)
428{ 429{
429 /* { 430 /* {
430 syscallarg(int) semid; 431 syscallarg(int) semid;
431 syscallarg(int) semnum; 432 syscallarg(int) semnum;
432 syscallarg(int) cmd; 433 syscallarg(int) cmd;
433 syscallarg(union __semun *) arg; 434 syscallarg(union __semun *) arg;
434 } */ 435 } */
435 struct semid_ds sembuf; 436 struct semid_ds sembuf;
436 int cmd, error; 437 int cmd, error;
437 void *pass_arg; 438 void *pass_arg;
438 union __semun karg; 439 union __semun karg;
439 440
440 cmd = SCARG(uap, cmd); 441 cmd = SCARG(uap, cmd);
441 442
442 pass_arg = get_semctl_arg(cmd, &sembuf, &karg); 443 pass_arg = get_semctl_arg(cmd, &sembuf, &karg);
443 444
444 if (pass_arg) { 445 if (pass_arg) {
445 error = copyin(SCARG(uap, arg), &karg, sizeof(karg)); 446 error = copyin(SCARG(uap, arg), &karg, sizeof(karg));
446 if (error) 447 if (error)
447 return error; 448 return error;
448 if (cmd == IPC_SET) { 449 if (cmd == IPC_SET) {
449 error = copyin(karg.buf, &sembuf, sizeof(sembuf)); 450 error = copyin(karg.buf, &sembuf, sizeof(sembuf));
450 if (error) 451 if (error)
451 return (error); 452 return (error);
452 } 453 }
453 } 454 }
454 455
455 error = semctl1(l, SCARG(uap, semid), SCARG(uap, semnum), cmd, 456 error = semctl1(l, SCARG(uap, semid), SCARG(uap, semnum), cmd,
456 pass_arg, retval); 457 pass_arg, retval);
457 458
458 if (error == 0 && cmd == IPC_STAT) 459 if (error == 0 && cmd == IPC_STAT)
459 error = copyout(&sembuf, karg.buf, sizeof(sembuf)); 460 error = copyout(&sembuf, karg.buf, sizeof(sembuf));
460 461
461 return (error); 462 return (error);
462} 463}
463 464
464int 465int
465semctl1(struct lwp *l, int semid, int semnum, int cmd, void *v, 466semctl1(struct lwp *l, int semid, int semnum, int cmd, void *v,
466 register_t *retval) 467 register_t *retval)
467{ 468{
468 kauth_cred_t cred = l->l_cred; 469 kauth_cred_t cred = l->l_cred;
469 union __semun *arg = v; 470 union __semun *arg = v;
470 struct semid_ds *sembuf = v, *semaptr; 471 struct semid_ds *sembuf = v, *semaptr;
471 int i, error, ix; 472 int i, error, ix;
472 473
473 SEM_PRINTF(("call to semctl(%d, %d, %d, %p)\n", 474 SEM_PRINTF(("call to semctl(%d, %d, %d, %p)\n",
474 semid, semnum, cmd, v)); 475 semid, semnum, cmd, v));
475 476
476 mutex_enter(&semlock); 477 mutex_enter(&semlock);
477 478
478 ix = IPCID_TO_IX(semid); 479 ix = IPCID_TO_IX(semid);
479 if (ix < 0 || ix >= seminfo.semmni) { 480 if (ix < 0 || ix >= seminfo.semmni) {
480 mutex_exit(&semlock); 481 mutex_exit(&semlock);
481 return (EINVAL); 482 return (EINVAL);
482 } 483 }
483 484
484 semaptr = &sema[ix]; 485 semaptr = &sema[ix];
485 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 || 486 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
486 semaptr->sem_perm._seq != IPCID_TO_SEQ(semid)) { 487 semaptr->sem_perm._seq != IPCID_TO_SEQ(semid)) {
487 mutex_exit(&semlock); 488 mutex_exit(&semlock);
488 return (EINVAL); 489 return (EINVAL);
489 } 490 }
490 491
491 switch (cmd) { 492 switch (cmd) {
492 case IPC_RMID: 493 case IPC_RMID:
493 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)) != 0) 494 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)) != 0)
494 break; 495 break;
495 semaptr->sem_perm.cuid = kauth_cred_geteuid(cred); 496 semaptr->sem_perm.cuid = kauth_cred_geteuid(cred);
496 semaptr->sem_perm.uid = kauth_cred_geteuid(cred); 497 semaptr->sem_perm.uid = kauth_cred_geteuid(cred);
497 semtot -= semaptr->sem_nsems; 498 semtot -= semaptr->sem_nsems;
498 for (i = semaptr->_sem_base - sem; i < semtot; i++) 499 for (i = semaptr->_sem_base - sem; i < semtot; i++)
499 sem[i] = sem[i + semaptr->sem_nsems]; 500 sem[i] = sem[i + semaptr->sem_nsems];
500 for (i = 0; i < seminfo.semmni; i++) { 501 for (i = 0; i < seminfo.semmni; i++) {
501 if ((sema[i].sem_perm.mode & SEM_ALLOC) && 502 if ((sema[i].sem_perm.mode & SEM_ALLOC) &&
502 sema[i]._sem_base > semaptr->_sem_base) 503 sema[i]._sem_base > semaptr->_sem_base)
503 sema[i]._sem_base -= semaptr->sem_nsems; 504 sema[i]._sem_base -= semaptr->sem_nsems;
504 } 505 }
505 semaptr->sem_perm.mode = 0; 506 semaptr->sem_perm.mode = 0;
506 semundo_clear(ix, -1); 507 semundo_clear(ix, -1);
507 cv_broadcast(&semcv[ix]); 508 cv_broadcast(&semcv[ix]);
508 break; 509 break;
509 510
510 case IPC_SET: 511 case IPC_SET:
511 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M))) 512 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)))
512 break; 513 break;
513 KASSERT(sembuf != NULL); 514 KASSERT(sembuf != NULL);
514 semaptr->sem_perm.uid = sembuf->sem_perm.uid; 515 semaptr->sem_perm.uid = sembuf->sem_perm.uid;
515 semaptr->sem_perm.gid = sembuf->sem_perm.gid; 516 semaptr->sem_perm.gid = sembuf->sem_perm.gid;
516 semaptr->sem_perm.mode = (semaptr->sem_perm.mode & ~0777) | 517 semaptr->sem_perm.mode = (semaptr->sem_perm.mode & ~0777) |
517 (sembuf->sem_perm.mode & 0777); 518 (sembuf->sem_perm.mode & 0777);
518 semaptr->sem_ctime = time_second; 519 semaptr->sem_ctime = time_second;
519 break; 520 break;
520 521
521 case IPC_STAT: 522 case IPC_STAT:
522 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R))) 523 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
523 break; 524 break;
524 KASSERT(sembuf != NULL); 525 KASSERT(sembuf != NULL);
525 memcpy(sembuf, semaptr, sizeof(struct semid_ds)); 526 memcpy(sembuf, semaptr, sizeof(struct semid_ds));
526 sembuf->sem_perm.mode &= 0777; 527 sembuf->sem_perm.mode &= 0777;
527 break; 528 break;
528 529
529 case GETNCNT: 530 case GETNCNT:
530 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R))) 531 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
531 break; 532 break;
532 if (semnum < 0 || semnum >= semaptr->sem_nsems) { 533 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
533 error = EINVAL; 534 error = EINVAL;
534 break; 535 break;
535 } 536 }
536 *retval = semaptr->_sem_base[semnum].semncnt; 537 *retval = semaptr->_sem_base[semnum].semncnt;
537 break; 538 break;
538 539
539 case GETPID: 540 case GETPID:
540 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R))) 541 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
541 break; 542 break;
542 if (semnum < 0 || semnum >= semaptr->sem_nsems) { 543 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
543 error = EINVAL; 544 error = EINVAL;
544 break; 545 break;
545 } 546 }
546 *retval = semaptr->_sem_base[semnum].sempid; 547 *retval = semaptr->_sem_base[semnum].sempid;
547 break; 548 break;
548 549
549 case GETVAL: 550 case GETVAL:
550 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R))) 551 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
551 break; 552 break;
552 if (semnum < 0 || semnum >= semaptr->sem_nsems) { 553 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
553 error = EINVAL; 554 error = EINVAL;
554 break; 555 break;
555 } 556 }
556 *retval = semaptr->_sem_base[semnum].semval; 557 *retval = semaptr->_sem_base[semnum].semval;
557 break; 558 break;
558 559
559 case GETALL: 560 case GETALL:
560 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R))) 561 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
561 break; 562 break;
562 KASSERT(arg != NULL); 563 KASSERT(arg != NULL);
563 for (i = 0; i < semaptr->sem_nsems; i++) { 564 for (i = 0; i < semaptr->sem_nsems; i++) {
564 error = copyout(&semaptr->_sem_base[i].semval, 565 error = copyout(&semaptr->_sem_base[i].semval,
565 &arg->array[i], sizeof(arg->array[i])); 566 &arg->array[i], sizeof(arg->array[i]));
566 if (error != 0) 567 if (error != 0)
567 break; 568 break;
568 } 569 }
569 break; 570 break;
570 571
571 case GETZCNT: 572 case GETZCNT:
572 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R))) 573 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
573 break; 574 break;
574 if (semnum < 0 || semnum >= semaptr->sem_nsems) { 575 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
575 error = EINVAL; 576 error = EINVAL;
576 break; 577 break;
577 } 578 }
578 *retval = semaptr->_sem_base[semnum].semzcnt; 579 *retval = semaptr->_sem_base[semnum].semzcnt;
579 break; 580 break;
580 581
581 case SETVAL: 582 case SETVAL:
582 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W))) 583 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
583 break; 584 break;
584 if (semnum < 0 || semnum >= semaptr->sem_nsems) { 585 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
585 error = EINVAL; 586 error = EINVAL;
586 break; 587 break;
587 } 588 }
588 KASSERT(arg != NULL); 589 KASSERT(arg != NULL);
589 if ((unsigned int)arg->val > seminfo.semvmx) { 590 if ((unsigned int)arg->val > seminfo.semvmx) {
590 error = ERANGE; 591 error = ERANGE;
591 break; 592 break;
592 } 593 }
593 semaptr->_sem_base[semnum].semval = arg->val; 594 semaptr->_sem_base[semnum].semval = arg->val;
594 semundo_clear(ix, semnum); 595 semundo_clear(ix, semnum);
595 cv_broadcast(&semcv[ix]); 596 cv_broadcast(&semcv[ix]);
596 break; 597 break;
597 598
598 case SETALL: 599 case SETALL:
599 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W))) 600 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
600 break; 601 break;
601 KASSERT(arg != NULL); 602 KASSERT(arg != NULL);
602 for (i = 0; i < semaptr->sem_nsems; i++) { 603 for (i = 0; i < semaptr->sem_nsems; i++) {
603 unsigned short semval; 604 unsigned short semval;
604 error = copyin(&arg->array[i], &semval, 605 error = copyin(&arg->array[i], &semval,
605 sizeof(arg->array[i])); 606 sizeof(arg->array[i]));
606 if (error != 0) 607 if (error != 0)
607 break; 608 break;
608 if ((unsigned int)semval > seminfo.semvmx) { 609 if ((unsigned int)semval > seminfo.semvmx) {
609 error = ERANGE; 610 error = ERANGE;
610 break; 611 break;
611 } 612 }
612 semaptr->_sem_base[i].semval = semval; 613 semaptr->_sem_base[i].semval = semval;
613 } 614 }
614 semundo_clear(ix, -1); 615 semundo_clear(ix, -1);
615 cv_broadcast(&semcv[ix]); 616 cv_broadcast(&semcv[ix]);
616 break; 617 break;
617 618
618 default: 619 default:
619 error = EINVAL; 620 error = EINVAL;
620 break; 621 break;
621 } 622 }
622 623
623 mutex_exit(&semlock); 624 mutex_exit(&semlock);
624 return (error); 625 return (error);
625} 626}
626 627
627int 628int
628sys_semget(struct lwp *l, const struct sys_semget_args *uap, register_t *retval) 629sys_semget(struct lwp *l, const struct sys_semget_args *uap, register_t *retval)
629{ 630{
630 /* { 631 /* {
631 syscallarg(key_t) key; 632 syscallarg(key_t) key;
632 syscallarg(int) nsems; 633 syscallarg(int) nsems;
633 syscallarg(int) semflg; 634 syscallarg(int) semflg;
634 } */ 635 } */
635 int semid, error = 0; 636 int semid, error = 0;
636 int key = SCARG(uap, key); 637 int key = SCARG(uap, key);
637 int nsems = SCARG(uap, nsems); 638 int nsems = SCARG(uap, nsems);
638 int semflg = SCARG(uap, semflg); 639 int semflg = SCARG(uap, semflg);
639 kauth_cred_t cred = l->l_cred; 640 kauth_cred_t cred = l->l_cred;
640 641
641 SEM_PRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg)); 642 SEM_PRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg));
642 643
643 mutex_enter(&semlock); 644 mutex_enter(&semlock);
644 645
645 if (key != IPC_PRIVATE) { 646 if (key != IPC_PRIVATE) {
646 for (semid = 0; semid < seminfo.semmni; semid++) { 647 for (semid = 0; semid < seminfo.semmni; semid++) {
647 if ((sema[semid].sem_perm.mode & SEM_ALLOC) && 648 if ((sema[semid].sem_perm.mode & SEM_ALLOC) &&
648 sema[semid].sem_perm._key == key) 649 sema[semid].sem_perm._key == key)
649 break; 650 break;
650 } 651 }
651 if (semid < seminfo.semmni) { 652 if (semid < seminfo.semmni) {
652 SEM_PRINTF(("found public key\n")); 653 SEM_PRINTF(("found public key\n"));
653 if ((error = ipcperm(cred, &sema[semid].sem_perm, 654 if ((error = ipcperm(cred, &sema[semid].sem_perm,
654 semflg & 0700))) 655 semflg & 0700)))
655 goto out; 656 goto out;
656 if (nsems > 0 && sema[semid].sem_nsems < nsems) { 657 if (nsems > 0 && sema[semid].sem_nsems < nsems) {
657 SEM_PRINTF(("too small\n")); 658 SEM_PRINTF(("too small\n"));
658 error = EINVAL; 659 error = EINVAL;
659 goto out; 660 goto out;
660 } 661 }
661 if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) { 662 if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
662 SEM_PRINTF(("not exclusive\n")); 663 SEM_PRINTF(("not exclusive\n"));
663 error = EEXIST; 664 error = EEXIST;
664 goto out; 665 goto out;
665 } 666 }
666 goto found; 667 goto found;
667 } 668 }
668 } 669 }
669 670
670 SEM_PRINTF(("need to allocate the semid_ds\n")); 671 SEM_PRINTF(("need to allocate the semid_ds\n"));
671 if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) { 672 if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) {
672 if (nsems <= 0 || nsems > seminfo.semmsl) { 673 if (nsems <= 0 || nsems > seminfo.semmsl) {
673 SEM_PRINTF(("nsems out of range (0<%d<=%d)\n", nsems, 674 SEM_PRINTF(("nsems out of range (0<%d<=%d)\n", nsems,
674 seminfo.semmsl)); 675 seminfo.semmsl));
675 error = EINVAL; 676 error = EINVAL;
676 goto out; 677 goto out;
677 } 678 }
678 if (nsems > seminfo.semmns - semtot) { 679 if (nsems > seminfo.semmns - semtot) {
679 SEM_PRINTF(("not enough semaphores left " 680 SEM_PRINTF(("not enough semaphores left "
680 "(need %d, got %d)\n", 681 "(need %d, got %d)\n",
681 nsems, seminfo.semmns - semtot)); 682 nsems, seminfo.semmns - semtot));
682 error = ENOSPC; 683 error = ENOSPC;
683 goto out; 684 goto out;
684 } 685 }
685 for (semid = 0; semid < seminfo.semmni; semid++) { 686 for (semid = 0; semid < seminfo.semmni; semid++) {
686 if ((sema[semid].sem_perm.mode & SEM_ALLOC) == 0) 687 if ((sema[semid].sem_perm.mode & SEM_ALLOC) == 0)
687 break; 688 break;
688 } 689 }
689 if (semid == seminfo.semmni) { 690 if (semid == seminfo.semmni) {
690 SEM_PRINTF(("no more semid_ds's available\n")); 691 SEM_PRINTF(("no more semid_ds's available\n"));
691 error = ENOSPC; 692 error = ENOSPC;
692 goto out; 693 goto out;
693 } 694 }
694 SEM_PRINTF(("semid %d is available\n", semid)); 695 SEM_PRINTF(("semid %d is available\n", semid));
695 sema[semid].sem_perm._key = key; 696 sema[semid].sem_perm._key = key;
696 sema[semid].sem_perm.cuid = kauth_cred_geteuid(cred); 697 sema[semid].sem_perm.cuid = kauth_cred_geteuid(cred);
697 sema[semid].sem_perm.uid = kauth_cred_geteuid(cred); 698 sema[semid].sem_perm.uid = kauth_cred_geteuid(cred);
698 sema[semid].sem_perm.cgid = kauth_cred_getegid(cred); 699 sema[semid].sem_perm.cgid = kauth_cred_getegid(cred);
699 sema[semid].sem_perm.gid = kauth_cred_getegid(cred); 700 sema[semid].sem_perm.gid = kauth_cred_getegid(cred);
700 sema[semid].sem_perm.mode = (semflg & 0777) | SEM_ALLOC; 701 sema[semid].sem_perm.mode = (semflg & 0777) | SEM_ALLOC;
701 sema[semid].sem_perm._seq = 702 sema[semid].sem_perm._seq =
702 (sema[semid].sem_perm._seq + 1) & 0x7fff; 703 (sema[semid].sem_perm._seq + 1) & 0x7fff;
703 sema[semid].sem_nsems = nsems; 704 sema[semid].sem_nsems = nsems;
704 sema[semid].sem_otime = 0; 705 sema[semid].sem_otime = 0;
705 sema[semid].sem_ctime = time_second; 706 sema[semid].sem_ctime = time_second;
706 sema[semid]._sem_base = &sem[semtot]; 707 sema[semid]._sem_base = &sem[semtot];
707 semtot += nsems; 708 semtot += nsems;
708 memset(sema[semid]._sem_base, 0, 709 memset(sema[semid]._sem_base, 0,
709 sizeof(sema[semid]._sem_base[0]) * nsems); 710 sizeof(sema[semid]._sem_base[0]) * nsems);
710 SEM_PRINTF(("sembase = %p, next = %p\n", sema[semid]._sem_base, 711 SEM_PRINTF(("sembase = %p, next = %p\n", sema[semid]._sem_base,
711 &sem[semtot])); 712 &sem[semtot]));
712 } else { 713 } else {
713 SEM_PRINTF(("didn't find it and wasn't asked to create it\n")); 714 SEM_PRINTF(("didn't find it and wasn't asked to create it\n"));
714 error = ENOENT; 715 error = ENOENT;
715 goto out; 716 goto out;
716 } 717 }
717 718
718 found: 719 found:
719 *retval = IXSEQ_TO_IPCID(semid, sema[semid].sem_perm); 720 *retval = IXSEQ_TO_IPCID(semid, sema[semid].sem_perm);
720 out: 721 out:
721 mutex_exit(&semlock); 722 mutex_exit(&semlock);
722 return (error); 723 return (error);
723} 724}
724 725
725#define SMALL_SOPS 8 726#define SMALL_SOPS 8
726 727
727int 728int
728sys_semop(struct lwp *l, const struct sys_semop_args *uap, register_t *retval) 729sys_semop(struct lwp *l, const struct sys_semop_args *uap, register_t *retval)
729{ 730{
730 /* { 731 /* {
731 syscallarg(int) semid; 732 syscallarg(int) semid;
732 syscallarg(struct sembuf *) sops; 733 syscallarg(struct sembuf *) sops;
733 syscallarg(size_t) nsops; 734 syscallarg(size_t) nsops;
734 } */ 735 } */
735 struct proc *p = l->l_proc; 736 struct proc *p = l->l_proc;
736 int semid = SCARG(uap, semid), seq; 737 int semid = SCARG(uap, semid), seq;
737 size_t nsops = SCARG(uap, nsops); 738 size_t nsops = SCARG(uap, nsops);
738 struct sembuf small_sops[SMALL_SOPS]; 739 struct sembuf small_sops[SMALL_SOPS];
739 struct sembuf *sops; 740 struct sembuf *sops;
740 struct semid_ds *semaptr; 741 struct semid_ds *semaptr;
741 struct sembuf *sopptr = NULL; 742 struct sembuf *sopptr = NULL;
742 struct __sem *semptr = NULL; 743 struct __sem *semptr = NULL;
743 struct sem_undo *suptr = NULL; 744 struct sem_undo *suptr = NULL;
744 kauth_cred_t cred = l->l_cred; 745 kauth_cred_t cred = l->l_cred;
745 int i, error; 746 int i, error;
746 int do_wakeup, do_undos; 747 int do_wakeup, do_undos;
747 748
748 SEM_PRINTF(("call to semop(%d, %p, %zd)\n", semid, SCARG(uap,sops), nsops)); 749 SEM_PRINTF(("call to semop(%d, %p, %zd)\n", semid, SCARG(uap,sops), nsops));
749 750
750 if (__predict_false((p->p_flag & PK_SYSVSEM) == 0)) { 751 if (__predict_false((p->p_flag & PK_SYSVSEM) == 0)) {
751 mutex_enter(p->p_lock); 752 mutex_enter(p->p_lock);
752 p->p_flag |= PK_SYSVSEM; 753 p->p_flag |= PK_SYSVSEM;
753 mutex_exit(p->p_lock); 754 mutex_exit(p->p_lock);
754 } 755 }
755 756
756restart: 757restart:
757 if (nsops <= SMALL_SOPS) { 758 if (nsops <= SMALL_SOPS) {
758 sops = small_sops; 759 sops = small_sops;
759 } else if (nsops <= seminfo.semopm) { 760 } else if (nsops <= seminfo.semopm) {
760 sops = kmem_alloc(nsops * sizeof(*sops), KM_SLEEP); 761 sops = kmem_alloc(nsops * sizeof(*sops), KM_SLEEP);
761 } else { 762 } else {
762 SEM_PRINTF(("too many sops (max=%d, nsops=%zd)\n", 763 SEM_PRINTF(("too many sops (max=%d, nsops=%zd)\n",
763 seminfo.semopm, nsops)); 764 seminfo.semopm, nsops));
764 return (E2BIG); 765 return (E2BIG);
765 } 766 }
766 767
767 error = copyin(SCARG(uap, sops), sops, nsops * sizeof(sops[0])); 768 error = copyin(SCARG(uap, sops), sops, nsops * sizeof(sops[0]));
768 if (error) { 769 if (error) {
769 SEM_PRINTF(("error = %d from copyin(%p, %p, %zd)\n", error, 770 SEM_PRINTF(("error = %d from copyin(%p, %p, %zd)\n", error,
770 SCARG(uap, sops), &sops, nsops * sizeof(sops[0]))); 771 SCARG(uap, sops), &sops, nsops * sizeof(sops[0])));
771 if (sops != small_sops) 772 if (sops != small_sops)
772 kmem_free(sops, nsops * sizeof(*sops)); 773 kmem_free(sops, nsops * sizeof(*sops));
773 return error; 774 return error;
774 } 775 }
775 776
776 mutex_enter(&semlock); 777 mutex_enter(&semlock);
777 /* In case of reallocation, we will wait for completion */ 778 /* In case of reallocation, we will wait for completion */
778 while (__predict_false(sem_realloc_state)) 779 while (__predict_false(sem_realloc_state))
779 cv_wait(&sem_realloc_cv, &semlock); 780 cv_wait(&sem_realloc_cv, &semlock);
780 781
781 semid = IPCID_TO_IX(semid); /* Convert back to zero origin */ 782 semid = IPCID_TO_IX(semid); /* Convert back to zero origin */
782 if (semid < 0 || semid >= seminfo.semmni) { 783 if (semid < 0 || semid >= seminfo.semmni) {
783 error = EINVAL; 784 error = EINVAL;
784 goto out; 785 goto out;
785 } 786 }
786 787
787 semaptr = &sema[semid]; 788 semaptr = &sema[semid];
788 seq = IPCID_TO_SEQ(SCARG(uap, semid)); 789 seq = IPCID_TO_SEQ(SCARG(uap, semid));
789 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 || 790 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
790 semaptr->sem_perm._seq != seq) { 791 semaptr->sem_perm._seq != seq) {
791 error = EINVAL; 792 error = EINVAL;
792 goto out; 793 goto out;
793 } 794 }
794 795
795 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W))) { 796 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W))) {
796 SEM_PRINTF(("error = %d from ipaccess\n", error)); 797 SEM_PRINTF(("error = %d from ipaccess\n", error));
797 goto out; 798 goto out;
798 } 799 }
799 800
800 for (i = 0; i < nsops; i++) 801 for (i = 0; i < nsops; i++)
801 if (sops[i].sem_num >= semaptr->sem_nsems) { 802 if (sops[i].sem_num >= semaptr->sem_nsems) {
802 error = EFBIG; 803 error = EFBIG;
803 goto out; 804 goto out;
804 } 805 }
805 806
806 /* 807 /*
807 * Loop trying to satisfy the vector of requests. 808 * Loop trying to satisfy the vector of requests.
808 * If we reach a point where we must wait, any requests already 809 * If we reach a point where we must wait, any requests already
809 * performed are rolled back and we go to sleep until some other 810 * performed are rolled back and we go to sleep until some other
810 * process wakes us up. At this point, we start all over again. 811 * process wakes us up. At this point, we start all over again.
811 * 812 *
812 * This ensures that from the perspective of other tasks, a set 813 * This ensures that from the perspective of other tasks, a set
813 * of requests is atomic (never partially satisfied). 814 * of requests is atomic (never partially satisfied).
814 */ 815 */
815 do_undos = 0; 816 do_undos = 0;
816 817
817 for (;;) { 818 for (;;) {
818 do_wakeup = 0; 819 do_wakeup = 0;
819 820
820 for (i = 0; i < nsops; i++) { 821 for (i = 0; i < nsops; i++) {
821 sopptr = &sops[i]; 822 sopptr = &sops[i];
822 semptr = &semaptr->_sem_base[sopptr->sem_num]; 823 semptr = &semaptr->_sem_base[sopptr->sem_num];
823 824
824 SEM_PRINTF(("semop: semaptr=%p, sem_base=%p, " 825 SEM_PRINTF(("semop: semaptr=%p, sem_base=%p, "
825 "semptr=%p, sem[%d]=%d : op=%d, flag=%s\n", 826 "semptr=%p, sem[%d]=%d : op=%d, flag=%s\n",
826 semaptr, semaptr->_sem_base, semptr, 827 semaptr, semaptr->_sem_base, semptr,
827 sopptr->sem_num, semptr->semval, sopptr->sem_op, 828 sopptr->sem_num, semptr->semval, sopptr->sem_op,
828 (sopptr->sem_flg & IPC_NOWAIT) ? 829 (sopptr->sem_flg & IPC_NOWAIT) ?
829 "nowait" : "wait")); 830 "nowait" : "wait"));
830 831
831 if (sopptr->sem_op < 0) { 832 if (sopptr->sem_op < 0) {
832 if ((int)(semptr->semval + 833 if ((int)(semptr->semval +
833 sopptr->sem_op) < 0) { 834 sopptr->sem_op) < 0) {
834 SEM_PRINTF(("semop: " 835 SEM_PRINTF(("semop: "
835 "can't do it now\n")); 836 "can't do it now\n"));
836 break; 837 break;
837 } else { 838 } else {
838 semptr->semval += sopptr->sem_op; 839 semptr->semval += sopptr->sem_op;
839 if (semptr->semval == 0 && 840 if (semptr->semval == 0 &&
840 semptr->semzcnt > 0) 841 semptr->semzcnt > 0)
841 do_wakeup = 1; 842 do_wakeup = 1;
842 } 843 }
843 if (sopptr->sem_flg & SEM_UNDO) 844 if (sopptr->sem_flg & SEM_UNDO)
844 do_undos = 1; 845 do_undos = 1;
845 } else if (sopptr->sem_op == 0) { 846 } else if (sopptr->sem_op == 0) {
846 if (semptr->semval > 0) { 847 if (semptr->semval > 0) {
847 SEM_PRINTF(("semop: not zero now\n")); 848 SEM_PRINTF(("semop: not zero now\n"));
848 break; 849 break;
849 } 850 }
850 } else { 851 } else {
851 if (semptr->semncnt > 0) 852 if (semptr->semncnt > 0)
852 do_wakeup = 1; 853 do_wakeup = 1;
853 semptr->semval += sopptr->sem_op; 854 semptr->semval += sopptr->sem_op;
854 if (sopptr->sem_flg & SEM_UNDO) 855 if (sopptr->sem_flg & SEM_UNDO)
855 do_undos = 1; 856 do_undos = 1;
856 } 857 }
857 } 858 }
858 859
859 /* 860 /*
860 * Did we get through the entire vector? 861 * Did we get through the entire vector?
861 */ 862 */
862 if (i >= nsops) 863 if (i >= nsops)
863 goto done; 864 goto done;
864 865
865 /* 866 /*
866 * No ... rollback anything that we've already done 867 * No ... rollback anything that we've already done
867 */ 868 */
868 SEM_PRINTF(("semop: rollback 0 through %d\n", i - 1)); 869 SEM_PRINTF(("semop: rollback 0 through %d\n", i - 1));
869 while (i-- > 0) 870 while (i-- > 0)
870 semaptr->_sem_base[sops[i].sem_num].semval -= 871 semaptr->_sem_base[sops[i].sem_num].semval -=
871 sops[i].sem_op; 872 sops[i].sem_op;
872 873
873 /* 874 /*
874 * If the request that we couldn't satisfy has the 875 * If the request that we couldn't satisfy has the
875 * NOWAIT flag set then return with EAGAIN. 876 * NOWAIT flag set then return with EAGAIN.
876 */ 877 */
877 if (sopptr->sem_flg & IPC_NOWAIT) { 878 if (sopptr->sem_flg & IPC_NOWAIT) {
878 error = EAGAIN; 879 error = EAGAIN;
879 goto out; 880 goto out;
880 } 881 }
881 882
882 if (sopptr->sem_op == 0) 883 if (sopptr->sem_op == 0)
883 semptr->semzcnt++; 884 semptr->semzcnt++;
884 else 885 else
885 semptr->semncnt++; 886 semptr->semncnt++;
886 887
887 sem_waiters++; 888 sem_waiters++;
888 SEM_PRINTF(("semop: good night!\n")); 889 SEM_PRINTF(("semop: good night!\n"));
889 error = cv_wait_sig(&semcv[semid], &semlock); 890 error = cv_wait_sig(&semcv[semid], &semlock);
890 SEM_PRINTF(("semop: good morning (error=%d)!\n", error)); 891 SEM_PRINTF(("semop: good morning (error=%d)!\n", error));
891 sem_waiters--; 892 sem_waiters--;
892 893
893 /* Notify reallocator, if it is waiting */ 894 /* Notify reallocator, if it is waiting */
894 cv_broadcast(&sem_realloc_cv); 895 cv_broadcast(&sem_realloc_cv);
895 896
896 /* 897 /*
897 * Make sure that the semaphore still exists 898 * Make sure that the semaphore still exists
898 */ 899 */
899 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 || 900 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
900 semaptr->sem_perm._seq != seq) { 901 semaptr->sem_perm._seq != seq) {
901 error = EIDRM; 902 error = EIDRM;
902 goto out; 903 goto out;
903 } 904 }
904 905
905 /* 906 /*
906 * The semaphore is still alive. Readjust the count of 907 * The semaphore is still alive. Readjust the count of
907 * waiting processes. 908 * waiting processes.
908 */ 909 */
909 semptr = &semaptr->_sem_base[sopptr->sem_num]; 910 semptr = &semaptr->_sem_base[sopptr->sem_num];
910 if (sopptr->sem_op == 0) 911 if (sopptr->sem_op == 0)
911 semptr->semzcnt--; 912 semptr->semzcnt--;
912 else 913 else
913 semptr->semncnt--; 914 semptr->semncnt--;
914 915
915 /* In case of such state, restart the call */ 916 /* In case of such state, restart the call */
916 if (sem_realloc_state) { 917 if (sem_realloc_state) {
917 mutex_exit(&semlock); 918 mutex_exit(&semlock);
918 goto restart; 919 goto restart;
919 } 920 }
920 921
921 /* Is it really morning, or was our sleep interrupted? */ 922 /* Is it really morning, or was our sleep interrupted? */
922 if (error != 0) { 923 if (error != 0) {
923 error = EINTR; 924 error = EINTR;
924 goto out; 925 goto out;
925 } 926 }
926 SEM_PRINTF(("semop: good morning!\n")); 927 SEM_PRINTF(("semop: good morning!\n"));
927 } 928 }
928 929
929done: 930done:
930 /* 931 /*
931 * Process any SEM_UNDO requests. 932 * Process any SEM_UNDO requests.
932 */ 933 */
933 if (do_undos) { 934 if (do_undos) {
934 for (i = 0; i < nsops; i++) { 935 for (i = 0; i < nsops; i++) {
935 /* 936 /*
936 * We only need to deal with SEM_UNDO's for non-zero 937 * We only need to deal with SEM_UNDO's for non-zero
937 * op's. 938 * op's.
938 */ 939 */
939 int adjval; 940 int adjval;
940 941
941 if ((sops[i].sem_flg & SEM_UNDO) == 0) 942 if ((sops[i].sem_flg & SEM_UNDO) == 0)
942 continue; 943 continue;
943 adjval = sops[i].sem_op; 944 adjval = sops[i].sem_op;
944 if (adjval == 0) 945 if (adjval == 0)
945 continue; 946 continue;
946 error = semundo_adjust(p, &suptr, semid, 947 error = semundo_adjust(p, &suptr, semid,
947 sops[i].sem_num, -adjval); 948 sops[i].sem_num, -adjval);
948 if (error == 0) 949 if (error == 0)
949 continue; 950 continue;
950 951
951 /* 952 /*
952 * Oh-Oh! We ran out of either sem_undo's or undo's. 953 * Oh-Oh! We ran out of either sem_undo's or undo's.
953 * Rollback the adjustments to this point and then 954 * Rollback the adjustments to this point and then
954 * rollback the semaphore ups and down so we can return 955 * rollback the semaphore ups and down so we can return
955 * with an error with all structures restored. We 956 * with an error with all structures restored. We
956 * rollback the undo's in the exact reverse order that 957 * rollback the undo's in the exact reverse order that
957 * we applied them. This guarantees that we won't run 958 * we applied them. This guarantees that we won't run
958 * out of space as we roll things back out. 959 * out of space as we roll things back out.
959 */ 960 */
960 while (i-- > 0) { 961 while (i-- > 0) {
961 if ((sops[i].sem_flg & SEM_UNDO) == 0) 962 if ((sops[i].sem_flg & SEM_UNDO) == 0)
962 continue; 963 continue;
963 adjval = sops[i].sem_op; 964 adjval = sops[i].sem_op;
964 if (adjval == 0) 965 if (adjval == 0)
965 continue; 966 continue;
966 if (semundo_adjust(p, &suptr, semid, 967 if (semundo_adjust(p, &suptr, semid,
967 sops[i].sem_num, adjval) != 0) 968 sops[i].sem_num, adjval) != 0)
968 panic("semop - can't undo undos"); 969 panic("semop - can't undo undos");
969 } 970 }
970 971
971 for (i = 0; i < nsops; i++) 972 for (i = 0; i < nsops; i++)
972 semaptr->_sem_base[sops[i].sem_num].semval -= 973 semaptr->_sem_base[sops[i].sem_num].semval -=
973 sops[i].sem_op; 974 sops[i].sem_op;
974 975
975 SEM_PRINTF(("error = %d from semundo_adjust\n", error)); 976 SEM_PRINTF(("error = %d from semundo_adjust\n", error));
976 goto out; 977 goto out;
977 } /* loop through the sops */ 978 } /* loop through the sops */
978 } /* if (do_undos) */ 979 } /* if (do_undos) */
979 980
980 /* We're definitely done - set the sempid's */ 981 /* We're definitely done - set the sempid's */
981 for (i = 0; i < nsops; i++) { 982 for (i = 0; i < nsops; i++) {
982 sopptr = &sops[i]; 983 sopptr = &sops[i];
983 semptr = &semaptr->_sem_base[sopptr->sem_num]; 984 semptr = &semaptr->_sem_base[sopptr->sem_num];
984 semptr->sempid = p->p_pid; 985 semptr->sempid = p->p_pid;
985 } 986 }
986 987
987 /* Update sem_otime */ 988 /* Update sem_otime */
988 semaptr->sem_otime = time_second; 989 semaptr->sem_otime = time_second;
989 990
990 /* Do a wakeup if any semaphore was up'd. */ 991 /* Do a wakeup if any semaphore was up'd. */
991 if (do_wakeup) { 992 if (do_wakeup) {
992 SEM_PRINTF(("semop: doing wakeup\n")); 993 SEM_PRINTF(("semop: doing wakeup\n"));
993 cv_broadcast(&semcv[semid]); 994 cv_broadcast(&semcv[semid]);
994 SEM_PRINTF(("semop: back from wakeup\n")); 995 SEM_PRINTF(("semop: back from wakeup\n"));
995 } 996 }
996 SEM_PRINTF(("semop: done\n")); 997 SEM_PRINTF(("semop: done\n"));
997 *retval = 0; 998 *retval = 0;
998 999
999 out: 1000 out:
1000 mutex_exit(&semlock); 1001 mutex_exit(&semlock);
1001 if (sops != small_sops) 1002 if (sops != small_sops)
1002 kmem_free(sops, nsops * sizeof(*sops)); 1003 kmem_free(sops, nsops * sizeof(*sops));
1003 return error; 1004 return error;
1004} 1005}
1005 1006
1006/* 1007/*
1007 * Go through the undo structures for this process and apply the 1008 * Go through the undo structures for this process and apply the
1008 * adjustments to semaphores. 1009 * adjustments to semaphores.
1009 */ 1010 */
1010/*ARGSUSED*/ 1011/*ARGSUSED*/
1011void 1012void
1012semexit(struct proc *p, void *v) 1013semexit(struct proc *p, void *v)
1013{ 1014{
1014 struct sem_undo *suptr; 1015 struct sem_undo *suptr;
1015 struct sem_undo **supptr; 1016 struct sem_undo **supptr;
1016 1017
1017 if ((p->p_flag & PK_SYSVSEM) == 0) 1018 if ((p->p_flag & PK_SYSVSEM) == 0)
1018 return; 1019 return;
1019 1020
1020 mutex_enter(&semlock); 1021 mutex_enter(&semlock);
1021 1022
1022 /* 1023 /*
1023 * Go through the chain of undo vectors looking for one 1024 * Go through the chain of undo vectors looking for one
1024 * associated with this process. 1025 * associated with this process.
1025 */ 1026 */
1026 1027
1027 for (supptr = &semu_list; (suptr = *supptr) != NULL; 1028 for (supptr = &semu_list; (suptr = *supptr) != NULL;
1028 supptr = &suptr->un_next) { 1029 supptr = &suptr->un_next) {
1029 if (suptr->un_proc == p) 1030 if (suptr->un_proc == p)
1030 break; 1031 break;
1031 } 1032 }
1032 1033
1033 /* 1034 /*
1034 * If there is no undo vector, skip to the end. 1035 * If there is no undo vector, skip to the end.
1035 */ 1036 */
1036 1037
1037 if (suptr == NULL) { 1038 if (suptr == NULL) {
1038 mutex_exit(&semlock); 1039 mutex_exit(&semlock);
1039 return; 1040 return;
1040 } 1041 }
1041 1042
1042 /* 1043 /*
1043 * We now have an undo vector for this process. 1044 * We now have an undo vector for this process.
1044 */ 1045 */
1045 1046
1046 SEM_PRINTF(("proc @%p has undo structure with %d entries\n", p, 1047 SEM_PRINTF(("proc @%p has undo structure with %d entries\n", p,
1047 suptr->un_cnt)); 1048 suptr->un_cnt));
1048 1049
1049 /* 1050 /*
1050 * If there are any active undo elements then process them. 1051 * If there are any active undo elements then process them.
1051 */ 1052 */
1052 if (suptr->un_cnt > 0) { 1053 if (suptr->un_cnt > 0) {
1053 int ix; 1054 int ix;
1054 1055
1055 for (ix = 0; ix < suptr->un_cnt; ix++) { 1056 for (ix = 0; ix < suptr->un_cnt; ix++) {
1056 int semid = suptr->un_ent[ix].un_id; 1057 int semid = suptr->un_ent[ix].un_id;
1057 int semnum = suptr->un_ent[ix].un_num; 1058 int semnum = suptr->un_ent[ix].un_num;
1058 int adjval = suptr->un_ent[ix].un_adjval; 1059 int adjval = suptr->un_ent[ix].un_adjval;
1059 struct semid_ds *semaptr; 1060 struct semid_ds *semaptr;
1060 1061
1061 semaptr = &sema[semid]; 1062 semaptr = &sema[semid];
1062 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0) 1063 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0)
1063 panic("semexit - semid not allocated"); 1064 panic("semexit - semid not allocated");
1064 if (semnum >= semaptr->sem_nsems) 1065 if (semnum >= semaptr->sem_nsems)
1065 panic("semexit - semnum out of range"); 1066 panic("semexit - semnum out of range");
1066 1067
1067 SEM_PRINTF(("semexit: %p id=%d num=%d(adj=%d) ; " 1068 SEM_PRINTF(("semexit: %p id=%d num=%d(adj=%d) ; "
1068 "sem=%d\n", 1069 "sem=%d\n",
1069 suptr->un_proc, suptr->un_ent[ix].un_id, 1070 suptr->un_proc, suptr->un_ent[ix].un_id,
1070 suptr->un_ent[ix].un_num, 1071 suptr->un_ent[ix].un_num,
1071 suptr->un_ent[ix].un_adjval, 1072 suptr->un_ent[ix].un_adjval,
1072 semaptr->_sem_base[semnum].semval)); 1073 semaptr->_sem_base[semnum].semval));
1073 1074
1074 if (adjval < 0 && 1075 if (adjval < 0 &&
1075 semaptr->_sem_base[semnum].semval < -adjval) 1076 semaptr->_sem_base[semnum].semval < -adjval)
1076 semaptr->_sem_base[semnum].semval = 0; 1077 semaptr->_sem_base[semnum].semval = 0;
1077 else 1078 else
1078 semaptr->_sem_base[semnum].semval += adjval; 1079 semaptr->_sem_base[semnum].semval += adjval;
1079 1080
1080 cv_broadcast(&semcv[semid]); 1081 cv_broadcast(&semcv[semid]);
1081 SEM_PRINTF(("semexit: back from wakeup\n")); 1082 SEM_PRINTF(("semexit: back from wakeup\n"));
1082 } 1083 }
1083 } 1084 }
1084 1085
1085 /* 1086 /*
1086 * Deallocate the undo vector. 1087 * Deallocate the undo vector.
1087 */ 1088 */
1088 SEM_PRINTF(("removing vector\n")); 1089 SEM_PRINTF(("removing vector\n"));
1089 suptr->un_proc = NULL; 1090 suptr->un_proc = NULL;
1090 *supptr = suptr->un_next; 1091 *supptr = suptr->un_next;
1091 mutex_exit(&semlock); 1092 mutex_exit(&semlock);
1092} 1093}
1093 1094
1094/* 1095/*
1095 * Sysctl initialization and nodes. 1096 * Sysctl initialization and nodes.
1096 */ 1097 */
1097 1098
1098static int 1099static int
1099sysctl_ipc_semmni(SYSCTLFN_ARGS) 1100sysctl_ipc_semmni(SYSCTLFN_ARGS)
1100{ 1101{
1101 int newsize, error; 1102 int newsize, error;
1102 struct sysctlnode node; 1103 struct sysctlnode node;
1103 node = *rnode; 1104 node = *rnode;
1104 node.sysctl_data = &newsize; 1105 node.sysctl_data = &newsize;
1105 1106
1106 newsize = seminfo.semmni; 1107 newsize = seminfo.semmni;
1107 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1108 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1108 if (error || newp == NULL) 1109 if (error || newp == NULL)
1109 return error; 1110 return error;
1110 1111
1111 return semrealloc(newsize, seminfo.semmns, seminfo.semmnu); 1112 return semrealloc(newsize, seminfo.semmns, seminfo.semmnu);
1112} 1113}
1113 1114
1114static int 1115static int
1115sysctl_ipc_semmns(SYSCTLFN_ARGS) 1116sysctl_ipc_semmns(SYSCTLFN_ARGS)
1116{ 1117{
1117 int newsize, error; 1118 int newsize, error;
1118 struct sysctlnode node; 1119 struct sysctlnode node;
1119 node = *rnode; 1120 node = *rnode;
1120 node.sysctl_data = &newsize; 1121 node.sysctl_data = &newsize;
1121 1122
1122 newsize = seminfo.semmns; 1123 newsize = seminfo.semmns;
1123 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1124 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1124 if (error || newp == NULL) 1125 if (error || newp == NULL)
1125 return error; 1126 return error;
1126 1127
1127 return semrealloc(seminfo.semmni, newsize, seminfo.semmnu); 1128 return semrealloc(seminfo.semmni, newsize, seminfo.semmnu);
1128} 1129}
1129 1130
1130static int 1131static int
1131sysctl_ipc_semmnu(SYSCTLFN_ARGS) 1132sysctl_ipc_semmnu(SYSCTLFN_ARGS)
1132{ 1133{
1133 int newsize, error; 1134 int newsize, error;
1134 struct sysctlnode node; 1135 struct sysctlnode node;
1135 node = *rnode; 1136 node = *rnode;
1136 node.sysctl_data = &newsize; 1137 node.sysctl_data = &newsize;
1137 1138
1138 newsize = seminfo.semmnu; 1139 newsize = seminfo.semmnu;
1139 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1140 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1140 if (error || newp == NULL) 1141 if (error || newp == NULL)
1141 return error; 1142 return error;
1142 1143
1143 return semrealloc(seminfo.semmni, seminfo.semmns, newsize); 1144 return semrealloc(seminfo.semmni, seminfo.semmns, newsize);
1144} 1145}
1145 1146
1146SYSCTL_SETUP(sysctl_ipc_sem_setup, "sysctl kern.ipc subtree setup") 1147SYSCTL_SETUP(sysctl_ipc_sem_setup, "sysctl kern.ipc subtree setup")
1147{ 1148{
1148 const struct sysctlnode *node = NULL; 1149 const struct sysctlnode *node = NULL;
1149 1150
1150 sysctl_createv(clog, 0, NULL, NULL, 1151 sysctl_createv(clog, 0, NULL, NULL,
1151 CTLFLAG_PERMANENT, 1152 CTLFLAG_PERMANENT,
1152 CTLTYPE_NODE, "kern", NULL, 1153 CTLTYPE_NODE, "kern", NULL,
1153 NULL, 0, NULL, 0, 1154 NULL, 0, NULL, 0,
1154 CTL_KERN, CTL_EOL); 1155 CTL_KERN, CTL_EOL);
1155 sysctl_createv(clog, 0, NULL, &node, 1156 sysctl_createv(clog, 0, NULL, &node,
1156 CTLFLAG_PERMANENT, 1157 CTLFLAG_PERMANENT,
1157 CTLTYPE_NODE, "ipc", 1158 CTLTYPE_NODE, "ipc",
1158 SYSCTL_DESCR("SysV IPC options"), 1159 SYSCTL_DESCR("SysV IPC options"),
1159 NULL, 0, NULL, 0, 1160 NULL, 0, NULL, 0,
1160 CTL_KERN, KERN_SYSVIPC, CTL_EOL); 1161 CTL_KERN, KERN_SYSVIPC, CTL_EOL);
1161 1162
1162 if (node == NULL) 1163 if (node == NULL)
1163 return; 1164 return;
1164 1165
1165 sysctl_createv(clog, 0, &node, NULL, 1166 sysctl_createv(clog, 0, &node, NULL,
1166 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1167 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1167 CTLTYPE_INT, "semmni", 1168 CTLTYPE_INT, "semmni",
1168 SYSCTL_DESCR("Max number of number of semaphore identifiers"), 1169 SYSCTL_DESCR("Max number of number of semaphore identifiers"),
1169 sysctl_ipc_semmni, 0, &seminfo.semmni, 0, 1170 sysctl_ipc_semmni, 0, &seminfo.semmni, 0,
1170 CTL_CREATE, CTL_EOL); 1171 CTL_CREATE, CTL_EOL);
1171 sysctl_createv(clog, 0, &node, NULL, 1172 sysctl_createv(clog, 0, &node, NULL,
1172 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1173 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1173 CTLTYPE_INT, "semmns", 1174 CTLTYPE_INT, "semmns",
1174 SYSCTL_DESCR("Max number of number of semaphores in system"), 1175 SYSCTL_DESCR("Max number of number of semaphores in system"),
1175 sysctl_ipc_semmns, 0, &seminfo.semmns, 0, 1176 sysctl_ipc_semmns, 0, &seminfo.semmns, 0,
1176 CTL_CREATE, CTL_EOL); 1177 CTL_CREATE, CTL_EOL);
1177 sysctl_createv(clog, 0, &node, NULL, 1178 sysctl_createv(clog, 0, &node, NULL,
1178 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1179 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1179 CTLTYPE_INT, "semmnu", 1180 CTLTYPE_INT, "semmnu",
1180 SYSCTL_DESCR("Max number of undo structures in system"), 1181 SYSCTL_DESCR("Max number of undo structures in system"),
1181 sysctl_ipc_semmnu, 0, &seminfo.semmnu, 0, 1182 sysctl_ipc_semmnu, 0, &seminfo.semmnu, 0,
1182 CTL_CREATE, CTL_EOL); 1183 CTL_CREATE, CTL_EOL);
1183} 1184}

cvs diff -r1.120 -r1.121 src/sys/kern/sysv_shm.c (switch to unified diff)

--- src/sys/kern/sysv_shm.c 2011/06/12 03:35:56 1.120
+++ src/sys/kern/sysv_shm.c 2011/07/30 06:19:02 1.121
@@ -1,1092 +1,1093 @@ @@ -1,1092 +1,1093 @@
1/* $NetBSD: sysv_shm.c,v 1.120 2011/06/12 03:35:56 rmind Exp $ */ 1/* $NetBSD: sysv_shm.c,v 1.121 2011/07/30 06:19:02 uebayasi Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc. 4 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Mindaugas Rasiukevicius. 9 * NASA Ames Research Center, and by Mindaugas Rasiukevicius.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE. 30 * POSSIBILITY OF SUCH DAMAGE.
31 */ 31 */
32 32
33/* 33/*
34 * Copyright (c) 1994 Adam Glass and Charles M. Hannum. All rights reserved. 34 * Copyright (c) 1994 Adam Glass and Charles M. Hannum. All rights reserved.
35 * 35 *
36 * Redistribution and use in source and binary forms, with or without 36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions 37 * modification, are permitted provided that the following conditions
38 * are met: 38 * are met:
39 * 1. Redistributions of source code must retain the above copyright 39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer. 40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright 41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the 42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution. 43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software 44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement: 45 * must display the following acknowledgement:
46 * This product includes software developed by Adam Glass and Charles M. 46 * This product includes software developed by Adam Glass and Charles M.
47 * Hannum. 47 * Hannum.
48 * 4. The names of the authors may not be used to endorse or promote products 48 * 4. The names of the authors may not be used to endorse or promote products
49 * derived from this software without specific prior written permission. 49 * derived from this software without specific prior written permission.
50 * 50 *
51 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 51 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
52 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 52 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
53 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 53 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
54 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, 54 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
55 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 55 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
56 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 56 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
57 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 57 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
58 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 58 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
59 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 59 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
60 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 60 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
61 */ 61 */
62 62
63#include <sys/cdefs.h> 63#include <sys/cdefs.h>
64__KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.120 2011/06/12 03:35:56 rmind Exp $"); 64__KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.121 2011/07/30 06:19:02 uebayasi Exp $");
65 65
66#define SYSVSHM 66#define SYSVSHM
67 67
68#include <sys/param.h> 68#include <sys/param.h>
69#include <sys/kernel.h> 69#include <sys/kernel.h>
70#include <sys/kmem.h> 70#include <sys/kmem.h>
71#include <sys/shm.h> 71#include <sys/shm.h>
72#include <sys/mutex.h> 72#include <sys/mutex.h>
73#include <sys/mman.h> 73#include <sys/mman.h>
74#include <sys/stat.h> 74#include <sys/stat.h>
75#include <sys/sysctl.h> 75#include <sys/sysctl.h>
76#include <sys/mount.h> /* XXX for <sys/syscallargs.h> */ 76#include <sys/mount.h> /* XXX for <sys/syscallargs.h> */
77#include <sys/syscallargs.h> 77#include <sys/syscallargs.h>
78#include <sys/queue.h> 78#include <sys/queue.h>
79#include <sys/kauth.h> 79#include <sys/kauth.h>
80 80
81#include <uvm/uvm_extern.h> 81#include <uvm/uvm_extern.h>
82#include <uvm/uvm_object.h> 82#include <uvm/uvm_object.h>
83 83
84struct shmmap_entry { 84struct shmmap_entry {
85 SLIST_ENTRY(shmmap_entry) next; 85 SLIST_ENTRY(shmmap_entry) next;
86 vaddr_t va; 86 vaddr_t va;
87 int shmid; 87 int shmid;
88}; 88};
89 89
90int shm_nused __cacheline_aligned; 90int shm_nused __cacheline_aligned;
91struct shmid_ds * shmsegs __read_mostly; 91struct shmid_ds * shmsegs __read_mostly;
92 92
93static kmutex_t shm_lock __cacheline_aligned; 93static kmutex_t shm_lock __cacheline_aligned;
94static kcondvar_t * shm_cv __cacheline_aligned; 94static kcondvar_t * shm_cv __cacheline_aligned;
95static int shm_last_free __cacheline_aligned; 95static int shm_last_free __cacheline_aligned;
96static size_t shm_committed __cacheline_aligned; 96static size_t shm_committed __cacheline_aligned;
97static int shm_use_phys __read_mostly; 97static int shm_use_phys __read_mostly;
98 98
99static kcondvar_t shm_realloc_cv; 99static kcondvar_t shm_realloc_cv;
100static bool shm_realloc_state; 100static bool shm_realloc_state;
101static u_int shm_realloc_disable; 101static u_int shm_realloc_disable;
102 102
103struct shmmap_state { 103struct shmmap_state {
104 unsigned int nitems; 104 unsigned int nitems;
105 unsigned int nrefs; 105 unsigned int nrefs;
106 SLIST_HEAD(, shmmap_entry) entries; 106 SLIST_HEAD(, shmmap_entry) entries;
107}; 107};
108 108
109#ifdef SHMDEBUG 109#ifdef SHMDEBUG
110#define SHMPRINTF(a) printf a 110#define SHMPRINTF(a) printf a
111#else 111#else
112#define SHMPRINTF(a) 112#define SHMPRINTF(a)
113#endif 113#endif
114 114
115static int shmrealloc(int); 115static int shmrealloc(int);
116 116
117/* 117/*
118 * Find the shared memory segment by the identifier. 118 * Find the shared memory segment by the identifier.
119 * => must be called with shm_lock held; 119 * => must be called with shm_lock held;
120 */ 120 */
121static struct shmid_ds * 121static struct shmid_ds *
122shm_find_segment_by_shmid(int shmid) 122shm_find_segment_by_shmid(int shmid)
123{ 123{
124 int segnum; 124 int segnum;
125 struct shmid_ds *shmseg; 125 struct shmid_ds *shmseg;
126 126
127 KASSERT(mutex_owned(&shm_lock)); 127 KASSERT(mutex_owned(&shm_lock));
128 128
129 segnum = IPCID_TO_IX(shmid); 129 segnum = IPCID_TO_IX(shmid);
130 if (segnum < 0 || segnum >= shminfo.shmmni) 130 if (segnum < 0 || segnum >= shminfo.shmmni)
131 return NULL; 131 return NULL;
132 shmseg = &shmsegs[segnum]; 132 shmseg = &shmsegs[segnum];
133 if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0) 133 if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0)
134 return NULL; 134 return NULL;
135 if ((shmseg->shm_perm.mode & 135 if ((shmseg->shm_perm.mode &
136 (SHMSEG_REMOVED|SHMSEG_RMLINGER)) == SHMSEG_REMOVED) 136 (SHMSEG_REMOVED|SHMSEG_RMLINGER)) == SHMSEG_REMOVED)
137 return NULL; 137 return NULL;
138 if (shmseg->shm_perm._seq != IPCID_TO_SEQ(shmid)) 138 if (shmseg->shm_perm._seq != IPCID_TO_SEQ(shmid))
139 return NULL; 139 return NULL;
140 140
141 return shmseg; 141 return shmseg;
142} 142}
143 143
144/* 144/*
145 * Free memory segment. 145 * Free memory segment.
146 * => must be called with shm_lock held; 146 * => must be called with shm_lock held;
147 */ 147 */
148static void 148static void
149shm_free_segment(int segnum) 149shm_free_segment(int segnum)
150{ 150{
151 struct shmid_ds *shmseg; 151 struct shmid_ds *shmseg;
152 size_t size; 152 size_t size;
153 bool wanted; 153 bool wanted;
154 154
155 KASSERT(mutex_owned(&shm_lock)); 155 KASSERT(mutex_owned(&shm_lock));
156 156
157 shmseg = &shmsegs[segnum]; 157 shmseg = &shmsegs[segnum];
158 SHMPRINTF(("shm freeing key 0x%lx seq 0x%x\n", 158 SHMPRINTF(("shm freeing key 0x%lx seq 0x%x\n",
159 shmseg->shm_perm._key, shmseg->shm_perm._seq)); 159 shmseg->shm_perm._key, shmseg->shm_perm._seq));
160 160
161 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 161 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
162 wanted = (shmseg->shm_perm.mode & SHMSEG_WANTED); 162 wanted = (shmseg->shm_perm.mode & SHMSEG_WANTED);
163 163
164 shmseg->_shm_internal = NULL; 164 shmseg->_shm_internal = NULL;
165 shm_committed -= btoc(size); 165 shm_committed -= btoc(size);
166 shm_nused--; 166 shm_nused--;
167 shmseg->shm_perm.mode = SHMSEG_FREE; 167 shmseg->shm_perm.mode = SHMSEG_FREE;
168 shm_last_free = segnum; 168 shm_last_free = segnum;
169 if (wanted == true) 169 if (wanted == true)
170 cv_broadcast(&shm_cv[segnum]); 170 cv_broadcast(&shm_cv[segnum]);
171} 171}
172 172
173/* 173/*
174 * Delete entry from the shm map. 174 * Delete entry from the shm map.
175 * => must be called with shm_lock held; 175 * => must be called with shm_lock held;
176 */ 176 */
177static struct uvm_object * 177static struct uvm_object *
178shm_delete_mapping(struct shmmap_state *shmmap_s, 178shm_delete_mapping(struct shmmap_state *shmmap_s,
179 struct shmmap_entry *shmmap_se) 179 struct shmmap_entry *shmmap_se)
180{ 180{
181 struct uvm_object *uobj = NULL; 181 struct uvm_object *uobj = NULL;
182 struct shmid_ds *shmseg; 182 struct shmid_ds *shmseg;
183 int segnum; 183 int segnum;
184 184
185 KASSERT(mutex_owned(&shm_lock)); 185 KASSERT(mutex_owned(&shm_lock));
186 186
187 segnum = IPCID_TO_IX(shmmap_se->shmid); 187 segnum = IPCID_TO_IX(shmmap_se->shmid);
188 shmseg = &shmsegs[segnum]; 188 shmseg = &shmsegs[segnum];
189 SLIST_REMOVE(&shmmap_s->entries, shmmap_se, shmmap_entry, next); 189 SLIST_REMOVE(&shmmap_s->entries, shmmap_se, shmmap_entry, next);
190 shmmap_s->nitems--; 190 shmmap_s->nitems--;
191 shmseg->shm_dtime = time_second; 191 shmseg->shm_dtime = time_second;
192 if ((--shmseg->shm_nattch <= 0) && 192 if ((--shmseg->shm_nattch <= 0) &&
193 (shmseg->shm_perm.mode & SHMSEG_REMOVED)) { 193 (shmseg->shm_perm.mode & SHMSEG_REMOVED)) {
194 uobj = shmseg->_shm_internal; 194 uobj = shmseg->_shm_internal;
195 shm_free_segment(segnum); 195 shm_free_segment(segnum);
196 } 196 }
197 197
198 return uobj; 198 return uobj;
199} 199}
200 200
201/* 201/*
202 * Get a non-shared shm map for that vmspace. Note, that memory 202 * Get a non-shared shm map for that vmspace. Note, that memory
203 * allocation might be performed with lock held. 203 * allocation might be performed with lock held.
204 */ 204 */
205static struct shmmap_state * 205static struct shmmap_state *
206shmmap_getprivate(struct proc *p) 206shmmap_getprivate(struct proc *p)
207{ 207{
208 struct shmmap_state *oshmmap_s, *shmmap_s; 208 struct shmmap_state *oshmmap_s, *shmmap_s;
209 struct shmmap_entry *oshmmap_se, *shmmap_se; 209 struct shmmap_entry *oshmmap_se, *shmmap_se;
210 210
211 KASSERT(mutex_owned(&shm_lock)); 211 KASSERT(mutex_owned(&shm_lock));
212 212
213 /* 1. A shm map with refcnt = 1, used by ourselves, thus return */ 213 /* 1. A shm map with refcnt = 1, used by ourselves, thus return */
214 oshmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; 214 oshmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm;
215 if (oshmmap_s && oshmmap_s->nrefs == 1) 215 if (oshmmap_s && oshmmap_s->nrefs == 1)
216 return oshmmap_s; 216 return oshmmap_s;
217 217
218 /* 2. No shm map preset - create a fresh one */ 218 /* 2. No shm map preset - create a fresh one */
219 shmmap_s = kmem_zalloc(sizeof(struct shmmap_state), KM_SLEEP); 219 shmmap_s = kmem_zalloc(sizeof(struct shmmap_state), KM_SLEEP);
220 shmmap_s->nrefs = 1; 220 shmmap_s->nrefs = 1;
221 SLIST_INIT(&shmmap_s->entries); 221 SLIST_INIT(&shmmap_s->entries);
222 p->p_vmspace->vm_shm = (void *)shmmap_s; 222 p->p_vmspace->vm_shm = (void *)shmmap_s;
223 223
224 if (oshmmap_s == NULL) 224 if (oshmmap_s == NULL)
225 return shmmap_s; 225 return shmmap_s;
226 226
227 SHMPRINTF(("shmmap_getprivate: vm %p split (%d entries), was used by %d\n", 227 SHMPRINTF(("shmmap_getprivate: vm %p split (%d entries), was used by %d\n",
228 p->p_vmspace, oshmmap_s->nitems, oshmmap_s->nrefs)); 228 p->p_vmspace, oshmmap_s->nitems, oshmmap_s->nrefs));
229 229
230 /* 3. A shared shm map, copy to a fresh one and adjust refcounts */ 230 /* 3. A shared shm map, copy to a fresh one and adjust refcounts */
231 SLIST_FOREACH(oshmmap_se, &oshmmap_s->entries, next) { 231 SLIST_FOREACH(oshmmap_se, &oshmmap_s->entries, next) {
232 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP); 232 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP);
233 shmmap_se->va = oshmmap_se->va; 233 shmmap_se->va = oshmmap_se->va;
234 shmmap_se->shmid = oshmmap_se->shmid; 234 shmmap_se->shmid = oshmmap_se->shmid;
235 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next); 235 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
236 } 236 }
237 shmmap_s->nitems = oshmmap_s->nitems; 237 shmmap_s->nitems = oshmmap_s->nitems;
238 oshmmap_s->nrefs--; 238 oshmmap_s->nrefs--;
239 239
240 return shmmap_s; 240 return shmmap_s;
241} 241}
242 242
243/* 243/*
244 * Lock/unlock the memory. 244 * Lock/unlock the memory.
245 * => must be called with shm_lock held; 245 * => must be called with shm_lock held;
246 * => called from one place, thus, inline; 246 * => called from one place, thus, inline;
247 */ 247 */
248static inline int 248static inline int
249shm_memlock(struct lwp *l, struct shmid_ds *shmseg, int shmid, int cmd) 249shm_memlock(struct lwp *l, struct shmid_ds *shmseg, int shmid, int cmd)
250{ 250{
251 struct proc *p = l->l_proc; 251 struct proc *p = l->l_proc;
252 struct shmmap_entry *shmmap_se; 252 struct shmmap_entry *shmmap_se;
253 struct shmmap_state *shmmap_s; 253 struct shmmap_state *shmmap_s;
254 size_t size; 254 size_t size;
255 int error; 255 int error;
256 256
257 KASSERT(mutex_owned(&shm_lock)); 257 KASSERT(mutex_owned(&shm_lock));
258 shmmap_s = shmmap_getprivate(p); 258 shmmap_s = shmmap_getprivate(p);
259 259
260 /* Find our shared memory address by shmid */ 260 /* Find our shared memory address by shmid */
261 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) { 261 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) {
262 if (shmmap_se->shmid != shmid) 262 if (shmmap_se->shmid != shmid)
263 continue; 263 continue;
264 264
265 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 265 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
266 266
267 if (cmd == SHM_LOCK && 267 if (cmd == SHM_LOCK &&
268 (shmseg->shm_perm.mode & SHMSEG_WIRED) == 0) { 268 (shmseg->shm_perm.mode & SHMSEG_WIRED) == 0) {
269 /* Wire the object and map, then tag it */ 269 /* Wire the object and map, then tag it */
270 error = uvm_obj_wirepages(shmseg->_shm_internal, 270 error = uvm_obj_wirepages(shmseg->_shm_internal,
271 0, size); 271 0, size);
272 if (error) 272 if (error)
273 return EIO; 273 return EIO;
274 error = uvm_map_pageable(&p->p_vmspace->vm_map, 274 error = uvm_map_pageable(&p->p_vmspace->vm_map,
275 shmmap_se->va, shmmap_se->va + size, false, 0); 275 shmmap_se->va, shmmap_se->va + size, false, 0);
276 if (error) { 276 if (error) {
277 uvm_obj_unwirepages(shmseg->_shm_internal, 277 uvm_obj_unwirepages(shmseg->_shm_internal,
278 0, size); 278 0, size);
279 if (error == EFAULT) 279 if (error == EFAULT)
280 error = ENOMEM; 280 error = ENOMEM;
281 return error; 281 return error;
282 } 282 }
283 shmseg->shm_perm.mode |= SHMSEG_WIRED; 283 shmseg->shm_perm.mode |= SHMSEG_WIRED;
284 284
285 } else if (cmd == SHM_UNLOCK && 285 } else if (cmd == SHM_UNLOCK &&
286 (shmseg->shm_perm.mode & SHMSEG_WIRED) != 0) { 286 (shmseg->shm_perm.mode & SHMSEG_WIRED) != 0) {
287 /* Unwire the object and map, then untag it */ 287 /* Unwire the object and map, then untag it */
288 uvm_obj_unwirepages(shmseg->_shm_internal, 0, size); 288 uvm_obj_unwirepages(shmseg->_shm_internal, 0, size);
289 error = uvm_map_pageable(&p->p_vmspace->vm_map, 289 error = uvm_map_pageable(&p->p_vmspace->vm_map,
290 shmmap_se->va, shmmap_se->va + size, true, 0); 290 shmmap_se->va, shmmap_se->va + size, true, 0);
291 if (error) 291 if (error)
292 return EIO; 292 return EIO;
293 shmseg->shm_perm.mode &= ~SHMSEG_WIRED; 293 shmseg->shm_perm.mode &= ~SHMSEG_WIRED;
294 } 294 }
295 } 295 }
296 296
297 return 0; 297 return 0;
298} 298}
299 299
300/* 300/*
301 * Unmap shared memory. 301 * Unmap shared memory.
302 */ 302 */
303int 303int
304sys_shmdt(struct lwp *l, const struct sys_shmdt_args *uap, register_t *retval) 304sys_shmdt(struct lwp *l, const struct sys_shmdt_args *uap, register_t *retval)
305{ 305{
306 /* { 306 /* {
307 syscallarg(const void *) shmaddr; 307 syscallarg(const void *) shmaddr;
308 } */ 308 } */
309 struct proc *p = l->l_proc; 309 struct proc *p = l->l_proc;
310 struct shmmap_state *shmmap_s1, *shmmap_s; 310 struct shmmap_state *shmmap_s1, *shmmap_s;
311 struct shmmap_entry *shmmap_se; 311 struct shmmap_entry *shmmap_se;
312 struct uvm_object *uobj; 312 struct uvm_object *uobj;
313 struct shmid_ds *shmseg; 313 struct shmid_ds *shmseg;
314 size_t size; 314 size_t size;
315 315
316 mutex_enter(&shm_lock); 316 mutex_enter(&shm_lock);
317 /* In case of reallocation, we will wait for completion */ 317 /* In case of reallocation, we will wait for completion */
318 while (__predict_false(shm_realloc_state)) 318 while (__predict_false(shm_realloc_state))
319 cv_wait(&shm_realloc_cv, &shm_lock); 319 cv_wait(&shm_realloc_cv, &shm_lock);
320 320
321 shmmap_s1 = (struct shmmap_state *)p->p_vmspace->vm_shm; 321 shmmap_s1 = (struct shmmap_state *)p->p_vmspace->vm_shm;
322 if (shmmap_s1 == NULL) { 322 if (shmmap_s1 == NULL) {
323 mutex_exit(&shm_lock); 323 mutex_exit(&shm_lock);
324 return EINVAL; 324 return EINVAL;
325 } 325 }
326 326
327 /* Find the map entry */ 327 /* Find the map entry */
328 SLIST_FOREACH(shmmap_se, &shmmap_s1->entries, next) 328 SLIST_FOREACH(shmmap_se, &shmmap_s1->entries, next)
329 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr)) 329 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
330 break; 330 break;
331 if (shmmap_se == NULL) { 331 if (shmmap_se == NULL) {
332 mutex_exit(&shm_lock); 332 mutex_exit(&shm_lock);
333 return EINVAL; 333 return EINVAL;
334 } 334 }
335 335
336 shmmap_s = shmmap_getprivate(p); 336 shmmap_s = shmmap_getprivate(p);
337 if (shmmap_s != shmmap_s1) { 337 if (shmmap_s != shmmap_s1) {
338 /* Map has been copied, lookup entry in new map */ 338 /* Map has been copied, lookup entry in new map */
339 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) 339 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
340 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr)) 340 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
341 break; 341 break;
342 if (shmmap_se == NULL) { 342 if (shmmap_se == NULL) {
343 mutex_exit(&shm_lock); 343 mutex_exit(&shm_lock);
344 return EINVAL; 344 return EINVAL;
345 } 345 }
346 } 346 }
347 347
348 SHMPRINTF(("shmdt: vm %p: remove %d @%lx\n", 348 SHMPRINTF(("shmdt: vm %p: remove %d @%lx\n",
349 p->p_vmspace, shmmap_se->shmid, shmmap_se->va)); 349 p->p_vmspace, shmmap_se->shmid, shmmap_se->va));
350 350
351 /* Delete the entry from shm map */ 351 /* Delete the entry from shm map */
352 uobj = shm_delete_mapping(shmmap_s, shmmap_se); 352 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
353 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)]; 353 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)];
354 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 354 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
355 mutex_exit(&shm_lock); 355 mutex_exit(&shm_lock);
356 356
357 uvm_deallocate(&p->p_vmspace->vm_map, shmmap_se->va, size); 357 uvm_deallocate(&p->p_vmspace->vm_map, shmmap_se->va, size);
358 if (uobj != NULL) { 358 if (uobj != NULL) {
359 uao_detach(uobj); 359 uao_detach(uobj);
360 } 360 }
361 kmem_free(shmmap_se, sizeof(struct shmmap_entry)); 361 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
362 362
363 return 0; 363 return 0;
364} 364}
365 365
366/* 366/*
367 * Map shared memory. 367 * Map shared memory.
368 */ 368 */
369int 369int
370sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval) 370sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval)
371{ 371{
372 /* { 372 /* {
373 syscallarg(int) shmid; 373 syscallarg(int) shmid;
374 syscallarg(const void *) shmaddr; 374 syscallarg(const void *) shmaddr;
375 syscallarg(int) shmflg; 375 syscallarg(int) shmflg;
376 } */ 376 } */
377 int error, flags = 0; 377 int error, flags = 0;
378 struct proc *p = l->l_proc; 378 struct proc *p = l->l_proc;
379 kauth_cred_t cred = l->l_cred; 379 kauth_cred_t cred = l->l_cred;
380 struct shmid_ds *shmseg; 380 struct shmid_ds *shmseg;
381 struct shmmap_state *shmmap_s; 381 struct shmmap_state *shmmap_s;
382 struct shmmap_entry *shmmap_se; 382 struct shmmap_entry *shmmap_se;
383 struct uvm_object *uobj; 383 struct uvm_object *uobj;
384 struct vmspace *vm; 384 struct vmspace *vm;
385 vaddr_t attach_va; 385 vaddr_t attach_va;
386 vm_prot_t prot; 386 vm_prot_t prot;
387 vsize_t size; 387 vsize_t size;
388 388
389 /* Allocate a new map entry and set it */ 389 /* Allocate a new map entry and set it */
390 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP); 390 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP);
391 shmmap_se->shmid = SCARG(uap, shmid); 391 shmmap_se->shmid = SCARG(uap, shmid);
392 392
393 mutex_enter(&shm_lock); 393 mutex_enter(&shm_lock);
394 /* In case of reallocation, we will wait for completion */ 394 /* In case of reallocation, we will wait for completion */
395 while (__predict_false(shm_realloc_state)) 395 while (__predict_false(shm_realloc_state))
396 cv_wait(&shm_realloc_cv, &shm_lock); 396 cv_wait(&shm_realloc_cv, &shm_lock);
397 397
398 shmseg = shm_find_segment_by_shmid(SCARG(uap, shmid)); 398 shmseg = shm_find_segment_by_shmid(SCARG(uap, shmid));
399 if (shmseg == NULL) { 399 if (shmseg == NULL) {
400 error = EINVAL; 400 error = EINVAL;
401 goto err; 401 goto err;
402 } 402 }
403 error = ipcperm(cred, &shmseg->shm_perm, 403 error = ipcperm(cred, &shmseg->shm_perm,
404 (SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W); 404 (SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
405 if (error) 405 if (error)
406 goto err; 406 goto err;
407 407
408 vm = p->p_vmspace; 408 vm = p->p_vmspace;
409 shmmap_s = (struct shmmap_state *)vm->vm_shm; 409 shmmap_s = (struct shmmap_state *)vm->vm_shm;
410 if (shmmap_s && shmmap_s->nitems >= shminfo.shmseg) { 410 if (shmmap_s && shmmap_s->nitems >= shminfo.shmseg) {
411 error = EMFILE; 411 error = EMFILE;
412 goto err; 412 goto err;
413 } 413 }
414 414
415 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 415 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
416 prot = VM_PROT_READ; 416 prot = VM_PROT_READ;
417 if ((SCARG(uap, shmflg) & SHM_RDONLY) == 0) 417 if ((SCARG(uap, shmflg) & SHM_RDONLY) == 0)
418 prot |= VM_PROT_WRITE; 418 prot |= VM_PROT_WRITE;
419 if (SCARG(uap, shmaddr)) { 419 if (SCARG(uap, shmaddr)) {
420 flags |= UVM_FLAG_FIXED; 420 flags |= UVM_FLAG_FIXED;
421 if (SCARG(uap, shmflg) & SHM_RND) 421 if (SCARG(uap, shmflg) & SHM_RND)
422 attach_va = 422 attach_va =
423 (vaddr_t)SCARG(uap, shmaddr) & ~(SHMLBA-1); 423 (vaddr_t)SCARG(uap, shmaddr) & ~(SHMLBA-1);
424 else if (((vaddr_t)SCARG(uap, shmaddr) & (SHMLBA-1)) == 0) 424 else if (((vaddr_t)SCARG(uap, shmaddr) & (SHMLBA-1)) == 0)
425 attach_va = (vaddr_t)SCARG(uap, shmaddr); 425 attach_va = (vaddr_t)SCARG(uap, shmaddr);
426 else { 426 else {
427 error = EINVAL; 427 error = EINVAL;
428 goto err; 428 goto err;
429 } 429 }
430 } else { 430 } else {
431 /* This is just a hint to uvm_map() about where to put it. */ 431 /* This is just a hint to uvm_map() about where to put it. */
432 attach_va = p->p_emul->e_vm_default_addr(p, 432 attach_va = p->p_emul->e_vm_default_addr(p,
433 (vaddr_t)vm->vm_daddr, size); 433 (vaddr_t)vm->vm_daddr, size);
434 } 434 }
435 435
436 /* 436 /*
437 * Create a map entry, add it to the list and increase the counters. 437 * Create a map entry, add it to the list and increase the counters.
438 * The lock will be dropped before the mapping, disable reallocation. 438 * The lock will be dropped before the mapping, disable reallocation.
439 */ 439 */
440 shmmap_s = shmmap_getprivate(p); 440 shmmap_s = shmmap_getprivate(p);
441 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next); 441 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
442 shmmap_s->nitems++; 442 shmmap_s->nitems++;
443 shmseg->shm_lpid = p->p_pid; 443 shmseg->shm_lpid = p->p_pid;
444 shmseg->shm_nattch++; 444 shmseg->shm_nattch++;
445 shm_realloc_disable++; 445 shm_realloc_disable++;
446 mutex_exit(&shm_lock); 446 mutex_exit(&shm_lock);
447 447
448 /* 448 /*
449 * Add a reference to the memory object, map it to the 449 * Add a reference to the memory object, map it to the
450 * address space, and lock the memory, if needed. 450 * address space, and lock the memory, if needed.
451 */ 451 */
452 uobj = shmseg->_shm_internal; 452 uobj = shmseg->_shm_internal;
453 uao_reference(uobj); 453 uao_reference(uobj);
454 error = uvm_map(&vm->vm_map, &attach_va, size, uobj, 0, 0, 454 error = uvm_map(&vm->vm_map, &attach_va, size, uobj, 0, 0,
455 UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, flags)); 455 UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, flags));
456 if (error) 456 if (error)
457 goto err_detach; 457 goto err_detach;
458 if (shm_use_phys || (shmseg->shm_perm.mode & SHMSEG_WIRED)) { 458 if (shm_use_phys || (shmseg->shm_perm.mode & SHMSEG_WIRED)) {
459 error = uvm_map_pageable(&vm->vm_map, attach_va, 459 error = uvm_map_pageable(&vm->vm_map, attach_va,
460 attach_va + size, false, 0); 460 attach_va + size, false, 0);
461 if (error) { 461 if (error) {
462 if (error == EFAULT) 462 if (error == EFAULT)
463 error = ENOMEM; 463 error = ENOMEM;
464 uvm_deallocate(&vm->vm_map, attach_va, size); 464 uvm_deallocate(&vm->vm_map, attach_va, size);
465 goto err_detach; 465 goto err_detach;
466 } 466 }
467 } 467 }
468 468
469 /* Set the new address, and update the time */ 469 /* Set the new address, and update the time */
470 mutex_enter(&shm_lock); 470 mutex_enter(&shm_lock);
471 shmmap_se->va = attach_va; 471 shmmap_se->va = attach_va;
472 shmseg->shm_atime = time_second; 472 shmseg->shm_atime = time_second;
473 shm_realloc_disable--; 473 shm_realloc_disable--;
474 retval[0] = attach_va; 474 retval[0] = attach_va;
475 SHMPRINTF(("shmat: vm %p: add %d @%lx\n", 475 SHMPRINTF(("shmat: vm %p: add %d @%lx\n",
476 p->p_vmspace, shmmap_se->shmid, attach_va)); 476 p->p_vmspace, shmmap_se->shmid, attach_va));
477err: 477err:
478 cv_broadcast(&shm_realloc_cv); 478 cv_broadcast(&shm_realloc_cv);
479 mutex_exit(&shm_lock); 479 mutex_exit(&shm_lock);
480 if (error && shmmap_se) { 480 if (error && shmmap_se) {
481 kmem_free(shmmap_se, sizeof(struct shmmap_entry)); 481 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
482 } 482 }
483 return error; 483 return error;
484 484
485err_detach: 485err_detach:
486 uao_detach(uobj); 486 uao_detach(uobj);
487 mutex_enter(&shm_lock); 487 mutex_enter(&shm_lock);
488 uobj = shm_delete_mapping(shmmap_s, shmmap_se); 488 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
489 shm_realloc_disable--; 489 shm_realloc_disable--;
490 cv_broadcast(&shm_realloc_cv); 490 cv_broadcast(&shm_realloc_cv);
491 mutex_exit(&shm_lock); 491 mutex_exit(&shm_lock);
492 if (uobj != NULL) { 492 if (uobj != NULL) {
493 uao_detach(uobj); 493 uao_detach(uobj);
494 } 494 }
495 kmem_free(shmmap_se, sizeof(struct shmmap_entry)); 495 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
496 return error; 496 return error;
497} 497}
498 498
499/* 499/*
500 * Shared memory control operations. 500 * Shared memory control operations.
501 */ 501 */
502int 502int
503sys___shmctl50(struct lwp *l, const struct sys___shmctl50_args *uap, 503sys___shmctl50(struct lwp *l, const struct sys___shmctl50_args *uap,
504 register_t *retval) 504 register_t *retval)
505{ 505{
506 /* { 506 /* {
507 syscallarg(int) shmid; 507 syscallarg(int) shmid;
508 syscallarg(int) cmd; 508 syscallarg(int) cmd;
509 syscallarg(struct shmid_ds *) buf; 509 syscallarg(struct shmid_ds *) buf;
510 } */ 510 } */
511 struct shmid_ds shmbuf; 511 struct shmid_ds shmbuf;
512 int cmd, error; 512 int cmd, error;
513 513
514 cmd = SCARG(uap, cmd); 514 cmd = SCARG(uap, cmd);
515 if (cmd == IPC_SET) { 515 if (cmd == IPC_SET) {
516 error = copyin(SCARG(uap, buf), &shmbuf, sizeof(shmbuf)); 516 error = copyin(SCARG(uap, buf), &shmbuf, sizeof(shmbuf));
517 if (error) 517 if (error)
518 return error; 518 return error;
519 } 519 }
520 520
521 error = shmctl1(l, SCARG(uap, shmid), cmd, 521 error = shmctl1(l, SCARG(uap, shmid), cmd,
522 (cmd == IPC_SET || cmd == IPC_STAT) ? &shmbuf : NULL); 522 (cmd == IPC_SET || cmd == IPC_STAT) ? &shmbuf : NULL);
523 523
524 if (error == 0 && cmd == IPC_STAT) 524 if (error == 0 && cmd == IPC_STAT)
525 error = copyout(&shmbuf, SCARG(uap, buf), sizeof(shmbuf)); 525 error = copyout(&shmbuf, SCARG(uap, buf), sizeof(shmbuf));
526 526
527 return error; 527 return error;
528} 528}
529 529
530int 530int
531shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf) 531shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf)
532{ 532{
533 struct uvm_object *uobj = NULL; 533 struct uvm_object *uobj = NULL;
534 kauth_cred_t cred = l->l_cred; 534 kauth_cred_t cred = l->l_cred;
535 struct shmid_ds *shmseg; 535 struct shmid_ds *shmseg;
536 int error = 0; 536 int error = 0;
537 537
538 mutex_enter(&shm_lock); 538 mutex_enter(&shm_lock);
539 /* In case of reallocation, we will wait for completion */ 539 /* In case of reallocation, we will wait for completion */
540 while (__predict_false(shm_realloc_state)) 540 while (__predict_false(shm_realloc_state))
541 cv_wait(&shm_realloc_cv, &shm_lock); 541 cv_wait(&shm_realloc_cv, &shm_lock);
542 542
543 shmseg = shm_find_segment_by_shmid(shmid); 543 shmseg = shm_find_segment_by_shmid(shmid);
544 if (shmseg == NULL) { 544 if (shmseg == NULL) {
545 mutex_exit(&shm_lock); 545 mutex_exit(&shm_lock);
546 return EINVAL; 546 return EINVAL;
547 } 547 }
548 548
549 switch (cmd) { 549 switch (cmd) {
550 case IPC_STAT: 550 case IPC_STAT:
551 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_R)) != 0) 551 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_R)) != 0)
552 break; 552 break;
553 memcpy(shmbuf, shmseg, sizeof(struct shmid_ds)); 553 memcpy(shmbuf, shmseg, sizeof(struct shmid_ds));
554 break; 554 break;
555 case IPC_SET: 555 case IPC_SET:
556 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0) 556 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
557 break; 557 break;
558 shmseg->shm_perm.uid = shmbuf->shm_perm.uid; 558 shmseg->shm_perm.uid = shmbuf->shm_perm.uid;
559 shmseg->shm_perm.gid = shmbuf->shm_perm.gid; 559 shmseg->shm_perm.gid = shmbuf->shm_perm.gid;
560 shmseg->shm_perm.mode = 560 shmseg->shm_perm.mode =
561 (shmseg->shm_perm.mode & ~ACCESSPERMS) | 561 (shmseg->shm_perm.mode & ~ACCESSPERMS) |
562 (shmbuf->shm_perm.mode & ACCESSPERMS); 562 (shmbuf->shm_perm.mode & ACCESSPERMS);
563 shmseg->shm_ctime = time_second; 563 shmseg->shm_ctime = time_second;
564 break; 564 break;
565 case IPC_RMID: 565 case IPC_RMID:
566 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0) 566 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
567 break; 567 break;
568 shmseg->shm_perm._key = IPC_PRIVATE; 568 shmseg->shm_perm._key = IPC_PRIVATE;
569 shmseg->shm_perm.mode |= SHMSEG_REMOVED; 569 shmseg->shm_perm.mode |= SHMSEG_REMOVED;
570 if (shmseg->shm_nattch <= 0) { 570 if (shmseg->shm_nattch <= 0) {
571 uobj = shmseg->_shm_internal; 571 uobj = shmseg->_shm_internal;
572 shm_free_segment(IPCID_TO_IX(shmid)); 572 shm_free_segment(IPCID_TO_IX(shmid));
573 } 573 }
574 break; 574 break;
575 case SHM_LOCK: 575 case SHM_LOCK:
576 case SHM_UNLOCK: 576 case SHM_UNLOCK:
577 if ((error = kauth_authorize_generic(cred, 577 if ((error = kauth_authorize_generic(cred,
578 KAUTH_GENERIC_ISSUSER, NULL)) != 0) 578 KAUTH_GENERIC_ISSUSER, NULL)) != 0)
579 break; 579 break;
580 error = shm_memlock(l, shmseg, shmid, cmd); 580 error = shm_memlock(l, shmseg, shmid, cmd);
581 break; 581 break;
582 default: 582 default:
583 error = EINVAL; 583 error = EINVAL;
584 } 584 }
585 585
586 mutex_exit(&shm_lock); 586 mutex_exit(&shm_lock);
587 if (uobj != NULL) 587 if (uobj != NULL)
588 uao_detach(uobj); 588 uao_detach(uobj);
589 return error; 589 return error;
590} 590}
591 591
592/* 592/*
593 * Try to take an already existing segment. 593 * Try to take an already existing segment.
594 * => must be called with shm_lock held; 594 * => must be called with shm_lock held;
595 * => called from one place, thus, inline; 595 * => called from one place, thus, inline;
596 */ 596 */
597static inline int 597static inline int
598shmget_existing(struct lwp *l, const struct sys_shmget_args *uap, int mode, 598shmget_existing(struct lwp *l, const struct sys_shmget_args *uap, int mode,
599 register_t *retval) 599 register_t *retval)
600{ 600{
601 struct shmid_ds *shmseg; 601 struct shmid_ds *shmseg;
602 kauth_cred_t cred = l->l_cred; 602 kauth_cred_t cred = l->l_cred;
603 int segnum, error; 603 int segnum, error;
604again: 604again:
605 KASSERT(mutex_owned(&shm_lock)); 605 KASSERT(mutex_owned(&shm_lock));
606 606
607 /* Find segment by key */ 607 /* Find segment by key */
608 for (segnum = 0; segnum < shminfo.shmmni; segnum++) 608 for (segnum = 0; segnum < shminfo.shmmni; segnum++)
609 if ((shmsegs[segnum].shm_perm.mode & SHMSEG_ALLOCATED) && 609 if ((shmsegs[segnum].shm_perm.mode & SHMSEG_ALLOCATED) &&
610 shmsegs[segnum].shm_perm._key == SCARG(uap, key)) 610 shmsegs[segnum].shm_perm._key == SCARG(uap, key))
611 break; 611 break;
612 if (segnum == shminfo.shmmni) { 612 if (segnum == shminfo.shmmni) {
613 /* Not found */ 613 /* Not found */
614 return -1; 614 return -1;
615 } 615 }
616 616
617 shmseg = &shmsegs[segnum]; 617 shmseg = &shmsegs[segnum];
618 if (shmseg->shm_perm.mode & SHMSEG_REMOVED) { 618 if (shmseg->shm_perm.mode & SHMSEG_REMOVED) {
619 /* 619 /*
620 * This segment is in the process of being allocated. Wait 620 * This segment is in the process of being allocated. Wait
621 * until it's done, and look the key up again (in case the 621 * until it's done, and look the key up again (in case the
622 * allocation failed or it was freed). 622 * allocation failed or it was freed).
623 */ 623 */
624 shmseg->shm_perm.mode |= SHMSEG_WANTED; 624 shmseg->shm_perm.mode |= SHMSEG_WANTED;
625 error = cv_wait_sig(&shm_cv[segnum], &shm_lock); 625 error = cv_wait_sig(&shm_cv[segnum], &shm_lock);
626 if (error) 626 if (error)
627 return error; 627 return error;
628 goto again; 628 goto again;
629 } 629 }
630 630
631 /* 631 /*
632 * First check the flags, to generate a useful error when a 632 * First check the flags, to generate a useful error when a
633 * segment already exists. 633 * segment already exists.
634 */ 634 */
635 if ((SCARG(uap, shmflg) & (IPC_CREAT | IPC_EXCL)) == 635 if ((SCARG(uap, shmflg) & (IPC_CREAT | IPC_EXCL)) ==
636 (IPC_CREAT | IPC_EXCL)) 636 (IPC_CREAT | IPC_EXCL))
637 return EEXIST; 637 return EEXIST;
638 638
639 /* Check the permission and segment size. */ 639 /* Check the permission and segment size. */
640 error = ipcperm(cred, &shmseg->shm_perm, mode); 640 error = ipcperm(cred, &shmseg->shm_perm, mode);
641 if (error) 641 if (error)
642 return error; 642 return error;
643 if (SCARG(uap, size) && SCARG(uap, size) > shmseg->shm_segsz) 643 if (SCARG(uap, size) && SCARG(uap, size) > shmseg->shm_segsz)
644 return EINVAL; 644 return EINVAL;
645 645
646 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); 646 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
647 return 0; 647 return 0;
648} 648}
649 649
650int 650int
651sys_shmget(struct lwp *l, const struct sys_shmget_args *uap, register_t *retval) 651sys_shmget(struct lwp *l, const struct sys_shmget_args *uap, register_t *retval)
652{ 652{
653 /* { 653 /* {
654 syscallarg(key_t) key; 654 syscallarg(key_t) key;
655 syscallarg(size_t) size; 655 syscallarg(size_t) size;
656 syscallarg(int) shmflg; 656 syscallarg(int) shmflg;
657 } */ 657 } */
658 struct shmid_ds *shmseg; 658 struct shmid_ds *shmseg;
659 kauth_cred_t cred = l->l_cred; 659 kauth_cred_t cred = l->l_cred;
660 key_t key = SCARG(uap, key); 660 key_t key = SCARG(uap, key);
661 size_t size; 661 size_t size;
662 int error, mode, segnum; 662 int error, mode, segnum;
663 bool lockmem; 663 bool lockmem;
664 664
665 mode = SCARG(uap, shmflg) & ACCESSPERMS; 665 mode = SCARG(uap, shmflg) & ACCESSPERMS;
666 if (SCARG(uap, shmflg) & _SHM_RMLINGER) 666 if (SCARG(uap, shmflg) & _SHM_RMLINGER)
667 mode |= SHMSEG_RMLINGER; 667 mode |= SHMSEG_RMLINGER;
668 668
669 SHMPRINTF(("shmget: key 0x%lx size 0x%zx shmflg 0x%x mode 0x%x\n", 669 SHMPRINTF(("shmget: key 0x%lx size 0x%zx shmflg 0x%x mode 0x%x\n",
670 SCARG(uap, key), SCARG(uap, size), SCARG(uap, shmflg), mode)); 670 SCARG(uap, key), SCARG(uap, size), SCARG(uap, shmflg), mode));
671 671
672 mutex_enter(&shm_lock); 672 mutex_enter(&shm_lock);
673 /* In case of reallocation, we will wait for completion */ 673 /* In case of reallocation, we will wait for completion */
674 while (__predict_false(shm_realloc_state)) 674 while (__predict_false(shm_realloc_state))
675 cv_wait(&shm_realloc_cv, &shm_lock); 675 cv_wait(&shm_realloc_cv, &shm_lock);
676 676
677 if (key != IPC_PRIVATE) { 677 if (key != IPC_PRIVATE) {
678 error = shmget_existing(l, uap, mode, retval); 678 error = shmget_existing(l, uap, mode, retval);
679 if (error != -1) { 679 if (error != -1) {
680 mutex_exit(&shm_lock); 680 mutex_exit(&shm_lock);
681 return error; 681 return error;
682 } 682 }
683 if ((SCARG(uap, shmflg) & IPC_CREAT) == 0) { 683 if ((SCARG(uap, shmflg) & IPC_CREAT) == 0) {
684 mutex_exit(&shm_lock); 684 mutex_exit(&shm_lock);
685 return ENOENT; 685 return ENOENT;
686 } 686 }
687 } 687 }
688 error = 0; 688 error = 0;
689 689
690 /* 690 /*
691 * Check the for the limits. 691 * Check the for the limits.
692 */ 692 */
693 size = SCARG(uap, size); 693 size = SCARG(uap, size);
694 if (size < shminfo.shmmin || size > shminfo.shmmax) { 694 if (size < shminfo.shmmin || size > shminfo.shmmax) {
695 mutex_exit(&shm_lock); 695 mutex_exit(&shm_lock);
696 return EINVAL; 696 return EINVAL;
697 } 697 }
698 if (shm_nused >= shminfo.shmmni) { 698 if (shm_nused >= shminfo.shmmni) {
699 mutex_exit(&shm_lock); 699 mutex_exit(&shm_lock);
700 return ENOSPC; 700 return ENOSPC;
701 } 701 }
702 size = (size + PGOFSET) & ~PGOFSET; 702 size = (size + PGOFSET) & ~PGOFSET;
703 if (shm_committed + btoc(size) > shminfo.shmall) { 703 if (shm_committed + btoc(size) > shminfo.shmall) {
704 mutex_exit(&shm_lock); 704 mutex_exit(&shm_lock);
705 return ENOMEM; 705 return ENOMEM;
706 } 706 }
707 707
708 /* Find the first available segment */ 708 /* Find the first available segment */
709 if (shm_last_free < 0) { 709 if (shm_last_free < 0) {
710 for (segnum = 0; segnum < shminfo.shmmni; segnum++) 710 for (segnum = 0; segnum < shminfo.shmmni; segnum++)
711 if (shmsegs[segnum].shm_perm.mode & SHMSEG_FREE) 711 if (shmsegs[segnum].shm_perm.mode & SHMSEG_FREE)
712 break; 712 break;
713 KASSERT(segnum < shminfo.shmmni); 713 KASSERT(segnum < shminfo.shmmni);
714 } else { 714 } else {
715 segnum = shm_last_free; 715 segnum = shm_last_free;
716 shm_last_free = -1; 716 shm_last_free = -1;
717 } 717 }
718 718
719 /* 719 /*
720 * Initialize the segment. 720 * Initialize the segment.
721 * We will drop the lock while allocating the memory, thus mark the 721 * We will drop the lock while allocating the memory, thus mark the
722 * segment present, but removed, that no other thread could take it. 722 * segment present, but removed, that no other thread could take it.
723 * Also, disable reallocation, while lock is dropped. 723 * Also, disable reallocation, while lock is dropped.
724 */ 724 */
725 shmseg = &shmsegs[segnum]; 725 shmseg = &shmsegs[segnum];
726 shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED; 726 shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
727 shm_committed += btoc(size); 727 shm_committed += btoc(size);
728 shm_nused++; 728 shm_nused++;
729 lockmem = shm_use_phys; 729 lockmem = shm_use_phys;
730 shm_realloc_disable++; 730 shm_realloc_disable++;
731 mutex_exit(&shm_lock); 731 mutex_exit(&shm_lock);
732 732
733 /* Allocate the memory object and lock it if needed */ 733 /* Allocate the memory object and lock it if needed */
734 shmseg->_shm_internal = uao_create(size, 0); 734 shmseg->_shm_internal = uao_create(size, 0);
735 if (lockmem) { 735 if (lockmem) {
736 /* Wire the pages and tag it */ 736 /* Wire the pages and tag it */
737 error = uvm_obj_wirepages(shmseg->_shm_internal, 0, size); 737 error = uvm_obj_wirepages(shmseg->_shm_internal, 0, size);
738 if (error) { 738 if (error) {
739 uao_detach(shmseg->_shm_internal); 739 uao_detach(shmseg->_shm_internal);
740 mutex_enter(&shm_lock); 740 mutex_enter(&shm_lock);
741 shm_free_segment(segnum); 741 shm_free_segment(segnum);
742 shm_realloc_disable--; 742 shm_realloc_disable--;
743 mutex_exit(&shm_lock); 743 mutex_exit(&shm_lock);
744 return error; 744 return error;
745 } 745 }
746 } 746 }
747 747
748 /* 748 /*
749 * Please note, while segment is marked, there are no need to hold the 749 * Please note, while segment is marked, there are no need to hold the
750 * lock, while setting it (except shm_perm.mode). 750 * lock, while setting it (except shm_perm.mode).
751 */ 751 */
752 shmseg->shm_perm._key = SCARG(uap, key); 752 shmseg->shm_perm._key = SCARG(uap, key);
753 shmseg->shm_perm._seq = (shmseg->shm_perm._seq + 1) & 0x7fff; 753 shmseg->shm_perm._seq = (shmseg->shm_perm._seq + 1) & 0x7fff;
754 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); 754 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
755 755
756 shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_geteuid(cred); 756 shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_geteuid(cred);
757 shmseg->shm_perm.cgid = shmseg->shm_perm.gid = kauth_cred_getegid(cred); 757 shmseg->shm_perm.cgid = shmseg->shm_perm.gid = kauth_cred_getegid(cred);
758 shmseg->shm_segsz = SCARG(uap, size); 758 shmseg->shm_segsz = SCARG(uap, size);
759 shmseg->shm_cpid = l->l_proc->p_pid; 759 shmseg->shm_cpid = l->l_proc->p_pid;
760 shmseg->shm_lpid = shmseg->shm_nattch = 0; 760 shmseg->shm_lpid = shmseg->shm_nattch = 0;
761 shmseg->shm_atime = shmseg->shm_dtime = 0; 761 shmseg->shm_atime = shmseg->shm_dtime = 0;
762 shmseg->shm_ctime = time_second; 762 shmseg->shm_ctime = time_second;
763 763
764 /* 764 /*
765 * Segment is initialized. 765 * Segment is initialized.
766 * Enter the lock, mark as allocated, and notify waiters (if any). 766 * Enter the lock, mark as allocated, and notify waiters (if any).
767 * Also, unmark the state of reallocation. 767 * Also, unmark the state of reallocation.
768 */ 768 */
769 mutex_enter(&shm_lock); 769 mutex_enter(&shm_lock);
770 shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) | 770 shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) |
771 (mode & (ACCESSPERMS | SHMSEG_RMLINGER)) | 771 (mode & (ACCESSPERMS | SHMSEG_RMLINGER)) |
772 SHMSEG_ALLOCATED | (lockmem ? SHMSEG_WIRED : 0); 772 SHMSEG_ALLOCATED | (lockmem ? SHMSEG_WIRED : 0);
773 if (shmseg->shm_perm.mode & SHMSEG_WANTED) { 773 if (shmseg->shm_perm.mode & SHMSEG_WANTED) {
774 shmseg->shm_perm.mode &= ~SHMSEG_WANTED; 774 shmseg->shm_perm.mode &= ~SHMSEG_WANTED;
775 cv_broadcast(&shm_cv[segnum]); 775 cv_broadcast(&shm_cv[segnum]);
776 } 776 }
777 shm_realloc_disable--; 777 shm_realloc_disable--;
778 cv_broadcast(&shm_realloc_cv); 778 cv_broadcast(&shm_realloc_cv);
779 mutex_exit(&shm_lock); 779 mutex_exit(&shm_lock);
780 780
781 return error; 781 return error;
782} 782}
783 783
784void 784void
785shmfork(struct vmspace *vm1, struct vmspace *vm2) 785shmfork(struct vmspace *vm1, struct vmspace *vm2)
786{ 786{
787 struct shmmap_state *shmmap_s; 787 struct shmmap_state *shmmap_s;
788 struct shmmap_entry *shmmap_se; 788 struct shmmap_entry *shmmap_se;
789 789
790 SHMPRINTF(("shmfork %p->%p\n", vm1, vm2)); 790 SHMPRINTF(("shmfork %p->%p\n", vm1, vm2));
791 mutex_enter(&shm_lock); 791 mutex_enter(&shm_lock);
792 vm2->vm_shm = vm1->vm_shm; 792 vm2->vm_shm = vm1->vm_shm;
793 if (vm1->vm_shm) { 793 if (vm1->vm_shm) {
794 shmmap_s = (struct shmmap_state *)vm1->vm_shm; 794 shmmap_s = (struct shmmap_state *)vm1->vm_shm;
795 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) 795 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
796 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch++; 796 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch++;
797 shmmap_s->nrefs++; 797 shmmap_s->nrefs++;
798 } 798 }
799 mutex_exit(&shm_lock); 799 mutex_exit(&shm_lock);
800} 800}
801 801
802void 802void
803shmexit(struct vmspace *vm) 803shmexit(struct vmspace *vm)
804{ 804{
805 struct shmmap_state *shmmap_s; 805 struct shmmap_state *shmmap_s;
806 struct shmmap_entry *shmmap_se; 806 struct shmmap_entry *shmmap_se;
807 807
808 mutex_enter(&shm_lock); 808 mutex_enter(&shm_lock);
809 shmmap_s = (struct shmmap_state *)vm->vm_shm; 809 shmmap_s = (struct shmmap_state *)vm->vm_shm;
810 if (shmmap_s == NULL) { 810 if (shmmap_s == NULL) {
811 mutex_exit(&shm_lock); 811 mutex_exit(&shm_lock);
812 return; 812 return;
813 } 813 }
814 vm->vm_shm = NULL; 814 vm->vm_shm = NULL;
815 815
816 if (--shmmap_s->nrefs > 0) { 816 if (--shmmap_s->nrefs > 0) {
817 SHMPRINTF(("shmexit: vm %p drop ref (%d entries), refs = %d\n", 817 SHMPRINTF(("shmexit: vm %p drop ref (%d entries), refs = %d\n",
818 vm, shmmap_s->nitems, shmmap_s->nrefs)); 818 vm, shmmap_s->nitems, shmmap_s->nrefs));
819 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) { 819 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) {
820 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch--; 820 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch--;
821 } 821 }
822 mutex_exit(&shm_lock); 822 mutex_exit(&shm_lock);
823 return; 823 return;
824 } 824 }
825 825
826 SHMPRINTF(("shmexit: vm %p cleanup (%d entries)\n", vm, shmmap_s->nitems)); 826 SHMPRINTF(("shmexit: vm %p cleanup (%d entries)\n", vm, shmmap_s->nitems));
827 if (shmmap_s->nitems == 0) { 827 if (shmmap_s->nitems == 0) {
828 mutex_exit(&shm_lock); 828 mutex_exit(&shm_lock);
829 kmem_free(shmmap_s, sizeof(struct shmmap_state)); 829 kmem_free(shmmap_s, sizeof(struct shmmap_state));
830 return; 830 return;
831 } 831 }
832 832
833 /* 833 /*
834 * Delete the entry from shm map. 834 * Delete the entry from shm map.
835 */ 835 */
836 for (;;) { 836 for (;;) {
837 struct shmid_ds *shmseg; 837 struct shmid_ds *shmseg;
838 struct uvm_object *uobj; 838 struct uvm_object *uobj;
839 size_t sz; 839 size_t sz;
840 840
841 shmmap_se = SLIST_FIRST(&shmmap_s->entries); 841 shmmap_se = SLIST_FIRST(&shmmap_s->entries);
842 KASSERT(shmmap_se != NULL); 842 KASSERT(shmmap_se != NULL);
843 843
844 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)]; 844 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)];
845 sz = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 845 sz = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
846 /* shm_delete_mapping() removes from the list. */ 846 /* shm_delete_mapping() removes from the list. */
847 uobj = shm_delete_mapping(shmmap_s, shmmap_se); 847 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
848 mutex_exit(&shm_lock); 848 mutex_exit(&shm_lock);
849 849
850 uvm_deallocate(&vm->vm_map, shmmap_se->va, sz); 850 uvm_deallocate(&vm->vm_map, shmmap_se->va, sz);
851 if (uobj != NULL) { 851 if (uobj != NULL) {
852 uao_detach(uobj); 852 uao_detach(uobj);
853 } 853 }
854 kmem_free(shmmap_se, sizeof(struct shmmap_entry)); 854 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
855 855
856 if (SLIST_EMPTY(&shmmap_s->entries)) { 856 if (SLIST_EMPTY(&shmmap_s->entries)) {
857 break; 857 break;
858 } 858 }
859 mutex_enter(&shm_lock); 859 mutex_enter(&shm_lock);
860 KASSERT(!SLIST_EMPTY(&shmmap_s->entries)); 860 KASSERT(!SLIST_EMPTY(&shmmap_s->entries));
861 } 861 }
862 kmem_free(shmmap_s, sizeof(struct shmmap_state)); 862 kmem_free(shmmap_s, sizeof(struct shmmap_state));
863} 863}
864 864
865static int 865static int
866shmrealloc(int newshmni) 866shmrealloc(int newshmni)
867{ 867{
868 vaddr_t v; 868 vaddr_t v;
869 struct shmid_ds *oldshmsegs, *newshmsegs; 869 struct shmid_ds *oldshmsegs, *newshmsegs;
870 kcondvar_t *newshm_cv, *oldshm_cv; 870 kcondvar_t *newshm_cv, *oldshm_cv;
871 size_t sz; 871 size_t sz;
872 int i, lsegid, oldshmni; 872 int i, lsegid, oldshmni;
873 873
874 if (newshmni < 1) 874 if (newshmni < 1)
875 return EINVAL; 875 return EINVAL;
876 876
877 /* Allocate new memory area */ 877 /* Allocate new memory area */
878 sz = ALIGN(newshmni * sizeof(struct shmid_ds)) + 878 sz = ALIGN(newshmni * sizeof(struct shmid_ds)) +
879 ALIGN(newshmni * sizeof(kcondvar_t)); 879 ALIGN(newshmni * sizeof(kcondvar_t));
880 v = uvm_km_alloc(kernel_map, round_page(sz), 0, 880 sz = round_page(sz);
881 UVM_KMF_WIRED|UVM_KMF_ZERO); 881 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
882 if (v == 0) 882 if (v == 0)
883 return ENOMEM; 883 return ENOMEM;
884 884
885 mutex_enter(&shm_lock); 885 mutex_enter(&shm_lock);
886 while (shm_realloc_state || shm_realloc_disable) 886 while (shm_realloc_state || shm_realloc_disable)
887 cv_wait(&shm_realloc_cv, &shm_lock); 887 cv_wait(&shm_realloc_cv, &shm_lock);
888 888
889 /* 889 /*
890 * Get the number of last segment. Fail we are trying to 890 * Get the number of last segment. Fail we are trying to
891 * reallocate less memory than we use. 891 * reallocate less memory than we use.
892 */ 892 */
893 lsegid = 0; 893 lsegid = 0;
894 for (i = 0; i < shminfo.shmmni; i++) 894 for (i = 0; i < shminfo.shmmni; i++)
895 if ((shmsegs[i].shm_perm.mode & SHMSEG_FREE) == 0) 895 if ((shmsegs[i].shm_perm.mode & SHMSEG_FREE) == 0)
896 lsegid = i; 896 lsegid = i;
897 if (lsegid >= newshmni) { 897 if (lsegid >= newshmni) {
898 mutex_exit(&shm_lock); 898 mutex_exit(&shm_lock);
899 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED); 899 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
900 return EBUSY; 900 return EBUSY;
901 } 901 }
902 shm_realloc_state = true; 902 shm_realloc_state = true;
903 903
904 newshmsegs = (void *)v; 904 newshmsegs = (void *)v;
905 newshm_cv = (void *)((uintptr_t)newshmsegs + 905 newshm_cv = (void *)((uintptr_t)newshmsegs +
906 ALIGN(newshmni * sizeof(struct shmid_ds))); 906 ALIGN(newshmni * sizeof(struct shmid_ds)));
907 907
908 /* Copy all memory to the new area */ 908 /* Copy all memory to the new area */
909 for (i = 0; i < shm_nused; i++) 909 for (i = 0; i < shm_nused; i++)
910 (void)memcpy(&newshmsegs[i], &shmsegs[i], 910 (void)memcpy(&newshmsegs[i], &shmsegs[i],
911 sizeof(newshmsegs[0])); 911 sizeof(newshmsegs[0]));
912 912
913 /* Mark as free all new segments, if there is any */ 913 /* Mark as free all new segments, if there is any */
914 for (; i < newshmni; i++) { 914 for (; i < newshmni; i++) {
915 cv_init(&newshm_cv[i], "shmwait"); 915 cv_init(&newshm_cv[i], "shmwait");
916 newshmsegs[i].shm_perm.mode = SHMSEG_FREE; 916 newshmsegs[i].shm_perm.mode = SHMSEG_FREE;
917 newshmsegs[i].shm_perm._seq = 0; 917 newshmsegs[i].shm_perm._seq = 0;
918 } 918 }
919 919
920 oldshmsegs = shmsegs; 920 oldshmsegs = shmsegs;
921 oldshmni = shminfo.shmmni; 921 oldshmni = shminfo.shmmni;
922 shminfo.shmmni = newshmni; 922 shminfo.shmmni = newshmni;
923 shmsegs = newshmsegs; 923 shmsegs = newshmsegs;
924 shm_cv = newshm_cv; 924 shm_cv = newshm_cv;
925 925
926 /* Reallocation completed - notify all waiters, if any */ 926 /* Reallocation completed - notify all waiters, if any */
927 shm_realloc_state = false; 927 shm_realloc_state = false;
928 cv_broadcast(&shm_realloc_cv); 928 cv_broadcast(&shm_realloc_cv);
929 mutex_exit(&shm_lock); 929 mutex_exit(&shm_lock);
930 930
931 /* Release now unused resources. */ 931 /* Release now unused resources. */
932 oldshm_cv = (void *)((uintptr_t)oldshmsegs + 932 oldshm_cv = (void *)((uintptr_t)oldshmsegs +
933 ALIGN(oldshmni * sizeof(struct shmid_ds))); 933 ALIGN(oldshmni * sizeof(struct shmid_ds)));
934 for (i = 0; i < oldshmni; i++) 934 for (i = 0; i < oldshmni; i++)
935 cv_destroy(&oldshm_cv[i]); 935 cv_destroy(&oldshm_cv[i]);
936 936
937 sz = ALIGN(oldshmni * sizeof(struct shmid_ds)) + 937 sz = ALIGN(oldshmni * sizeof(struct shmid_ds)) +
938 ALIGN(oldshmni * sizeof(kcondvar_t)); 938 ALIGN(oldshmni * sizeof(kcondvar_t));
 939 sz = round_page(sz);
939 uvm_km_free(kernel_map, (vaddr_t)oldshmsegs, sz, UVM_KMF_WIRED); 940 uvm_km_free(kernel_map, (vaddr_t)oldshmsegs, sz, UVM_KMF_WIRED);
940 941
941 return 0; 942 return 0;
942} 943}
943 944
944void 945void
945shminit(void) 946shminit(void)
946{ 947{
947 vaddr_t v; 948 vaddr_t v;
948 size_t sz; 949 size_t sz;
949 int i; 950 int i;
950 951
951 mutex_init(&shm_lock, MUTEX_DEFAULT, IPL_NONE); 952 mutex_init(&shm_lock, MUTEX_DEFAULT, IPL_NONE);
952 cv_init(&shm_realloc_cv, "shmrealc"); 953 cv_init(&shm_realloc_cv, "shmrealc");
953 954
954 /* Allocate the wired memory for our structures */ 955 /* Allocate the wired memory for our structures */
955 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) + 956 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) +
956 ALIGN(shminfo.shmmni * sizeof(kcondvar_t)); 957 ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
957 v = uvm_km_alloc(kernel_map, round_page(sz), 0, 958 sz = round_page(sz);
958 UVM_KMF_WIRED|UVM_KMF_ZERO); 959 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
959 if (v == 0) 960 if (v == 0)
960 panic("sysv_shm: cannot allocate memory"); 961 panic("sysv_shm: cannot allocate memory");
961 shmsegs = (void *)v; 962 shmsegs = (void *)v;
962 shm_cv = (void *)((uintptr_t)shmsegs + 963 shm_cv = (void *)((uintptr_t)shmsegs +
963 ALIGN(shminfo.shmmni * sizeof(struct shmid_ds))); 964 ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)));
964 965
965 if (shminfo.shmmax == 0) 966 if (shminfo.shmmax == 0)
966 shminfo.shmmax = max(physmem / 4, 1024) * PAGE_SIZE; 967 shminfo.shmmax = max(physmem / 4, 1024) * PAGE_SIZE;
967 else 968 else
968 shminfo.shmmax *= PAGE_SIZE; 969 shminfo.shmmax *= PAGE_SIZE;
969 shminfo.shmall = shminfo.shmmax / PAGE_SIZE; 970 shminfo.shmall = shminfo.shmmax / PAGE_SIZE;
970 971
971 for (i = 0; i < shminfo.shmmni; i++) { 972 for (i = 0; i < shminfo.shmmni; i++) {
972 cv_init(&shm_cv[i], "shmwait"); 973 cv_init(&shm_cv[i], "shmwait");
973 shmsegs[i].shm_perm.mode = SHMSEG_FREE; 974 shmsegs[i].shm_perm.mode = SHMSEG_FREE;
974 shmsegs[i].shm_perm._seq = 0; 975 shmsegs[i].shm_perm._seq = 0;
975 } 976 }
976 shm_last_free = 0; 977 shm_last_free = 0;
977 shm_nused = 0; 978 shm_nused = 0;
978 shm_committed = 0; 979 shm_committed = 0;
979 shm_realloc_disable = 0; 980 shm_realloc_disable = 0;
980 shm_realloc_state = false; 981 shm_realloc_state = false;
981} 982}
982 983
983static int 984static int
984sysctl_ipc_shmmni(SYSCTLFN_ARGS) 985sysctl_ipc_shmmni(SYSCTLFN_ARGS)
985{ 986{
986 int newsize, error; 987 int newsize, error;
987 struct sysctlnode node; 988 struct sysctlnode node;
988 node = *rnode; 989 node = *rnode;
989 node.sysctl_data = &newsize; 990 node.sysctl_data = &newsize;
990 991
991 newsize = shminfo.shmmni; 992 newsize = shminfo.shmmni;
992 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 993 error = sysctl_lookup(SYSCTLFN_CALL(&node));
993 if (error || newp == NULL) 994 if (error || newp == NULL)
994 return error; 995 return error;
995 996
996 sysctl_unlock(); 997 sysctl_unlock();
997 error = shmrealloc(newsize); 998 error = shmrealloc(newsize);
998 sysctl_relock(); 999 sysctl_relock();
999 return error; 1000 return error;
1000} 1001}
1001 1002
1002static int 1003static int
1003sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS) 1004sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS)
1004{ 1005{
1005 uint32_t newsize; 1006 uint32_t newsize;
1006 int error; 1007 int error;
1007 struct sysctlnode node; 1008 struct sysctlnode node;
1008 node = *rnode; 1009 node = *rnode;
1009 node.sysctl_data = &newsize; 1010 node.sysctl_data = &newsize;
1010 1011
1011 newsize = shminfo.shmall; 1012 newsize = shminfo.shmall;
1012 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1013 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1013 if (error || newp == NULL) 1014 if (error || newp == NULL)
1014 return error; 1015 return error;
1015 1016
1016 if (newsize < 1) 1017 if (newsize < 1)
1017 return EINVAL; 1018 return EINVAL;
1018 1019
1019 shminfo.shmall = newsize; 1020 shminfo.shmall = newsize;
1020 shminfo.shmmax = (uint64_t)shminfo.shmall * PAGE_SIZE; 1021 shminfo.shmmax = (uint64_t)shminfo.shmall * PAGE_SIZE;
1021 1022
1022 return 0; 1023 return 0;
1023} 1024}
1024 1025
1025static int 1026static int
1026sysctl_ipc_shmmax(SYSCTLFN_ARGS) 1027sysctl_ipc_shmmax(SYSCTLFN_ARGS)
1027{ 1028{
1028 uint64_t newsize; 1029 uint64_t newsize;
1029 int error; 1030 int error;
1030 struct sysctlnode node; 1031 struct sysctlnode node;
1031 node = *rnode; 1032 node = *rnode;
1032 node.sysctl_data = &newsize; 1033 node.sysctl_data = &newsize;
1033 1034
1034 newsize = shminfo.shmmax; 1035 newsize = shminfo.shmmax;
1035 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1036 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1036 if (error || newp == NULL) 1037 if (error || newp == NULL)
1037 return error; 1038 return error;
1038 1039
1039 if (newsize < PAGE_SIZE) 1040 if (newsize < PAGE_SIZE)
1040 return EINVAL; 1041 return EINVAL;
1041 1042
1042 shminfo.shmmax = round_page(newsize); 1043 shminfo.shmmax = round_page(newsize);
1043 shminfo.shmall = shminfo.shmmax >> PAGE_SHIFT; 1044 shminfo.shmall = shminfo.shmmax >> PAGE_SHIFT;
1044 1045
1045 return 0; 1046 return 0;
1046} 1047}
1047 1048
1048SYSCTL_SETUP(sysctl_ipc_shm_setup, "sysctl kern.ipc subtree setup") 1049SYSCTL_SETUP(sysctl_ipc_shm_setup, "sysctl kern.ipc subtree setup")
1049{ 1050{
1050 1051
1051 sysctl_createv(clog, 0, NULL, NULL, 1052 sysctl_createv(clog, 0, NULL, NULL,
1052 CTLFLAG_PERMANENT, 1053 CTLFLAG_PERMANENT,
1053 CTLTYPE_NODE, "kern", NULL, 1054 CTLTYPE_NODE, "kern", NULL,
1054 NULL, 0, NULL, 0, 1055 NULL, 0, NULL, 0,
1055 CTL_KERN, CTL_EOL); 1056 CTL_KERN, CTL_EOL);
1056 sysctl_createv(clog, 0, NULL, NULL, 1057 sysctl_createv(clog, 0, NULL, NULL,
1057 CTLFLAG_PERMANENT, 1058 CTLFLAG_PERMANENT,
1058 CTLTYPE_NODE, "ipc", 1059 CTLTYPE_NODE, "ipc",
1059 SYSCTL_DESCR("SysV IPC options"), 1060 SYSCTL_DESCR("SysV IPC options"),
1060 NULL, 0, NULL, 0, 1061 NULL, 0, NULL, 0,
1061 CTL_KERN, KERN_SYSVIPC, CTL_EOL); 1062 CTL_KERN, KERN_SYSVIPC, CTL_EOL);
1062 sysctl_createv(clog, 0, NULL, NULL, 1063 sysctl_createv(clog, 0, NULL, NULL,
1063 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1064 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1064 CTLTYPE_QUAD, "shmmax", 1065 CTLTYPE_QUAD, "shmmax",
1065 SYSCTL_DESCR("Max shared memory segment size in bytes"), 1066 SYSCTL_DESCR("Max shared memory segment size in bytes"),
1066 sysctl_ipc_shmmax, 0, &shminfo.shmmax, 0, 1067 sysctl_ipc_shmmax, 0, &shminfo.shmmax, 0,
1067 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAX, CTL_EOL); 1068 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAX, CTL_EOL);
1068 sysctl_createv(clog, 0, NULL, NULL, 1069 sysctl_createv(clog, 0, NULL, NULL,
1069 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1070 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1070 CTLTYPE_INT, "shmmni", 1071 CTLTYPE_INT, "shmmni",
1071 SYSCTL_DESCR("Max number of shared memory identifiers"), 1072 SYSCTL_DESCR("Max number of shared memory identifiers"),
1072 sysctl_ipc_shmmni, 0, &shminfo.shmmni, 0, 1073 sysctl_ipc_shmmni, 0, &shminfo.shmmni, 0,
1073 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMNI, CTL_EOL); 1074 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMNI, CTL_EOL);
1074 sysctl_createv(clog, 0, NULL, NULL, 1075 sysctl_createv(clog, 0, NULL, NULL,
1075 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1076 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1076 CTLTYPE_INT, "shmseg", 1077 CTLTYPE_INT, "shmseg",
1077 SYSCTL_DESCR("Max shared memory segments per process"), 1078 SYSCTL_DESCR("Max shared memory segments per process"),
1078 NULL, 0, &shminfo.shmseg, 0, 1079 NULL, 0, &shminfo.shmseg, 0,
1079 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMSEG, CTL_EOL); 1080 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMSEG, CTL_EOL);
1080 sysctl_createv(clog, 0, NULL, NULL, 1081 sysctl_createv(clog, 0, NULL, NULL,
1081 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1082 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1082 CTLTYPE_INT, "shmmaxpgs", 1083 CTLTYPE_INT, "shmmaxpgs",
1083 SYSCTL_DESCR("Max amount of shared memory in pages"), 1084 SYSCTL_DESCR("Max amount of shared memory in pages"),
1084 sysctl_ipc_shmmaxpgs, 0, &shminfo.shmall, 0, 1085 sysctl_ipc_shmmaxpgs, 0, &shminfo.shmall, 0,
1085 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAXPGS, CTL_EOL); 1086 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAXPGS, CTL_EOL);
1086 sysctl_createv(clog, 0, NULL, NULL, 1087 sysctl_createv(clog, 0, NULL, NULL,
1087 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1088 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1088 CTLTYPE_INT, "shm_use_phys", 1089 CTLTYPE_INT, "shm_use_phys",
1089 SYSCTL_DESCR("Enable/disable locking of shared memory in " 1090 SYSCTL_DESCR("Enable/disable locking of shared memory in "
1090 "physical memory"), NULL, 0, &shm_use_phys, 0, 1091 "physical memory"), NULL, 0, &shm_use_phys, 0,
1091 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMUSEPHYS, CTL_EOL); 1092 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMUSEPHYS, CTL_EOL);
1092} 1093}