Mon Feb 17 20:40:06 2014 UTC ()
replace vmem(9) custom boundary tag allocation with a pool(9)


(para)
diff -r1.200 -r1.201 src/sys/kern/subr_pool.c
diff -r1.87 -r1.88 src/sys/kern/subr_vmem.c

cvs diff -r1.200 -r1.201 src/sys/kern/subr_pool.c (expand / switch to unified diff)

--- src/sys/kern/subr_pool.c 2013/03/11 21:37:54 1.200
+++ src/sys/kern/subr_pool.c 2014/02/17 20:40:06 1.201
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: subr_pool.c,v 1.200 2013/03/11 21:37:54 pooka Exp $ */ 1/* $NetBSD: subr_pool.c,v 1.201 2014/02/17 20:40:06 para Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008, 2010 4 * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008, 2010
5 * The NetBSD Foundation, Inc. 5 * The NetBSD Foundation, Inc.
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * This code is derived from software contributed to The NetBSD Foundation 8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10 * Simulation Facility, NASA Ames Research Center, and by Andrew Doran. 10 * Simulation Facility, NASA Ames Research Center, and by Andrew Doran.
11 * 11 *
12 * Redistribution and use in source and binary forms, with or without 12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions 13 * modification, are permitted provided that the following conditions
14 * are met: 14 * are met:
@@ -22,27 +22,27 @@ @@ -22,27 +22,27 @@
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE. 31 * POSSIBILITY OF SUCH DAMAGE.
32 */ 32 */
33 33
34#include <sys/cdefs.h> 34#include <sys/cdefs.h>
35__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.200 2013/03/11 21:37:54 pooka Exp $"); 35__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.201 2014/02/17 20:40:06 para Exp $");
36 36
37#include "opt_ddb.h" 37#include "opt_ddb.h"
38#include "opt_lockdebug.h" 38#include "opt_lockdebug.h"
39 39
40#include <sys/param.h> 40#include <sys/param.h>
41#include <sys/systm.h> 41#include <sys/systm.h>
42#include <sys/bitops.h> 42#include <sys/bitops.h>
43#include <sys/proc.h> 43#include <sys/proc.h>
44#include <sys/errno.h> 44#include <sys/errno.h>
45#include <sys/kernel.h> 45#include <sys/kernel.h>
46#include <sys/vmem.h> 46#include <sys/vmem.h>
47#include <sys/pool.h> 47#include <sys/pool.h>
48#include <sys/syslog.h> 48#include <sys/syslog.h>
@@ -551,29 +551,30 @@ pool_init(struct pool *pp, size_t size,  @@ -551,29 +551,30 @@ pool_init(struct pool *pp, size_t size,
551 * a returned item with its header based on the page address. 551 * a returned item with its header based on the page address.
552 * We use 1/16 of the page size and about 8 times of the item 552 * We use 1/16 of the page size and about 8 times of the item
553 * size as the threshold (XXX: tune) 553 * size as the threshold (XXX: tune)
554 * 554 *
555 * However, we'll put the header into the page if we can put 555 * However, we'll put the header into the page if we can put
556 * it without wasting any items. 556 * it without wasting any items.
557 * 557 *
558 * Silently enforce `0 <= ioff < align'. 558 * Silently enforce `0 <= ioff < align'.
559 */ 559 */
560 pp->pr_itemoffset = ioff %= align; 560 pp->pr_itemoffset = ioff %= align;
561 /* See the comment below about reserved bytes. */ 561 /* See the comment below about reserved bytes. */
562 trysize = palloc->pa_pagesz - ((align - ioff) % align); 562 trysize = palloc->pa_pagesz - ((align - ioff) % align);
563 phsize = ALIGN(sizeof(struct pool_item_header)); 563 phsize = ALIGN(sizeof(struct pool_item_header));
564 if ((pp->pr_roflags & (PR_NOTOUCH | PR_NOALIGN)) == 0 && 564 if (pp->pr_roflags & PR_PHINPAGE ||
 565 ((pp->pr_roflags & (PR_NOTOUCH | PR_NOALIGN)) == 0 &&
565 (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) || 566 (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) ||
566 trysize / pp->pr_size == (trysize - phsize) / pp->pr_size)) { 567 trysize / pp->pr_size == (trysize - phsize) / pp->pr_size))) {
567 /* Use the end of the page for the page header */ 568 /* Use the end of the page for the page header */
568 pp->pr_roflags |= PR_PHINPAGE; 569 pp->pr_roflags |= PR_PHINPAGE;
569 pp->pr_phoffset = off = palloc->pa_pagesz - phsize; 570 pp->pr_phoffset = off = palloc->pa_pagesz - phsize;
570 } else { 571 } else {
571 /* The page header will be taken from our page header pool */ 572 /* The page header will be taken from our page header pool */
572 pp->pr_phoffset = 0; 573 pp->pr_phoffset = 0;
573 off = palloc->pa_pagesz; 574 off = palloc->pa_pagesz;
574 SPLAY_INIT(&pp->pr_phtree); 575 SPLAY_INIT(&pp->pr_phtree);
575 } 576 }
576 577
577 /* 578 /*
578 * Alignment is to take place at `ioff' within the item. This means 579 * Alignment is to take place at `ioff' within the item. This means
579 * we must reserve up to `align - 1' bytes on the page to allow 580 * we must reserve up to `align - 1' bytes on the page to allow

cvs diff -r1.87 -r1.88 src/sys/kern/subr_vmem.c (expand / switch to unified diff)

--- src/sys/kern/subr_vmem.c 2013/11/22 21:04:11 1.87
+++ src/sys/kern/subr_vmem.c 2014/02/17 20:40:06 1.88
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: subr_vmem.c,v 1.87 2013/11/22 21:04:11 christos Exp $ */ 1/* $NetBSD: subr_vmem.c,v 1.88 2014/02/17 20:40:06 para Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi, 4 * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi,
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without 7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions 8 * modification, are permitted provided that the following conditions
9 * are met: 9 * are met:
10 * 1. Redistributions of source code must retain the above copyright 10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer. 11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright 12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the 13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution. 14 * documentation and/or other materials provided with the distribution.
@@ -21,30 +21,42 @@ @@ -21,30 +21,42 @@
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE. 26 * SUCH DAMAGE.
27 */ 27 */
28 28
29/* 29/*
30 * reference: 30 * reference:
31 * - Magazines and Vmem: Extending the Slab Allocator 31 * - Magazines and Vmem: Extending the Slab Allocator
32 * to Many CPUs and Arbitrary Resources 32 * to Many CPUs and Arbitrary Resources
33 * http://www.usenix.org/event/usenix01/bonwick.html 33 * http://www.usenix.org/event/usenix01/bonwick.html
 34 *
 35 * locking & the boundary tag pool:
 36 * - A pool(9) is used for vmem boundary tags
 37 * - During a pool get call the global vmem_btag_refill_lock is taken,
 38 * to serialize access to the allocation reserve, but no other
 39 * vmem arena locks.
 40 * - During pool_put calls no vmem mutexes are locked.
 41 * - pool_drain doesn't hold the pool's mutex while releasing memory to
 42 * its backing therefore no interferance with any vmem mutexes.
 43 * - The boundary tag pool is forced to put page headers into pool pages
 44 * (PR_PHINPAGE) and not off page to avoid pool recursion.
 45 * (due to sizeof(bt_t) it should be the case anyway)
34 */ 46 */
35 47
36#include <sys/cdefs.h> 48#include <sys/cdefs.h>
37__KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.87 2013/11/22 21:04:11 christos Exp $"); 49__KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.88 2014/02/17 20:40:06 para Exp $");
38 50
39#if defined(_KERNEL) 51#if defined(_KERNEL)
40#include "opt_ddb.h" 52#include "opt_ddb.h"
41#endif /* defined(_KERNEL) */ 53#endif /* defined(_KERNEL) */
42 54
43#include <sys/param.h> 55#include <sys/param.h>
44#include <sys/hash.h> 56#include <sys/hash.h>
45#include <sys/queue.h> 57#include <sys/queue.h>
46#include <sys/bitops.h> 58#include <sys/bitops.h>
47 59
48#if defined(_KERNEL) 60#if defined(_KERNEL)
49#include <sys/systm.h> 61#include <sys/systm.h>
50#include <sys/kernel.h> /* hz */ 62#include <sys/kernel.h> /* hz */
@@ -65,34 +77,33 @@ __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c, @@ -65,34 +77,33 @@ __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,
65#include <errno.h> 77#include <errno.h>
66#include <assert.h> 78#include <assert.h>
67#include <stdlib.h> 79#include <stdlib.h>
68#include <string.h> 80#include <string.h>
69#include "../sys/vmem.h" 81#include "../sys/vmem.h"
70#include "../sys/vmem_impl.h" 82#include "../sys/vmem_impl.h"
71#endif /* defined(_KERNEL) */ 83#endif /* defined(_KERNEL) */
72 84
73 85
74#if defined(_KERNEL) 86#if defined(_KERNEL)
75#include <sys/evcnt.h> 87#include <sys/evcnt.h>
76#define VMEM_EVCNT_DEFINE(name) \ 88#define VMEM_EVCNT_DEFINE(name) \
77struct evcnt vmem_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ 89struct evcnt vmem_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \
78 "vmemev", #name); \ 90 "vmem", #name); \
79EVCNT_ATTACH_STATIC(vmem_evcnt_##name); 91EVCNT_ATTACH_STATIC(vmem_evcnt_##name);
80#define VMEM_EVCNT_INCR(ev) vmem_evcnt_##ev.ev_count++ 92#define VMEM_EVCNT_INCR(ev) vmem_evcnt_##ev.ev_count++
81#define VMEM_EVCNT_DECR(ev) vmem_evcnt_##ev.ev_count-- 93#define VMEM_EVCNT_DECR(ev) vmem_evcnt_##ev.ev_count--
82 94
83VMEM_EVCNT_DEFINE(bt_pages) 95VMEM_EVCNT_DEFINE(static_bt_count)
84VMEM_EVCNT_DEFINE(bt_count) 96VMEM_EVCNT_DEFINE(static_bt_inuse)
85VMEM_EVCNT_DEFINE(bt_inuse) 
86 97
87#define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) 98#define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan)
88#define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv) 99#define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv)
89#define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock) 100#define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock)
90#define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv) 101#define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv)
91 102
92#else /* defined(_KERNEL) */ 103#else /* defined(_KERNEL) */
93 104
94#define VMEM_EVCNT_INCR(ev) /* nothing */ 105#define VMEM_EVCNT_INCR(ev) /* nothing */
95#define VMEM_EVCNT_DECR(ev) /* nothing */ 106#define VMEM_EVCNT_DECR(ev) /* nothing */
96 107
97#define VMEM_CONDVAR_INIT(vm, wchan) /* nothing */ 108#define VMEM_CONDVAR_INIT(vm, wchan) /* nothing */
98#define VMEM_CONDVAR_DESTROY(vm) /* nothing */ 109#define VMEM_CONDVAR_DESTROY(vm) /* nothing */
@@ -165,164 +176,182 @@ static LIST_HEAD(, vmem) vmem_list = LIS @@ -165,164 +176,182 @@ static LIST_HEAD(, vmem) vmem_list = LIS
165 * This reserve is 4 for each arena involved in allocating vmems memory. 176 * This reserve is 4 for each arena involved in allocating vmems memory.
166 * BT_MAXFREE: don't cache excessive counts of bts in arenas 177 * BT_MAXFREE: don't cache excessive counts of bts in arenas
167 */ 178 */
168#define STATIC_BT_COUNT 200 179#define STATIC_BT_COUNT 200
169#define BT_MINRESERVE 4 180#define BT_MINRESERVE 4
170#define BT_MAXFREE 64 181#define BT_MAXFREE 64
171 182
172static struct vmem_btag static_bts[STATIC_BT_COUNT]; 183static struct vmem_btag static_bts[STATIC_BT_COUNT];
173static int static_bt_count = STATIC_BT_COUNT; 184static int static_bt_count = STATIC_BT_COUNT;
174 185
175static struct vmem kmem_va_meta_arena_store; 186static struct vmem kmem_va_meta_arena_store;
176vmem_t *kmem_va_meta_arena; 187vmem_t *kmem_va_meta_arena;
177static struct vmem kmem_meta_arena_store; 188static struct vmem kmem_meta_arena_store;
178vmem_t *kmem_meta_arena; 189vmem_t *kmem_meta_arena = NULL;
179 190
180static kmutex_t vmem_refill_lock; 191static kmutex_t vmem_btag_refill_lock;
181static kmutex_t vmem_btag_lock; 192static kmutex_t vmem_btag_lock;
182static LIST_HEAD(, vmem_btag) vmem_btag_freelist; 193static LIST_HEAD(, vmem_btag) vmem_btag_freelist;
183static size_t vmem_btag_freelist_count = 0; 194static size_t vmem_btag_freelist_count = 0;
184static size_t vmem_btag_count = STATIC_BT_COUNT; 195static struct pool vmem_btag_pool;
185 196
186/* ---- boundary tag */ 197/* ---- boundary tag */
187 198
188#define BT_PER_PAGE (PAGE_SIZE / sizeof(bt_t)) 
189 
190static int bt_refill(vmem_t *vm, vm_flag_t flags); 199static int bt_refill(vmem_t *vm, vm_flag_t flags);
191 200
192static int 201static void *
193bt_refillglobal(vm_flag_t flags) 202pool_page_alloc_vmem_meta(struct pool *pp, int flags)
194{ 203{
 204 const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP;
195 vmem_addr_t va; 205 vmem_addr_t va;
196 bt_t *btp; 206 int ret;
197 bt_t *bt; 
198 int i; 
199 
200 mutex_enter(&vmem_refill_lock); 
201 207
202 mutex_enter(&vmem_btag_lock); 208 ret = vmem_alloc(kmem_meta_arena, pp->pr_alloc->pa_pagesz,
203 if (vmem_btag_freelist_count > 0) { 209 (vflags & ~VM_FITMASK) | VM_INSTANTFIT | VM_POPULATING, &va);
204 mutex_exit(&vmem_btag_lock); 
205 mutex_exit(&vmem_refill_lock); 
206 return 0; 
207 } 
208 mutex_exit(&vmem_btag_lock); 
209 
210 if (vmem_alloc(kmem_meta_arena, PAGE_SIZE, 
211 (flags & ~VM_FITMASK) | VM_INSTANTFIT | VM_POPULATING, &va) != 0) { 
212 mutex_exit(&vmem_refill_lock); 
213 return ENOMEM; 
214 } 
215 VMEM_EVCNT_INCR(bt_pages); 
216 
217 mutex_enter(&vmem_btag_lock); 
218 btp = (void *) va; 
219 for (i = 0; i < (BT_PER_PAGE); i++) { 
220 bt = btp; 
221 memset(bt, 0, sizeof(*bt)); 
222 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, 
223 bt_freelist); 
224 vmem_btag_freelist_count++; 
225 vmem_btag_count++; 
226 VMEM_EVCNT_INCR(bt_count); 
227 btp++; 
228 } 
229 mutex_exit(&vmem_btag_lock); 
230 210
231 bt_refill(kmem_arena, (flags & ~VM_FITMASK) 211 return ret ? NULL : (void *)va;
232 | VM_INSTANTFIT | VM_POPULATING); 212}
233 bt_refill(kmem_va_meta_arena, (flags & ~VM_FITMASK) 
234 | VM_INSTANTFIT | VM_POPULATING); 
235 bt_refill(kmem_meta_arena, (flags & ~VM_FITMASK) 
236 | VM_INSTANTFIT | VM_POPULATING); 
237 213
238 mutex_exit(&vmem_refill_lock); 214static void
 215pool_page_free_vmem_meta(struct pool *pp, void *v)
 216{
239 217
240 return 0; 218 vmem_free(kmem_meta_arena, (vmem_addr_t)v, pp->pr_alloc->pa_pagesz);
241} 219}
242 220
 221/* allocator for vmem-pool metadata */
 222struct pool_allocator pool_allocator_vmem_meta = {
 223 .pa_alloc = pool_page_alloc_vmem_meta,
 224 .pa_free = pool_page_free_vmem_meta,
 225 .pa_pagesz = 0
 226};
 227
243static int 228static int
244bt_refill(vmem_t *vm, vm_flag_t flags) 229bt_refill(vmem_t *vm, vm_flag_t flags)
245{ 230{
246 bt_t *bt; 231 bt_t *bt;
247 232
248 if (!(flags & VM_POPULATING)) { 233 VMEM_LOCK(vm);
249 bt_refillglobal(flags); 234 if (vm->vm_nfreetags > BT_MINRESERVE) {
 235 VMEM_UNLOCK(vm);
 236 return 0;
250 } 237 }
251 238
252 VMEM_LOCK(vm); 
253 mutex_enter(&vmem_btag_lock); 239 mutex_enter(&vmem_btag_lock);
254 while (!LIST_EMPTY(&vmem_btag_freelist) && 240 while (!LIST_EMPTY(&vmem_btag_freelist) &&
255 vm->vm_nfreetags <= BT_MINRESERVE) { 241 vm->vm_nfreetags <= BT_MINRESERVE) {
256 bt = LIST_FIRST(&vmem_btag_freelist); 242 bt = LIST_FIRST(&vmem_btag_freelist);
257 LIST_REMOVE(bt, bt_freelist); 243 LIST_REMOVE(bt, bt_freelist);
258 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 244 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist);
259 vm->vm_nfreetags++; 245 vm->vm_nfreetags++;
260 vmem_btag_freelist_count--; 246 vmem_btag_freelist_count--;
 247 VMEM_EVCNT_INCR(static_bt_inuse);
261 } 248 }
262 mutex_exit(&vmem_btag_lock); 249 mutex_exit(&vmem_btag_lock);
263 250
264 if (vm->vm_nfreetags == 0) { 251 while (vm->vm_nfreetags <= BT_MINRESERVE) {
265 VMEM_UNLOCK(vm); 252 VMEM_UNLOCK(vm);
266 return ENOMEM; 253 mutex_enter(&vmem_btag_refill_lock);
 254 bt = pool_get(&vmem_btag_pool,
 255 (flags & VM_SLEEP) ? PR_WAITOK: PR_NOWAIT);
 256 mutex_exit(&vmem_btag_refill_lock);
 257 VMEM_LOCK(vm);
 258 if (bt == NULL && (flags & VM_SLEEP) == 0)
 259 break;
 260 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist);
 261 vm->vm_nfreetags++;
267 } 262 }
 263
268 VMEM_UNLOCK(vm); 264 VMEM_UNLOCK(vm);
269 265
 266 if (vm->vm_nfreetags == 0) {
 267 return ENOMEM;
 268 }
 269
 270
 271 if (kmem_meta_arena != NULL) {
 272 bt_refill(kmem_arena, (flags & ~VM_FITMASK)
 273 | VM_INSTANTFIT | VM_POPULATING);
 274 bt_refill(kmem_va_meta_arena, (flags & ~VM_FITMASK)
 275 | VM_INSTANTFIT | VM_POPULATING);
 276 bt_refill(kmem_meta_arena, (flags & ~VM_FITMASK)
 277 | VM_INSTANTFIT | VM_POPULATING);
 278 }
 279
270 return 0; 280 return 0;
271} 281}
272 282
273static inline bt_t * 283static bt_t *
274bt_alloc(vmem_t *vm, vm_flag_t flags) 284bt_alloc(vmem_t *vm, vm_flag_t flags)
275{ 285{
276 bt_t *bt; 286 bt_t *bt;
277again: 
278 VMEM_LOCK(vm); 287 VMEM_LOCK(vm);
279 if (vm->vm_nfreetags <= BT_MINRESERVE && 288 while (vm->vm_nfreetags <= BT_MINRESERVE && (flags & VM_POPULATING) == 0) {
280 (flags & VM_POPULATING) == 0) { 
281 VMEM_UNLOCK(vm); 289 VMEM_UNLOCK(vm);
282 if (bt_refill(vm, VM_NOSLEEP | VM_INSTANTFIT)) { 290 if (bt_refill(vm, VM_NOSLEEP | VM_INSTANTFIT)) {
283 return NULL; 291 return NULL;
284 } 292 }
285 goto again; 293 VMEM_LOCK(vm);
286 } 294 }
287 bt = LIST_FIRST(&vm->vm_freetags); 295 bt = LIST_FIRST(&vm->vm_freetags);
288 LIST_REMOVE(bt, bt_freelist); 296 LIST_REMOVE(bt, bt_freelist);
289 vm->vm_nfreetags--; 297 vm->vm_nfreetags--;
290 VMEM_UNLOCK(vm); 298 VMEM_UNLOCK(vm);
291 VMEM_EVCNT_INCR(bt_inuse); 
292 299
293 return bt; 300 return bt;
294} 301}
295 302
296static inline void 303static void
297bt_free(vmem_t *vm, bt_t *bt) 304bt_free(vmem_t *vm, bt_t *bt)
298{ 305{
299 306
300 VMEM_LOCK(vm); 307 VMEM_LOCK(vm);
301 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 308 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist);
302 vm->vm_nfreetags++; 309 vm->vm_nfreetags++;
303 while (vm->vm_nfreetags > BT_MAXFREE) { 310 VMEM_UNLOCK(vm);
304 bt = LIST_FIRST(&vm->vm_freetags); 311}
 312
 313static void
 314bt_freetrim(vmem_t *vm, int freelimit)
 315{
 316 bt_t *t;
 317 LIST_HEAD(, vmem_btag) tofree;
 318
 319 LIST_INIT(&tofree);
 320
 321 VMEM_LOCK(vm);
 322 while (vm->vm_nfreetags > freelimit) {
 323 bt_t *bt = LIST_FIRST(&vm->vm_freetags);
305 LIST_REMOVE(bt, bt_freelist); 324 LIST_REMOVE(bt, bt_freelist);
306 vm->vm_nfreetags--; 325 vm->vm_nfreetags--;
307 mutex_enter(&vmem_btag_lock); 326 if (bt >= static_bts
308 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 327 && bt < static_bts + sizeof(static_bts)) {
309 vmem_btag_freelist_count++; 328 mutex_enter(&vmem_btag_lock);
310 mutex_exit(&vmem_btag_lock); 329 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist);
 330 vmem_btag_freelist_count++;
 331 mutex_exit(&vmem_btag_lock);
 332 VMEM_EVCNT_DECR(static_bt_inuse);
 333 } else {
 334 LIST_INSERT_HEAD(&tofree, bt, bt_freelist);
 335 }
311 } 336 }
 337
312 VMEM_UNLOCK(vm); 338 VMEM_UNLOCK(vm);
313 VMEM_EVCNT_DECR(bt_inuse); 339 while (!LIST_EMPTY(&tofree)) {
 340 t = LIST_FIRST(&tofree);
 341 LIST_REMOVE(t, bt_freelist);
 342 pool_put(&vmem_btag_pool, t);
 343 }
314} 344}
315 
316#endif /* defined(_KERNEL) */ 345#endif /* defined(_KERNEL) */
317 346
318/* 347/*
319 * freelist[0] ... [1, 1] 348 * freelist[0] ... [1, 1]
320 * freelist[1] ... [2, 3] 349 * freelist[1] ... [2, 3]
321 * freelist[2] ... [4, 7] 350 * freelist[2] ... [4, 7]
322 * freelist[3] ... [8, 15] 351 * freelist[3] ... [8, 15]
323 * : 352 * :
324 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1] 353 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1]
325 * : 354 * :
326 */ 355 */
327 356
328static struct vmem_freelist * 357static struct vmem_freelist *
@@ -593,51 +622,54 @@ qc_destroy(vmem_t *vm) @@ -593,51 +622,54 @@ qc_destroy(vmem_t *vm)
593 } 622 }
594 pool_cache_destroy(qc->qc_cache); 623 pool_cache_destroy(qc->qc_cache);
595 prevqc = qc; 624 prevqc = qc;
596 } 625 }
597} 626}
598#endif 627#endif
599 628
600#if defined(_KERNEL) 629#if defined(_KERNEL)
601static void 630static void
602vmem_bootstrap(void) 631vmem_bootstrap(void)
603{ 632{
604 633
605 mutex_init(&vmem_list_lock, MUTEX_DEFAULT, IPL_VM); 634 mutex_init(&vmem_list_lock, MUTEX_DEFAULT, IPL_VM);
606 mutex_init(&vmem_refill_lock, MUTEX_DEFAULT, IPL_VM); 
607 mutex_init(&vmem_btag_lock, MUTEX_DEFAULT, IPL_VM); 635 mutex_init(&vmem_btag_lock, MUTEX_DEFAULT, IPL_VM);
 636 mutex_init(&vmem_btag_refill_lock, MUTEX_DEFAULT, IPL_VM);
608 637
609 while (static_bt_count-- > 0) { 638 while (static_bt_count-- > 0) {
610 bt_t *bt = &static_bts[static_bt_count]; 639 bt_t *bt = &static_bts[static_bt_count];
611 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 640 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist);
612 VMEM_EVCNT_INCR(bt_count); 641 VMEM_EVCNT_INCR(static_bt_count);
613 vmem_btag_freelist_count++; 642 vmem_btag_freelist_count++;
614 } 643 }
615 vmem_bootstrapped = TRUE; 644 vmem_bootstrapped = TRUE;
616} 645}
617 646
618void 647void
619vmem_subsystem_init(vmem_t *vm) 648vmem_subsystem_init(vmem_t *vm)
620{ 649{
621 650
622 kmem_va_meta_arena = vmem_init(&kmem_va_meta_arena_store, "vmem-va", 651 kmem_va_meta_arena = vmem_init(&kmem_va_meta_arena_store, "vmem-va",
623 0, 0, PAGE_SIZE, vmem_alloc, vmem_free, vm, 652 0, 0, PAGE_SIZE, vmem_alloc, vmem_free, vm,
624 0, VM_NOSLEEP | VM_BOOTSTRAP | VM_LARGEIMPORT, 653 0, VM_NOSLEEP | VM_BOOTSTRAP | VM_LARGEIMPORT,
625 IPL_VM); 654 IPL_VM);
626 655
627 kmem_meta_arena = vmem_init(&kmem_meta_arena_store, "vmem-meta", 656 kmem_meta_arena = vmem_init(&kmem_meta_arena_store, "vmem-meta",
628 0, 0, PAGE_SIZE, 657 0, 0, PAGE_SIZE,
629 uvm_km_kmem_alloc, uvm_km_kmem_free, kmem_va_meta_arena, 658 uvm_km_kmem_alloc, uvm_km_kmem_free, kmem_va_meta_arena,
630 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); 659 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM);
 660
 661 pool_init(&vmem_btag_pool, sizeof(bt_t), 0, 0, PR_PHINPAGE,
 662 "vmembt", &pool_allocator_vmem_meta, IPL_VM);
631} 663}
632#endif /* defined(_KERNEL) */ 664#endif /* defined(_KERNEL) */
633 665
634static int 666static int
635vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags, 667vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags,
636 int spanbttype) 668 int spanbttype)
637{ 669{
638 bt_t *btspan; 670 bt_t *btspan;
639 bt_t *btfree; 671 bt_t *btfree;
640 672
641 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 673 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
642 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 674 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
643 KASSERT(spanbttype == BT_TYPE_SPAN || 675 KASSERT(spanbttype == BT_TYPE_SPAN ||
@@ -685,37 +717,27 @@ vmem_destroy1(vmem_t *vm) @@ -685,37 +717,27 @@ vmem_destroy1(vmem_t *vm)
685 bt_t *bt; 717 bt_t *bt;
686 718
687 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) { 719 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) {
688 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC); 720 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC);
689 bt_free(vm, bt); 721 bt_free(vm, bt);
690 } 722 }
691 } 723 }
692 if (vm->vm_hashlist != &vm->vm_hash0) { 724 if (vm->vm_hashlist != &vm->vm_hash0) {
693 xfree(vm->vm_hashlist, 725 xfree(vm->vm_hashlist,
694 sizeof(struct vmem_hashlist *) * vm->vm_hashsize); 726 sizeof(struct vmem_hashlist *) * vm->vm_hashsize);
695 } 727 }
696 } 728 }
697 729
698 while (vm->vm_nfreetags > 0) { 730 bt_freetrim(vm, 0);
699 bt_t *bt = LIST_FIRST(&vm->vm_freetags); 
700 LIST_REMOVE(bt, bt_freelist); 
701 vm->vm_nfreetags--; 
702 mutex_enter(&vmem_btag_lock); 
703#if defined (_KERNEL) 
704 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 
705 vmem_btag_freelist_count++; 
706#endif /* defined(_KERNEL) */ 
707 mutex_exit(&vmem_btag_lock); 
708 } 
709 731
710 VMEM_CONDVAR_DESTROY(vm); 732 VMEM_CONDVAR_DESTROY(vm);
711 VMEM_LOCK_DESTROY(vm); 733 VMEM_LOCK_DESTROY(vm);
712 xfree(vm, sizeof(*vm)); 734 xfree(vm, sizeof(*vm));
713} 735}
714 736
715static int 737static int
716vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags) 738vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags)
717{ 739{
718 vmem_addr_t addr; 740 vmem_addr_t addr;
719 int rc; 741 int rc;
720 742
721 if (vm->vm_importfn == NULL) { 743 if (vm->vm_importfn == NULL) {
@@ -1301,26 +1323,28 @@ vmem_xfree(vmem_t *vm, vmem_addr_t addr, @@ -1301,26 +1323,28 @@ vmem_xfree(vmem_t *vm, vmem_addr_t addr,
1301 VMEM_UNLOCK(vm); 1323 VMEM_UNLOCK(vm);
1302 (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize); 1324 (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize);
1303 } else { 1325 } else {
1304 bt_insfree(vm, bt); 1326 bt_insfree(vm, bt);
1305 VMEM_CONDVAR_BROADCAST(vm); 1327 VMEM_CONDVAR_BROADCAST(vm);
1306 VMEM_UNLOCK(vm); 1328 VMEM_UNLOCK(vm);
1307 } 1329 }
1308 1330
1309 while (!LIST_EMPTY(&tofree)) { 1331 while (!LIST_EMPTY(&tofree)) {
1310 t = LIST_FIRST(&tofree); 1332 t = LIST_FIRST(&tofree);
1311 LIST_REMOVE(t, bt_freelist); 1333 LIST_REMOVE(t, bt_freelist);
1312 bt_free(vm, t); 1334 bt_free(vm, t);
1313 } 1335 }
 1336
 1337 bt_freetrim(vm, BT_MAXFREE);
1314} 1338}
1315 1339
1316/* 1340/*
1317 * vmem_add: 1341 * vmem_add:
1318 * 1342 *
1319 * => caller must ensure appropriate spl, 1343 * => caller must ensure appropriate spl,
1320 * if the arena can be accessed from interrupt context. 1344 * if the arena can be accessed from interrupt context.
1321 */ 1345 */
1322 1346
1323int 1347int
1324vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags) 1348vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags)
1325{ 1349{
1326 1350