Sat Jan 8 09:40:05 2011 UTC ()
Do a minidehumanizenumber for RUMP_MEMLIMIT.  Now you can set it
to e.g. 16m instead of having to type out 16777216.


(pooka)
diff -r1.104 -r1.105 src/sys/rump/librump/rumpkern/vm.c

cvs diff -r1.104 -r1.105 src/sys/rump/librump/rumpkern/vm.c (switch to unified diff)

--- src/sys/rump/librump/rumpkern/vm.c 2010/12/01 20:29:57 1.104
+++ src/sys/rump/librump/rumpkern/vm.c 2011/01/08 09:40:05 1.105
@@ -1,1123 +1,1152 @@ @@ -1,1123 +1,1152 @@
1/* $NetBSD: vm.c,v 1.104 2010/12/01 20:29:57 pooka Exp $ */ 1/* $NetBSD: vm.c,v 1.105 2011/01/08 09:40:05 pooka Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved. 4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved.
5 * 5 *
6 * Development of this software was supported by 6 * Development of this software was supported by
7 * The Finnish Cultural Foundation and the Research Foundation of 7 * The Finnish Cultural Foundation and the Research Foundation of
8 * The Helsinki University of Technology. 8 * The Helsinki University of Technology.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE. 29 * SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * Virtual memory emulation routines. 33 * Virtual memory emulation routines.
34 */ 34 */
35 35
36/* 36/*
37 * XXX: we abuse pg->uanon for the virtual address of the storage 37 * XXX: we abuse pg->uanon for the virtual address of the storage
38 * for each page. phys_addr would fit the job description better, 38 * for each page. phys_addr would fit the job description better,
39 * except that it will create unnecessary lossage on some platforms 39 * except that it will create unnecessary lossage on some platforms
40 * due to not being a pointer type. 40 * due to not being a pointer type.
41 */ 41 */
42 42
43#include <sys/cdefs.h> 43#include <sys/cdefs.h>
44__KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.104 2010/12/01 20:29:57 pooka Exp $"); 44__KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.105 2011/01/08 09:40:05 pooka Exp $");
45 45
46#include <sys/param.h> 46#include <sys/param.h>
47#include <sys/atomic.h> 47#include <sys/atomic.h>
48#include <sys/buf.h> 48#include <sys/buf.h>
49#include <sys/kernel.h> 49#include <sys/kernel.h>
50#include <sys/kmem.h> 50#include <sys/kmem.h>
51#include <sys/mman.h> 51#include <sys/mman.h>
52#include <sys/null.h> 52#include <sys/null.h>
53#include <sys/vnode.h> 53#include <sys/vnode.h>
54 54
55#include <machine/pmap.h> 55#include <machine/pmap.h>
56 56
57#include <rump/rumpuser.h> 57#include <rump/rumpuser.h>
58 58
59#include <uvm/uvm.h> 59#include <uvm/uvm.h>
60#include <uvm/uvm_ddb.h> 60#include <uvm/uvm_ddb.h>
61#include <uvm/uvm_pdpolicy.h> 61#include <uvm/uvm_pdpolicy.h>
62#include <uvm/uvm_prot.h> 62#include <uvm/uvm_prot.h>
63#include <uvm/uvm_readahead.h> 63#include <uvm/uvm_readahead.h>
64 64
65#include "rump_private.h" 65#include "rump_private.h"
66#include "rump_vfs_private.h" 66#include "rump_vfs_private.h"
67 67
68kmutex_t uvm_pageqlock; 68kmutex_t uvm_pageqlock;
69kmutex_t uvm_swap_data_lock; 69kmutex_t uvm_swap_data_lock;
70 70
71struct uvmexp uvmexp; 71struct uvmexp uvmexp;
72int *uvmexp_pagesize; 72int *uvmexp_pagesize;
73int *uvmexp_pagemask; 73int *uvmexp_pagemask;
74int *uvmexp_pageshift; 74int *uvmexp_pageshift;
75struct uvm uvm; 75struct uvm uvm;
76 76
77struct vm_map rump_vmmap; 77struct vm_map rump_vmmap;
78static struct vm_map_kernel kmem_map_store; 78static struct vm_map_kernel kmem_map_store;
79struct vm_map *kmem_map = &kmem_map_store.vmk_map; 79struct vm_map *kmem_map = &kmem_map_store.vmk_map;
80 80
81static struct vm_map_kernel kernel_map_store; 81static struct vm_map_kernel kernel_map_store;
82struct vm_map *kernel_map = &kernel_map_store.vmk_map; 82struct vm_map *kernel_map = &kernel_map_store.vmk_map;
83 83
84static unsigned int pdaemon_waiters; 84static unsigned int pdaemon_waiters;
85static kmutex_t pdaemonmtx; 85static kmutex_t pdaemonmtx;
86static kcondvar_t pdaemoncv, oomwait; 86static kcondvar_t pdaemoncv, oomwait;
87 87
88unsigned long rump_physmemlimit = RUMPMEM_UNLIMITED; 88unsigned long rump_physmemlimit = RUMPMEM_UNLIMITED;
89static unsigned long curphysmem; 89static unsigned long curphysmem;
90static unsigned long dddlim; /* 90% of memory limit used */ 90static unsigned long dddlim; /* 90% of memory limit used */
91#define NEED_PAGEDAEMON() \ 91#define NEED_PAGEDAEMON() \
92 (rump_physmemlimit != RUMPMEM_UNLIMITED && curphysmem > dddlim) 92 (rump_physmemlimit != RUMPMEM_UNLIMITED && curphysmem > dddlim)
93 93
94/* 94/*
95 * Try to free two pages worth of pages from objects. 95 * Try to free two pages worth of pages from objects.
96 * If this succesfully frees a full page cache page, we'll 96 * If this succesfully frees a full page cache page, we'll
97 * free the released page plus PAGE_SIZEČ/sizeof(vm_page). 97 * free the released page plus PAGE_SIZEČ/sizeof(vm_page).
98 */ 98 */
99#define PAGEDAEMON_OBJCHUNK (2*PAGE_SIZE / sizeof(struct vm_page)) 99#define PAGEDAEMON_OBJCHUNK (2*PAGE_SIZE / sizeof(struct vm_page))
100 100
101/* 101/*
102 * Keep a list of least recently used pages. Since the only way a 102 * Keep a list of least recently used pages. Since the only way a
103 * rump kernel can "access" a page is via lookup, we put the page 103 * rump kernel can "access" a page is via lookup, we put the page
104 * at the back of queue every time a lookup for it is done. If the 104 * at the back of queue every time a lookup for it is done. If the
105 * page is in front of this global queue and we're short of memory,  105 * page is in front of this global queue and we're short of memory,
106 * it's a candidate for pageout. 106 * it's a candidate for pageout.
107 */ 107 */
108static struct pglist vmpage_lruqueue; 108static struct pglist vmpage_lruqueue;
109static unsigned vmpage_onqueue; 109static unsigned vmpage_onqueue;
110 110
111static int 111static int
112pg_compare_key(void *ctx, const void *n, const void *key) 112pg_compare_key(void *ctx, const void *n, const void *key)
113{ 113{
114 voff_t a = ((const struct vm_page *)n)->offset; 114 voff_t a = ((const struct vm_page *)n)->offset;
115 voff_t b = *(const voff_t *)key; 115 voff_t b = *(const voff_t *)key;
116 116
117 if (a < b) 117 if (a < b)
118 return -1; 118 return -1;
119 else if (a > b) 119 else if (a > b)
120 return 1; 120 return 1;
121 else 121 else
122 return 0; 122 return 0;
123} 123}
124 124
125static int 125static int
126pg_compare_nodes(void *ctx, const void *n1, const void *n2) 126pg_compare_nodes(void *ctx, const void *n1, const void *n2)
127{ 127{
128 128
129 return pg_compare_key(ctx, n1, &((const struct vm_page *)n2)->offset); 129 return pg_compare_key(ctx, n1, &((const struct vm_page *)n2)->offset);
130} 130}
131 131
132const rb_tree_ops_t uvm_page_tree_ops = { 132const rb_tree_ops_t uvm_page_tree_ops = {
133 .rbto_compare_nodes = pg_compare_nodes, 133 .rbto_compare_nodes = pg_compare_nodes,
134 .rbto_compare_key = pg_compare_key, 134 .rbto_compare_key = pg_compare_key,
135 .rbto_node_offset = offsetof(struct vm_page, rb_node), 135 .rbto_node_offset = offsetof(struct vm_page, rb_node),
136 .rbto_context = NULL 136 .rbto_context = NULL
137}; 137};
138 138
139/* 139/*
140 * vm pages  140 * vm pages
141 */ 141 */
142 142
143static int 143static int
144pgctor(void *arg, void *obj, int flags) 144pgctor(void *arg, void *obj, int flags)
145{ 145{
146 struct vm_page *pg = obj; 146 struct vm_page *pg = obj;
147 147
148 memset(pg, 0, sizeof(*pg)); 148 memset(pg, 0, sizeof(*pg));
149 pg->uanon = rump_hypermalloc(PAGE_SIZE, PAGE_SIZE, 149 pg->uanon = rump_hypermalloc(PAGE_SIZE, PAGE_SIZE,
150 (flags & PR_WAITOK) == PR_WAITOK, "pgalloc"); 150 (flags & PR_WAITOK) == PR_WAITOK, "pgalloc");
151 return pg->uanon == NULL; 151 return pg->uanon == NULL;
152} 152}
153 153
154static void 154static void
155pgdtor(void *arg, void *obj) 155pgdtor(void *arg, void *obj)
156{ 156{
157 struct vm_page *pg = obj; 157 struct vm_page *pg = obj;
158 158
159 rump_hyperfree(pg->uanon, PAGE_SIZE); 159 rump_hyperfree(pg->uanon, PAGE_SIZE);
160} 160}
161 161
162static struct pool_cache pagecache; 162static struct pool_cache pagecache;
163 163
164/* 164/*
165 * Called with the object locked. We don't support anons. 165 * Called with the object locked. We don't support anons.
166 */ 166 */
167struct vm_page * 167struct vm_page *
168uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon, 168uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon,
169 int flags, int strat, int free_list) 169 int flags, int strat, int free_list)
170{ 170{
171 struct vm_page *pg; 171 struct vm_page *pg;
172 172
173 KASSERT(uobj && mutex_owned(&uobj->vmobjlock)); 173 KASSERT(uobj && mutex_owned(&uobj->vmobjlock));
174 KASSERT(anon == NULL); 174 KASSERT(anon == NULL);
175 175
176 pg = pool_cache_get(&pagecache, PR_NOWAIT); 176 pg = pool_cache_get(&pagecache, PR_NOWAIT);
177 if (__predict_false(pg == NULL)) { 177 if (__predict_false(pg == NULL)) {
178 return NULL; 178 return NULL;
179 } 179 }
180 180
181 pg->offset = off; 181 pg->offset = off;
182 pg->uobject = uobj; 182 pg->uobject = uobj;
183 183
184 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE; 184 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
185 if (flags & UVM_PGA_ZERO) { 185 if (flags & UVM_PGA_ZERO) {
186 uvm_pagezero(pg); 186 uvm_pagezero(pg);
187 } 187 }
188 188
189 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue); 189 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
190 (void)rb_tree_insert_node(&uobj->rb_tree, pg); 190 (void)rb_tree_insert_node(&uobj->rb_tree, pg);
191 191
192 /* 192 /*
193 * Don't put anons on the LRU page queue. We can't flush them 193 * Don't put anons on the LRU page queue. We can't flush them
194 * (there's no concept of swap in a rump kernel), so no reason 194 * (there's no concept of swap in a rump kernel), so no reason
195 * to bother with them. 195 * to bother with them.
196 */ 196 */
197 if (!UVM_OBJ_IS_AOBJ(uobj)) { 197 if (!UVM_OBJ_IS_AOBJ(uobj)) {
198 atomic_inc_uint(&vmpage_onqueue); 198 atomic_inc_uint(&vmpage_onqueue);
199 mutex_enter(&uvm_pageqlock); 199 mutex_enter(&uvm_pageqlock);
200 TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue); 200 TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue);
201 mutex_exit(&uvm_pageqlock); 201 mutex_exit(&uvm_pageqlock);
202 } 202 }
203 203
204 uobj->uo_npages++; 204 uobj->uo_npages++;
205 205
206 return pg; 206 return pg;
207} 207}
208 208
209/* 209/*
210 * Release a page. 210 * Release a page.
211 * 211 *
212 * Called with the vm object locked. 212 * Called with the vm object locked.
213 */ 213 */
214void 214void
215uvm_pagefree(struct vm_page *pg) 215uvm_pagefree(struct vm_page *pg)
216{ 216{
217 struct uvm_object *uobj = pg->uobject; 217 struct uvm_object *uobj = pg->uobject;
218 218
219 KASSERT(mutex_owned(&uvm_pageqlock)); 219 KASSERT(mutex_owned(&uvm_pageqlock));
220 KASSERT(mutex_owned(&uobj->vmobjlock)); 220 KASSERT(mutex_owned(&uobj->vmobjlock));
221 221
222 if (pg->flags & PG_WANTED) 222 if (pg->flags & PG_WANTED)
223 wakeup(pg); 223 wakeup(pg);
224 224
225 TAILQ_REMOVE(&uobj->memq, pg, listq.queue); 225 TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
226 226
227 uobj->uo_npages--; 227 uobj->uo_npages--;
228 rb_tree_remove_node(&uobj->rb_tree, pg); 228 rb_tree_remove_node(&uobj->rb_tree, pg);
229 229
230 if (!UVM_OBJ_IS_AOBJ(uobj)) { 230 if (!UVM_OBJ_IS_AOBJ(uobj)) {
231 TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue); 231 TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue);
232 atomic_dec_uint(&vmpage_onqueue); 232 atomic_dec_uint(&vmpage_onqueue);
233 } 233 }
234 234
235 pool_cache_put(&pagecache, pg); 235 pool_cache_put(&pagecache, pg);
236} 236}
237 237
238void 238void
239uvm_pagezero(struct vm_page *pg) 239uvm_pagezero(struct vm_page *pg)
240{ 240{
241 241
242 pg->flags &= ~PG_CLEAN; 242 pg->flags &= ~PG_CLEAN;
243 memset((void *)pg->uanon, 0, PAGE_SIZE); 243 memset((void *)pg->uanon, 0, PAGE_SIZE);
244} 244}
245 245
246/* 246/*
247 * Misc routines 247 * Misc routines
248 */ 248 */
249 249
250static kmutex_t pagermtx; 250static kmutex_t pagermtx;
251 251
252void 252void
253uvm_init(void) 253uvm_init(void)
254{ 254{
255 char buf[64]; 255 char buf[64];
256 int error; 256 int error;
257 257
258 if (rumpuser_getenv("RUMP_MEMLIMIT", buf, sizeof(buf), &error) == 0) { 258 if (rumpuser_getenv("RUMP_MEMLIMIT", buf, sizeof(buf), &error) == 0) {
259 rump_physmemlimit = strtoll(buf, NULL, 10); 259 unsigned long tmp;
 260 char *ep;
 261 int mult;
 262
 263 tmp = strtoll(buf, &ep, 10);
 264 if (strlen(ep) > 1)
 265 panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf);
 266
 267 /* mini-dehumanize-number */
 268 mult = 1;
 269 switch (*ep) {
 270 case 'k':
 271 mult = 1024;
 272 break;
 273 case 'm':
 274 mult = 1024*1024;
 275 break;
 276 case 'g':
 277 mult = 1024*1024*1024;
 278 break;
 279 case 0:
 280 break;
 281 default:
 282 panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf);
 283 }
 284 rump_physmemlimit = tmp * mult;
 285
 286 if (rump_physmemlimit / mult != tmp)
 287 panic("uvm_init: RUMP_MEMLIMIT overflow: %s", buf);
260 /* it's not like we'd get far with, say, 1 byte, but ... */ 288 /* it's not like we'd get far with, say, 1 byte, but ... */
261 if (rump_physmemlimit == 0) 289 if (rump_physmemlimit == 0)
262 panic("uvm_init: no memory available"); 290 panic("uvm_init: no memory");
 291
263#define HUMANIZE_BYTES 9 292#define HUMANIZE_BYTES 9
264 CTASSERT(sizeof(buf) >= HUMANIZE_BYTES); 293 CTASSERT(sizeof(buf) >= HUMANIZE_BYTES);
265 format_bytes(buf, HUMANIZE_BYTES, rump_physmemlimit); 294 format_bytes(buf, HUMANIZE_BYTES, rump_physmemlimit);
266#undef HUMANIZE_BYTES 295#undef HUMANIZE_BYTES
267 dddlim = 9 * (rump_physmemlimit / 10); 296 dddlim = 9 * (rump_physmemlimit / 10);
268 } else { 297 } else {
269 strlcpy(buf, "unlimited (host limit)", sizeof(buf)); 298 strlcpy(buf, "unlimited (host limit)", sizeof(buf));
270 } 299 }
271 aprint_verbose("total memory = %s\n", buf); 300 aprint_verbose("total memory = %s\n", buf);
272 301
273 TAILQ_INIT(&vmpage_lruqueue); 302 TAILQ_INIT(&vmpage_lruqueue);
274 303
275 uvmexp.free = 1024*1024; /* XXX: arbitrary & not updated */ 304 uvmexp.free = 1024*1024; /* XXX: arbitrary & not updated */
276 305
277 mutex_init(&pagermtx, MUTEX_DEFAULT, 0); 306 mutex_init(&pagermtx, MUTEX_DEFAULT, 0);
278 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0); 307 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
279 mutex_init(&uvm_swap_data_lock, MUTEX_DEFAULT, 0); 308 mutex_init(&uvm_swap_data_lock, MUTEX_DEFAULT, 0);
280 309
281 mutex_init(&pdaemonmtx, MUTEX_DEFAULT, 0); 310 mutex_init(&pdaemonmtx, MUTEX_DEFAULT, 0);
282 cv_init(&pdaemoncv, "pdaemon"); 311 cv_init(&pdaemoncv, "pdaemon");
283 cv_init(&oomwait, "oomwait"); 312 cv_init(&oomwait, "oomwait");
284 313
285 kernel_map->pmap = pmap_kernel(); 314 kernel_map->pmap = pmap_kernel();
286 callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM); 315 callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM);
287 kmem_map->pmap = pmap_kernel(); 316 kmem_map->pmap = pmap_kernel();
288 callback_head_init(&kmem_map_store.vmk_reclaim_callback, IPL_VM); 317 callback_head_init(&kmem_map_store.vmk_reclaim_callback, IPL_VM);
289 318
290 pool_cache_bootstrap(&pagecache, sizeof(struct vm_page), 0, 0, 0, 319 pool_cache_bootstrap(&pagecache, sizeof(struct vm_page), 0, 0, 0,
291 "page$", NULL, IPL_NONE, pgctor, pgdtor, NULL); 320 "page$", NULL, IPL_NONE, pgctor, pgdtor, NULL);
292} 321}
293 322
294void 323void
295uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax) 324uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax)
296{ 325{
297 326
298 vm->vm_map.pmap = pmap_kernel(); 327 vm->vm_map.pmap = pmap_kernel();
299 vm->vm_refcnt = 1; 328 vm->vm_refcnt = 1;
300} 329}
301 330
302void 331void
303uvm_pagewire(struct vm_page *pg) 332uvm_pagewire(struct vm_page *pg)
304{ 333{
305 334
306 /* nada */ 335 /* nada */
307} 336}
308 337
309void 338void
310uvm_pageunwire(struct vm_page *pg) 339uvm_pageunwire(struct vm_page *pg)
311{ 340{
312 341
313 /* nada */ 342 /* nada */
314} 343}
315 344
316/* where's your schmonz now? */ 345/* where's your schmonz now? */
317#define PUNLIMIT(a) \ 346#define PUNLIMIT(a) \
318p->p_rlimit[a].rlim_cur = p->p_rlimit[a].rlim_max = RLIM_INFINITY; 347p->p_rlimit[a].rlim_cur = p->p_rlimit[a].rlim_max = RLIM_INFINITY;
319void 348void
320uvm_init_limits(struct proc *p) 349uvm_init_limits(struct proc *p)
321{ 350{
322 351
323 PUNLIMIT(RLIMIT_STACK); 352 PUNLIMIT(RLIMIT_STACK);
324 PUNLIMIT(RLIMIT_DATA); 353 PUNLIMIT(RLIMIT_DATA);
325 PUNLIMIT(RLIMIT_RSS); 354 PUNLIMIT(RLIMIT_RSS);
326 PUNLIMIT(RLIMIT_AS); 355 PUNLIMIT(RLIMIT_AS);
327 /* nice, cascade */ 356 /* nice, cascade */
328} 357}
329#undef PUNLIMIT 358#undef PUNLIMIT
330 359
331/* 360/*
332 * This satisfies the "disgusting mmap hack" used by proplib. 361 * This satisfies the "disgusting mmap hack" used by proplib.
333 * We probably should grow some more assertables to make sure we're 362 * We probably should grow some more assertables to make sure we're
334 * not satisfying anything we shouldn't be satisfying. 363 * not satisfying anything we shouldn't be satisfying.
335 */ 364 */
336int 365int
337uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot, 366uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
338 vm_prot_t maxprot, int flags, void *handle, voff_t off, vsize_t locklim) 367 vm_prot_t maxprot, int flags, void *handle, voff_t off, vsize_t locklim)
339{ 368{
340 void *uaddr; 369 void *uaddr;
341 int error; 370 int error;
342 371
343 if (prot != (VM_PROT_READ | VM_PROT_WRITE)) 372 if (prot != (VM_PROT_READ | VM_PROT_WRITE))
344 panic("uvm_mmap() variant unsupported"); 373 panic("uvm_mmap() variant unsupported");
345 if (flags != (MAP_PRIVATE | MAP_ANON)) 374 if (flags != (MAP_PRIVATE | MAP_ANON))
346 panic("uvm_mmap() variant unsupported"); 375 panic("uvm_mmap() variant unsupported");
347 376
348 /* no reason in particular, but cf. uvm_default_mapaddr() */ 377 /* no reason in particular, but cf. uvm_default_mapaddr() */
349 if (*addr != 0) 378 if (*addr != 0)
350 panic("uvm_mmap() variant unsupported"); 379 panic("uvm_mmap() variant unsupported");
351 380
352 if (curproc->p_vmspace == vmspace_kernel()) { 381 if (curproc->p_vmspace == vmspace_kernel()) {
353 uaddr = rumpuser_anonmmap(NULL, size, 0, 0, &error); 382 uaddr = rumpuser_anonmmap(NULL, size, 0, 0, &error);
354 } else { 383 } else {
355 error = rumpuser_sp_anonmmap(curproc->p_vmspace->vm_map.pmap, 384 error = rumpuser_sp_anonmmap(curproc->p_vmspace->vm_map.pmap,
356 size, &uaddr); 385 size, &uaddr);
357 } 386 }
358 if (uaddr == NULL) 387 if (uaddr == NULL)
359 return error; 388 return error;
360 389
361 *addr = (vaddr_t)uaddr; 390 *addr = (vaddr_t)uaddr;
362 return 0; 391 return 0;
363} 392}
364 393
365struct pagerinfo { 394struct pagerinfo {
366 vaddr_t pgr_kva; 395 vaddr_t pgr_kva;
367 int pgr_npages; 396 int pgr_npages;
368 struct vm_page **pgr_pgs; 397 struct vm_page **pgr_pgs;
369 bool pgr_read; 398 bool pgr_read;
370 399
371 LIST_ENTRY(pagerinfo) pgr_entries; 400 LIST_ENTRY(pagerinfo) pgr_entries;
372}; 401};
373static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist); 402static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist);
374 403
375/* 404/*
376 * Pager "map" in routine. Instead of mapping, we allocate memory 405 * Pager "map" in routine. Instead of mapping, we allocate memory
377 * and copy page contents there. Not optimal or even strictly 406 * and copy page contents there. Not optimal or even strictly
378 * correct (the caller might modify the page contents after mapping 407 * correct (the caller might modify the page contents after mapping
379 * them in), but what the heck. Assumes UVMPAGER_MAPIN_WAITOK. 408 * them in), but what the heck. Assumes UVMPAGER_MAPIN_WAITOK.
380 */ 409 */
381vaddr_t 410vaddr_t
382uvm_pagermapin(struct vm_page **pgs, int npages, int flags) 411uvm_pagermapin(struct vm_page **pgs, int npages, int flags)
383{ 412{
384 struct pagerinfo *pgri; 413 struct pagerinfo *pgri;
385 vaddr_t curkva; 414 vaddr_t curkva;
386 int i; 415 int i;
387 416
388 /* allocate structures */ 417 /* allocate structures */
389 pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP); 418 pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP);
390 pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP); 419 pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP);
391 pgri->pgr_npages = npages; 420 pgri->pgr_npages = npages;
392 pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP); 421 pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP);
393 pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0; 422 pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0;
394 423
395 /* copy contents to "mapped" memory */ 424 /* copy contents to "mapped" memory */
396 for (i = 0, curkva = pgri->pgr_kva; 425 for (i = 0, curkva = pgri->pgr_kva;
397 i < npages; 426 i < npages;
398 i++, curkva += PAGE_SIZE) { 427 i++, curkva += PAGE_SIZE) {
399 /* 428 /*
400 * We need to copy the previous contents of the pages to 429 * We need to copy the previous contents of the pages to
401 * the window even if we are reading from the 430 * the window even if we are reading from the
402 * device, since the device might not fill the contents of 431 * device, since the device might not fill the contents of
403 * the full mapped range and we will end up corrupting 432 * the full mapped range and we will end up corrupting
404 * data when we unmap the window. 433 * data when we unmap the window.
405 */ 434 */
406 memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE); 435 memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE);
407 pgri->pgr_pgs[i] = pgs[i]; 436 pgri->pgr_pgs[i] = pgs[i];
408 } 437 }
409 438
410 mutex_enter(&pagermtx); 439 mutex_enter(&pagermtx);
411 LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries); 440 LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries);
412 mutex_exit(&pagermtx); 441 mutex_exit(&pagermtx);
413 442
414 return pgri->pgr_kva; 443 return pgri->pgr_kva;
415} 444}
416 445
417/* 446/*
418 * map out the pager window. return contents from VA to page storage 447 * map out the pager window. return contents from VA to page storage
419 * and free structures. 448 * and free structures.
420 * 449 *
421 * Note: does not currently support partial frees 450 * Note: does not currently support partial frees
422 */ 451 */
423void 452void
424uvm_pagermapout(vaddr_t kva, int npages) 453uvm_pagermapout(vaddr_t kva, int npages)
425{ 454{
426 struct pagerinfo *pgri; 455 struct pagerinfo *pgri;
427 vaddr_t curkva; 456 vaddr_t curkva;
428 int i; 457 int i;
429 458
430 mutex_enter(&pagermtx); 459 mutex_enter(&pagermtx);
431 LIST_FOREACH(pgri, &pagerlist, pgr_entries) { 460 LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
432 if (pgri->pgr_kva == kva) 461 if (pgri->pgr_kva == kva)
433 break; 462 break;
434 } 463 }
435 KASSERT(pgri); 464 KASSERT(pgri);
436 if (pgri->pgr_npages != npages) 465 if (pgri->pgr_npages != npages)
437 panic("uvm_pagermapout: partial unmapping not supported"); 466 panic("uvm_pagermapout: partial unmapping not supported");
438 LIST_REMOVE(pgri, pgr_entries); 467 LIST_REMOVE(pgri, pgr_entries);
439 mutex_exit(&pagermtx); 468 mutex_exit(&pagermtx);
440 469
441 if (pgri->pgr_read) { 470 if (pgri->pgr_read) {
442 for (i = 0, curkva = pgri->pgr_kva; 471 for (i = 0, curkva = pgri->pgr_kva;
443 i < pgri->pgr_npages; 472 i < pgri->pgr_npages;
444 i++, curkva += PAGE_SIZE) { 473 i++, curkva += PAGE_SIZE) {
445 memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE); 474 memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE);
446 } 475 }
447 } 476 }
448 477
449 kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *)); 478 kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *));
450 kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE); 479 kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE);
451 kmem_free(pgri, sizeof(*pgri)); 480 kmem_free(pgri, sizeof(*pgri));
452} 481}
453 482
454/* 483/*
455 * convert va in pager window to page structure. 484 * convert va in pager window to page structure.
456 * XXX: how expensive is this (global lock, list traversal)? 485 * XXX: how expensive is this (global lock, list traversal)?
457 */ 486 */
458struct vm_page * 487struct vm_page *
459uvm_pageratop(vaddr_t va) 488uvm_pageratop(vaddr_t va)
460{ 489{
461 struct pagerinfo *pgri; 490 struct pagerinfo *pgri;
462 struct vm_page *pg = NULL; 491 struct vm_page *pg = NULL;
463 int i; 492 int i;
464 493
465 mutex_enter(&pagermtx); 494 mutex_enter(&pagermtx);
466 LIST_FOREACH(pgri, &pagerlist, pgr_entries) { 495 LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
467 if (pgri->pgr_kva <= va 496 if (pgri->pgr_kva <= va
468 && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE) 497 && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE)
469 break; 498 break;
470 } 499 }
471 if (pgri) { 500 if (pgri) {
472 i = (va - pgri->pgr_kva) >> PAGE_SHIFT; 501 i = (va - pgri->pgr_kva) >> PAGE_SHIFT;
473 pg = pgri->pgr_pgs[i]; 502 pg = pgri->pgr_pgs[i];
474 } 503 }
475 mutex_exit(&pagermtx); 504 mutex_exit(&pagermtx);
476 505
477 return pg; 506 return pg;
478} 507}
479 508
480/* 509/*
481 * Called with the vm object locked. 510 * Called with the vm object locked.
482 * 511 *
483 * Put vnode object pages at the end of the access queue to indicate 512 * Put vnode object pages at the end of the access queue to indicate
484 * they have been recently accessed and should not be immediate 513 * they have been recently accessed and should not be immediate
485 * candidates for pageout. Do not do this for lookups done by 514 * candidates for pageout. Do not do this for lookups done by
486 * the pagedaemon to mimic pmap_kentered mappings which don't track 515 * the pagedaemon to mimic pmap_kentered mappings which don't track
487 * access information. 516 * access information.
488 */ 517 */
489struct vm_page * 518struct vm_page *
490uvm_pagelookup(struct uvm_object *uobj, voff_t off) 519uvm_pagelookup(struct uvm_object *uobj, voff_t off)
491{ 520{
492 struct vm_page *pg; 521 struct vm_page *pg;
493 bool ispagedaemon = curlwp == uvm.pagedaemon_lwp; 522 bool ispagedaemon = curlwp == uvm.pagedaemon_lwp;
494 523
495 pg = rb_tree_find_node(&uobj->rb_tree, &off); 524 pg = rb_tree_find_node(&uobj->rb_tree, &off);
496 if (pg && !UVM_OBJ_IS_AOBJ(pg->uobject) && !ispagedaemon) { 525 if (pg && !UVM_OBJ_IS_AOBJ(pg->uobject) && !ispagedaemon) {
497 mutex_enter(&uvm_pageqlock); 526 mutex_enter(&uvm_pageqlock);
498 TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue); 527 TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue);
499 TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue); 528 TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue);
500 mutex_exit(&uvm_pageqlock); 529 mutex_exit(&uvm_pageqlock);
501 } 530 }
502 531
503 return pg; 532 return pg;
504} 533}
505 534
506void 535void
507uvm_page_unbusy(struct vm_page **pgs, int npgs) 536uvm_page_unbusy(struct vm_page **pgs, int npgs)
508{ 537{
509 struct vm_page *pg; 538 struct vm_page *pg;
510 int i; 539 int i;
511 540
512 KASSERT(npgs > 0); 541 KASSERT(npgs > 0);
513 KASSERT(mutex_owned(&pgs[0]->uobject->vmobjlock)); 542 KASSERT(mutex_owned(&pgs[0]->uobject->vmobjlock));
514 543
515 for (i = 0; i < npgs; i++) { 544 for (i = 0; i < npgs; i++) {
516 pg = pgs[i]; 545 pg = pgs[i];
517 if (pg == NULL) 546 if (pg == NULL)
518 continue; 547 continue;
519 548
520 KASSERT(pg->flags & PG_BUSY); 549 KASSERT(pg->flags & PG_BUSY);
521 if (pg->flags & PG_WANTED) 550 if (pg->flags & PG_WANTED)
522 wakeup(pg); 551 wakeup(pg);
523 if (pg->flags & PG_RELEASED) 552 if (pg->flags & PG_RELEASED)
524 uvm_pagefree(pg); 553 uvm_pagefree(pg);
525 else 554 else
526 pg->flags &= ~(PG_WANTED|PG_BUSY); 555 pg->flags &= ~(PG_WANTED|PG_BUSY);
527 } 556 }
528} 557}
529 558
530void 559void
531uvm_estimatepageable(int *active, int *inactive) 560uvm_estimatepageable(int *active, int *inactive)
532{ 561{
533 562
534 /* XXX: guessing game */ 563 /* XXX: guessing game */
535 *active = 1024; 564 *active = 1024;
536 *inactive = 1024; 565 *inactive = 1024;
537} 566}
538 567
539struct vm_map_kernel * 568struct vm_map_kernel *
540vm_map_to_kernel(struct vm_map *map) 569vm_map_to_kernel(struct vm_map *map)
541{ 570{
542 571
543 return (struct vm_map_kernel *)map; 572 return (struct vm_map_kernel *)map;
544} 573}
545 574
546bool 575bool
547vm_map_starved_p(struct vm_map *map) 576vm_map_starved_p(struct vm_map *map)
548{ 577{
549 578
550 if (map->flags & VM_MAP_WANTVA) 579 if (map->flags & VM_MAP_WANTVA)
551 return true; 580 return true;
552 581
553 return false; 582 return false;
554} 583}
555 584
556int 585int
557uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags) 586uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
558{ 587{
559 588
560 panic("%s: unimplemented", __func__); 589 panic("%s: unimplemented", __func__);
561} 590}
562 591
563void 592void
564uvm_unloan(void *v, int npages, int flags) 593uvm_unloan(void *v, int npages, int flags)
565{ 594{
566 595
567 panic("%s: unimplemented", __func__); 596 panic("%s: unimplemented", __func__);
568} 597}
569 598
570int 599int
571uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages, 600uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
572 struct vm_page **opp) 601 struct vm_page **opp)
573{ 602{
574 603
575 return EBUSY; 604 return EBUSY;
576} 605}
577 606
578#ifdef DEBUGPRINT 607#ifdef DEBUGPRINT
579void 608void
580uvm_object_printit(struct uvm_object *uobj, bool full, 609uvm_object_printit(struct uvm_object *uobj, bool full,
581 void (*pr)(const char *, ...)) 610 void (*pr)(const char *, ...))
582{ 611{
583 612
584 pr("VM OBJECT at %p, refs %d", uobj, uobj->uo_refs); 613 pr("VM OBJECT at %p, refs %d", uobj, uobj->uo_refs);
585} 614}
586#endif 615#endif
587 616
588vaddr_t 617vaddr_t
589uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz) 618uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz)
590{ 619{
591 620
592 return 0; 621 return 0;
593} 622}
594 623
595int 624int
596uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 625uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
597 vm_prot_t prot, bool set_max) 626 vm_prot_t prot, bool set_max)
598{ 627{
599 628
600 return EOPNOTSUPP; 629 return EOPNOTSUPP;
601} 630}
602 631
603/* 632/*
604 * UVM km 633 * UVM km
605 */ 634 */
606 635
607vaddr_t 636vaddr_t
608uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags) 637uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
609{ 638{
610 void *rv, *desired = NULL; 639 void *rv, *desired = NULL;
611 int alignbit, error; 640 int alignbit, error;
612 641
613#ifdef __x86_64__ 642#ifdef __x86_64__
614 /* 643 /*
615 * On amd64, allocate all module memory from the lowest 2GB. 644 * On amd64, allocate all module memory from the lowest 2GB.
616 * This is because NetBSD kernel modules are compiled 645 * This is because NetBSD kernel modules are compiled
617 * with -mcmodel=kernel and reserve only 4 bytes for 646 * with -mcmodel=kernel and reserve only 4 bytes for
618 * offsets. If we load code compiled with -mcmodel=kernel 647 * offsets. If we load code compiled with -mcmodel=kernel
619 * anywhere except the lowest or highest 2GB, it will not 648 * anywhere except the lowest or highest 2GB, it will not
620 * work. Since userspace does not have access to the highest 649 * work. Since userspace does not have access to the highest
621 * 2GB, use the lowest 2GB. 650 * 2GB, use the lowest 2GB.
622 *  651 *
623 * Note: this assumes the rump kernel resides in 652 * Note: this assumes the rump kernel resides in
624 * the lowest 2GB as well. 653 * the lowest 2GB as well.
625 * 654 *
626 * Note2: yes, it's a quick hack, but since this the only 655 * Note2: yes, it's a quick hack, but since this the only
627 * place where we care about the map we're allocating from, 656 * place where we care about the map we're allocating from,
628 * just use a simple "if" instead of coming up with a fancy 657 * just use a simple "if" instead of coming up with a fancy
629 * generic solution. 658 * generic solution.
630 */ 659 */
631 extern struct vm_map *module_map; 660 extern struct vm_map *module_map;
632 if (map == module_map) { 661 if (map == module_map) {
633 desired = (void *)(0x80000000 - size); 662 desired = (void *)(0x80000000 - size);
634 } 663 }
635#endif 664#endif
636 665
637 alignbit = 0; 666 alignbit = 0;
638 if (align) { 667 if (align) {
639 alignbit = ffs(align)-1; 668 alignbit = ffs(align)-1;
640 } 669 }
641 670
642 rv = rumpuser_anonmmap(desired, size, alignbit, flags & UVM_KMF_EXEC, 671 rv = rumpuser_anonmmap(desired, size, alignbit, flags & UVM_KMF_EXEC,
643 &error); 672 &error);
644 if (rv == NULL) { 673 if (rv == NULL) {
645 if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT)) 674 if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT))
646 return 0; 675 return 0;
647 else 676 else
648 panic("uvm_km_alloc failed"); 677 panic("uvm_km_alloc failed");
649 } 678 }
650 679
651 if (flags & UVM_KMF_ZERO) 680 if (flags & UVM_KMF_ZERO)
652 memset(rv, 0, size); 681 memset(rv, 0, size);
653 682
654 return (vaddr_t)rv; 683 return (vaddr_t)rv;
655} 684}
656 685
657void 686void
658uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags) 687uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
659{ 688{
660 689
661 rumpuser_unmap((void *)vaddr, size); 690 rumpuser_unmap((void *)vaddr, size);
662} 691}
663 692
664struct vm_map * 693struct vm_map *
665uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr, 694uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
666 vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap) 695 vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap)
667{ 696{
668 697
669 return (struct vm_map *)417416; 698 return (struct vm_map *)417416;
670} 699}
671 700
672vaddr_t 701vaddr_t
673uvm_km_alloc_poolpage(struct vm_map *map, bool waitok) 702uvm_km_alloc_poolpage(struct vm_map *map, bool waitok)
674{ 703{
675 704
676 return (vaddr_t)rump_hypermalloc(PAGE_SIZE, PAGE_SIZE, 705 return (vaddr_t)rump_hypermalloc(PAGE_SIZE, PAGE_SIZE,
677 waitok, "kmalloc"); 706 waitok, "kmalloc");
678} 707}
679 708
680void 709void
681uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr) 710uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr)
682{ 711{
683 712
684 rump_hyperfree((void *)addr, PAGE_SIZE); 713 rump_hyperfree((void *)addr, PAGE_SIZE);
685} 714}
686 715
687vaddr_t 716vaddr_t
688uvm_km_alloc_poolpage_cache(struct vm_map *map, bool waitok) 717uvm_km_alloc_poolpage_cache(struct vm_map *map, bool waitok)
689{ 718{
690 719
691 return uvm_km_alloc_poolpage(map, waitok); 720 return uvm_km_alloc_poolpage(map, waitok);
692} 721}
693 722
694void 723void
695uvm_km_free_poolpage_cache(struct vm_map *map, vaddr_t vaddr) 724uvm_km_free_poolpage_cache(struct vm_map *map, vaddr_t vaddr)
696{ 725{
697 726
698 uvm_km_free_poolpage(map, vaddr); 727 uvm_km_free_poolpage(map, vaddr);
699} 728}
700 729
701void 730void
702uvm_km_va_drain(struct vm_map *map, uvm_flag_t flags) 731uvm_km_va_drain(struct vm_map *map, uvm_flag_t flags)
703{ 732{
704 733
705 /* we eventually maybe want some model for available memory */ 734 /* we eventually maybe want some model for available memory */
706} 735}
707 736
708/* 737/*
709 * VM space locking routines. We don't really have to do anything, 738 * VM space locking routines. We don't really have to do anything,
710 * since the pages are always "wired" (both local and remote processes). 739 * since the pages are always "wired" (both local and remote processes).
711 */ 740 */
712int 741int
713uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access) 742uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access)
714{ 743{
715 744
716 return 0; 745 return 0;
717} 746}
718 747
719void 748void
720uvm_vsunlock(struct vmspace *vs, void *addr, size_t len) 749uvm_vsunlock(struct vmspace *vs, void *addr, size_t len)
721{ 750{
722 751
723} 752}
724 753
725/* 754/*
726 * For the local case the buffer mappers don't need to do anything. 755 * For the local case the buffer mappers don't need to do anything.
727 * For the remote case we need to reserve space and copy data in or 756 * For the remote case we need to reserve space and copy data in or
728 * out, depending on B_READ/B_WRITE. 757 * out, depending on B_READ/B_WRITE.
729 */ 758 */
730void 759void
731vmapbuf(struct buf *bp, vsize_t len) 760vmapbuf(struct buf *bp, vsize_t len)
732{ 761{
733 762
734 bp->b_saveaddr = bp->b_data; 763 bp->b_saveaddr = bp->b_data;
735 764
736 /* remote case */ 765 /* remote case */
737 if (curproc->p_vmspace != vmspace_kernel()) { 766 if (curproc->p_vmspace != vmspace_kernel()) {
738 bp->b_data = rump_hypermalloc(len, 0, true, "vmapbuf"); 767 bp->b_data = rump_hypermalloc(len, 0, true, "vmapbuf");
739 if (BUF_ISWRITE(bp)) { 768 if (BUF_ISWRITE(bp)) {
740 copyin(bp->b_saveaddr, bp->b_data, len); 769 copyin(bp->b_saveaddr, bp->b_data, len);
741 } 770 }
742 } 771 }
743} 772}
744 773
745void 774void
746vunmapbuf(struct buf *bp, vsize_t len) 775vunmapbuf(struct buf *bp, vsize_t len)
747{ 776{
748 777
749 /* remote case */ 778 /* remote case */
750 if (bp->b_proc->p_vmspace != vmspace_kernel()) { 779 if (bp->b_proc->p_vmspace != vmspace_kernel()) {
751 if (BUF_ISREAD(bp)) { 780 if (BUF_ISREAD(bp)) {
752 copyout_proc(bp->b_proc, 781 copyout_proc(bp->b_proc,
753 bp->b_data, bp->b_saveaddr, len); 782 bp->b_data, bp->b_saveaddr, len);
754 } 783 }
755 rump_hyperfree(bp->b_data, len); 784 rump_hyperfree(bp->b_data, len);
756 } 785 }
757 786
758 bp->b_data = bp->b_saveaddr; 787 bp->b_data = bp->b_saveaddr;
759 bp->b_saveaddr = 0; 788 bp->b_saveaddr = 0;
760} 789}
761 790
762void 791void
763uvmspace_addref(struct vmspace *vm) 792uvmspace_addref(struct vmspace *vm)
764{ 793{
765 794
766 /* 795 /*
767 * No dynamically allocated vmspaces exist. 796 * No dynamically allocated vmspaces exist.
768 */ 797 */
769} 798}
770 799
771void 800void
772uvmspace_free(struct vmspace *vm) 801uvmspace_free(struct vmspace *vm)
773{ 802{
774 803
775 /* nothing for now */ 804 /* nothing for now */
776} 805}
777 806
778/* 807/*
779 * page life cycle stuff. it really doesn't exist, so just stubs. 808 * page life cycle stuff. it really doesn't exist, so just stubs.
780 */ 809 */
781 810
782void 811void
783uvm_pageactivate(struct vm_page *pg) 812uvm_pageactivate(struct vm_page *pg)
784{ 813{
785 814
786 /* nada */ 815 /* nada */
787} 816}
788 817
789void 818void
790uvm_pagedeactivate(struct vm_page *pg) 819uvm_pagedeactivate(struct vm_page *pg)
791{ 820{
792 821
793 /* nada */ 822 /* nada */
794} 823}
795 824
796void 825void
797uvm_pagedequeue(struct vm_page *pg) 826uvm_pagedequeue(struct vm_page *pg)
798{ 827{
799 828
800 /* nada*/ 829 /* nada*/
801} 830}
802 831
803void 832void
804uvm_pageenqueue(struct vm_page *pg) 833uvm_pageenqueue(struct vm_page *pg)
805{ 834{
806 835
807 /* nada */ 836 /* nada */
808} 837}
809 838
810void 839void
811uvmpdpol_anfree(struct vm_anon *an) 840uvmpdpol_anfree(struct vm_anon *an)
812{ 841{
813 842
814 /* nada */ 843 /* nada */
815} 844}
816 845
817/* 846/*
818 * Physical address accessors. 847 * Physical address accessors.
819 */ 848 */
820 849
821struct vm_page * 850struct vm_page *
822uvm_phys_to_vm_page(paddr_t pa) 851uvm_phys_to_vm_page(paddr_t pa)
823{ 852{
824 853
825 return NULL; 854 return NULL;
826} 855}
827 856
828paddr_t 857paddr_t
829uvm_vm_page_to_phys(const struct vm_page *pg) 858uvm_vm_page_to_phys(const struct vm_page *pg)
830{ 859{
831 860
832 return 0; 861 return 0;
833} 862}
834 863
835/* 864/*
836 * Routines related to the Page Baroness. 865 * Routines related to the Page Baroness.
837 */ 866 */
838 867
839void 868void
840uvm_wait(const char *msg) 869uvm_wait(const char *msg)
841{ 870{
842 871
843 if (__predict_false(curlwp == uvm.pagedaemon_lwp)) 872 if (__predict_false(curlwp == uvm.pagedaemon_lwp))
844 panic("pagedaemon out of memory"); 873 panic("pagedaemon out of memory");
845 if (__predict_false(rump_threads == 0)) 874 if (__predict_false(rump_threads == 0))
846 panic("pagedaemon missing (RUMP_THREADS = 0)"); 875 panic("pagedaemon missing (RUMP_THREADS = 0)");
847 876
848 mutex_enter(&pdaemonmtx); 877 mutex_enter(&pdaemonmtx);
849 pdaemon_waiters++; 878 pdaemon_waiters++;
850 cv_signal(&pdaemoncv); 879 cv_signal(&pdaemoncv);
851 cv_wait(&oomwait, &pdaemonmtx); 880 cv_wait(&oomwait, &pdaemonmtx);
852 mutex_exit(&pdaemonmtx); 881 mutex_exit(&pdaemonmtx);
853} 882}
854 883
855void 884void
856uvm_pageout_start(int npages) 885uvm_pageout_start(int npages)
857{ 886{
858 887
859 /* we don't have the heuristics */ 888 /* we don't have the heuristics */
860} 889}
861 890
862void 891void
863uvm_pageout_done(int npages) 892uvm_pageout_done(int npages)
864{ 893{
865 894
866 /* could wakeup waiters, but just let the pagedaemon do it */ 895 /* could wakeup waiters, but just let the pagedaemon do it */
867} 896}
868 897
869static bool 898static bool
870processpage(struct vm_page *pg, bool *lockrunning) 899processpage(struct vm_page *pg, bool *lockrunning)
871{ 900{
872 struct uvm_object *uobj; 901 struct uvm_object *uobj;
873 902
874 uobj = pg->uobject; 903 uobj = pg->uobject;
875 if (mutex_tryenter(&uobj->vmobjlock)) { 904 if (mutex_tryenter(&uobj->vmobjlock)) {
876 if ((pg->flags & PG_BUSY) == 0) { 905 if ((pg->flags & PG_BUSY) == 0) {
877 mutex_exit(&uvm_pageqlock); 906 mutex_exit(&uvm_pageqlock);
878 uobj->pgops->pgo_put(uobj, pg->offset, 907 uobj->pgops->pgo_put(uobj, pg->offset,
879 pg->offset + PAGE_SIZE, 908 pg->offset + PAGE_SIZE,
880 PGO_CLEANIT|PGO_FREE); 909 PGO_CLEANIT|PGO_FREE);
881 KASSERT(!mutex_owned(&uobj->vmobjlock)); 910 KASSERT(!mutex_owned(&uobj->vmobjlock));
882 return true; 911 return true;
883 } else { 912 } else {
884 mutex_exit(&uobj->vmobjlock); 913 mutex_exit(&uobj->vmobjlock);
885 } 914 }
886 } else if (*lockrunning == false && ncpu > 1) { 915 } else if (*lockrunning == false && ncpu > 1) {
887 CPU_INFO_ITERATOR cii; 916 CPU_INFO_ITERATOR cii;
888 struct cpu_info *ci; 917 struct cpu_info *ci;
889 struct lwp *l; 918 struct lwp *l;
890 919
891 l = mutex_owner(&uobj->vmobjlock); 920 l = mutex_owner(&uobj->vmobjlock);
892 for (CPU_INFO_FOREACH(cii, ci)) { 921 for (CPU_INFO_FOREACH(cii, ci)) {
893 if (ci->ci_curlwp == l) { 922 if (ci->ci_curlwp == l) {
894 *lockrunning = true; 923 *lockrunning = true;
895 break; 924 break;
896 } 925 }
897 } 926 }
898 } 927 }
899 928
900 return false; 929 return false;
901} 930}
902 931
903/* 932/*
904 * The Diabolical pageDaemon Director (DDD). 933 * The Diabolical pageDaemon Director (DDD).
905 */ 934 */
906void 935void
907uvm_pageout(void *arg) 936uvm_pageout(void *arg)
908{ 937{
909 struct vm_page *pg; 938 struct vm_page *pg;
910 struct pool *pp, *pp_first; 939 struct pool *pp, *pp_first;
911 uint64_t where; 940 uint64_t where;
912 int timo = 0; 941 int timo = 0;
913 int cleaned, skip, skipped; 942 int cleaned, skip, skipped;
914 bool succ = false; 943 bool succ = false;
915 bool lockrunning; 944 bool lockrunning;
916 945
917 mutex_enter(&pdaemonmtx); 946 mutex_enter(&pdaemonmtx);
918 for (;;) { 947 for (;;) {
919 if (succ) { 948 if (succ) {
920 kernel_map->flags &= ~VM_MAP_WANTVA; 949 kernel_map->flags &= ~VM_MAP_WANTVA;
921 kmem_map->flags &= ~VM_MAP_WANTVA; 950 kmem_map->flags &= ~VM_MAP_WANTVA;
922 timo = 0; 951 timo = 0;
923 if (pdaemon_waiters) { 952 if (pdaemon_waiters) {
924 pdaemon_waiters = 0; 953 pdaemon_waiters = 0;
925 cv_broadcast(&oomwait); 954 cv_broadcast(&oomwait);
926 } 955 }
927 } 956 }
928 succ = false; 957 succ = false;
929 958
930 if (pdaemon_waiters == 0) { 959 if (pdaemon_waiters == 0) {
931 cv_timedwait(&pdaemoncv, &pdaemonmtx, timo); 960 cv_timedwait(&pdaemoncv, &pdaemonmtx, timo);
932 uvmexp.pdwoke++; 961 uvmexp.pdwoke++;
933 } 962 }
934 963
935 /* tell the world that we are hungry */ 964 /* tell the world that we are hungry */
936 kernel_map->flags |= VM_MAP_WANTVA; 965 kernel_map->flags |= VM_MAP_WANTVA;
937 kmem_map->flags |= VM_MAP_WANTVA; 966 kmem_map->flags |= VM_MAP_WANTVA;
938 967
939 if (pdaemon_waiters == 0 && !NEED_PAGEDAEMON()) 968 if (pdaemon_waiters == 0 && !NEED_PAGEDAEMON())
940 continue; 969 continue;
941 mutex_exit(&pdaemonmtx); 970 mutex_exit(&pdaemonmtx);
942 971
943 /* 972 /*
944 * step one: reclaim the page cache. this should give 973 * step one: reclaim the page cache. this should give
945 * us the biggest earnings since whole pages are released 974 * us the biggest earnings since whole pages are released
946 * into backing memory. 975 * into backing memory.
947 */ 976 */
948 pool_cache_reclaim(&pagecache); 977 pool_cache_reclaim(&pagecache);
949 if (!NEED_PAGEDAEMON()) { 978 if (!NEED_PAGEDAEMON()) {
950 succ = true; 979 succ = true;
951 mutex_enter(&pdaemonmtx); 980 mutex_enter(&pdaemonmtx);
952 continue; 981 continue;
953 } 982 }
954 983
955 /* 984 /*
956 * Ok, so that didn't help. Next, try to hunt memory 985 * Ok, so that didn't help. Next, try to hunt memory
957 * by pushing out vnode pages. The pages might contain 986 * by pushing out vnode pages. The pages might contain
958 * useful cached data, but we need the memory. 987 * useful cached data, but we need the memory.
959 */ 988 */
960 cleaned = 0; 989 cleaned = 0;
961 skip = 0; 990 skip = 0;
962 lockrunning = false; 991 lockrunning = false;
963 again: 992 again:
964 mutex_enter(&uvm_pageqlock); 993 mutex_enter(&uvm_pageqlock);
965 while (cleaned < PAGEDAEMON_OBJCHUNK) { 994 while (cleaned < PAGEDAEMON_OBJCHUNK) {
966 skipped = 0; 995 skipped = 0;
967 TAILQ_FOREACH(pg, &vmpage_lruqueue, pageq.queue) { 996 TAILQ_FOREACH(pg, &vmpage_lruqueue, pageq.queue) {
968 997
969 /* 998 /*
970 * skip over pages we _might_ have tried 999 * skip over pages we _might_ have tried
971 * to handle earlier. they might not be 1000 * to handle earlier. they might not be
972 * exactly the same ones, but I'm not too 1001 * exactly the same ones, but I'm not too
973 * concerned. 1002 * concerned.
974 */ 1003 */
975 while (skipped++ < skip) 1004 while (skipped++ < skip)
976 continue; 1005 continue;
977 1006
978 if (processpage(pg, &lockrunning)) { 1007 if (processpage(pg, &lockrunning)) {
979 cleaned++; 1008 cleaned++;
980 goto again; 1009 goto again;
981 } 1010 }
982 1011
983 skip++; 1012 skip++;
984 } 1013 }
985 break; 1014 break;
986 } 1015 }
987 mutex_exit(&uvm_pageqlock); 1016 mutex_exit(&uvm_pageqlock);
988 1017
989 /* 1018 /*
990 * Ok, someone is running with an object lock held. 1019 * Ok, someone is running with an object lock held.
991 * We want to yield the host CPU to make sure the 1020 * We want to yield the host CPU to make sure the
992 * thread is not parked on the host. Since sched_yield() 1021 * thread is not parked on the host. Since sched_yield()
993 * doesn't appear to do anything on NetBSD, nanosleep 1022 * doesn't appear to do anything on NetBSD, nanosleep
994 * for the smallest possible time and hope we're back in 1023 * for the smallest possible time and hope we're back in
995 * the game soon. 1024 * the game soon.
996 */ 1025 */
997 if (cleaned == 0 && lockrunning) { 1026 if (cleaned == 0 && lockrunning) {
998 uint64_t sec, nsec; 1027 uint64_t sec, nsec;
999 1028
1000 sec = 0; 1029 sec = 0;
1001 nsec = 1; 1030 nsec = 1;
1002 rumpuser_nanosleep(&sec, &nsec, NULL); 1031 rumpuser_nanosleep(&sec, &nsec, NULL);
1003 1032
1004 lockrunning = false; 1033 lockrunning = false;
1005 skip = 0; 1034 skip = 0;
1006 1035
1007 /* and here we go again */ 1036 /* and here we go again */
1008 goto again; 1037 goto again;
1009 } 1038 }
1010 1039
1011 /* 1040 /*
1012 * And of course we need to reclaim the page cache 1041 * And of course we need to reclaim the page cache
1013 * again to actually release memory. 1042 * again to actually release memory.
1014 */ 1043 */
1015 pool_cache_reclaim(&pagecache); 1044 pool_cache_reclaim(&pagecache);
1016 if (!NEED_PAGEDAEMON()) { 1045 if (!NEED_PAGEDAEMON()) {
1017 succ = true; 1046 succ = true;
1018 mutex_enter(&pdaemonmtx); 1047 mutex_enter(&pdaemonmtx);
1019 continue; 1048 continue;
1020 } 1049 }
1021 1050
1022 /* 1051 /*
1023 * Still not there? sleeves come off right about now. 1052 * Still not there? sleeves come off right about now.
1024 * First: do reclaim on kernel/kmem map. 1053 * First: do reclaim on kernel/kmem map.
1025 */ 1054 */
1026 callback_run_roundrobin(&kernel_map_store.vmk_reclaim_callback, 1055 callback_run_roundrobin(&kernel_map_store.vmk_reclaim_callback,
1027 NULL); 1056 NULL);
1028 callback_run_roundrobin(&kmem_map_store.vmk_reclaim_callback, 1057 callback_run_roundrobin(&kmem_map_store.vmk_reclaim_callback,
1029 NULL); 1058 NULL);
1030 1059
1031 /* 1060 /*
1032 * And then drain the pools. Wipe them out ... all of them. 1061 * And then drain the pools. Wipe them out ... all of them.
1033 */ 1062 */
1034 1063
1035 pool_drain_start(&pp_first, &where); 1064 pool_drain_start(&pp_first, &where);
1036 pp = pp_first; 1065 pp = pp_first;
1037 for (;;) { 1066 for (;;) {
1038 rump_vfs_drainbufs(10 /* XXX: estimate better */); 1067 rump_vfs_drainbufs(10 /* XXX: estimate better */);
1039 succ = pool_drain_end(pp, where); 1068 succ = pool_drain_end(pp, where);
1040 if (succ) 1069 if (succ)
1041 break; 1070 break;
1042 pool_drain_start(&pp, &where); 1071 pool_drain_start(&pp, &where);
1043 if (pp == pp_first) { 1072 if (pp == pp_first) {
1044 succ = pool_drain_end(pp, where); 1073 succ = pool_drain_end(pp, where);
1045 break; 1074 break;
1046 } 1075 }
1047 } 1076 }
1048 1077
1049 /* 1078 /*
1050 * Need to use PYEC on our bag of tricks. 1079 * Need to use PYEC on our bag of tricks.
1051 * Unfortunately, the wife just borrowed it. 1080 * Unfortunately, the wife just borrowed it.
1052 */ 1081 */
1053 1082
1054 if (!succ && cleaned == 0) { 1083 if (!succ && cleaned == 0) {
1055 rumpuser_dprintf("pagedaemoness: failed to reclaim " 1084 rumpuser_dprintf("pagedaemoness: failed to reclaim "
1056 "memory ... sleeping (deadlock?)\n"); 1085 "memory ... sleeping (deadlock?)\n");
1057 timo = hz; 1086 timo = hz;
1058 } 1087 }
1059 1088
1060 mutex_enter(&pdaemonmtx); 1089 mutex_enter(&pdaemonmtx);
1061 } 1090 }
1062 1091
1063 panic("you can swap out any time you like, but you can never leave"); 1092 panic("you can swap out any time you like, but you can never leave");
1064} 1093}
1065 1094
1066void 1095void
1067uvm_kick_pdaemon() 1096uvm_kick_pdaemon()
1068{ 1097{
1069 1098
1070 /* 1099 /*
1071 * Wake up the diabolical pagedaemon director if we are over 1100 * Wake up the diabolical pagedaemon director if we are over
1072 * 90% of the memory limit. This is a complete and utter 1101 * 90% of the memory limit. This is a complete and utter
1073 * stetson-harrison decision which you are allowed to finetune. 1102 * stetson-harrison decision which you are allowed to finetune.
1074 * Don't bother locking. If we have some unflushed caches, 1103 * Don't bother locking. If we have some unflushed caches,
1075 * other waker-uppers will deal with the issue. 1104 * other waker-uppers will deal with the issue.
1076 */ 1105 */
1077 if (NEED_PAGEDAEMON()) { 1106 if (NEED_PAGEDAEMON()) {
1078 cv_signal(&pdaemoncv); 1107 cv_signal(&pdaemoncv);
1079 } 1108 }
1080} 1109}
1081 1110
1082void * 1111void *
1083rump_hypermalloc(size_t howmuch, int alignment, bool waitok, const char *wmsg) 1112rump_hypermalloc(size_t howmuch, int alignment, bool waitok, const char *wmsg)
1084{ 1113{
1085 unsigned long newmem; 1114 unsigned long newmem;
1086 void *rv; 1115 void *rv;
1087 1116
1088 uvm_kick_pdaemon(); /* ouch */ 1117 uvm_kick_pdaemon(); /* ouch */
1089 1118
1090 /* first we must be within the limit */ 1119 /* first we must be within the limit */
1091 limitagain: 1120 limitagain:
1092 if (rump_physmemlimit != RUMPMEM_UNLIMITED) { 1121 if (rump_physmemlimit != RUMPMEM_UNLIMITED) {
1093 newmem = atomic_add_long_nv(&curphysmem, howmuch); 1122 newmem = atomic_add_long_nv(&curphysmem, howmuch);
1094 if (newmem > rump_physmemlimit) { 1123 if (newmem > rump_physmemlimit) {
1095 newmem = atomic_add_long_nv(&curphysmem, -howmuch); 1124 newmem = atomic_add_long_nv(&curphysmem, -howmuch);
1096 if (!waitok) { 1125 if (!waitok) {
1097 return NULL; 1126 return NULL;
1098 } 1127 }
1099 uvm_wait(wmsg); 1128 uvm_wait(wmsg);
1100 goto limitagain; 1129 goto limitagain;
1101 } 1130 }
1102 } 1131 }
1103 1132
1104 /* second, we must get something from the backend */ 1133 /* second, we must get something from the backend */
1105 again: 1134 again:
1106 rv = rumpuser_malloc(howmuch, alignment); 1135 rv = rumpuser_malloc(howmuch, alignment);
1107 if (__predict_false(rv == NULL && waitok)) { 1136 if (__predict_false(rv == NULL && waitok)) {
1108 uvm_wait(wmsg); 1137 uvm_wait(wmsg);
1109 goto again; 1138 goto again;
1110 } 1139 }
1111 1140
1112 return rv; 1141 return rv;
1113} 1142}
1114 1143
1115void 1144void
1116rump_hyperfree(void *what, size_t size) 1145rump_hyperfree(void *what, size_t size)
1117{ 1146{
1118 1147
1119 if (rump_physmemlimit != RUMPMEM_UNLIMITED) { 1148 if (rump_physmemlimit != RUMPMEM_UNLIMITED) {
1120 atomic_add_long(&curphysmem, -size); 1149 atomic_add_long(&curphysmem, -size);
1121 } 1150 }
1122 rumpuser_free(what); 1151 rumpuser_free(what);
1123} 1152}