| @@ -1,1177 +1,1177 @@ | | | @@ -1,1177 +1,1177 @@ |
1 | /* $NetBSD: vm.c,v 1.123 2012/02/19 09:19:41 martin Exp $ */ | | 1 | /* $NetBSD: vm.c,v 1.124 2012/03/05 13:43:56 para Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright (c) 2007-2011 Antti Kantee. All Rights Reserved. | | 4 | * Copyright (c) 2007-2011 Antti Kantee. All Rights Reserved. |
5 | * | | 5 | * |
6 | * Development of this software was supported by | | 6 | * Development of this software was supported by |
7 | * The Finnish Cultural Foundation and the Research Foundation of | | 7 | * The Finnish Cultural Foundation and the Research Foundation of |
8 | * The Helsinki University of Technology. | | 8 | * The Helsinki University of Technology. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright | | 15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the | | 16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. | | 17 | * documentation and/or other materials provided with the distribution. |
18 | * | | 18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS | | 19 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS |
20 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | | 20 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
21 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | | 21 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
22 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | | 22 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | | 24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
25 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 25 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
29 | * SUCH DAMAGE. | | 29 | * SUCH DAMAGE. |
30 | */ | | 30 | */ |
31 | | | 31 | |
32 | /* | | 32 | /* |
33 | * Virtual memory emulation routines. | | 33 | * Virtual memory emulation routines. |
34 | */ | | 34 | */ |
35 | | | 35 | |
36 | /* | | 36 | /* |
37 | * XXX: we abuse pg->uanon for the virtual address of the storage | | 37 | * XXX: we abuse pg->uanon for the virtual address of the storage |
38 | * for each page. phys_addr would fit the job description better, | | 38 | * for each page. phys_addr would fit the job description better, |
39 | * except that it will create unnecessary lossage on some platforms | | 39 | * except that it will create unnecessary lossage on some platforms |
40 | * due to not being a pointer type. | | 40 | * due to not being a pointer type. |
41 | */ | | 41 | */ |
42 | | | 42 | |
43 | #include <sys/cdefs.h> | | 43 | #include <sys/cdefs.h> |
44 | __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.123 2012/02/19 09:19:41 martin Exp $"); | | 44 | __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.124 2012/03/05 13:43:56 para Exp $"); |
45 | | | 45 | |
46 | #include <sys/param.h> | | 46 | #include <sys/param.h> |
47 | #include <sys/atomic.h> | | 47 | #include <sys/atomic.h> |
48 | #include <sys/buf.h> | | 48 | #include <sys/buf.h> |
49 | #include <sys/kernel.h> | | 49 | #include <sys/kernel.h> |
50 | #include <sys/kmem.h> | | 50 | #include <sys/kmem.h> |
51 | #include <sys/vmem.h> | | 51 | #include <sys/vmem.h> |
52 | #include <sys/mman.h> | | 52 | #include <sys/mman.h> |
53 | #include <sys/null.h> | | 53 | #include <sys/null.h> |
54 | #include <sys/vnode.h> | | 54 | #include <sys/vnode.h> |
55 | | | 55 | |
56 | #include <machine/pmap.h> | | 56 | #include <machine/pmap.h> |
57 | | | 57 | |
58 | #include <rump/rumpuser.h> | | 58 | #include <rump/rumpuser.h> |
59 | | | 59 | |
60 | #include <uvm/uvm.h> | | 60 | #include <uvm/uvm.h> |
61 | #include <uvm/uvm_ddb.h> | | 61 | #include <uvm/uvm_ddb.h> |
62 | #include <uvm/uvm_pdpolicy.h> | | 62 | #include <uvm/uvm_pdpolicy.h> |
63 | #include <uvm/uvm_prot.h> | | 63 | #include <uvm/uvm_prot.h> |
64 | #include <uvm/uvm_readahead.h> | | 64 | #include <uvm/uvm_readahead.h> |
65 | | | 65 | |
66 | #include "rump_private.h" | | 66 | #include "rump_private.h" |
67 | #include "rump_vfs_private.h" | | 67 | #include "rump_vfs_private.h" |
68 | | | 68 | |
69 | kmutex_t uvm_pageqlock; | | 69 | kmutex_t uvm_pageqlock; |
70 | kmutex_t uvm_swap_data_lock; | | 70 | kmutex_t uvm_swap_data_lock; |
71 | | | 71 | |
72 | struct uvmexp uvmexp; | | 72 | struct uvmexp uvmexp; |
73 | struct uvm uvm; | | 73 | struct uvm uvm; |
74 | | | 74 | |
75 | #ifdef __uvmexp_pagesize | | 75 | #ifdef __uvmexp_pagesize |
76 | const int * const uvmexp_pagesize = &uvmexp.pagesize; | | 76 | const int * const uvmexp_pagesize = &uvmexp.pagesize; |
77 | const int * const uvmexp_pagemask = &uvmexp.pagemask; | | 77 | const int * const uvmexp_pagemask = &uvmexp.pagemask; |
78 | const int * const uvmexp_pageshift = &uvmexp.pageshift; | | 78 | const int * const uvmexp_pageshift = &uvmexp.pageshift; |
79 | #endif | | 79 | #endif |
80 | | | 80 | |
81 | struct vm_map rump_vmmap; | | 81 | struct vm_map rump_vmmap; |
82 | | | 82 | |
83 | static struct vm_map kernel_map_store; | | 83 | static struct vm_map kernel_map_store; |
84 | struct vm_map *kernel_map = &kernel_map_store; | | 84 | struct vm_map *kernel_map = &kernel_map_store; |
85 | | | 85 | |
86 | vmem_t *kmem_arena; | | 86 | vmem_t *kmem_arena; |
87 | vmem_t *kmem_va_arena; | | 87 | vmem_t *kmem_va_arena; |
88 | | | 88 | |
89 | static unsigned int pdaemon_waiters; | | 89 | static unsigned int pdaemon_waiters; |
90 | static kmutex_t pdaemonmtx; | | 90 | static kmutex_t pdaemonmtx; |
91 | static kcondvar_t pdaemoncv, oomwait; | | 91 | static kcondvar_t pdaemoncv, oomwait; |
92 | | | 92 | |
93 | unsigned long rump_physmemlimit = RUMPMEM_UNLIMITED; | | 93 | unsigned long rump_physmemlimit = RUMPMEM_UNLIMITED; |
94 | static unsigned long curphysmem; | | 94 | static unsigned long curphysmem; |
95 | static unsigned long dddlim; /* 90% of memory limit used */ | | 95 | static unsigned long dddlim; /* 90% of memory limit used */ |
96 | #define NEED_PAGEDAEMON() \ | | 96 | #define NEED_PAGEDAEMON() \ |
97 | (rump_physmemlimit != RUMPMEM_UNLIMITED && curphysmem > dddlim) | | 97 | (rump_physmemlimit != RUMPMEM_UNLIMITED && curphysmem > dddlim) |
98 | | | 98 | |
99 | /* | | 99 | /* |
100 | * Try to free two pages worth of pages from objects. | | 100 | * Try to free two pages worth of pages from objects. |
101 | * If this succesfully frees a full page cache page, we'll | | 101 | * If this succesfully frees a full page cache page, we'll |
102 | * free the released page plus PAGE_SIZE/sizeof(vm_page). | | 102 | * free the released page plus PAGE_SIZE/sizeof(vm_page). |
103 | */ | | 103 | */ |
104 | #define PAGEDAEMON_OBJCHUNK (2*PAGE_SIZE / sizeof(struct vm_page)) | | 104 | #define PAGEDAEMON_OBJCHUNK (2*PAGE_SIZE / sizeof(struct vm_page)) |
105 | | | 105 | |
106 | /* | | 106 | /* |
107 | * Keep a list of least recently used pages. Since the only way a | | 107 | * Keep a list of least recently used pages. Since the only way a |
108 | * rump kernel can "access" a page is via lookup, we put the page | | 108 | * rump kernel can "access" a page is via lookup, we put the page |
109 | * at the back of queue every time a lookup for it is done. If the | | 109 | * at the back of queue every time a lookup for it is done. If the |
110 | * page is in front of this global queue and we're short of memory, | | 110 | * page is in front of this global queue and we're short of memory, |
111 | * it's a candidate for pageout. | | 111 | * it's a candidate for pageout. |
112 | */ | | 112 | */ |
113 | static struct pglist vmpage_lruqueue; | | 113 | static struct pglist vmpage_lruqueue; |
114 | static unsigned vmpage_onqueue; | | 114 | static unsigned vmpage_onqueue; |
115 | | | 115 | |
116 | static int | | 116 | static int |
117 | pg_compare_key(void *ctx, const void *n, const void *key) | | 117 | pg_compare_key(void *ctx, const void *n, const void *key) |
118 | { | | 118 | { |
119 | voff_t a = ((const struct vm_page *)n)->offset; | | 119 | voff_t a = ((const struct vm_page *)n)->offset; |
120 | voff_t b = *(const voff_t *)key; | | 120 | voff_t b = *(const voff_t *)key; |
121 | | | 121 | |
122 | if (a < b) | | 122 | if (a < b) |
123 | return -1; | | 123 | return -1; |
124 | else if (a > b) | | 124 | else if (a > b) |
125 | return 1; | | 125 | return 1; |
126 | else | | 126 | else |
127 | return 0; | | 127 | return 0; |
128 | } | | 128 | } |
129 | | | 129 | |
130 | static int | | 130 | static int |
131 | pg_compare_nodes(void *ctx, const void *n1, const void *n2) | | 131 | pg_compare_nodes(void *ctx, const void *n1, const void *n2) |
132 | { | | 132 | { |
133 | | | 133 | |
134 | return pg_compare_key(ctx, n1, &((const struct vm_page *)n2)->offset); | | 134 | return pg_compare_key(ctx, n1, &((const struct vm_page *)n2)->offset); |
135 | } | | 135 | } |
136 | | | 136 | |
137 | const rb_tree_ops_t uvm_page_tree_ops = { | | 137 | const rb_tree_ops_t uvm_page_tree_ops = { |
138 | .rbto_compare_nodes = pg_compare_nodes, | | 138 | .rbto_compare_nodes = pg_compare_nodes, |
139 | .rbto_compare_key = pg_compare_key, | | 139 | .rbto_compare_key = pg_compare_key, |
140 | .rbto_node_offset = offsetof(struct vm_page, rb_node), | | 140 | .rbto_node_offset = offsetof(struct vm_page, rb_node), |
141 | .rbto_context = NULL | | 141 | .rbto_context = NULL |
142 | }; | | 142 | }; |
143 | | | 143 | |
144 | /* | | 144 | /* |
145 | * vm pages | | 145 | * vm pages |
146 | */ | | 146 | */ |
147 | | | 147 | |
148 | static int | | 148 | static int |
149 | pgctor(void *arg, void *obj, int flags) | | 149 | pgctor(void *arg, void *obj, int flags) |
150 | { | | 150 | { |
151 | struct vm_page *pg = obj; | | 151 | struct vm_page *pg = obj; |
152 | | | 152 | |
153 | memset(pg, 0, sizeof(*pg)); | | 153 | memset(pg, 0, sizeof(*pg)); |
154 | pg->uanon = rump_hypermalloc(PAGE_SIZE, PAGE_SIZE, | | 154 | pg->uanon = rump_hypermalloc(PAGE_SIZE, PAGE_SIZE, |
155 | (flags & PR_WAITOK) == PR_WAITOK, "pgalloc"); | | 155 | (flags & PR_WAITOK) == PR_WAITOK, "pgalloc"); |
156 | return pg->uanon == NULL; | | 156 | return pg->uanon == NULL; |
157 | } | | 157 | } |
158 | | | 158 | |
159 | static void | | 159 | static void |
160 | pgdtor(void *arg, void *obj) | | 160 | pgdtor(void *arg, void *obj) |
161 | { | | 161 | { |
162 | struct vm_page *pg = obj; | | 162 | struct vm_page *pg = obj; |
163 | | | 163 | |
164 | rump_hyperfree(pg->uanon, PAGE_SIZE); | | 164 | rump_hyperfree(pg->uanon, PAGE_SIZE); |
165 | } | | 165 | } |
166 | | | 166 | |
167 | static struct pool_cache pagecache; | | 167 | static struct pool_cache pagecache; |
168 | | | 168 | |
169 | /* | | 169 | /* |
170 | * Called with the object locked. We don't support anons. | | 170 | * Called with the object locked. We don't support anons. |
171 | */ | | 171 | */ |
172 | struct vm_page * | | 172 | struct vm_page * |
173 | uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon, | | 173 | uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon, |
174 | int flags, int strat, int free_list) | | 174 | int flags, int strat, int free_list) |
175 | { | | 175 | { |
176 | struct vm_page *pg; | | 176 | struct vm_page *pg; |
177 | | | 177 | |
178 | KASSERT(uobj && mutex_owned(uobj->vmobjlock)); | | 178 | KASSERT(uobj && mutex_owned(uobj->vmobjlock)); |
179 | KASSERT(anon == NULL); | | 179 | KASSERT(anon == NULL); |
180 | | | 180 | |
181 | pg = pool_cache_get(&pagecache, PR_NOWAIT); | | 181 | pg = pool_cache_get(&pagecache, PR_NOWAIT); |
182 | if (__predict_false(pg == NULL)) { | | 182 | if (__predict_false(pg == NULL)) { |
183 | return NULL; | | 183 | return NULL; |
184 | } | | 184 | } |
185 | | | 185 | |
186 | pg->offset = off; | | 186 | pg->offset = off; |
187 | pg->uobject = uobj; | | 187 | pg->uobject = uobj; |
188 | | | 188 | |
189 | pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE; | | 189 | pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE; |
190 | if (flags & UVM_PGA_ZERO) { | | 190 | if (flags & UVM_PGA_ZERO) { |
191 | uvm_pagezero(pg); | | 191 | uvm_pagezero(pg); |
192 | } | | 192 | } |
193 | | | 193 | |
194 | TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue); | | 194 | TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue); |
195 | (void)rb_tree_insert_node(&uobj->rb_tree, pg); | | 195 | (void)rb_tree_insert_node(&uobj->rb_tree, pg); |
196 | | | 196 | |
197 | /* | | 197 | /* |
198 | * Don't put anons on the LRU page queue. We can't flush them | | 198 | * Don't put anons on the LRU page queue. We can't flush them |
199 | * (there's no concept of swap in a rump kernel), so no reason | | 199 | * (there's no concept of swap in a rump kernel), so no reason |
200 | * to bother with them. | | 200 | * to bother with them. |
201 | */ | | 201 | */ |
202 | if (!UVM_OBJ_IS_AOBJ(uobj)) { | | 202 | if (!UVM_OBJ_IS_AOBJ(uobj)) { |
203 | atomic_inc_uint(&vmpage_onqueue); | | 203 | atomic_inc_uint(&vmpage_onqueue); |
204 | mutex_enter(&uvm_pageqlock); | | 204 | mutex_enter(&uvm_pageqlock); |
205 | TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue); | | 205 | TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue); |
206 | mutex_exit(&uvm_pageqlock); | | 206 | mutex_exit(&uvm_pageqlock); |
207 | } | | 207 | } |
208 | | | 208 | |
209 | uobj->uo_npages++; | | 209 | uobj->uo_npages++; |
210 | | | 210 | |
211 | return pg; | | 211 | return pg; |
212 | } | | 212 | } |
213 | | | 213 | |
214 | /* | | 214 | /* |
215 | * Release a page. | | 215 | * Release a page. |
216 | * | | 216 | * |
217 | * Called with the vm object locked. | | 217 | * Called with the vm object locked. |
218 | */ | | 218 | */ |
219 | void | | 219 | void |
220 | uvm_pagefree(struct vm_page *pg) | | 220 | uvm_pagefree(struct vm_page *pg) |
221 | { | | 221 | { |
222 | struct uvm_object *uobj = pg->uobject; | | 222 | struct uvm_object *uobj = pg->uobject; |
223 | | | 223 | |
224 | KASSERT(mutex_owned(&uvm_pageqlock)); | | 224 | KASSERT(mutex_owned(&uvm_pageqlock)); |
225 | KASSERT(mutex_owned(uobj->vmobjlock)); | | 225 | KASSERT(mutex_owned(uobj->vmobjlock)); |
226 | | | 226 | |
227 | if (pg->flags & PG_WANTED) | | 227 | if (pg->flags & PG_WANTED) |
228 | wakeup(pg); | | 228 | wakeup(pg); |
229 | | | 229 | |
230 | TAILQ_REMOVE(&uobj->memq, pg, listq.queue); | | 230 | TAILQ_REMOVE(&uobj->memq, pg, listq.queue); |
231 | | | 231 | |
232 | uobj->uo_npages--; | | 232 | uobj->uo_npages--; |
233 | rb_tree_remove_node(&uobj->rb_tree, pg); | | 233 | rb_tree_remove_node(&uobj->rb_tree, pg); |
234 | | | 234 | |
235 | if (!UVM_OBJ_IS_AOBJ(uobj)) { | | 235 | if (!UVM_OBJ_IS_AOBJ(uobj)) { |
236 | TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue); | | 236 | TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue); |
237 | atomic_dec_uint(&vmpage_onqueue); | | 237 | atomic_dec_uint(&vmpage_onqueue); |
238 | } | | 238 | } |
239 | | | 239 | |
240 | pool_cache_put(&pagecache, pg); | | 240 | pool_cache_put(&pagecache, pg); |
241 | } | | 241 | } |
242 | | | 242 | |
243 | void | | 243 | void |
244 | uvm_pagezero(struct vm_page *pg) | | 244 | uvm_pagezero(struct vm_page *pg) |
245 | { | | 245 | { |
246 | | | 246 | |
247 | pg->flags &= ~PG_CLEAN; | | 247 | pg->flags &= ~PG_CLEAN; |
248 | memset((void *)pg->uanon, 0, PAGE_SIZE); | | 248 | memset((void *)pg->uanon, 0, PAGE_SIZE); |
249 | } | | 249 | } |
250 | | | 250 | |
251 | /* | | 251 | /* |
252 | * Misc routines | | 252 | * Misc routines |
253 | */ | | 253 | */ |
254 | | | 254 | |
255 | static kmutex_t pagermtx; | | 255 | static kmutex_t pagermtx; |
256 | | | 256 | |
257 | void | | 257 | void |
258 | uvm_init(void) | | 258 | uvm_init(void) |
259 | { | | 259 | { |
260 | char buf[64]; | | 260 | char buf[64]; |
261 | int error; | | 261 | int error; |
262 | | | 262 | |
263 | if (rumpuser_getenv("RUMP_MEMLIMIT", buf, sizeof(buf), &error) == 0) { | | 263 | if (rumpuser_getenv("RUMP_MEMLIMIT", buf, sizeof(buf), &error) == 0) { |
264 | unsigned long tmp; | | 264 | unsigned long tmp; |
265 | char *ep; | | 265 | char *ep; |
266 | int mult; | | 266 | int mult; |
267 | | | 267 | |
268 | tmp = strtoul(buf, &ep, 10); | | 268 | tmp = strtoul(buf, &ep, 10); |
269 | if (strlen(ep) > 1) | | 269 | if (strlen(ep) > 1) |
270 | panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf); | | 270 | panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf); |
271 | | | 271 | |
272 | /* mini-dehumanize-number */ | | 272 | /* mini-dehumanize-number */ |
273 | mult = 1; | | 273 | mult = 1; |
274 | switch (*ep) { | | 274 | switch (*ep) { |
275 | case 'k': | | 275 | case 'k': |
276 | mult = 1024; | | 276 | mult = 1024; |
277 | break; | | 277 | break; |
278 | case 'm': | | 278 | case 'm': |
279 | mult = 1024*1024; | | 279 | mult = 1024*1024; |
280 | break; | | 280 | break; |
281 | case 'g': | | 281 | case 'g': |
282 | mult = 1024*1024*1024; | | 282 | mult = 1024*1024*1024; |
283 | break; | | 283 | break; |
284 | case 0: | | 284 | case 0: |
285 | break; | | 285 | break; |
286 | default: | | 286 | default: |
287 | panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf); | | 287 | panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf); |
288 | } | | 288 | } |
289 | rump_physmemlimit = tmp * mult; | | 289 | rump_physmemlimit = tmp * mult; |
290 | | | 290 | |
291 | if (rump_physmemlimit / mult != tmp) | | 291 | if (rump_physmemlimit / mult != tmp) |
292 | panic("uvm_init: RUMP_MEMLIMIT overflow: %s", buf); | | 292 | panic("uvm_init: RUMP_MEMLIMIT overflow: %s", buf); |
293 | /* it's not like we'd get far with, say, 1 byte, but ... */ | | 293 | /* it's not like we'd get far with, say, 1 byte, but ... */ |
294 | if (rump_physmemlimit == 0) | | 294 | if (rump_physmemlimit == 0) |
295 | panic("uvm_init: no memory"); | | 295 | panic("uvm_init: no memory"); |
296 | | | 296 | |
297 | #define HUMANIZE_BYTES 9 | | 297 | #define HUMANIZE_BYTES 9 |
298 | CTASSERT(sizeof(buf) >= HUMANIZE_BYTES); | | 298 | CTASSERT(sizeof(buf) >= HUMANIZE_BYTES); |
299 | format_bytes(buf, HUMANIZE_BYTES, rump_physmemlimit); | | 299 | format_bytes(buf, HUMANIZE_BYTES, rump_physmemlimit); |
300 | #undef HUMANIZE_BYTES | | 300 | #undef HUMANIZE_BYTES |
301 | dddlim = 9 * (rump_physmemlimit / 10); | | 301 | dddlim = 9 * (rump_physmemlimit / 10); |
302 | } else { | | 302 | } else { |
303 | strlcpy(buf, "unlimited (host limit)", sizeof(buf)); | | 303 | strlcpy(buf, "unlimited (host limit)", sizeof(buf)); |
304 | } | | 304 | } |
305 | aprint_verbose("total memory = %s\n", buf); | | 305 | aprint_verbose("total memory = %s\n", buf); |
306 | | | 306 | |
307 | TAILQ_INIT(&vmpage_lruqueue); | | 307 | TAILQ_INIT(&vmpage_lruqueue); |
308 | | | 308 | |
309 | uvmexp.free = 1024*1024; /* XXX: arbitrary & not updated */ | | 309 | uvmexp.free = 1024*1024; /* XXX: arbitrary & not updated */ |
310 | | | 310 | |
311 | #ifndef __uvmexp_pagesize | | 311 | #ifndef __uvmexp_pagesize |
312 | uvmexp.pagesize = PAGE_SIZE; | | 312 | uvmexp.pagesize = PAGE_SIZE; |
313 | uvmexp.pagemask = PAGE_MASK; | | 313 | uvmexp.pagemask = PAGE_MASK; |
314 | uvmexp.pageshift = PAGE_SHIFT; | | 314 | uvmexp.pageshift = PAGE_SHIFT; |
315 | #else | | 315 | #else |
316 | #define FAKE_PAGE_SHIFT 12 | | 316 | #define FAKE_PAGE_SHIFT 12 |
317 | uvmexp.pageshift = FAKE_PAGE_SHIFT; | | 317 | uvmexp.pageshift = FAKE_PAGE_SHIFT; |
318 | uvmexp.pagesize = 1<<FAKE_PAGE_SHIFT; | | 318 | uvmexp.pagesize = 1<<FAKE_PAGE_SHIFT; |
319 | uvmexp.pagemask = (1<<FAKE_PAGE_SHIFT)-1; | | 319 | uvmexp.pagemask = (1<<FAKE_PAGE_SHIFT)-1; |
320 | #undef FAKE_PAGE_SHIFT | | 320 | #undef FAKE_PAGE_SHIFT |
321 | #endif | | 321 | #endif |
322 | | | 322 | |
323 | mutex_init(&pagermtx, MUTEX_DEFAULT, 0); | | 323 | mutex_init(&pagermtx, MUTEX_DEFAULT, 0); |
324 | mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0); | | 324 | mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0); |
325 | mutex_init(&uvm_swap_data_lock, MUTEX_DEFAULT, 0); | | 325 | mutex_init(&uvm_swap_data_lock, MUTEX_DEFAULT, 0); |
326 | | | 326 | |
327 | mutex_init(&pdaemonmtx, MUTEX_DEFAULT, 0); | | 327 | mutex_init(&pdaemonmtx, MUTEX_DEFAULT, 0); |
328 | cv_init(&pdaemoncv, "pdaemon"); | | 328 | cv_init(&pdaemoncv, "pdaemon"); |
329 | cv_init(&oomwait, "oomwait"); | | 329 | cv_init(&oomwait, "oomwait"); |
330 | | | 330 | |
331 | kernel_map->pmap = pmap_kernel(); | | 331 | kernel_map->pmap = pmap_kernel(); |
332 | | | 332 | |
333 | pool_subsystem_init(); | | 333 | pool_subsystem_init(); |
334 | vmem_bootstrap(); | | 334 | vmem_bootstrap(); |
335 | kmem_arena = vmem_create("kmem", 0, 1024*1024, PAGE_SIZE, | | 335 | kmem_arena = vmem_create("kmem", 0, 1024*1024, PAGE_SIZE, |
336 | NULL, NULL, NULL, | | 336 | NULL, NULL, NULL, |
337 | 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); | | 337 | 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); |
338 | | | 338 | |
339 | vmem_init(kmem_arena); | | 339 | vmem_init(kmem_arena); |
340 | | | 340 | |
341 | kmem_va_arena = vmem_create("kva", 0, 0, PAGE_SIZE, | | 341 | kmem_va_arena = vmem_create("kva", 0, 0, PAGE_SIZE, |
342 | vmem_alloc, vmem_free, kmem_arena, | | 342 | vmem_alloc, vmem_free, kmem_arena, |
343 | 32 * PAGE_SIZE, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); | | 343 | 8 * PAGE_SIZE, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); |
344 | | | 344 | |
345 | pool_cache_bootstrap(&pagecache, sizeof(struct vm_page), 0, 0, 0, | | 345 | pool_cache_bootstrap(&pagecache, sizeof(struct vm_page), 0, 0, 0, |
346 | "page$", NULL, IPL_NONE, pgctor, pgdtor, NULL); | | 346 | "page$", NULL, IPL_NONE, pgctor, pgdtor, NULL); |
347 | } | | 347 | } |
348 | | | 348 | |
349 | void | | 349 | void |
350 | uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax) | | 350 | uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax) |
351 | { | | 351 | { |
352 | | | 352 | |
353 | vm->vm_map.pmap = pmap_kernel(); | | 353 | vm->vm_map.pmap = pmap_kernel(); |
354 | vm->vm_refcnt = 1; | | 354 | vm->vm_refcnt = 1; |
355 | } | | 355 | } |
356 | | | 356 | |
357 | void | | 357 | void |
358 | uvm_pagewire(struct vm_page *pg) | | 358 | uvm_pagewire(struct vm_page *pg) |
359 | { | | 359 | { |
360 | | | 360 | |
361 | /* nada */ | | 361 | /* nada */ |
362 | } | | 362 | } |
363 | | | 363 | |
364 | void | | 364 | void |
365 | uvm_pageunwire(struct vm_page *pg) | | 365 | uvm_pageunwire(struct vm_page *pg) |
366 | { | | 366 | { |
367 | | | 367 | |
368 | /* nada */ | | 368 | /* nada */ |
369 | } | | 369 | } |
370 | | | 370 | |
371 | /* where's your schmonz now? */ | | 371 | /* where's your schmonz now? */ |
372 | #define PUNLIMIT(a) \ | | 372 | #define PUNLIMIT(a) \ |
373 | p->p_rlimit[a].rlim_cur = p->p_rlimit[a].rlim_max = RLIM_INFINITY; | | 373 | p->p_rlimit[a].rlim_cur = p->p_rlimit[a].rlim_max = RLIM_INFINITY; |
374 | void | | 374 | void |
375 | uvm_init_limits(struct proc *p) | | 375 | uvm_init_limits(struct proc *p) |
376 | { | | 376 | { |
377 | | | 377 | |
378 | PUNLIMIT(RLIMIT_STACK); | | 378 | PUNLIMIT(RLIMIT_STACK); |
379 | PUNLIMIT(RLIMIT_DATA); | | 379 | PUNLIMIT(RLIMIT_DATA); |
380 | PUNLIMIT(RLIMIT_RSS); | | 380 | PUNLIMIT(RLIMIT_RSS); |
381 | PUNLIMIT(RLIMIT_AS); | | 381 | PUNLIMIT(RLIMIT_AS); |
382 | /* nice, cascade */ | | 382 | /* nice, cascade */ |
383 | } | | 383 | } |
384 | #undef PUNLIMIT | | 384 | #undef PUNLIMIT |
385 | | | 385 | |
386 | /* | | 386 | /* |
387 | * This satisfies the "disgusting mmap hack" used by proplib. | | 387 | * This satisfies the "disgusting mmap hack" used by proplib. |
388 | * We probably should grow some more assertables to make sure we're | | 388 | * We probably should grow some more assertables to make sure we're |
389 | * not satisfying anything we shouldn't be satisfying. | | 389 | * not satisfying anything we shouldn't be satisfying. |
390 | */ | | 390 | */ |
391 | int | | 391 | int |
392 | uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot, | | 392 | uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot, |
393 | vm_prot_t maxprot, int flags, void *handle, voff_t off, vsize_t locklim) | | 393 | vm_prot_t maxprot, int flags, void *handle, voff_t off, vsize_t locklim) |
394 | { | | 394 | { |
395 | void *uaddr; | | 395 | void *uaddr; |
396 | int error; | | 396 | int error; |
397 | | | 397 | |
398 | if (prot != (VM_PROT_READ | VM_PROT_WRITE)) | | 398 | if (prot != (VM_PROT_READ | VM_PROT_WRITE)) |
399 | panic("uvm_mmap() variant unsupported"); | | 399 | panic("uvm_mmap() variant unsupported"); |
400 | if (flags != (MAP_PRIVATE | MAP_ANON)) | | 400 | if (flags != (MAP_PRIVATE | MAP_ANON)) |
401 | panic("uvm_mmap() variant unsupported"); | | 401 | panic("uvm_mmap() variant unsupported"); |
402 | | | 402 | |
403 | /* no reason in particular, but cf. uvm_default_mapaddr() */ | | 403 | /* no reason in particular, but cf. uvm_default_mapaddr() */ |
404 | if (*addr != 0) | | 404 | if (*addr != 0) |
405 | panic("uvm_mmap() variant unsupported"); | | 405 | panic("uvm_mmap() variant unsupported"); |
406 | | | 406 | |
407 | if (RUMP_LOCALPROC_P(curproc)) { | | 407 | if (RUMP_LOCALPROC_P(curproc)) { |
408 | uaddr = rumpuser_anonmmap(NULL, size, 0, 0, &error); | | 408 | uaddr = rumpuser_anonmmap(NULL, size, 0, 0, &error); |
409 | } else { | | 409 | } else { |
410 | error = rumpuser_sp_anonmmap(curproc->p_vmspace->vm_map.pmap, | | 410 | error = rumpuser_sp_anonmmap(curproc->p_vmspace->vm_map.pmap, |
411 | size, &uaddr); | | 411 | size, &uaddr); |
412 | } | | 412 | } |
413 | if (uaddr == NULL) | | 413 | if (uaddr == NULL) |
414 | return error; | | 414 | return error; |
415 | | | 415 | |
416 | *addr = (vaddr_t)uaddr; | | 416 | *addr = (vaddr_t)uaddr; |
417 | return 0; | | 417 | return 0; |
418 | } | | 418 | } |
419 | | | 419 | |
420 | struct pagerinfo { | | 420 | struct pagerinfo { |
421 | vaddr_t pgr_kva; | | 421 | vaddr_t pgr_kva; |
422 | int pgr_npages; | | 422 | int pgr_npages; |
423 | struct vm_page **pgr_pgs; | | 423 | struct vm_page **pgr_pgs; |
424 | bool pgr_read; | | 424 | bool pgr_read; |
425 | | | 425 | |
426 | LIST_ENTRY(pagerinfo) pgr_entries; | | 426 | LIST_ENTRY(pagerinfo) pgr_entries; |
427 | }; | | 427 | }; |
428 | static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist); | | 428 | static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist); |
429 | | | 429 | |
430 | /* | | 430 | /* |
431 | * Pager "map" in routine. Instead of mapping, we allocate memory | | 431 | * Pager "map" in routine. Instead of mapping, we allocate memory |
432 | * and copy page contents there. Not optimal or even strictly | | 432 | * and copy page contents there. Not optimal or even strictly |
433 | * correct (the caller might modify the page contents after mapping | | 433 | * correct (the caller might modify the page contents after mapping |
434 | * them in), but what the heck. Assumes UVMPAGER_MAPIN_WAITOK. | | 434 | * them in), but what the heck. Assumes UVMPAGER_MAPIN_WAITOK. |
435 | */ | | 435 | */ |
436 | vaddr_t | | 436 | vaddr_t |
437 | uvm_pagermapin(struct vm_page **pgs, int npages, int flags) | | 437 | uvm_pagermapin(struct vm_page **pgs, int npages, int flags) |
438 | { | | 438 | { |
439 | struct pagerinfo *pgri; | | 439 | struct pagerinfo *pgri; |
440 | vaddr_t curkva; | | 440 | vaddr_t curkva; |
441 | int i; | | 441 | int i; |
442 | | | 442 | |
443 | /* allocate structures */ | | 443 | /* allocate structures */ |
444 | pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP); | | 444 | pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP); |
445 | pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP); | | 445 | pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP); |
446 | pgri->pgr_npages = npages; | | 446 | pgri->pgr_npages = npages; |
447 | pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP); | | 447 | pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP); |
448 | pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0; | | 448 | pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0; |
449 | | | 449 | |
450 | /* copy contents to "mapped" memory */ | | 450 | /* copy contents to "mapped" memory */ |
451 | for (i = 0, curkva = pgri->pgr_kva; | | 451 | for (i = 0, curkva = pgri->pgr_kva; |
452 | i < npages; | | 452 | i < npages; |
453 | i++, curkva += PAGE_SIZE) { | | 453 | i++, curkva += PAGE_SIZE) { |
454 | /* | | 454 | /* |
455 | * We need to copy the previous contents of the pages to | | 455 | * We need to copy the previous contents of the pages to |
456 | * the window even if we are reading from the | | 456 | * the window even if we are reading from the |
457 | * device, since the device might not fill the contents of | | 457 | * device, since the device might not fill the contents of |
458 | * the full mapped range and we will end up corrupting | | 458 | * the full mapped range and we will end up corrupting |
459 | * data when we unmap the window. | | 459 | * data when we unmap the window. |
460 | */ | | 460 | */ |
461 | memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE); | | 461 | memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE); |
462 | pgri->pgr_pgs[i] = pgs[i]; | | 462 | pgri->pgr_pgs[i] = pgs[i]; |
463 | } | | 463 | } |
464 | | | 464 | |
465 | mutex_enter(&pagermtx); | | 465 | mutex_enter(&pagermtx); |
466 | LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries); | | 466 | LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries); |
467 | mutex_exit(&pagermtx); | | 467 | mutex_exit(&pagermtx); |
468 | | | 468 | |
469 | return pgri->pgr_kva; | | 469 | return pgri->pgr_kva; |
470 | } | | 470 | } |
471 | | | 471 | |
472 | /* | | 472 | /* |
473 | * map out the pager window. return contents from VA to page storage | | 473 | * map out the pager window. return contents from VA to page storage |
474 | * and free structures. | | 474 | * and free structures. |
475 | * | | 475 | * |
476 | * Note: does not currently support partial frees | | 476 | * Note: does not currently support partial frees |
477 | */ | | 477 | */ |
478 | void | | 478 | void |
479 | uvm_pagermapout(vaddr_t kva, int npages) | | 479 | uvm_pagermapout(vaddr_t kva, int npages) |
480 | { | | 480 | { |
481 | struct pagerinfo *pgri; | | 481 | struct pagerinfo *pgri; |
482 | vaddr_t curkva; | | 482 | vaddr_t curkva; |
483 | int i; | | 483 | int i; |
484 | | | 484 | |
485 | mutex_enter(&pagermtx); | | 485 | mutex_enter(&pagermtx); |
486 | LIST_FOREACH(pgri, &pagerlist, pgr_entries) { | | 486 | LIST_FOREACH(pgri, &pagerlist, pgr_entries) { |
487 | if (pgri->pgr_kva == kva) | | 487 | if (pgri->pgr_kva == kva) |
488 | break; | | 488 | break; |
489 | } | | 489 | } |
490 | KASSERT(pgri); | | 490 | KASSERT(pgri); |
491 | if (pgri->pgr_npages != npages) | | 491 | if (pgri->pgr_npages != npages) |
492 | panic("uvm_pagermapout: partial unmapping not supported"); | | 492 | panic("uvm_pagermapout: partial unmapping not supported"); |
493 | LIST_REMOVE(pgri, pgr_entries); | | 493 | LIST_REMOVE(pgri, pgr_entries); |
494 | mutex_exit(&pagermtx); | | 494 | mutex_exit(&pagermtx); |
495 | | | 495 | |
496 | if (pgri->pgr_read) { | | 496 | if (pgri->pgr_read) { |
497 | for (i = 0, curkva = pgri->pgr_kva; | | 497 | for (i = 0, curkva = pgri->pgr_kva; |
498 | i < pgri->pgr_npages; | | 498 | i < pgri->pgr_npages; |
499 | i++, curkva += PAGE_SIZE) { | | 499 | i++, curkva += PAGE_SIZE) { |
500 | memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE); | | 500 | memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE); |
501 | } | | 501 | } |
502 | } | | 502 | } |
503 | | | 503 | |
504 | kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *)); | | 504 | kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *)); |
505 | kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE); | | 505 | kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE); |
506 | kmem_free(pgri, sizeof(*pgri)); | | 506 | kmem_free(pgri, sizeof(*pgri)); |
507 | } | | 507 | } |
508 | | | 508 | |
509 | /* | | 509 | /* |
510 | * convert va in pager window to page structure. | | 510 | * convert va in pager window to page structure. |
511 | * XXX: how expensive is this (global lock, list traversal)? | | 511 | * XXX: how expensive is this (global lock, list traversal)? |
512 | */ | | 512 | */ |
513 | struct vm_page * | | 513 | struct vm_page * |
514 | uvm_pageratop(vaddr_t va) | | 514 | uvm_pageratop(vaddr_t va) |
515 | { | | 515 | { |
516 | struct pagerinfo *pgri; | | 516 | struct pagerinfo *pgri; |
517 | struct vm_page *pg = NULL; | | 517 | struct vm_page *pg = NULL; |
518 | int i; | | 518 | int i; |
519 | | | 519 | |
520 | mutex_enter(&pagermtx); | | 520 | mutex_enter(&pagermtx); |
521 | LIST_FOREACH(pgri, &pagerlist, pgr_entries) { | | 521 | LIST_FOREACH(pgri, &pagerlist, pgr_entries) { |
522 | if (pgri->pgr_kva <= va | | 522 | if (pgri->pgr_kva <= va |
523 | && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE) | | 523 | && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE) |
524 | break; | | 524 | break; |
525 | } | | 525 | } |
526 | if (pgri) { | | 526 | if (pgri) { |
527 | i = (va - pgri->pgr_kva) >> PAGE_SHIFT; | | 527 | i = (va - pgri->pgr_kva) >> PAGE_SHIFT; |
528 | pg = pgri->pgr_pgs[i]; | | 528 | pg = pgri->pgr_pgs[i]; |
529 | } | | 529 | } |
530 | mutex_exit(&pagermtx); | | 530 | mutex_exit(&pagermtx); |
531 | | | 531 | |
532 | return pg; | | 532 | return pg; |
533 | } | | 533 | } |
534 | | | 534 | |
535 | /* | | 535 | /* |
536 | * Called with the vm object locked. | | 536 | * Called with the vm object locked. |
537 | * | | 537 | * |
538 | * Put vnode object pages at the end of the access queue to indicate | | 538 | * Put vnode object pages at the end of the access queue to indicate |
539 | * they have been recently accessed and should not be immediate | | 539 | * they have been recently accessed and should not be immediate |
540 | * candidates for pageout. Do not do this for lookups done by | | 540 | * candidates for pageout. Do not do this for lookups done by |
541 | * the pagedaemon to mimic pmap_kentered mappings which don't track | | 541 | * the pagedaemon to mimic pmap_kentered mappings which don't track |
542 | * access information. | | 542 | * access information. |
543 | */ | | 543 | */ |
544 | struct vm_page * | | 544 | struct vm_page * |
545 | uvm_pagelookup(struct uvm_object *uobj, voff_t off) | | 545 | uvm_pagelookup(struct uvm_object *uobj, voff_t off) |
546 | { | | 546 | { |
547 | struct vm_page *pg; | | 547 | struct vm_page *pg; |
548 | bool ispagedaemon = curlwp == uvm.pagedaemon_lwp; | | 548 | bool ispagedaemon = curlwp == uvm.pagedaemon_lwp; |
549 | | | 549 | |
550 | pg = rb_tree_find_node(&uobj->rb_tree, &off); | | 550 | pg = rb_tree_find_node(&uobj->rb_tree, &off); |
551 | if (pg && !UVM_OBJ_IS_AOBJ(pg->uobject) && !ispagedaemon) { | | 551 | if (pg && !UVM_OBJ_IS_AOBJ(pg->uobject) && !ispagedaemon) { |
552 | mutex_enter(&uvm_pageqlock); | | 552 | mutex_enter(&uvm_pageqlock); |
553 | TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue); | | 553 | TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue); |
554 | TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue); | | 554 | TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue); |
555 | mutex_exit(&uvm_pageqlock); | | 555 | mutex_exit(&uvm_pageqlock); |
556 | } | | 556 | } |
557 | | | 557 | |
558 | return pg; | | 558 | return pg; |
559 | } | | 559 | } |
560 | | | 560 | |
561 | void | | 561 | void |
562 | uvm_page_unbusy(struct vm_page **pgs, int npgs) | | 562 | uvm_page_unbusy(struct vm_page **pgs, int npgs) |
563 | { | | 563 | { |
564 | struct vm_page *pg; | | 564 | struct vm_page *pg; |
565 | int i; | | 565 | int i; |
566 | | | 566 | |
567 | KASSERT(npgs > 0); | | 567 | KASSERT(npgs > 0); |
568 | KASSERT(mutex_owned(pgs[0]->uobject->vmobjlock)); | | 568 | KASSERT(mutex_owned(pgs[0]->uobject->vmobjlock)); |
569 | | | 569 | |
570 | for (i = 0; i < npgs; i++) { | | 570 | for (i = 0; i < npgs; i++) { |
571 | pg = pgs[i]; | | 571 | pg = pgs[i]; |
572 | if (pg == NULL) | | 572 | if (pg == NULL) |
573 | continue; | | 573 | continue; |
574 | | | 574 | |
575 | KASSERT(pg->flags & PG_BUSY); | | 575 | KASSERT(pg->flags & PG_BUSY); |
576 | if (pg->flags & PG_WANTED) | | 576 | if (pg->flags & PG_WANTED) |
577 | wakeup(pg); | | 577 | wakeup(pg); |
578 | if (pg->flags & PG_RELEASED) | | 578 | if (pg->flags & PG_RELEASED) |
579 | uvm_pagefree(pg); | | 579 | uvm_pagefree(pg); |
580 | else | | 580 | else |
581 | pg->flags &= ~(PG_WANTED|PG_BUSY); | | 581 | pg->flags &= ~(PG_WANTED|PG_BUSY); |
582 | } | | 582 | } |
583 | } | | 583 | } |
584 | | | 584 | |
585 | void | | 585 | void |
586 | uvm_estimatepageable(int *active, int *inactive) | | 586 | uvm_estimatepageable(int *active, int *inactive) |
587 | { | | 587 | { |
588 | | | 588 | |
589 | /* XXX: guessing game */ | | 589 | /* XXX: guessing game */ |
590 | *active = 1024; | | 590 | *active = 1024; |
591 | *inactive = 1024; | | 591 | *inactive = 1024; |
592 | } | | 592 | } |
593 | | | 593 | |
594 | bool | | 594 | bool |
595 | vm_map_starved_p(struct vm_map *map) | | 595 | vm_map_starved_p(struct vm_map *map) |
596 | { | | 596 | { |
597 | | | 597 | |
598 | if (map->flags & VM_MAP_WANTVA) | | 598 | if (map->flags & VM_MAP_WANTVA) |
599 | return true; | | 599 | return true; |
600 | | | 600 | |
601 | return false; | | 601 | return false; |
602 | } | | 602 | } |
603 | | | 603 | |
604 | int | | 604 | int |
605 | uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags) | | 605 | uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags) |
606 | { | | 606 | { |
607 | | | 607 | |
608 | panic("%s: unimplemented", __func__); | | 608 | panic("%s: unimplemented", __func__); |
609 | } | | 609 | } |
610 | | | 610 | |
611 | void | | 611 | void |
612 | uvm_unloan(void *v, int npages, int flags) | | 612 | uvm_unloan(void *v, int npages, int flags) |
613 | { | | 613 | { |
614 | | | 614 | |
615 | panic("%s: unimplemented", __func__); | | 615 | panic("%s: unimplemented", __func__); |
616 | } | | 616 | } |
617 | | | 617 | |
618 | int | | 618 | int |
619 | uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages, | | 619 | uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages, |
620 | struct vm_page **opp) | | 620 | struct vm_page **opp) |
621 | { | | 621 | { |
622 | | | 622 | |
623 | return EBUSY; | | 623 | return EBUSY; |
624 | } | | 624 | } |
625 | | | 625 | |
626 | struct vm_page * | | 626 | struct vm_page * |
627 | uvm_loanbreak(struct vm_page *pg) | | 627 | uvm_loanbreak(struct vm_page *pg) |
628 | { | | 628 | { |
629 | | | 629 | |
630 | panic("%s: unimplemented", __func__); | | 630 | panic("%s: unimplemented", __func__); |
631 | } | | 631 | } |
632 | | | 632 | |
633 | void | | 633 | void |
634 | ubc_purge(struct uvm_object *uobj) | | 634 | ubc_purge(struct uvm_object *uobj) |
635 | { | | 635 | { |
636 | | | 636 | |
637 | } | | 637 | } |
638 | | | 638 | |
639 | #ifdef DEBUGPRINT | | 639 | #ifdef DEBUGPRINT |
640 | void | | 640 | void |
641 | uvm_object_printit(struct uvm_object *uobj, bool full, | | 641 | uvm_object_printit(struct uvm_object *uobj, bool full, |
642 | void (*pr)(const char *, ...)) | | 642 | void (*pr)(const char *, ...)) |
643 | { | | 643 | { |
644 | | | 644 | |
645 | pr("VM OBJECT at %p, refs %d", uobj, uobj->uo_refs); | | 645 | pr("VM OBJECT at %p, refs %d", uobj, uobj->uo_refs); |
646 | } | | 646 | } |
647 | #endif | | 647 | #endif |
648 | | | 648 | |
649 | vaddr_t | | 649 | vaddr_t |
650 | uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz) | | 650 | uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz) |
651 | { | | 651 | { |
652 | | | 652 | |
653 | return 0; | | 653 | return 0; |
654 | } | | 654 | } |
655 | | | 655 | |
656 | int | | 656 | int |
657 | uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, | | 657 | uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, |
658 | vm_prot_t prot, bool set_max) | | 658 | vm_prot_t prot, bool set_max) |
659 | { | | 659 | { |
660 | | | 660 | |
661 | return EOPNOTSUPP; | | 661 | return EOPNOTSUPP; |
662 | } | | 662 | } |
663 | | | 663 | |
664 | /* | | 664 | /* |
665 | * UVM km | | 665 | * UVM km |
666 | */ | | 666 | */ |
667 | | | 667 | |
668 | vaddr_t | | 668 | vaddr_t |
669 | uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags) | | 669 | uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags) |
670 | { | | 670 | { |
671 | void *rv, *desired = NULL; | | 671 | void *rv, *desired = NULL; |
672 | int alignbit, error; | | 672 | int alignbit, error; |
673 | | | 673 | |
674 | #ifdef __x86_64__ | | 674 | #ifdef __x86_64__ |
675 | /* | | 675 | /* |
676 | * On amd64, allocate all module memory from the lowest 2GB. | | 676 | * On amd64, allocate all module memory from the lowest 2GB. |
677 | * This is because NetBSD kernel modules are compiled | | 677 | * This is because NetBSD kernel modules are compiled |
678 | * with -mcmodel=kernel and reserve only 4 bytes for | | 678 | * with -mcmodel=kernel and reserve only 4 bytes for |
679 | * offsets. If we load code compiled with -mcmodel=kernel | | 679 | * offsets. If we load code compiled with -mcmodel=kernel |
680 | * anywhere except the lowest or highest 2GB, it will not | | 680 | * anywhere except the lowest or highest 2GB, it will not |
681 | * work. Since userspace does not have access to the highest | | 681 | * work. Since userspace does not have access to the highest |
682 | * 2GB, use the lowest 2GB. | | 682 | * 2GB, use the lowest 2GB. |
683 | * | | 683 | * |
684 | * Note: this assumes the rump kernel resides in | | 684 | * Note: this assumes the rump kernel resides in |
685 | * the lowest 2GB as well. | | 685 | * the lowest 2GB as well. |
686 | * | | 686 | * |
687 | * Note2: yes, it's a quick hack, but since this the only | | 687 | * Note2: yes, it's a quick hack, but since this the only |
688 | * place where we care about the map we're allocating from, | | 688 | * place where we care about the map we're allocating from, |
689 | * just use a simple "if" instead of coming up with a fancy | | 689 | * just use a simple "if" instead of coming up with a fancy |
690 | * generic solution. | | 690 | * generic solution. |
691 | */ | | 691 | */ |
692 | extern struct vm_map *module_map; | | 692 | extern struct vm_map *module_map; |
693 | if (map == module_map) { | | 693 | if (map == module_map) { |
694 | desired = (void *)(0x80000000 - size); | | 694 | desired = (void *)(0x80000000 - size); |
695 | } | | 695 | } |
696 | #endif | | 696 | #endif |
697 | | | 697 | |
698 | alignbit = 0; | | 698 | alignbit = 0; |
699 | if (align) { | | 699 | if (align) { |
700 | alignbit = ffs(align)-1; | | 700 | alignbit = ffs(align)-1; |
701 | } | | 701 | } |
702 | | | 702 | |
703 | rv = rumpuser_anonmmap(desired, size, alignbit, flags & UVM_KMF_EXEC, | | 703 | rv = rumpuser_anonmmap(desired, size, alignbit, flags & UVM_KMF_EXEC, |
704 | &error); | | 704 | &error); |
705 | if (rv == NULL) { | | 705 | if (rv == NULL) { |
706 | if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT)) | | 706 | if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT)) |
707 | return 0; | | 707 | return 0; |
708 | else | | 708 | else |
709 | panic("uvm_km_alloc failed"); | | 709 | panic("uvm_km_alloc failed"); |
710 | } | | 710 | } |
711 | | | 711 | |
712 | if (flags & UVM_KMF_ZERO) | | 712 | if (flags & UVM_KMF_ZERO) |
713 | memset(rv, 0, size); | | 713 | memset(rv, 0, size); |
714 | | | 714 | |
715 | return (vaddr_t)rv; | | 715 | return (vaddr_t)rv; |
716 | } | | 716 | } |
717 | | | 717 | |
718 | void | | 718 | void |
719 | uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags) | | 719 | uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags) |
720 | { | | 720 | { |
721 | | | 721 | |
722 | rumpuser_unmap((void *)vaddr, size); | | 722 | rumpuser_unmap((void *)vaddr, size); |
723 | } | | 723 | } |
724 | | | 724 | |
725 | struct vm_map * | | 725 | struct vm_map * |
726 | uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr, | | 726 | uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr, |
727 | vsize_t size, int pageable, bool fixed, struct vm_map *submap) | | 727 | vsize_t size, int pageable, bool fixed, struct vm_map *submap) |
728 | { | | 728 | { |
729 | | | 729 | |
730 | return (struct vm_map *)417416; | | 730 | return (struct vm_map *)417416; |
731 | } | | 731 | } |
732 | | | 732 | |
733 | int | | 733 | int |
734 | uvm_km_kmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags, | | 734 | uvm_km_kmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags, |
735 | vmem_addr_t *addr) | | 735 | vmem_addr_t *addr) |
736 | { | | 736 | { |
737 | vaddr_t va; | | 737 | vaddr_t va; |
738 | va = (vaddr_t)rump_hypermalloc(size, PAGE_SIZE, | | 738 | va = (vaddr_t)rump_hypermalloc(size, PAGE_SIZE, |
739 | (flags & VM_SLEEP), "kmalloc"); | | 739 | (flags & VM_SLEEP), "kmalloc"); |
740 | | | 740 | |
741 | if (va) { | | 741 | if (va) { |
742 | *addr = va; | | 742 | *addr = va; |
743 | return 0; | | 743 | return 0; |
744 | } else { | | 744 | } else { |
745 | return ENOMEM; | | 745 | return ENOMEM; |
746 | } | | 746 | } |
747 | } | | 747 | } |
748 | | | 748 | |
749 | void | | 749 | void |
750 | uvm_km_kmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) | | 750 | uvm_km_kmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) |
751 | { | | 751 | { |
752 | | | 752 | |
753 | rump_hyperfree((void *)addr, size); | | 753 | rump_hyperfree((void *)addr, size); |
754 | } | | 754 | } |
755 | | | 755 | |
756 | /* | | 756 | /* |
757 | * VM space locking routines. We don't really have to do anything, | | 757 | * VM space locking routines. We don't really have to do anything, |
758 | * since the pages are always "wired" (both local and remote processes). | | 758 | * since the pages are always "wired" (both local and remote processes). |
759 | */ | | 759 | */ |
760 | int | | 760 | int |
761 | uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access) | | 761 | uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access) |
762 | { | | 762 | { |
763 | | | 763 | |
764 | return 0; | | 764 | return 0; |
765 | } | | 765 | } |
766 | | | 766 | |
767 | void | | 767 | void |
768 | uvm_vsunlock(struct vmspace *vs, void *addr, size_t len) | | 768 | uvm_vsunlock(struct vmspace *vs, void *addr, size_t len) |
769 | { | | 769 | { |
770 | | | 770 | |
771 | } | | 771 | } |
772 | | | 772 | |
773 | /* | | 773 | /* |
774 | * For the local case the buffer mappers don't need to do anything. | | 774 | * For the local case the buffer mappers don't need to do anything. |
775 | * For the remote case we need to reserve space and copy data in or | | 775 | * For the remote case we need to reserve space and copy data in or |
776 | * out, depending on B_READ/B_WRITE. | | 776 | * out, depending on B_READ/B_WRITE. |
777 | */ | | 777 | */ |
778 | int | | 778 | int |
779 | vmapbuf(struct buf *bp, vsize_t len) | | 779 | vmapbuf(struct buf *bp, vsize_t len) |
780 | { | | 780 | { |
781 | int error = 0; | | 781 | int error = 0; |
782 | | | 782 | |
783 | bp->b_saveaddr = bp->b_data; | | 783 | bp->b_saveaddr = bp->b_data; |
784 | | | 784 | |
785 | /* remote case */ | | 785 | /* remote case */ |
786 | if (!RUMP_LOCALPROC_P(curproc)) { | | 786 | if (!RUMP_LOCALPROC_P(curproc)) { |
787 | bp->b_data = rump_hypermalloc(len, 0, true, "vmapbuf"); | | 787 | bp->b_data = rump_hypermalloc(len, 0, true, "vmapbuf"); |
788 | if (BUF_ISWRITE(bp)) { | | 788 | if (BUF_ISWRITE(bp)) { |
789 | error = copyin(bp->b_saveaddr, bp->b_data, len); | | 789 | error = copyin(bp->b_saveaddr, bp->b_data, len); |
790 | if (error) { | | 790 | if (error) { |
791 | rump_hyperfree(bp->b_data, len); | | 791 | rump_hyperfree(bp->b_data, len); |
792 | bp->b_data = bp->b_saveaddr; | | 792 | bp->b_data = bp->b_saveaddr; |
793 | bp->b_saveaddr = 0; | | 793 | bp->b_saveaddr = 0; |
794 | } | | 794 | } |
795 | } | | 795 | } |
796 | } | | 796 | } |
797 | | | 797 | |
798 | return error; | | 798 | return error; |
799 | } | | 799 | } |
800 | | | 800 | |
801 | void | | 801 | void |
802 | vunmapbuf(struct buf *bp, vsize_t len) | | 802 | vunmapbuf(struct buf *bp, vsize_t len) |
803 | { | | 803 | { |
804 | | | 804 | |
805 | /* remote case */ | | 805 | /* remote case */ |
806 | if (!RUMP_LOCALPROC_P(bp->b_proc)) { | | 806 | if (!RUMP_LOCALPROC_P(bp->b_proc)) { |
807 | if (BUF_ISREAD(bp)) { | | 807 | if (BUF_ISREAD(bp)) { |
808 | bp->b_error = copyout_proc(bp->b_proc, | | 808 | bp->b_error = copyout_proc(bp->b_proc, |
809 | bp->b_data, bp->b_saveaddr, len); | | 809 | bp->b_data, bp->b_saveaddr, len); |
810 | } | | 810 | } |
811 | rump_hyperfree(bp->b_data, len); | | 811 | rump_hyperfree(bp->b_data, len); |
812 | } | | 812 | } |
813 | | | 813 | |
814 | bp->b_data = bp->b_saveaddr; | | 814 | bp->b_data = bp->b_saveaddr; |
815 | bp->b_saveaddr = 0; | | 815 | bp->b_saveaddr = 0; |
816 | } | | 816 | } |
817 | | | 817 | |
818 | void | | 818 | void |
819 | uvmspace_addref(struct vmspace *vm) | | 819 | uvmspace_addref(struct vmspace *vm) |
820 | { | | 820 | { |
821 | | | 821 | |
822 | /* | | 822 | /* |
823 | * No dynamically allocated vmspaces exist. | | 823 | * No dynamically allocated vmspaces exist. |
824 | */ | | 824 | */ |
825 | } | | 825 | } |
826 | | | 826 | |
827 | void | | 827 | void |
828 | uvmspace_free(struct vmspace *vm) | | 828 | uvmspace_free(struct vmspace *vm) |
829 | { | | 829 | { |
830 | | | 830 | |
831 | /* nothing for now */ | | 831 | /* nothing for now */ |
832 | } | | 832 | } |
833 | | | 833 | |
834 | /* | | 834 | /* |
835 | * page life cycle stuff. it really doesn't exist, so just stubs. | | 835 | * page life cycle stuff. it really doesn't exist, so just stubs. |
836 | */ | | 836 | */ |
837 | | | 837 | |
838 | void | | 838 | void |
839 | uvm_pageactivate(struct vm_page *pg) | | 839 | uvm_pageactivate(struct vm_page *pg) |
840 | { | | 840 | { |
841 | | | 841 | |
842 | /* nada */ | | 842 | /* nada */ |
843 | } | | 843 | } |
844 | | | 844 | |
845 | void | | 845 | void |
846 | uvm_pagedeactivate(struct vm_page *pg) | | 846 | uvm_pagedeactivate(struct vm_page *pg) |
847 | { | | 847 | { |
848 | | | 848 | |
849 | /* nada */ | | 849 | /* nada */ |
850 | } | | 850 | } |
851 | | | 851 | |
852 | void | | 852 | void |
853 | uvm_pagedequeue(struct vm_page *pg) | | 853 | uvm_pagedequeue(struct vm_page *pg) |
854 | { | | 854 | { |
855 | | | 855 | |
856 | /* nada*/ | | 856 | /* nada*/ |
857 | } | | 857 | } |
858 | | | 858 | |
859 | void | | 859 | void |
860 | uvm_pageenqueue(struct vm_page *pg) | | 860 | uvm_pageenqueue(struct vm_page *pg) |
861 | { | | 861 | { |
862 | | | 862 | |
863 | /* nada */ | | 863 | /* nada */ |
864 | } | | 864 | } |
865 | | | 865 | |
866 | void | | 866 | void |
867 | uvmpdpol_anfree(struct vm_anon *an) | | 867 | uvmpdpol_anfree(struct vm_anon *an) |
868 | { | | 868 | { |
869 | | | 869 | |
870 | /* nada */ | | 870 | /* nada */ |
871 | } | | 871 | } |
872 | | | 872 | |
873 | /* | | 873 | /* |
874 | * Physical address accessors. | | 874 | * Physical address accessors. |
875 | */ | | 875 | */ |
876 | | | 876 | |
877 | struct vm_page * | | 877 | struct vm_page * |
878 | uvm_phys_to_vm_page(paddr_t pa) | | 878 | uvm_phys_to_vm_page(paddr_t pa) |
879 | { | | 879 | { |
880 | | | 880 | |
881 | return NULL; | | 881 | return NULL; |
882 | } | | 882 | } |
883 | | | 883 | |
884 | paddr_t | | 884 | paddr_t |
885 | uvm_vm_page_to_phys(const struct vm_page *pg) | | 885 | uvm_vm_page_to_phys(const struct vm_page *pg) |
886 | { | | 886 | { |
887 | | | 887 | |
888 | return 0; | | 888 | return 0; |
889 | } | | 889 | } |
890 | | | 890 | |
891 | /* | | 891 | /* |
892 | * Routines related to the Page Baroness. | | 892 | * Routines related to the Page Baroness. |
893 | */ | | 893 | */ |
894 | | | 894 | |
895 | void | | 895 | void |
896 | uvm_wait(const char *msg) | | 896 | uvm_wait(const char *msg) |
897 | { | | 897 | { |
898 | | | 898 | |
899 | if (__predict_false(curlwp == uvm.pagedaemon_lwp)) | | 899 | if (__predict_false(curlwp == uvm.pagedaemon_lwp)) |
900 | panic("pagedaemon out of memory"); | | 900 | panic("pagedaemon out of memory"); |
901 | if (__predict_false(rump_threads == 0)) | | 901 | if (__predict_false(rump_threads == 0)) |
902 | panic("pagedaemon missing (RUMP_THREADS = 0)"); | | 902 | panic("pagedaemon missing (RUMP_THREADS = 0)"); |
903 | | | 903 | |
904 | mutex_enter(&pdaemonmtx); | | 904 | mutex_enter(&pdaemonmtx); |
905 | pdaemon_waiters++; | | 905 | pdaemon_waiters++; |
906 | cv_signal(&pdaemoncv); | | 906 | cv_signal(&pdaemoncv); |
907 | cv_wait(&oomwait, &pdaemonmtx); | | 907 | cv_wait(&oomwait, &pdaemonmtx); |
908 | mutex_exit(&pdaemonmtx); | | 908 | mutex_exit(&pdaemonmtx); |
909 | } | | 909 | } |
910 | | | 910 | |
911 | void | | 911 | void |
912 | uvm_pageout_start(int npages) | | 912 | uvm_pageout_start(int npages) |
913 | { | | 913 | { |
914 | | | 914 | |
915 | mutex_enter(&pdaemonmtx); | | 915 | mutex_enter(&pdaemonmtx); |
916 | uvmexp.paging += npages; | | 916 | uvmexp.paging += npages; |
917 | mutex_exit(&pdaemonmtx); | | 917 | mutex_exit(&pdaemonmtx); |
918 | } | | 918 | } |
919 | | | 919 | |
920 | void | | 920 | void |
921 | uvm_pageout_done(int npages) | | 921 | uvm_pageout_done(int npages) |
922 | { | | 922 | { |
923 | | | 923 | |
924 | if (!npages) | | 924 | if (!npages) |
925 | return; | | 925 | return; |
926 | | | 926 | |
927 | mutex_enter(&pdaemonmtx); | | 927 | mutex_enter(&pdaemonmtx); |
928 | KASSERT(uvmexp.paging >= npages); | | 928 | KASSERT(uvmexp.paging >= npages); |
929 | uvmexp.paging -= npages; | | 929 | uvmexp.paging -= npages; |
930 | | | 930 | |
931 | if (pdaemon_waiters) { | | 931 | if (pdaemon_waiters) { |
932 | pdaemon_waiters = 0; | | 932 | pdaemon_waiters = 0; |
933 | cv_broadcast(&oomwait); | | 933 | cv_broadcast(&oomwait); |
934 | } | | 934 | } |
935 | mutex_exit(&pdaemonmtx); | | 935 | mutex_exit(&pdaemonmtx); |
936 | } | | 936 | } |
937 | | | 937 | |
938 | static bool | | 938 | static bool |
939 | processpage(struct vm_page *pg, bool *lockrunning) | | 939 | processpage(struct vm_page *pg, bool *lockrunning) |
940 | { | | 940 | { |
941 | struct uvm_object *uobj; | | 941 | struct uvm_object *uobj; |
942 | | | 942 | |
943 | uobj = pg->uobject; | | 943 | uobj = pg->uobject; |
944 | if (mutex_tryenter(uobj->vmobjlock)) { | | 944 | if (mutex_tryenter(uobj->vmobjlock)) { |
945 | if ((pg->flags & PG_BUSY) == 0) { | | 945 | if ((pg->flags & PG_BUSY) == 0) { |
946 | mutex_exit(&uvm_pageqlock); | | 946 | mutex_exit(&uvm_pageqlock); |
947 | uobj->pgops->pgo_put(uobj, pg->offset, | | 947 | uobj->pgops->pgo_put(uobj, pg->offset, |
948 | pg->offset + PAGE_SIZE, | | 948 | pg->offset + PAGE_SIZE, |
949 | PGO_CLEANIT|PGO_FREE); | | 949 | PGO_CLEANIT|PGO_FREE); |
950 | KASSERT(!mutex_owned(uobj->vmobjlock)); | | 950 | KASSERT(!mutex_owned(uobj->vmobjlock)); |
951 | return true; | | 951 | return true; |
952 | } else { | | 952 | } else { |
953 | mutex_exit(uobj->vmobjlock); | | 953 | mutex_exit(uobj->vmobjlock); |
954 | } | | 954 | } |
955 | } else if (*lockrunning == false && ncpu > 1) { | | 955 | } else if (*lockrunning == false && ncpu > 1) { |
956 | CPU_INFO_ITERATOR cii; | | 956 | CPU_INFO_ITERATOR cii; |
957 | struct cpu_info *ci; | | 957 | struct cpu_info *ci; |
958 | struct lwp *l; | | 958 | struct lwp *l; |
959 | | | 959 | |
960 | l = mutex_owner(uobj->vmobjlock); | | 960 | l = mutex_owner(uobj->vmobjlock); |
961 | for (CPU_INFO_FOREACH(cii, ci)) { | | 961 | for (CPU_INFO_FOREACH(cii, ci)) { |
962 | if (ci->ci_curlwp == l) { | | 962 | if (ci->ci_curlwp == l) { |
963 | *lockrunning = true; | | 963 | *lockrunning = true; |
964 | break; | | 964 | break; |
965 | } | | 965 | } |
966 | } | | 966 | } |
967 | } | | 967 | } |
968 | | | 968 | |
969 | return false; | | 969 | return false; |
970 | } | | 970 | } |
971 | | | 971 | |
972 | /* | | 972 | /* |
973 | * The Diabolical pageDaemon Director (DDD). | | 973 | * The Diabolical pageDaemon Director (DDD). |
974 | * | | 974 | * |
975 | * This routine can always use better heuristics. | | 975 | * This routine can always use better heuristics. |
976 | */ | | 976 | */ |
977 | void | | 977 | void |
978 | uvm_pageout(void *arg) | | 978 | uvm_pageout(void *arg) |
979 | { | | 979 | { |
980 | struct vm_page *pg; | | 980 | struct vm_page *pg; |
981 | struct pool *pp, *pp_first; | | 981 | struct pool *pp, *pp_first; |
982 | uint64_t where; | | 982 | uint64_t where; |
983 | int cleaned, skip, skipped; | | 983 | int cleaned, skip, skipped; |
984 | int waspaging; | | 984 | int waspaging; |
985 | bool succ; | | 985 | bool succ; |
986 | bool lockrunning; | | 986 | bool lockrunning; |
987 | | | 987 | |
988 | mutex_enter(&pdaemonmtx); | | 988 | mutex_enter(&pdaemonmtx); |
989 | for (;;) { | | 989 | for (;;) { |
990 | if (!NEED_PAGEDAEMON()) { | | 990 | if (!NEED_PAGEDAEMON()) { |
991 | kernel_map->flags &= ~VM_MAP_WANTVA; | | 991 | kernel_map->flags &= ~VM_MAP_WANTVA; |
992 | } | | 992 | } |
993 | | | 993 | |
994 | if (pdaemon_waiters) { | | 994 | if (pdaemon_waiters) { |
995 | pdaemon_waiters = 0; | | 995 | pdaemon_waiters = 0; |
996 | cv_broadcast(&oomwait); | | 996 | cv_broadcast(&oomwait); |
997 | } | | 997 | } |
998 | | | 998 | |
999 | cv_wait(&pdaemoncv, &pdaemonmtx); | | 999 | cv_wait(&pdaemoncv, &pdaemonmtx); |
1000 | uvmexp.pdwoke++; | | 1000 | uvmexp.pdwoke++; |
1001 | waspaging = uvmexp.paging; | | 1001 | waspaging = uvmexp.paging; |
1002 | | | 1002 | |
1003 | /* tell the world that we are hungry */ | | 1003 | /* tell the world that we are hungry */ |
1004 | kernel_map->flags |= VM_MAP_WANTVA; | | 1004 | kernel_map->flags |= VM_MAP_WANTVA; |
1005 | mutex_exit(&pdaemonmtx); | | 1005 | mutex_exit(&pdaemonmtx); |
1006 | | | 1006 | |
1007 | /* | | 1007 | /* |
1008 | * step one: reclaim the page cache. this should give | | 1008 | * step one: reclaim the page cache. this should give |
1009 | * us the biggest earnings since whole pages are released | | 1009 | * us the biggest earnings since whole pages are released |
1010 | * into backing memory. | | 1010 | * into backing memory. |
1011 | */ | | 1011 | */ |
1012 | pool_cache_reclaim(&pagecache); | | 1012 | pool_cache_reclaim(&pagecache); |
1013 | if (!NEED_PAGEDAEMON()) { | | 1013 | if (!NEED_PAGEDAEMON()) { |
1014 | mutex_enter(&pdaemonmtx); | | 1014 | mutex_enter(&pdaemonmtx); |
1015 | continue; | | 1015 | continue; |
1016 | } | | 1016 | } |
1017 | | | 1017 | |
1018 | /* | | 1018 | /* |
1019 | * Ok, so that didn't help. Next, try to hunt memory | | 1019 | * Ok, so that didn't help. Next, try to hunt memory |
1020 | * by pushing out vnode pages. The pages might contain | | 1020 | * by pushing out vnode pages. The pages might contain |
1021 | * useful cached data, but we need the memory. | | 1021 | * useful cached data, but we need the memory. |
1022 | */ | | 1022 | */ |
1023 | cleaned = 0; | | 1023 | cleaned = 0; |
1024 | skip = 0; | | 1024 | skip = 0; |
1025 | lockrunning = false; | | 1025 | lockrunning = false; |
1026 | again: | | 1026 | again: |
1027 | mutex_enter(&uvm_pageqlock); | | 1027 | mutex_enter(&uvm_pageqlock); |
1028 | while (cleaned < PAGEDAEMON_OBJCHUNK) { | | 1028 | while (cleaned < PAGEDAEMON_OBJCHUNK) { |
1029 | skipped = 0; | | 1029 | skipped = 0; |
1030 | TAILQ_FOREACH(pg, &vmpage_lruqueue, pageq.queue) { | | 1030 | TAILQ_FOREACH(pg, &vmpage_lruqueue, pageq.queue) { |
1031 | | | 1031 | |
1032 | /* | | 1032 | /* |
1033 | * skip over pages we _might_ have tried | | 1033 | * skip over pages we _might_ have tried |
1034 | * to handle earlier. they might not be | | 1034 | * to handle earlier. they might not be |
1035 | * exactly the same ones, but I'm not too | | 1035 | * exactly the same ones, but I'm not too |
1036 | * concerned. | | 1036 | * concerned. |
1037 | */ | | 1037 | */ |
1038 | while (skipped++ < skip) | | 1038 | while (skipped++ < skip) |
1039 | continue; | | 1039 | continue; |
1040 | | | 1040 | |
1041 | if (processpage(pg, &lockrunning)) { | | 1041 | if (processpage(pg, &lockrunning)) { |
1042 | cleaned++; | | 1042 | cleaned++; |
1043 | goto again; | | 1043 | goto again; |
1044 | } | | 1044 | } |
1045 | | | 1045 | |
1046 | skip++; | | 1046 | skip++; |
1047 | } | | 1047 | } |
1048 | break; | | 1048 | break; |
1049 | } | | 1049 | } |
1050 | mutex_exit(&uvm_pageqlock); | | 1050 | mutex_exit(&uvm_pageqlock); |
1051 | | | 1051 | |
1052 | /* | | 1052 | /* |
1053 | * Ok, someone is running with an object lock held. | | 1053 | * Ok, someone is running with an object lock held. |
1054 | * We want to yield the host CPU to make sure the | | 1054 | * We want to yield the host CPU to make sure the |
1055 | * thread is not parked on the host. Since sched_yield() | | 1055 | * thread is not parked on the host. Since sched_yield() |
1056 | * doesn't appear to do anything on NetBSD, nanosleep | | 1056 | * doesn't appear to do anything on NetBSD, nanosleep |
1057 | * for the smallest possible time and hope we're back in | | 1057 | * for the smallest possible time and hope we're back in |
1058 | * the game soon. | | 1058 | * the game soon. |
1059 | */ | | 1059 | */ |
1060 | if (cleaned == 0 && lockrunning) { | | 1060 | if (cleaned == 0 && lockrunning) { |
1061 | uint64_t sec, nsec; | | 1061 | uint64_t sec, nsec; |
1062 | | | 1062 | |
1063 | sec = 0; | | 1063 | sec = 0; |
1064 | nsec = 1; | | 1064 | nsec = 1; |
1065 | rumpuser_nanosleep(&sec, &nsec, NULL); | | 1065 | rumpuser_nanosleep(&sec, &nsec, NULL); |
1066 | | | 1066 | |
1067 | lockrunning = false; | | 1067 | lockrunning = false; |
1068 | skip = 0; | | 1068 | skip = 0; |
1069 | | | 1069 | |
1070 | /* and here we go again */ | | 1070 | /* and here we go again */ |
1071 | goto again; | | 1071 | goto again; |
1072 | } | | 1072 | } |
1073 | | | 1073 | |
1074 | /* | | 1074 | /* |
1075 | * And of course we need to reclaim the page cache | | 1075 | * And of course we need to reclaim the page cache |
1076 | * again to actually release memory. | | 1076 | * again to actually release memory. |
1077 | */ | | 1077 | */ |
1078 | pool_cache_reclaim(&pagecache); | | 1078 | pool_cache_reclaim(&pagecache); |
1079 | if (!NEED_PAGEDAEMON()) { | | 1079 | if (!NEED_PAGEDAEMON()) { |
1080 | mutex_enter(&pdaemonmtx); | | 1080 | mutex_enter(&pdaemonmtx); |
1081 | continue; | | 1081 | continue; |
1082 | } | | 1082 | } |
1083 | | | 1083 | |
1084 | /* | | 1084 | /* |
1085 | * And then drain the pools. Wipe them out ... all of them. | | 1085 | * And then drain the pools. Wipe them out ... all of them. |
1086 | */ | | 1086 | */ |
1087 | | | 1087 | |
1088 | pool_drain_start(&pp_first, &where); | | 1088 | pool_drain_start(&pp_first, &where); |
1089 | pp = pp_first; | | 1089 | pp = pp_first; |
1090 | for (;;) { | | 1090 | for (;;) { |
1091 | rump_vfs_drainbufs(10 /* XXX: estimate better */); | | 1091 | rump_vfs_drainbufs(10 /* XXX: estimate better */); |
1092 | succ = pool_drain_end(pp, where); | | 1092 | succ = pool_drain_end(pp, where); |
1093 | if (succ) | | 1093 | if (succ) |
1094 | break; | | 1094 | break; |
1095 | pool_drain_start(&pp, &where); | | 1095 | pool_drain_start(&pp, &where); |
1096 | if (pp == pp_first) { | | 1096 | if (pp == pp_first) { |
1097 | succ = pool_drain_end(pp, where); | | 1097 | succ = pool_drain_end(pp, where); |
1098 | break; | | 1098 | break; |
1099 | } | | 1099 | } |
1100 | } | | 1100 | } |
1101 | | | 1101 | |
1102 | /* | | 1102 | /* |
1103 | * Need to use PYEC on our bag of tricks. | | 1103 | * Need to use PYEC on our bag of tricks. |
1104 | * Unfortunately, the wife just borrowed it. | | 1104 | * Unfortunately, the wife just borrowed it. |
1105 | */ | | 1105 | */ |
1106 | | | 1106 | |
1107 | mutex_enter(&pdaemonmtx); | | 1107 | mutex_enter(&pdaemonmtx); |
1108 | if (!succ && cleaned == 0 && pdaemon_waiters && | | 1108 | if (!succ && cleaned == 0 && pdaemon_waiters && |
1109 | uvmexp.paging == 0) { | | 1109 | uvmexp.paging == 0) { |
1110 | rumpuser_dprintf("pagedaemoness: failed to reclaim " | | 1110 | rumpuser_dprintf("pagedaemoness: failed to reclaim " |
1111 | "memory ... sleeping (deadlock?)\n"); | | 1111 | "memory ... sleeping (deadlock?)\n"); |
1112 | cv_timedwait(&pdaemoncv, &pdaemonmtx, hz); | | 1112 | cv_timedwait(&pdaemoncv, &pdaemonmtx, hz); |
1113 | mutex_enter(&pdaemonmtx); | | 1113 | mutex_enter(&pdaemonmtx); |
1114 | } | | 1114 | } |
1115 | } | | 1115 | } |
1116 | | | 1116 | |
1117 | panic("you can swap out any time you like, but you can never leave"); | | 1117 | panic("you can swap out any time you like, but you can never leave"); |
1118 | } | | 1118 | } |
1119 | | | 1119 | |
1120 | void | | 1120 | void |
1121 | uvm_kick_pdaemon() | | 1121 | uvm_kick_pdaemon() |
1122 | { | | 1122 | { |
1123 | | | 1123 | |
1124 | /* | | 1124 | /* |
1125 | * Wake up the diabolical pagedaemon director if we are over | | 1125 | * Wake up the diabolical pagedaemon director if we are over |
1126 | * 90% of the memory limit. This is a complete and utter | | 1126 | * 90% of the memory limit. This is a complete and utter |
1127 | * stetson-harrison decision which you are allowed to finetune. | | 1127 | * stetson-harrison decision which you are allowed to finetune. |
1128 | * Don't bother locking. If we have some unflushed caches, | | 1128 | * Don't bother locking. If we have some unflushed caches, |
1129 | * other waker-uppers will deal with the issue. | | 1129 | * other waker-uppers will deal with the issue. |
1130 | */ | | 1130 | */ |
1131 | if (NEED_PAGEDAEMON()) { | | 1131 | if (NEED_PAGEDAEMON()) { |
1132 | cv_signal(&pdaemoncv); | | 1132 | cv_signal(&pdaemoncv); |
1133 | } | | 1133 | } |
1134 | } | | 1134 | } |
1135 | | | 1135 | |
1136 | void * | | 1136 | void * |
1137 | rump_hypermalloc(size_t howmuch, int alignment, bool waitok, const char *wmsg) | | 1137 | rump_hypermalloc(size_t howmuch, int alignment, bool waitok, const char *wmsg) |
1138 | { | | 1138 | { |
1139 | unsigned long newmem; | | 1139 | unsigned long newmem; |
1140 | void *rv; | | 1140 | void *rv; |
1141 | | | 1141 | |
1142 | uvm_kick_pdaemon(); /* ouch */ | | 1142 | uvm_kick_pdaemon(); /* ouch */ |
1143 | | | 1143 | |
1144 | /* first we must be within the limit */ | | 1144 | /* first we must be within the limit */ |
1145 | limitagain: | | 1145 | limitagain: |
1146 | if (rump_physmemlimit != RUMPMEM_UNLIMITED) { | | 1146 | if (rump_physmemlimit != RUMPMEM_UNLIMITED) { |
1147 | newmem = atomic_add_long_nv(&curphysmem, howmuch); | | 1147 | newmem = atomic_add_long_nv(&curphysmem, howmuch); |
1148 | if (newmem > rump_physmemlimit) { | | 1148 | if (newmem > rump_physmemlimit) { |
1149 | newmem = atomic_add_long_nv(&curphysmem, -howmuch); | | 1149 | newmem = atomic_add_long_nv(&curphysmem, -howmuch); |
1150 | if (!waitok) { | | 1150 | if (!waitok) { |
1151 | return NULL; | | 1151 | return NULL; |
1152 | } | | 1152 | } |
1153 | uvm_wait(wmsg); | | 1153 | uvm_wait(wmsg); |
1154 | goto limitagain; | | 1154 | goto limitagain; |
1155 | } | | 1155 | } |
1156 | } | | 1156 | } |
1157 | | | 1157 | |
1158 | /* second, we must get something from the backend */ | | 1158 | /* second, we must get something from the backend */ |
1159 | again: | | 1159 | again: |
1160 | rv = rumpuser_malloc(howmuch, alignment); | | 1160 | rv = rumpuser_malloc(howmuch, alignment); |
1161 | if (__predict_false(rv == NULL && waitok)) { | | 1161 | if (__predict_false(rv == NULL && waitok)) { |
1162 | uvm_wait(wmsg); | | 1162 | uvm_wait(wmsg); |
1163 | goto again; | | 1163 | goto again; |
1164 | } | | 1164 | } |
1165 | | | 1165 | |
1166 | return rv; | | 1166 | return rv; |
1167 | } | | 1167 | } |
1168 | | | 1168 | |
1169 | void | | 1169 | void |
1170 | rump_hyperfree(void *what, size_t size) | | 1170 | rump_hyperfree(void *what, size_t size) |
1171 | { | | 1171 | { |
1172 | | | 1172 | |
1173 | if (rump_physmemlimit != RUMPMEM_UNLIMITED) { | | 1173 | if (rump_physmemlimit != RUMPMEM_UNLIMITED) { |
1174 | atomic_add_long(&curphysmem, -size); | | 1174 | atomic_add_long(&curphysmem, -size); |
1175 | } | | 1175 | } |
1176 | rumpuser_free(what); | | 1176 | rumpuser_free(what); |
1177 | } | | 1177 | } |