| @@ -1,1123 +1,1152 @@ | | | @@ -1,1123 +1,1152 @@ |
1 | /* $NetBSD: vm.c,v 1.104 2010/12/01 20:29:57 pooka Exp $ */ | | 1 | /* $NetBSD: vm.c,v 1.105 2011/01/08 09:40:05 pooka Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved. | | 4 | * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved. |
5 | * | | 5 | * |
6 | * Development of this software was supported by | | 6 | * Development of this software was supported by |
7 | * The Finnish Cultural Foundation and the Research Foundation of | | 7 | * The Finnish Cultural Foundation and the Research Foundation of |
8 | * The Helsinki University of Technology. | | 8 | * The Helsinki University of Technology. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright | | 15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the | | 16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. | | 17 | * documentation and/or other materials provided with the distribution. |
18 | * | | 18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS | | 19 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS |
20 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | | 20 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
21 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | | 21 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
22 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | | 22 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | | 24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
25 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 25 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
29 | * SUCH DAMAGE. | | 29 | * SUCH DAMAGE. |
30 | */ | | 30 | */ |
31 | | | 31 | |
32 | /* | | 32 | /* |
33 | * Virtual memory emulation routines. | | 33 | * Virtual memory emulation routines. |
34 | */ | | 34 | */ |
35 | | | 35 | |
36 | /* | | 36 | /* |
37 | * XXX: we abuse pg->uanon for the virtual address of the storage | | 37 | * XXX: we abuse pg->uanon for the virtual address of the storage |
38 | * for each page. phys_addr would fit the job description better, | | 38 | * for each page. phys_addr would fit the job description better, |
39 | * except that it will create unnecessary lossage on some platforms | | 39 | * except that it will create unnecessary lossage on some platforms |
40 | * due to not being a pointer type. | | 40 | * due to not being a pointer type. |
41 | */ | | 41 | */ |
42 | | | 42 | |
43 | #include <sys/cdefs.h> | | 43 | #include <sys/cdefs.h> |
44 | __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.104 2010/12/01 20:29:57 pooka Exp $"); | | 44 | __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.105 2011/01/08 09:40:05 pooka Exp $"); |
45 | | | 45 | |
46 | #include <sys/param.h> | | 46 | #include <sys/param.h> |
47 | #include <sys/atomic.h> | | 47 | #include <sys/atomic.h> |
48 | #include <sys/buf.h> | | 48 | #include <sys/buf.h> |
49 | #include <sys/kernel.h> | | 49 | #include <sys/kernel.h> |
50 | #include <sys/kmem.h> | | 50 | #include <sys/kmem.h> |
51 | #include <sys/mman.h> | | 51 | #include <sys/mman.h> |
52 | #include <sys/null.h> | | 52 | #include <sys/null.h> |
53 | #include <sys/vnode.h> | | 53 | #include <sys/vnode.h> |
54 | | | 54 | |
55 | #include <machine/pmap.h> | | 55 | #include <machine/pmap.h> |
56 | | | 56 | |
57 | #include <rump/rumpuser.h> | | 57 | #include <rump/rumpuser.h> |
58 | | | 58 | |
59 | #include <uvm/uvm.h> | | 59 | #include <uvm/uvm.h> |
60 | #include <uvm/uvm_ddb.h> | | 60 | #include <uvm/uvm_ddb.h> |
61 | #include <uvm/uvm_pdpolicy.h> | | 61 | #include <uvm/uvm_pdpolicy.h> |
62 | #include <uvm/uvm_prot.h> | | 62 | #include <uvm/uvm_prot.h> |
63 | #include <uvm/uvm_readahead.h> | | 63 | #include <uvm/uvm_readahead.h> |
64 | | | 64 | |
65 | #include "rump_private.h" | | 65 | #include "rump_private.h" |
66 | #include "rump_vfs_private.h" | | 66 | #include "rump_vfs_private.h" |
67 | | | 67 | |
68 | kmutex_t uvm_pageqlock; | | 68 | kmutex_t uvm_pageqlock; |
69 | kmutex_t uvm_swap_data_lock; | | 69 | kmutex_t uvm_swap_data_lock; |
70 | | | 70 | |
71 | struct uvmexp uvmexp; | | 71 | struct uvmexp uvmexp; |
72 | int *uvmexp_pagesize; | | 72 | int *uvmexp_pagesize; |
73 | int *uvmexp_pagemask; | | 73 | int *uvmexp_pagemask; |
74 | int *uvmexp_pageshift; | | 74 | int *uvmexp_pageshift; |
75 | struct uvm uvm; | | 75 | struct uvm uvm; |
76 | | | 76 | |
77 | struct vm_map rump_vmmap; | | 77 | struct vm_map rump_vmmap; |
78 | static struct vm_map_kernel kmem_map_store; | | 78 | static struct vm_map_kernel kmem_map_store; |
79 | struct vm_map *kmem_map = &kmem_map_store.vmk_map; | | 79 | struct vm_map *kmem_map = &kmem_map_store.vmk_map; |
80 | | | 80 | |
81 | static struct vm_map_kernel kernel_map_store; | | 81 | static struct vm_map_kernel kernel_map_store; |
82 | struct vm_map *kernel_map = &kernel_map_store.vmk_map; | | 82 | struct vm_map *kernel_map = &kernel_map_store.vmk_map; |
83 | | | 83 | |
84 | static unsigned int pdaemon_waiters; | | 84 | static unsigned int pdaemon_waiters; |
85 | static kmutex_t pdaemonmtx; | | 85 | static kmutex_t pdaemonmtx; |
86 | static kcondvar_t pdaemoncv, oomwait; | | 86 | static kcondvar_t pdaemoncv, oomwait; |
87 | | | 87 | |
88 | unsigned long rump_physmemlimit = RUMPMEM_UNLIMITED; | | 88 | unsigned long rump_physmemlimit = RUMPMEM_UNLIMITED; |
89 | static unsigned long curphysmem; | | 89 | static unsigned long curphysmem; |
90 | static unsigned long dddlim; /* 90% of memory limit used */ | | 90 | static unsigned long dddlim; /* 90% of memory limit used */ |
91 | #define NEED_PAGEDAEMON() \ | | 91 | #define NEED_PAGEDAEMON() \ |
92 | (rump_physmemlimit != RUMPMEM_UNLIMITED && curphysmem > dddlim) | | 92 | (rump_physmemlimit != RUMPMEM_UNLIMITED && curphysmem > dddlim) |
93 | | | 93 | |
94 | /* | | 94 | /* |
95 | * Try to free two pages worth of pages from objects. | | 95 | * Try to free two pages worth of pages from objects. |
96 | * If this succesfully frees a full page cache page, we'll | | 96 | * If this succesfully frees a full page cache page, we'll |
97 | * free the released page plus PAGE_SIZEČ/sizeof(vm_page). | | 97 | * free the released page plus PAGE_SIZEČ/sizeof(vm_page). |
98 | */ | | 98 | */ |
99 | #define PAGEDAEMON_OBJCHUNK (2*PAGE_SIZE / sizeof(struct vm_page)) | | 99 | #define PAGEDAEMON_OBJCHUNK (2*PAGE_SIZE / sizeof(struct vm_page)) |
100 | | | 100 | |
101 | /* | | 101 | /* |
102 | * Keep a list of least recently used pages. Since the only way a | | 102 | * Keep a list of least recently used pages. Since the only way a |
103 | * rump kernel can "access" a page is via lookup, we put the page | | 103 | * rump kernel can "access" a page is via lookup, we put the page |
104 | * at the back of queue every time a lookup for it is done. If the | | 104 | * at the back of queue every time a lookup for it is done. If the |
105 | * page is in front of this global queue and we're short of memory, | | 105 | * page is in front of this global queue and we're short of memory, |
106 | * it's a candidate for pageout. | | 106 | * it's a candidate for pageout. |
107 | */ | | 107 | */ |
108 | static struct pglist vmpage_lruqueue; | | 108 | static struct pglist vmpage_lruqueue; |
109 | static unsigned vmpage_onqueue; | | 109 | static unsigned vmpage_onqueue; |
110 | | | 110 | |
111 | static int | | 111 | static int |
112 | pg_compare_key(void *ctx, const void *n, const void *key) | | 112 | pg_compare_key(void *ctx, const void *n, const void *key) |
113 | { | | 113 | { |
114 | voff_t a = ((const struct vm_page *)n)->offset; | | 114 | voff_t a = ((const struct vm_page *)n)->offset; |
115 | voff_t b = *(const voff_t *)key; | | 115 | voff_t b = *(const voff_t *)key; |
116 | | | 116 | |
117 | if (a < b) | | 117 | if (a < b) |
118 | return -1; | | 118 | return -1; |
119 | else if (a > b) | | 119 | else if (a > b) |
120 | return 1; | | 120 | return 1; |
121 | else | | 121 | else |
122 | return 0; | | 122 | return 0; |
123 | } | | 123 | } |
124 | | | 124 | |
125 | static int | | 125 | static int |
126 | pg_compare_nodes(void *ctx, const void *n1, const void *n2) | | 126 | pg_compare_nodes(void *ctx, const void *n1, const void *n2) |
127 | { | | 127 | { |
128 | | | 128 | |
129 | return pg_compare_key(ctx, n1, &((const struct vm_page *)n2)->offset); | | 129 | return pg_compare_key(ctx, n1, &((const struct vm_page *)n2)->offset); |
130 | } | | 130 | } |
131 | | | 131 | |
132 | const rb_tree_ops_t uvm_page_tree_ops = { | | 132 | const rb_tree_ops_t uvm_page_tree_ops = { |
133 | .rbto_compare_nodes = pg_compare_nodes, | | 133 | .rbto_compare_nodes = pg_compare_nodes, |
134 | .rbto_compare_key = pg_compare_key, | | 134 | .rbto_compare_key = pg_compare_key, |
135 | .rbto_node_offset = offsetof(struct vm_page, rb_node), | | 135 | .rbto_node_offset = offsetof(struct vm_page, rb_node), |
136 | .rbto_context = NULL | | 136 | .rbto_context = NULL |
137 | }; | | 137 | }; |
138 | | | 138 | |
139 | /* | | 139 | /* |
140 | * vm pages | | 140 | * vm pages |
141 | */ | | 141 | */ |
142 | | | 142 | |
143 | static int | | 143 | static int |
144 | pgctor(void *arg, void *obj, int flags) | | 144 | pgctor(void *arg, void *obj, int flags) |
145 | { | | 145 | { |
146 | struct vm_page *pg = obj; | | 146 | struct vm_page *pg = obj; |
147 | | | 147 | |
148 | memset(pg, 0, sizeof(*pg)); | | 148 | memset(pg, 0, sizeof(*pg)); |
149 | pg->uanon = rump_hypermalloc(PAGE_SIZE, PAGE_SIZE, | | 149 | pg->uanon = rump_hypermalloc(PAGE_SIZE, PAGE_SIZE, |
150 | (flags & PR_WAITOK) == PR_WAITOK, "pgalloc"); | | 150 | (flags & PR_WAITOK) == PR_WAITOK, "pgalloc"); |
151 | return pg->uanon == NULL; | | 151 | return pg->uanon == NULL; |
152 | } | | 152 | } |
153 | | | 153 | |
154 | static void | | 154 | static void |
155 | pgdtor(void *arg, void *obj) | | 155 | pgdtor(void *arg, void *obj) |
156 | { | | 156 | { |
157 | struct vm_page *pg = obj; | | 157 | struct vm_page *pg = obj; |
158 | | | 158 | |
159 | rump_hyperfree(pg->uanon, PAGE_SIZE); | | 159 | rump_hyperfree(pg->uanon, PAGE_SIZE); |
160 | } | | 160 | } |
161 | | | 161 | |
162 | static struct pool_cache pagecache; | | 162 | static struct pool_cache pagecache; |
163 | | | 163 | |
164 | /* | | 164 | /* |
165 | * Called with the object locked. We don't support anons. | | 165 | * Called with the object locked. We don't support anons. |
166 | */ | | 166 | */ |
167 | struct vm_page * | | 167 | struct vm_page * |
168 | uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon, | | 168 | uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon, |
169 | int flags, int strat, int free_list) | | 169 | int flags, int strat, int free_list) |
170 | { | | 170 | { |
171 | struct vm_page *pg; | | 171 | struct vm_page *pg; |
172 | | | 172 | |
173 | KASSERT(uobj && mutex_owned(&uobj->vmobjlock)); | | 173 | KASSERT(uobj && mutex_owned(&uobj->vmobjlock)); |
174 | KASSERT(anon == NULL); | | 174 | KASSERT(anon == NULL); |
175 | | | 175 | |
176 | pg = pool_cache_get(&pagecache, PR_NOWAIT); | | 176 | pg = pool_cache_get(&pagecache, PR_NOWAIT); |
177 | if (__predict_false(pg == NULL)) { | | 177 | if (__predict_false(pg == NULL)) { |
178 | return NULL; | | 178 | return NULL; |
179 | } | | 179 | } |
180 | | | 180 | |
181 | pg->offset = off; | | 181 | pg->offset = off; |
182 | pg->uobject = uobj; | | 182 | pg->uobject = uobj; |
183 | | | 183 | |
184 | pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE; | | 184 | pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE; |
185 | if (flags & UVM_PGA_ZERO) { | | 185 | if (flags & UVM_PGA_ZERO) { |
186 | uvm_pagezero(pg); | | 186 | uvm_pagezero(pg); |
187 | } | | 187 | } |
188 | | | 188 | |
189 | TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue); | | 189 | TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue); |
190 | (void)rb_tree_insert_node(&uobj->rb_tree, pg); | | 190 | (void)rb_tree_insert_node(&uobj->rb_tree, pg); |
191 | | | 191 | |
192 | /* | | 192 | /* |
193 | * Don't put anons on the LRU page queue. We can't flush them | | 193 | * Don't put anons on the LRU page queue. We can't flush them |
194 | * (there's no concept of swap in a rump kernel), so no reason | | 194 | * (there's no concept of swap in a rump kernel), so no reason |
195 | * to bother with them. | | 195 | * to bother with them. |
196 | */ | | 196 | */ |
197 | if (!UVM_OBJ_IS_AOBJ(uobj)) { | | 197 | if (!UVM_OBJ_IS_AOBJ(uobj)) { |
198 | atomic_inc_uint(&vmpage_onqueue); | | 198 | atomic_inc_uint(&vmpage_onqueue); |
199 | mutex_enter(&uvm_pageqlock); | | 199 | mutex_enter(&uvm_pageqlock); |
200 | TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue); | | 200 | TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue); |
201 | mutex_exit(&uvm_pageqlock); | | 201 | mutex_exit(&uvm_pageqlock); |
202 | } | | 202 | } |
203 | | | 203 | |
204 | uobj->uo_npages++; | | 204 | uobj->uo_npages++; |
205 | | | 205 | |
206 | return pg; | | 206 | return pg; |
207 | } | | 207 | } |
208 | | | 208 | |
209 | /* | | 209 | /* |
210 | * Release a page. | | 210 | * Release a page. |
211 | * | | 211 | * |
212 | * Called with the vm object locked. | | 212 | * Called with the vm object locked. |
213 | */ | | 213 | */ |
214 | void | | 214 | void |
215 | uvm_pagefree(struct vm_page *pg) | | 215 | uvm_pagefree(struct vm_page *pg) |
216 | { | | 216 | { |
217 | struct uvm_object *uobj = pg->uobject; | | 217 | struct uvm_object *uobj = pg->uobject; |
218 | | | 218 | |
219 | KASSERT(mutex_owned(&uvm_pageqlock)); | | 219 | KASSERT(mutex_owned(&uvm_pageqlock)); |
220 | KASSERT(mutex_owned(&uobj->vmobjlock)); | | 220 | KASSERT(mutex_owned(&uobj->vmobjlock)); |
221 | | | 221 | |
222 | if (pg->flags & PG_WANTED) | | 222 | if (pg->flags & PG_WANTED) |
223 | wakeup(pg); | | 223 | wakeup(pg); |
224 | | | 224 | |
225 | TAILQ_REMOVE(&uobj->memq, pg, listq.queue); | | 225 | TAILQ_REMOVE(&uobj->memq, pg, listq.queue); |
226 | | | 226 | |
227 | uobj->uo_npages--; | | 227 | uobj->uo_npages--; |
228 | rb_tree_remove_node(&uobj->rb_tree, pg); | | 228 | rb_tree_remove_node(&uobj->rb_tree, pg); |
229 | | | 229 | |
230 | if (!UVM_OBJ_IS_AOBJ(uobj)) { | | 230 | if (!UVM_OBJ_IS_AOBJ(uobj)) { |
231 | TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue); | | 231 | TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue); |
232 | atomic_dec_uint(&vmpage_onqueue); | | 232 | atomic_dec_uint(&vmpage_onqueue); |
233 | } | | 233 | } |
234 | | | 234 | |
235 | pool_cache_put(&pagecache, pg); | | 235 | pool_cache_put(&pagecache, pg); |
236 | } | | 236 | } |
237 | | | 237 | |
238 | void | | 238 | void |
239 | uvm_pagezero(struct vm_page *pg) | | 239 | uvm_pagezero(struct vm_page *pg) |
240 | { | | 240 | { |
241 | | | 241 | |
242 | pg->flags &= ~PG_CLEAN; | | 242 | pg->flags &= ~PG_CLEAN; |
243 | memset((void *)pg->uanon, 0, PAGE_SIZE); | | 243 | memset((void *)pg->uanon, 0, PAGE_SIZE); |
244 | } | | 244 | } |
245 | | | 245 | |
246 | /* | | 246 | /* |
247 | * Misc routines | | 247 | * Misc routines |
248 | */ | | 248 | */ |
249 | | | 249 | |
250 | static kmutex_t pagermtx; | | 250 | static kmutex_t pagermtx; |
251 | | | 251 | |
252 | void | | 252 | void |
253 | uvm_init(void) | | 253 | uvm_init(void) |
254 | { | | 254 | { |
255 | char buf[64]; | | 255 | char buf[64]; |
256 | int error; | | 256 | int error; |
257 | | | 257 | |
258 | if (rumpuser_getenv("RUMP_MEMLIMIT", buf, sizeof(buf), &error) == 0) { | | 258 | if (rumpuser_getenv("RUMP_MEMLIMIT", buf, sizeof(buf), &error) == 0) { |
259 | rump_physmemlimit = strtoll(buf, NULL, 10); | | 259 | unsigned long tmp; |
| | | 260 | char *ep; |
| | | 261 | int mult; |
| | | 262 | |
| | | 263 | tmp = strtoll(buf, &ep, 10); |
| | | 264 | if (strlen(ep) > 1) |
| | | 265 | panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf); |
| | | 266 | |
| | | 267 | /* mini-dehumanize-number */ |
| | | 268 | mult = 1; |
| | | 269 | switch (*ep) { |
| | | 270 | case 'k': |
| | | 271 | mult = 1024; |
| | | 272 | break; |
| | | 273 | case 'm': |
| | | 274 | mult = 1024*1024; |
| | | 275 | break; |
| | | 276 | case 'g': |
| | | 277 | mult = 1024*1024*1024; |
| | | 278 | break; |
| | | 279 | case 0: |
| | | 280 | break; |
| | | 281 | default: |
| | | 282 | panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf); |
| | | 283 | } |
| | | 284 | rump_physmemlimit = tmp * mult; |
| | | 285 | |
| | | 286 | if (rump_physmemlimit / mult != tmp) |
| | | 287 | panic("uvm_init: RUMP_MEMLIMIT overflow: %s", buf); |
260 | /* it's not like we'd get far with, say, 1 byte, but ... */ | | 288 | /* it's not like we'd get far with, say, 1 byte, but ... */ |
261 | if (rump_physmemlimit == 0) | | 289 | if (rump_physmemlimit == 0) |
262 | panic("uvm_init: no memory available"); | | 290 | panic("uvm_init: no memory"); |
| | | 291 | |
263 | #define HUMANIZE_BYTES 9 | | 292 | #define HUMANIZE_BYTES 9 |
264 | CTASSERT(sizeof(buf) >= HUMANIZE_BYTES); | | 293 | CTASSERT(sizeof(buf) >= HUMANIZE_BYTES); |
265 | format_bytes(buf, HUMANIZE_BYTES, rump_physmemlimit); | | 294 | format_bytes(buf, HUMANIZE_BYTES, rump_physmemlimit); |
266 | #undef HUMANIZE_BYTES | | 295 | #undef HUMANIZE_BYTES |
267 | dddlim = 9 * (rump_physmemlimit / 10); | | 296 | dddlim = 9 * (rump_physmemlimit / 10); |
268 | } else { | | 297 | } else { |
269 | strlcpy(buf, "unlimited (host limit)", sizeof(buf)); | | 298 | strlcpy(buf, "unlimited (host limit)", sizeof(buf)); |
270 | } | | 299 | } |
271 | aprint_verbose("total memory = %s\n", buf); | | 300 | aprint_verbose("total memory = %s\n", buf); |
272 | | | 301 | |
273 | TAILQ_INIT(&vmpage_lruqueue); | | 302 | TAILQ_INIT(&vmpage_lruqueue); |
274 | | | 303 | |
275 | uvmexp.free = 1024*1024; /* XXX: arbitrary & not updated */ | | 304 | uvmexp.free = 1024*1024; /* XXX: arbitrary & not updated */ |
276 | | | 305 | |
277 | mutex_init(&pagermtx, MUTEX_DEFAULT, 0); | | 306 | mutex_init(&pagermtx, MUTEX_DEFAULT, 0); |
278 | mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0); | | 307 | mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0); |
279 | mutex_init(&uvm_swap_data_lock, MUTEX_DEFAULT, 0); | | 308 | mutex_init(&uvm_swap_data_lock, MUTEX_DEFAULT, 0); |
280 | | | 309 | |
281 | mutex_init(&pdaemonmtx, MUTEX_DEFAULT, 0); | | 310 | mutex_init(&pdaemonmtx, MUTEX_DEFAULT, 0); |
282 | cv_init(&pdaemoncv, "pdaemon"); | | 311 | cv_init(&pdaemoncv, "pdaemon"); |
283 | cv_init(&oomwait, "oomwait"); | | 312 | cv_init(&oomwait, "oomwait"); |
284 | | | 313 | |
285 | kernel_map->pmap = pmap_kernel(); | | 314 | kernel_map->pmap = pmap_kernel(); |
286 | callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM); | | 315 | callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM); |
287 | kmem_map->pmap = pmap_kernel(); | | 316 | kmem_map->pmap = pmap_kernel(); |
288 | callback_head_init(&kmem_map_store.vmk_reclaim_callback, IPL_VM); | | 317 | callback_head_init(&kmem_map_store.vmk_reclaim_callback, IPL_VM); |
289 | | | 318 | |
290 | pool_cache_bootstrap(&pagecache, sizeof(struct vm_page), 0, 0, 0, | | 319 | pool_cache_bootstrap(&pagecache, sizeof(struct vm_page), 0, 0, 0, |
291 | "page$", NULL, IPL_NONE, pgctor, pgdtor, NULL); | | 320 | "page$", NULL, IPL_NONE, pgctor, pgdtor, NULL); |
292 | } | | 321 | } |
293 | | | 322 | |
294 | void | | 323 | void |
295 | uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax) | | 324 | uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax) |
296 | { | | 325 | { |
297 | | | 326 | |
298 | vm->vm_map.pmap = pmap_kernel(); | | 327 | vm->vm_map.pmap = pmap_kernel(); |
299 | vm->vm_refcnt = 1; | | 328 | vm->vm_refcnt = 1; |
300 | } | | 329 | } |
301 | | | 330 | |
302 | void | | 331 | void |
303 | uvm_pagewire(struct vm_page *pg) | | 332 | uvm_pagewire(struct vm_page *pg) |
304 | { | | 333 | { |
305 | | | 334 | |
306 | /* nada */ | | 335 | /* nada */ |
307 | } | | 336 | } |
308 | | | 337 | |
309 | void | | 338 | void |
310 | uvm_pageunwire(struct vm_page *pg) | | 339 | uvm_pageunwire(struct vm_page *pg) |
311 | { | | 340 | { |
312 | | | 341 | |
313 | /* nada */ | | 342 | /* nada */ |
314 | } | | 343 | } |
315 | | | 344 | |
316 | /* where's your schmonz now? */ | | 345 | /* where's your schmonz now? */ |
317 | #define PUNLIMIT(a) \ | | 346 | #define PUNLIMIT(a) \ |
318 | p->p_rlimit[a].rlim_cur = p->p_rlimit[a].rlim_max = RLIM_INFINITY; | | 347 | p->p_rlimit[a].rlim_cur = p->p_rlimit[a].rlim_max = RLIM_INFINITY; |
319 | void | | 348 | void |
320 | uvm_init_limits(struct proc *p) | | 349 | uvm_init_limits(struct proc *p) |
321 | { | | 350 | { |
322 | | | 351 | |
323 | PUNLIMIT(RLIMIT_STACK); | | 352 | PUNLIMIT(RLIMIT_STACK); |
324 | PUNLIMIT(RLIMIT_DATA); | | 353 | PUNLIMIT(RLIMIT_DATA); |
325 | PUNLIMIT(RLIMIT_RSS); | | 354 | PUNLIMIT(RLIMIT_RSS); |
326 | PUNLIMIT(RLIMIT_AS); | | 355 | PUNLIMIT(RLIMIT_AS); |
327 | /* nice, cascade */ | | 356 | /* nice, cascade */ |
328 | } | | 357 | } |
329 | #undef PUNLIMIT | | 358 | #undef PUNLIMIT |
330 | | | 359 | |
331 | /* | | 360 | /* |
332 | * This satisfies the "disgusting mmap hack" used by proplib. | | 361 | * This satisfies the "disgusting mmap hack" used by proplib. |
333 | * We probably should grow some more assertables to make sure we're | | 362 | * We probably should grow some more assertables to make sure we're |
334 | * not satisfying anything we shouldn't be satisfying. | | 363 | * not satisfying anything we shouldn't be satisfying. |
335 | */ | | 364 | */ |
336 | int | | 365 | int |
337 | uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot, | | 366 | uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot, |
338 | vm_prot_t maxprot, int flags, void *handle, voff_t off, vsize_t locklim) | | 367 | vm_prot_t maxprot, int flags, void *handle, voff_t off, vsize_t locklim) |
339 | { | | 368 | { |
340 | void *uaddr; | | 369 | void *uaddr; |
341 | int error; | | 370 | int error; |
342 | | | 371 | |
343 | if (prot != (VM_PROT_READ | VM_PROT_WRITE)) | | 372 | if (prot != (VM_PROT_READ | VM_PROT_WRITE)) |
344 | panic("uvm_mmap() variant unsupported"); | | 373 | panic("uvm_mmap() variant unsupported"); |
345 | if (flags != (MAP_PRIVATE | MAP_ANON)) | | 374 | if (flags != (MAP_PRIVATE | MAP_ANON)) |
346 | panic("uvm_mmap() variant unsupported"); | | 375 | panic("uvm_mmap() variant unsupported"); |
347 | | | 376 | |
348 | /* no reason in particular, but cf. uvm_default_mapaddr() */ | | 377 | /* no reason in particular, but cf. uvm_default_mapaddr() */ |
349 | if (*addr != 0) | | 378 | if (*addr != 0) |
350 | panic("uvm_mmap() variant unsupported"); | | 379 | panic("uvm_mmap() variant unsupported"); |
351 | | | 380 | |
352 | if (curproc->p_vmspace == vmspace_kernel()) { | | 381 | if (curproc->p_vmspace == vmspace_kernel()) { |
353 | uaddr = rumpuser_anonmmap(NULL, size, 0, 0, &error); | | 382 | uaddr = rumpuser_anonmmap(NULL, size, 0, 0, &error); |
354 | } else { | | 383 | } else { |
355 | error = rumpuser_sp_anonmmap(curproc->p_vmspace->vm_map.pmap, | | 384 | error = rumpuser_sp_anonmmap(curproc->p_vmspace->vm_map.pmap, |
356 | size, &uaddr); | | 385 | size, &uaddr); |
357 | } | | 386 | } |
358 | if (uaddr == NULL) | | 387 | if (uaddr == NULL) |
359 | return error; | | 388 | return error; |
360 | | | 389 | |
361 | *addr = (vaddr_t)uaddr; | | 390 | *addr = (vaddr_t)uaddr; |
362 | return 0; | | 391 | return 0; |
363 | } | | 392 | } |
364 | | | 393 | |
365 | struct pagerinfo { | | 394 | struct pagerinfo { |
366 | vaddr_t pgr_kva; | | 395 | vaddr_t pgr_kva; |
367 | int pgr_npages; | | 396 | int pgr_npages; |
368 | struct vm_page **pgr_pgs; | | 397 | struct vm_page **pgr_pgs; |
369 | bool pgr_read; | | 398 | bool pgr_read; |
370 | | | 399 | |
371 | LIST_ENTRY(pagerinfo) pgr_entries; | | 400 | LIST_ENTRY(pagerinfo) pgr_entries; |
372 | }; | | 401 | }; |
373 | static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist); | | 402 | static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist); |
374 | | | 403 | |
375 | /* | | 404 | /* |
376 | * Pager "map" in routine. Instead of mapping, we allocate memory | | 405 | * Pager "map" in routine. Instead of mapping, we allocate memory |
377 | * and copy page contents there. Not optimal or even strictly | | 406 | * and copy page contents there. Not optimal or even strictly |
378 | * correct (the caller might modify the page contents after mapping | | 407 | * correct (the caller might modify the page contents after mapping |
379 | * them in), but what the heck. Assumes UVMPAGER_MAPIN_WAITOK. | | 408 | * them in), but what the heck. Assumes UVMPAGER_MAPIN_WAITOK. |
380 | */ | | 409 | */ |
381 | vaddr_t | | 410 | vaddr_t |
382 | uvm_pagermapin(struct vm_page **pgs, int npages, int flags) | | 411 | uvm_pagermapin(struct vm_page **pgs, int npages, int flags) |
383 | { | | 412 | { |
384 | struct pagerinfo *pgri; | | 413 | struct pagerinfo *pgri; |
385 | vaddr_t curkva; | | 414 | vaddr_t curkva; |
386 | int i; | | 415 | int i; |
387 | | | 416 | |
388 | /* allocate structures */ | | 417 | /* allocate structures */ |
389 | pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP); | | 418 | pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP); |
390 | pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP); | | 419 | pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP); |
391 | pgri->pgr_npages = npages; | | 420 | pgri->pgr_npages = npages; |
392 | pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP); | | 421 | pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP); |
393 | pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0; | | 422 | pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0; |
394 | | | 423 | |
395 | /* copy contents to "mapped" memory */ | | 424 | /* copy contents to "mapped" memory */ |
396 | for (i = 0, curkva = pgri->pgr_kva; | | 425 | for (i = 0, curkva = pgri->pgr_kva; |
397 | i < npages; | | 426 | i < npages; |
398 | i++, curkva += PAGE_SIZE) { | | 427 | i++, curkva += PAGE_SIZE) { |
399 | /* | | 428 | /* |
400 | * We need to copy the previous contents of the pages to | | 429 | * We need to copy the previous contents of the pages to |
401 | * the window even if we are reading from the | | 430 | * the window even if we are reading from the |
402 | * device, since the device might not fill the contents of | | 431 | * device, since the device might not fill the contents of |
403 | * the full mapped range and we will end up corrupting | | 432 | * the full mapped range and we will end up corrupting |
404 | * data when we unmap the window. | | 433 | * data when we unmap the window. |
405 | */ | | 434 | */ |
406 | memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE); | | 435 | memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE); |
407 | pgri->pgr_pgs[i] = pgs[i]; | | 436 | pgri->pgr_pgs[i] = pgs[i]; |
408 | } | | 437 | } |
409 | | | 438 | |
410 | mutex_enter(&pagermtx); | | 439 | mutex_enter(&pagermtx); |
411 | LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries); | | 440 | LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries); |
412 | mutex_exit(&pagermtx); | | 441 | mutex_exit(&pagermtx); |
413 | | | 442 | |
414 | return pgri->pgr_kva; | | 443 | return pgri->pgr_kva; |
415 | } | | 444 | } |
416 | | | 445 | |
417 | /* | | 446 | /* |
418 | * map out the pager window. return contents from VA to page storage | | 447 | * map out the pager window. return contents from VA to page storage |
419 | * and free structures. | | 448 | * and free structures. |
420 | * | | 449 | * |
421 | * Note: does not currently support partial frees | | 450 | * Note: does not currently support partial frees |
422 | */ | | 451 | */ |
423 | void | | 452 | void |
424 | uvm_pagermapout(vaddr_t kva, int npages) | | 453 | uvm_pagermapout(vaddr_t kva, int npages) |
425 | { | | 454 | { |
426 | struct pagerinfo *pgri; | | 455 | struct pagerinfo *pgri; |
427 | vaddr_t curkva; | | 456 | vaddr_t curkva; |
428 | int i; | | 457 | int i; |
429 | | | 458 | |
430 | mutex_enter(&pagermtx); | | 459 | mutex_enter(&pagermtx); |
431 | LIST_FOREACH(pgri, &pagerlist, pgr_entries) { | | 460 | LIST_FOREACH(pgri, &pagerlist, pgr_entries) { |
432 | if (pgri->pgr_kva == kva) | | 461 | if (pgri->pgr_kva == kva) |
433 | break; | | 462 | break; |
434 | } | | 463 | } |
435 | KASSERT(pgri); | | 464 | KASSERT(pgri); |
436 | if (pgri->pgr_npages != npages) | | 465 | if (pgri->pgr_npages != npages) |
437 | panic("uvm_pagermapout: partial unmapping not supported"); | | 466 | panic("uvm_pagermapout: partial unmapping not supported"); |
438 | LIST_REMOVE(pgri, pgr_entries); | | 467 | LIST_REMOVE(pgri, pgr_entries); |
439 | mutex_exit(&pagermtx); | | 468 | mutex_exit(&pagermtx); |
440 | | | 469 | |
441 | if (pgri->pgr_read) { | | 470 | if (pgri->pgr_read) { |
442 | for (i = 0, curkva = pgri->pgr_kva; | | 471 | for (i = 0, curkva = pgri->pgr_kva; |
443 | i < pgri->pgr_npages; | | 472 | i < pgri->pgr_npages; |
444 | i++, curkva += PAGE_SIZE) { | | 473 | i++, curkva += PAGE_SIZE) { |
445 | memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE); | | 474 | memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE); |
446 | } | | 475 | } |
447 | } | | 476 | } |
448 | | | 477 | |
449 | kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *)); | | 478 | kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *)); |
450 | kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE); | | 479 | kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE); |
451 | kmem_free(pgri, sizeof(*pgri)); | | 480 | kmem_free(pgri, sizeof(*pgri)); |
452 | } | | 481 | } |
453 | | | 482 | |
454 | /* | | 483 | /* |
455 | * convert va in pager window to page structure. | | 484 | * convert va in pager window to page structure. |
456 | * XXX: how expensive is this (global lock, list traversal)? | | 485 | * XXX: how expensive is this (global lock, list traversal)? |
457 | */ | | 486 | */ |
458 | struct vm_page * | | 487 | struct vm_page * |
459 | uvm_pageratop(vaddr_t va) | | 488 | uvm_pageratop(vaddr_t va) |
460 | { | | 489 | { |
461 | struct pagerinfo *pgri; | | 490 | struct pagerinfo *pgri; |
462 | struct vm_page *pg = NULL; | | 491 | struct vm_page *pg = NULL; |
463 | int i; | | 492 | int i; |
464 | | | 493 | |
465 | mutex_enter(&pagermtx); | | 494 | mutex_enter(&pagermtx); |
466 | LIST_FOREACH(pgri, &pagerlist, pgr_entries) { | | 495 | LIST_FOREACH(pgri, &pagerlist, pgr_entries) { |
467 | if (pgri->pgr_kva <= va | | 496 | if (pgri->pgr_kva <= va |
468 | && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE) | | 497 | && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE) |
469 | break; | | 498 | break; |
470 | } | | 499 | } |
471 | if (pgri) { | | 500 | if (pgri) { |
472 | i = (va - pgri->pgr_kva) >> PAGE_SHIFT; | | 501 | i = (va - pgri->pgr_kva) >> PAGE_SHIFT; |
473 | pg = pgri->pgr_pgs[i]; | | 502 | pg = pgri->pgr_pgs[i]; |
474 | } | | 503 | } |
475 | mutex_exit(&pagermtx); | | 504 | mutex_exit(&pagermtx); |
476 | | | 505 | |
477 | return pg; | | 506 | return pg; |
478 | } | | 507 | } |
479 | | | 508 | |
480 | /* | | 509 | /* |
481 | * Called with the vm object locked. | | 510 | * Called with the vm object locked. |
482 | * | | 511 | * |
483 | * Put vnode object pages at the end of the access queue to indicate | | 512 | * Put vnode object pages at the end of the access queue to indicate |
484 | * they have been recently accessed and should not be immediate | | 513 | * they have been recently accessed and should not be immediate |
485 | * candidates for pageout. Do not do this for lookups done by | | 514 | * candidates for pageout. Do not do this for lookups done by |
486 | * the pagedaemon to mimic pmap_kentered mappings which don't track | | 515 | * the pagedaemon to mimic pmap_kentered mappings which don't track |
487 | * access information. | | 516 | * access information. |
488 | */ | | 517 | */ |
489 | struct vm_page * | | 518 | struct vm_page * |
490 | uvm_pagelookup(struct uvm_object *uobj, voff_t off) | | 519 | uvm_pagelookup(struct uvm_object *uobj, voff_t off) |
491 | { | | 520 | { |
492 | struct vm_page *pg; | | 521 | struct vm_page *pg; |
493 | bool ispagedaemon = curlwp == uvm.pagedaemon_lwp; | | 522 | bool ispagedaemon = curlwp == uvm.pagedaemon_lwp; |
494 | | | 523 | |
495 | pg = rb_tree_find_node(&uobj->rb_tree, &off); | | 524 | pg = rb_tree_find_node(&uobj->rb_tree, &off); |
496 | if (pg && !UVM_OBJ_IS_AOBJ(pg->uobject) && !ispagedaemon) { | | 525 | if (pg && !UVM_OBJ_IS_AOBJ(pg->uobject) && !ispagedaemon) { |
497 | mutex_enter(&uvm_pageqlock); | | 526 | mutex_enter(&uvm_pageqlock); |
498 | TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue); | | 527 | TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue); |
499 | TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue); | | 528 | TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue); |
500 | mutex_exit(&uvm_pageqlock); | | 529 | mutex_exit(&uvm_pageqlock); |
501 | } | | 530 | } |
502 | | | 531 | |
503 | return pg; | | 532 | return pg; |
504 | } | | 533 | } |
505 | | | 534 | |
506 | void | | 535 | void |
507 | uvm_page_unbusy(struct vm_page **pgs, int npgs) | | 536 | uvm_page_unbusy(struct vm_page **pgs, int npgs) |
508 | { | | 537 | { |
509 | struct vm_page *pg; | | 538 | struct vm_page *pg; |
510 | int i; | | 539 | int i; |
511 | | | 540 | |
512 | KASSERT(npgs > 0); | | 541 | KASSERT(npgs > 0); |
513 | KASSERT(mutex_owned(&pgs[0]->uobject->vmobjlock)); | | 542 | KASSERT(mutex_owned(&pgs[0]->uobject->vmobjlock)); |
514 | | | 543 | |
515 | for (i = 0; i < npgs; i++) { | | 544 | for (i = 0; i < npgs; i++) { |
516 | pg = pgs[i]; | | 545 | pg = pgs[i]; |
517 | if (pg == NULL) | | 546 | if (pg == NULL) |
518 | continue; | | 547 | continue; |
519 | | | 548 | |
520 | KASSERT(pg->flags & PG_BUSY); | | 549 | KASSERT(pg->flags & PG_BUSY); |
521 | if (pg->flags & PG_WANTED) | | 550 | if (pg->flags & PG_WANTED) |
522 | wakeup(pg); | | 551 | wakeup(pg); |
523 | if (pg->flags & PG_RELEASED) | | 552 | if (pg->flags & PG_RELEASED) |
524 | uvm_pagefree(pg); | | 553 | uvm_pagefree(pg); |
525 | else | | 554 | else |
526 | pg->flags &= ~(PG_WANTED|PG_BUSY); | | 555 | pg->flags &= ~(PG_WANTED|PG_BUSY); |
527 | } | | 556 | } |
528 | } | | 557 | } |
529 | | | 558 | |
530 | void | | 559 | void |
531 | uvm_estimatepageable(int *active, int *inactive) | | 560 | uvm_estimatepageable(int *active, int *inactive) |
532 | { | | 561 | { |
533 | | | 562 | |
534 | /* XXX: guessing game */ | | 563 | /* XXX: guessing game */ |
535 | *active = 1024; | | 564 | *active = 1024; |
536 | *inactive = 1024; | | 565 | *inactive = 1024; |
537 | } | | 566 | } |
538 | | | 567 | |
539 | struct vm_map_kernel * | | 568 | struct vm_map_kernel * |
540 | vm_map_to_kernel(struct vm_map *map) | | 569 | vm_map_to_kernel(struct vm_map *map) |
541 | { | | 570 | { |
542 | | | 571 | |
543 | return (struct vm_map_kernel *)map; | | 572 | return (struct vm_map_kernel *)map; |
544 | } | | 573 | } |
545 | | | 574 | |
546 | bool | | 575 | bool |
547 | vm_map_starved_p(struct vm_map *map) | | 576 | vm_map_starved_p(struct vm_map *map) |
548 | { | | 577 | { |
549 | | | 578 | |
550 | if (map->flags & VM_MAP_WANTVA) | | 579 | if (map->flags & VM_MAP_WANTVA) |
551 | return true; | | 580 | return true; |
552 | | | 581 | |
553 | return false; | | 582 | return false; |
554 | } | | 583 | } |
555 | | | 584 | |
556 | int | | 585 | int |
557 | uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags) | | 586 | uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags) |
558 | { | | 587 | { |
559 | | | 588 | |
560 | panic("%s: unimplemented", __func__); | | 589 | panic("%s: unimplemented", __func__); |
561 | } | | 590 | } |
562 | | | 591 | |
563 | void | | 592 | void |
564 | uvm_unloan(void *v, int npages, int flags) | | 593 | uvm_unloan(void *v, int npages, int flags) |
565 | { | | 594 | { |
566 | | | 595 | |
567 | panic("%s: unimplemented", __func__); | | 596 | panic("%s: unimplemented", __func__); |
568 | } | | 597 | } |
569 | | | 598 | |
570 | int | | 599 | int |
571 | uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages, | | 600 | uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages, |
572 | struct vm_page **opp) | | 601 | struct vm_page **opp) |
573 | { | | 602 | { |
574 | | | 603 | |
575 | return EBUSY; | | 604 | return EBUSY; |
576 | } | | 605 | } |
577 | | | 606 | |
578 | #ifdef DEBUGPRINT | | 607 | #ifdef DEBUGPRINT |
579 | void | | 608 | void |
580 | uvm_object_printit(struct uvm_object *uobj, bool full, | | 609 | uvm_object_printit(struct uvm_object *uobj, bool full, |
581 | void (*pr)(const char *, ...)) | | 610 | void (*pr)(const char *, ...)) |
582 | { | | 611 | { |
583 | | | 612 | |
584 | pr("VM OBJECT at %p, refs %d", uobj, uobj->uo_refs); | | 613 | pr("VM OBJECT at %p, refs %d", uobj, uobj->uo_refs); |
585 | } | | 614 | } |
586 | #endif | | 615 | #endif |
587 | | | 616 | |
588 | vaddr_t | | 617 | vaddr_t |
589 | uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz) | | 618 | uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz) |
590 | { | | 619 | { |
591 | | | 620 | |
592 | return 0; | | 621 | return 0; |
593 | } | | 622 | } |
594 | | | 623 | |
595 | int | | 624 | int |
596 | uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, | | 625 | uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, |
597 | vm_prot_t prot, bool set_max) | | 626 | vm_prot_t prot, bool set_max) |
598 | { | | 627 | { |
599 | | | 628 | |
600 | return EOPNOTSUPP; | | 629 | return EOPNOTSUPP; |
601 | } | | 630 | } |
602 | | | 631 | |
603 | /* | | 632 | /* |
604 | * UVM km | | 633 | * UVM km |
605 | */ | | 634 | */ |
606 | | | 635 | |
607 | vaddr_t | | 636 | vaddr_t |
608 | uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags) | | 637 | uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags) |
609 | { | | 638 | { |
610 | void *rv, *desired = NULL; | | 639 | void *rv, *desired = NULL; |
611 | int alignbit, error; | | 640 | int alignbit, error; |
612 | | | 641 | |
613 | #ifdef __x86_64__ | | 642 | #ifdef __x86_64__ |
614 | /* | | 643 | /* |
615 | * On amd64, allocate all module memory from the lowest 2GB. | | 644 | * On amd64, allocate all module memory from the lowest 2GB. |
616 | * This is because NetBSD kernel modules are compiled | | 645 | * This is because NetBSD kernel modules are compiled |
617 | * with -mcmodel=kernel and reserve only 4 bytes for | | 646 | * with -mcmodel=kernel and reserve only 4 bytes for |
618 | * offsets. If we load code compiled with -mcmodel=kernel | | 647 | * offsets. If we load code compiled with -mcmodel=kernel |
619 | * anywhere except the lowest or highest 2GB, it will not | | 648 | * anywhere except the lowest or highest 2GB, it will not |
620 | * work. Since userspace does not have access to the highest | | 649 | * work. Since userspace does not have access to the highest |
621 | * 2GB, use the lowest 2GB. | | 650 | * 2GB, use the lowest 2GB. |
622 | * | | 651 | * |
623 | * Note: this assumes the rump kernel resides in | | 652 | * Note: this assumes the rump kernel resides in |
624 | * the lowest 2GB as well. | | 653 | * the lowest 2GB as well. |
625 | * | | 654 | * |
626 | * Note2: yes, it's a quick hack, but since this the only | | 655 | * Note2: yes, it's a quick hack, but since this the only |
627 | * place where we care about the map we're allocating from, | | 656 | * place where we care about the map we're allocating from, |
628 | * just use a simple "if" instead of coming up with a fancy | | 657 | * just use a simple "if" instead of coming up with a fancy |
629 | * generic solution. | | 658 | * generic solution. |
630 | */ | | 659 | */ |
631 | extern struct vm_map *module_map; | | 660 | extern struct vm_map *module_map; |
632 | if (map == module_map) { | | 661 | if (map == module_map) { |
633 | desired = (void *)(0x80000000 - size); | | 662 | desired = (void *)(0x80000000 - size); |
634 | } | | 663 | } |
635 | #endif | | 664 | #endif |
636 | | | 665 | |
637 | alignbit = 0; | | 666 | alignbit = 0; |
638 | if (align) { | | 667 | if (align) { |
639 | alignbit = ffs(align)-1; | | 668 | alignbit = ffs(align)-1; |
640 | } | | 669 | } |
641 | | | 670 | |
642 | rv = rumpuser_anonmmap(desired, size, alignbit, flags & UVM_KMF_EXEC, | | 671 | rv = rumpuser_anonmmap(desired, size, alignbit, flags & UVM_KMF_EXEC, |
643 | &error); | | 672 | &error); |
644 | if (rv == NULL) { | | 673 | if (rv == NULL) { |
645 | if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT)) | | 674 | if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT)) |
646 | return 0; | | 675 | return 0; |
647 | else | | 676 | else |
648 | panic("uvm_km_alloc failed"); | | 677 | panic("uvm_km_alloc failed"); |
649 | } | | 678 | } |
650 | | | 679 | |
651 | if (flags & UVM_KMF_ZERO) | | 680 | if (flags & UVM_KMF_ZERO) |
652 | memset(rv, 0, size); | | 681 | memset(rv, 0, size); |
653 | | | 682 | |
654 | return (vaddr_t)rv; | | 683 | return (vaddr_t)rv; |
655 | } | | 684 | } |
656 | | | 685 | |
657 | void | | 686 | void |
658 | uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags) | | 687 | uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags) |
659 | { | | 688 | { |
660 | | | 689 | |
661 | rumpuser_unmap((void *)vaddr, size); | | 690 | rumpuser_unmap((void *)vaddr, size); |
662 | } | | 691 | } |
663 | | | 692 | |
664 | struct vm_map * | | 693 | struct vm_map * |
665 | uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr, | | 694 | uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr, |
666 | vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap) | | 695 | vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap) |
667 | { | | 696 | { |
668 | | | 697 | |
669 | return (struct vm_map *)417416; | | 698 | return (struct vm_map *)417416; |
670 | } | | 699 | } |
671 | | | 700 | |
672 | vaddr_t | | 701 | vaddr_t |
673 | uvm_km_alloc_poolpage(struct vm_map *map, bool waitok) | | 702 | uvm_km_alloc_poolpage(struct vm_map *map, bool waitok) |
674 | { | | 703 | { |
675 | | | 704 | |
676 | return (vaddr_t)rump_hypermalloc(PAGE_SIZE, PAGE_SIZE, | | 705 | return (vaddr_t)rump_hypermalloc(PAGE_SIZE, PAGE_SIZE, |
677 | waitok, "kmalloc"); | | 706 | waitok, "kmalloc"); |
678 | } | | 707 | } |
679 | | | 708 | |
680 | void | | 709 | void |
681 | uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr) | | 710 | uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr) |
682 | { | | 711 | { |
683 | | | 712 | |
684 | rump_hyperfree((void *)addr, PAGE_SIZE); | | 713 | rump_hyperfree((void *)addr, PAGE_SIZE); |
685 | } | | 714 | } |
686 | | | 715 | |
687 | vaddr_t | | 716 | vaddr_t |
688 | uvm_km_alloc_poolpage_cache(struct vm_map *map, bool waitok) | | 717 | uvm_km_alloc_poolpage_cache(struct vm_map *map, bool waitok) |
689 | { | | 718 | { |
690 | | | 719 | |
691 | return uvm_km_alloc_poolpage(map, waitok); | | 720 | return uvm_km_alloc_poolpage(map, waitok); |
692 | } | | 721 | } |
693 | | | 722 | |
694 | void | | 723 | void |
695 | uvm_km_free_poolpage_cache(struct vm_map *map, vaddr_t vaddr) | | 724 | uvm_km_free_poolpage_cache(struct vm_map *map, vaddr_t vaddr) |
696 | { | | 725 | { |
697 | | | 726 | |
698 | uvm_km_free_poolpage(map, vaddr); | | 727 | uvm_km_free_poolpage(map, vaddr); |
699 | } | | 728 | } |
700 | | | 729 | |
701 | void | | 730 | void |
702 | uvm_km_va_drain(struct vm_map *map, uvm_flag_t flags) | | 731 | uvm_km_va_drain(struct vm_map *map, uvm_flag_t flags) |
703 | { | | 732 | { |
704 | | | 733 | |
705 | /* we eventually maybe want some model for available memory */ | | 734 | /* we eventually maybe want some model for available memory */ |
706 | } | | 735 | } |
707 | | | 736 | |
708 | /* | | 737 | /* |
709 | * VM space locking routines. We don't really have to do anything, | | 738 | * VM space locking routines. We don't really have to do anything, |
710 | * since the pages are always "wired" (both local and remote processes). | | 739 | * since the pages are always "wired" (both local and remote processes). |
711 | */ | | 740 | */ |
712 | int | | 741 | int |
713 | uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access) | | 742 | uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access) |
714 | { | | 743 | { |
715 | | | 744 | |
716 | return 0; | | 745 | return 0; |
717 | } | | 746 | } |
718 | | | 747 | |
719 | void | | 748 | void |
720 | uvm_vsunlock(struct vmspace *vs, void *addr, size_t len) | | 749 | uvm_vsunlock(struct vmspace *vs, void *addr, size_t len) |
721 | { | | 750 | { |
722 | | | 751 | |
723 | } | | 752 | } |
724 | | | 753 | |
725 | /* | | 754 | /* |
726 | * For the local case the buffer mappers don't need to do anything. | | 755 | * For the local case the buffer mappers don't need to do anything. |
727 | * For the remote case we need to reserve space and copy data in or | | 756 | * For the remote case we need to reserve space and copy data in or |
728 | * out, depending on B_READ/B_WRITE. | | 757 | * out, depending on B_READ/B_WRITE. |
729 | */ | | 758 | */ |
730 | void | | 759 | void |
731 | vmapbuf(struct buf *bp, vsize_t len) | | 760 | vmapbuf(struct buf *bp, vsize_t len) |
732 | { | | 761 | { |
733 | | | 762 | |
734 | bp->b_saveaddr = bp->b_data; | | 763 | bp->b_saveaddr = bp->b_data; |
735 | | | 764 | |
736 | /* remote case */ | | 765 | /* remote case */ |
737 | if (curproc->p_vmspace != vmspace_kernel()) { | | 766 | if (curproc->p_vmspace != vmspace_kernel()) { |
738 | bp->b_data = rump_hypermalloc(len, 0, true, "vmapbuf"); | | 767 | bp->b_data = rump_hypermalloc(len, 0, true, "vmapbuf"); |
739 | if (BUF_ISWRITE(bp)) { | | 768 | if (BUF_ISWRITE(bp)) { |
740 | copyin(bp->b_saveaddr, bp->b_data, len); | | 769 | copyin(bp->b_saveaddr, bp->b_data, len); |
741 | } | | 770 | } |
742 | } | | 771 | } |
743 | } | | 772 | } |
744 | | | 773 | |
745 | void | | 774 | void |
746 | vunmapbuf(struct buf *bp, vsize_t len) | | 775 | vunmapbuf(struct buf *bp, vsize_t len) |
747 | { | | 776 | { |
748 | | | 777 | |
749 | /* remote case */ | | 778 | /* remote case */ |
750 | if (bp->b_proc->p_vmspace != vmspace_kernel()) { | | 779 | if (bp->b_proc->p_vmspace != vmspace_kernel()) { |
751 | if (BUF_ISREAD(bp)) { | | 780 | if (BUF_ISREAD(bp)) { |
752 | copyout_proc(bp->b_proc, | | 781 | copyout_proc(bp->b_proc, |
753 | bp->b_data, bp->b_saveaddr, len); | | 782 | bp->b_data, bp->b_saveaddr, len); |
754 | } | | 783 | } |
755 | rump_hyperfree(bp->b_data, len); | | 784 | rump_hyperfree(bp->b_data, len); |
756 | } | | 785 | } |
757 | | | 786 | |
758 | bp->b_data = bp->b_saveaddr; | | 787 | bp->b_data = bp->b_saveaddr; |
759 | bp->b_saveaddr = 0; | | 788 | bp->b_saveaddr = 0; |
760 | } | | 789 | } |
761 | | | 790 | |
762 | void | | 791 | void |
763 | uvmspace_addref(struct vmspace *vm) | | 792 | uvmspace_addref(struct vmspace *vm) |
764 | { | | 793 | { |
765 | | | 794 | |
766 | /* | | 795 | /* |
767 | * No dynamically allocated vmspaces exist. | | 796 | * No dynamically allocated vmspaces exist. |
768 | */ | | 797 | */ |
769 | } | | 798 | } |
770 | | | 799 | |
771 | void | | 800 | void |
772 | uvmspace_free(struct vmspace *vm) | | 801 | uvmspace_free(struct vmspace *vm) |
773 | { | | 802 | { |
774 | | | 803 | |
775 | /* nothing for now */ | | 804 | /* nothing for now */ |
776 | } | | 805 | } |
777 | | | 806 | |
778 | /* | | 807 | /* |
779 | * page life cycle stuff. it really doesn't exist, so just stubs. | | 808 | * page life cycle stuff. it really doesn't exist, so just stubs. |
780 | */ | | 809 | */ |
781 | | | 810 | |
782 | void | | 811 | void |
783 | uvm_pageactivate(struct vm_page *pg) | | 812 | uvm_pageactivate(struct vm_page *pg) |
784 | { | | 813 | { |
785 | | | 814 | |
786 | /* nada */ | | 815 | /* nada */ |
787 | } | | 816 | } |
788 | | | 817 | |
789 | void | | 818 | void |
790 | uvm_pagedeactivate(struct vm_page *pg) | | 819 | uvm_pagedeactivate(struct vm_page *pg) |
791 | { | | 820 | { |
792 | | | 821 | |
793 | /* nada */ | | 822 | /* nada */ |
794 | } | | 823 | } |
795 | | | 824 | |
796 | void | | 825 | void |
797 | uvm_pagedequeue(struct vm_page *pg) | | 826 | uvm_pagedequeue(struct vm_page *pg) |
798 | { | | 827 | { |
799 | | | 828 | |
800 | /* nada*/ | | 829 | /* nada*/ |
801 | } | | 830 | } |
802 | | | 831 | |
803 | void | | 832 | void |
804 | uvm_pageenqueue(struct vm_page *pg) | | 833 | uvm_pageenqueue(struct vm_page *pg) |
805 | { | | 834 | { |
806 | | | 835 | |
807 | /* nada */ | | 836 | /* nada */ |
808 | } | | 837 | } |
809 | | | 838 | |
810 | void | | 839 | void |
811 | uvmpdpol_anfree(struct vm_anon *an) | | 840 | uvmpdpol_anfree(struct vm_anon *an) |
812 | { | | 841 | { |
813 | | | 842 | |
814 | /* nada */ | | 843 | /* nada */ |
815 | } | | 844 | } |
816 | | | 845 | |
817 | /* | | 846 | /* |
818 | * Physical address accessors. | | 847 | * Physical address accessors. |
819 | */ | | 848 | */ |
820 | | | 849 | |
821 | struct vm_page * | | 850 | struct vm_page * |
822 | uvm_phys_to_vm_page(paddr_t pa) | | 851 | uvm_phys_to_vm_page(paddr_t pa) |
823 | { | | 852 | { |
824 | | | 853 | |
825 | return NULL; | | 854 | return NULL; |
826 | } | | 855 | } |
827 | | | 856 | |
828 | paddr_t | | 857 | paddr_t |
829 | uvm_vm_page_to_phys(const struct vm_page *pg) | | 858 | uvm_vm_page_to_phys(const struct vm_page *pg) |
830 | { | | 859 | { |
831 | | | 860 | |
832 | return 0; | | 861 | return 0; |
833 | } | | 862 | } |
834 | | | 863 | |
835 | /* | | 864 | /* |
836 | * Routines related to the Page Baroness. | | 865 | * Routines related to the Page Baroness. |
837 | */ | | 866 | */ |
838 | | | 867 | |
839 | void | | 868 | void |
840 | uvm_wait(const char *msg) | | 869 | uvm_wait(const char *msg) |
841 | { | | 870 | { |
842 | | | 871 | |
843 | if (__predict_false(curlwp == uvm.pagedaemon_lwp)) | | 872 | if (__predict_false(curlwp == uvm.pagedaemon_lwp)) |
844 | panic("pagedaemon out of memory"); | | 873 | panic("pagedaemon out of memory"); |
845 | if (__predict_false(rump_threads == 0)) | | 874 | if (__predict_false(rump_threads == 0)) |
846 | panic("pagedaemon missing (RUMP_THREADS = 0)"); | | 875 | panic("pagedaemon missing (RUMP_THREADS = 0)"); |
847 | | | 876 | |
848 | mutex_enter(&pdaemonmtx); | | 877 | mutex_enter(&pdaemonmtx); |
849 | pdaemon_waiters++; | | 878 | pdaemon_waiters++; |
850 | cv_signal(&pdaemoncv); | | 879 | cv_signal(&pdaemoncv); |
851 | cv_wait(&oomwait, &pdaemonmtx); | | 880 | cv_wait(&oomwait, &pdaemonmtx); |
852 | mutex_exit(&pdaemonmtx); | | 881 | mutex_exit(&pdaemonmtx); |
853 | } | | 882 | } |
854 | | | 883 | |
855 | void | | 884 | void |
856 | uvm_pageout_start(int npages) | | 885 | uvm_pageout_start(int npages) |
857 | { | | 886 | { |
858 | | | 887 | |
859 | /* we don't have the heuristics */ | | 888 | /* we don't have the heuristics */ |
860 | } | | 889 | } |
861 | | | 890 | |
862 | void | | 891 | void |
863 | uvm_pageout_done(int npages) | | 892 | uvm_pageout_done(int npages) |
864 | { | | 893 | { |
865 | | | 894 | |
866 | /* could wakeup waiters, but just let the pagedaemon do it */ | | 895 | /* could wakeup waiters, but just let the pagedaemon do it */ |
867 | } | | 896 | } |
868 | | | 897 | |
869 | static bool | | 898 | static bool |
870 | processpage(struct vm_page *pg, bool *lockrunning) | | 899 | processpage(struct vm_page *pg, bool *lockrunning) |
871 | { | | 900 | { |
872 | struct uvm_object *uobj; | | 901 | struct uvm_object *uobj; |
873 | | | 902 | |
874 | uobj = pg->uobject; | | 903 | uobj = pg->uobject; |
875 | if (mutex_tryenter(&uobj->vmobjlock)) { | | 904 | if (mutex_tryenter(&uobj->vmobjlock)) { |
876 | if ((pg->flags & PG_BUSY) == 0) { | | 905 | if ((pg->flags & PG_BUSY) == 0) { |
877 | mutex_exit(&uvm_pageqlock); | | 906 | mutex_exit(&uvm_pageqlock); |
878 | uobj->pgops->pgo_put(uobj, pg->offset, | | 907 | uobj->pgops->pgo_put(uobj, pg->offset, |
879 | pg->offset + PAGE_SIZE, | | 908 | pg->offset + PAGE_SIZE, |
880 | PGO_CLEANIT|PGO_FREE); | | 909 | PGO_CLEANIT|PGO_FREE); |
881 | KASSERT(!mutex_owned(&uobj->vmobjlock)); | | 910 | KASSERT(!mutex_owned(&uobj->vmobjlock)); |
882 | return true; | | 911 | return true; |
883 | } else { | | 912 | } else { |
884 | mutex_exit(&uobj->vmobjlock); | | 913 | mutex_exit(&uobj->vmobjlock); |
885 | } | | 914 | } |
886 | } else if (*lockrunning == false && ncpu > 1) { | | 915 | } else if (*lockrunning == false && ncpu > 1) { |
887 | CPU_INFO_ITERATOR cii; | | 916 | CPU_INFO_ITERATOR cii; |
888 | struct cpu_info *ci; | | 917 | struct cpu_info *ci; |
889 | struct lwp *l; | | 918 | struct lwp *l; |
890 | | | 919 | |
891 | l = mutex_owner(&uobj->vmobjlock); | | 920 | l = mutex_owner(&uobj->vmobjlock); |
892 | for (CPU_INFO_FOREACH(cii, ci)) { | | 921 | for (CPU_INFO_FOREACH(cii, ci)) { |
893 | if (ci->ci_curlwp == l) { | | 922 | if (ci->ci_curlwp == l) { |
894 | *lockrunning = true; | | 923 | *lockrunning = true; |
895 | break; | | 924 | break; |
896 | } | | 925 | } |
897 | } | | 926 | } |
898 | } | | 927 | } |
899 | | | 928 | |
900 | return false; | | 929 | return false; |
901 | } | | 930 | } |
902 | | | 931 | |
903 | /* | | 932 | /* |
904 | * The Diabolical pageDaemon Director (DDD). | | 933 | * The Diabolical pageDaemon Director (DDD). |
905 | */ | | 934 | */ |
906 | void | | 935 | void |
907 | uvm_pageout(void *arg) | | 936 | uvm_pageout(void *arg) |
908 | { | | 937 | { |
909 | struct vm_page *pg; | | 938 | struct vm_page *pg; |
910 | struct pool *pp, *pp_first; | | 939 | struct pool *pp, *pp_first; |
911 | uint64_t where; | | 940 | uint64_t where; |
912 | int timo = 0; | | 941 | int timo = 0; |
913 | int cleaned, skip, skipped; | | 942 | int cleaned, skip, skipped; |
914 | bool succ = false; | | 943 | bool succ = false; |
915 | bool lockrunning; | | 944 | bool lockrunning; |
916 | | | 945 | |
917 | mutex_enter(&pdaemonmtx); | | 946 | mutex_enter(&pdaemonmtx); |
918 | for (;;) { | | 947 | for (;;) { |
919 | if (succ) { | | 948 | if (succ) { |
920 | kernel_map->flags &= ~VM_MAP_WANTVA; | | 949 | kernel_map->flags &= ~VM_MAP_WANTVA; |
921 | kmem_map->flags &= ~VM_MAP_WANTVA; | | 950 | kmem_map->flags &= ~VM_MAP_WANTVA; |
922 | timo = 0; | | 951 | timo = 0; |
923 | if (pdaemon_waiters) { | | 952 | if (pdaemon_waiters) { |
924 | pdaemon_waiters = 0; | | 953 | pdaemon_waiters = 0; |
925 | cv_broadcast(&oomwait); | | 954 | cv_broadcast(&oomwait); |
926 | } | | 955 | } |
927 | } | | 956 | } |
928 | succ = false; | | 957 | succ = false; |
929 | | | 958 | |
930 | if (pdaemon_waiters == 0) { | | 959 | if (pdaemon_waiters == 0) { |
931 | cv_timedwait(&pdaemoncv, &pdaemonmtx, timo); | | 960 | cv_timedwait(&pdaemoncv, &pdaemonmtx, timo); |
932 | uvmexp.pdwoke++; | | 961 | uvmexp.pdwoke++; |
933 | } | | 962 | } |
934 | | | 963 | |
935 | /* tell the world that we are hungry */ | | 964 | /* tell the world that we are hungry */ |
936 | kernel_map->flags |= VM_MAP_WANTVA; | | 965 | kernel_map->flags |= VM_MAP_WANTVA; |
937 | kmem_map->flags |= VM_MAP_WANTVA; | | 966 | kmem_map->flags |= VM_MAP_WANTVA; |
938 | | | 967 | |
939 | if (pdaemon_waiters == 0 && !NEED_PAGEDAEMON()) | | 968 | if (pdaemon_waiters == 0 && !NEED_PAGEDAEMON()) |
940 | continue; | | 969 | continue; |
941 | mutex_exit(&pdaemonmtx); | | 970 | mutex_exit(&pdaemonmtx); |
942 | | | 971 | |
943 | /* | | 972 | /* |
944 | * step one: reclaim the page cache. this should give | | 973 | * step one: reclaim the page cache. this should give |
945 | * us the biggest earnings since whole pages are released | | 974 | * us the biggest earnings since whole pages are released |
946 | * into backing memory. | | 975 | * into backing memory. |
947 | */ | | 976 | */ |
948 | pool_cache_reclaim(&pagecache); | | 977 | pool_cache_reclaim(&pagecache); |
949 | if (!NEED_PAGEDAEMON()) { | | 978 | if (!NEED_PAGEDAEMON()) { |
950 | succ = true; | | 979 | succ = true; |
951 | mutex_enter(&pdaemonmtx); | | 980 | mutex_enter(&pdaemonmtx); |
952 | continue; | | 981 | continue; |
953 | } | | 982 | } |
954 | | | 983 | |
955 | /* | | 984 | /* |
956 | * Ok, so that didn't help. Next, try to hunt memory | | 985 | * Ok, so that didn't help. Next, try to hunt memory |
957 | * by pushing out vnode pages. The pages might contain | | 986 | * by pushing out vnode pages. The pages might contain |
958 | * useful cached data, but we need the memory. | | 987 | * useful cached data, but we need the memory. |
959 | */ | | 988 | */ |
960 | cleaned = 0; | | 989 | cleaned = 0; |
961 | skip = 0; | | 990 | skip = 0; |
962 | lockrunning = false; | | 991 | lockrunning = false; |
963 | again: | | 992 | again: |
964 | mutex_enter(&uvm_pageqlock); | | 993 | mutex_enter(&uvm_pageqlock); |
965 | while (cleaned < PAGEDAEMON_OBJCHUNK) { | | 994 | while (cleaned < PAGEDAEMON_OBJCHUNK) { |
966 | skipped = 0; | | 995 | skipped = 0; |
967 | TAILQ_FOREACH(pg, &vmpage_lruqueue, pageq.queue) { | | 996 | TAILQ_FOREACH(pg, &vmpage_lruqueue, pageq.queue) { |
968 | | | 997 | |
969 | /* | | 998 | /* |
970 | * skip over pages we _might_ have tried | | 999 | * skip over pages we _might_ have tried |
971 | * to handle earlier. they might not be | | 1000 | * to handle earlier. they might not be |
972 | * exactly the same ones, but I'm not too | | 1001 | * exactly the same ones, but I'm not too |
973 | * concerned. | | 1002 | * concerned. |
974 | */ | | 1003 | */ |
975 | while (skipped++ < skip) | | 1004 | while (skipped++ < skip) |
976 | continue; | | 1005 | continue; |
977 | | | 1006 | |
978 | if (processpage(pg, &lockrunning)) { | | 1007 | if (processpage(pg, &lockrunning)) { |
979 | cleaned++; | | 1008 | cleaned++; |
980 | goto again; | | 1009 | goto again; |
981 | } | | 1010 | } |
982 | | | 1011 | |
983 | skip++; | | 1012 | skip++; |
984 | } | | 1013 | } |
985 | break; | | 1014 | break; |
986 | } | | 1015 | } |
987 | mutex_exit(&uvm_pageqlock); | | 1016 | mutex_exit(&uvm_pageqlock); |
988 | | | 1017 | |
989 | /* | | 1018 | /* |
990 | * Ok, someone is running with an object lock held. | | 1019 | * Ok, someone is running with an object lock held. |
991 | * We want to yield the host CPU to make sure the | | 1020 | * We want to yield the host CPU to make sure the |
992 | * thread is not parked on the host. Since sched_yield() | | 1021 | * thread is not parked on the host. Since sched_yield() |
993 | * doesn't appear to do anything on NetBSD, nanosleep | | 1022 | * doesn't appear to do anything on NetBSD, nanosleep |
994 | * for the smallest possible time and hope we're back in | | 1023 | * for the smallest possible time and hope we're back in |
995 | * the game soon. | | 1024 | * the game soon. |
996 | */ | | 1025 | */ |
997 | if (cleaned == 0 && lockrunning) { | | 1026 | if (cleaned == 0 && lockrunning) { |
998 | uint64_t sec, nsec; | | 1027 | uint64_t sec, nsec; |
999 | | | 1028 | |
1000 | sec = 0; | | 1029 | sec = 0; |
1001 | nsec = 1; | | 1030 | nsec = 1; |
1002 | rumpuser_nanosleep(&sec, &nsec, NULL); | | 1031 | rumpuser_nanosleep(&sec, &nsec, NULL); |
1003 | | | 1032 | |
1004 | lockrunning = false; | | 1033 | lockrunning = false; |
1005 | skip = 0; | | 1034 | skip = 0; |
1006 | | | 1035 | |
1007 | /* and here we go again */ | | 1036 | /* and here we go again */ |
1008 | goto again; | | 1037 | goto again; |
1009 | } | | 1038 | } |
1010 | | | 1039 | |
1011 | /* | | 1040 | /* |
1012 | * And of course we need to reclaim the page cache | | 1041 | * And of course we need to reclaim the page cache |
1013 | * again to actually release memory. | | 1042 | * again to actually release memory. |
1014 | */ | | 1043 | */ |
1015 | pool_cache_reclaim(&pagecache); | | 1044 | pool_cache_reclaim(&pagecache); |
1016 | if (!NEED_PAGEDAEMON()) { | | 1045 | if (!NEED_PAGEDAEMON()) { |
1017 | succ = true; | | 1046 | succ = true; |
1018 | mutex_enter(&pdaemonmtx); | | 1047 | mutex_enter(&pdaemonmtx); |
1019 | continue; | | 1048 | continue; |
1020 | } | | 1049 | } |
1021 | | | 1050 | |
1022 | /* | | 1051 | /* |
1023 | * Still not there? sleeves come off right about now. | | 1052 | * Still not there? sleeves come off right about now. |
1024 | * First: do reclaim on kernel/kmem map. | | 1053 | * First: do reclaim on kernel/kmem map. |
1025 | */ | | 1054 | */ |
1026 | callback_run_roundrobin(&kernel_map_store.vmk_reclaim_callback, | | 1055 | callback_run_roundrobin(&kernel_map_store.vmk_reclaim_callback, |
1027 | NULL); | | 1056 | NULL); |
1028 | callback_run_roundrobin(&kmem_map_store.vmk_reclaim_callback, | | 1057 | callback_run_roundrobin(&kmem_map_store.vmk_reclaim_callback, |
1029 | NULL); | | 1058 | NULL); |
1030 | | | 1059 | |
1031 | /* | | 1060 | /* |
1032 | * And then drain the pools. Wipe them out ... all of them. | | 1061 | * And then drain the pools. Wipe them out ... all of them. |
1033 | */ | | 1062 | */ |
1034 | | | 1063 | |
1035 | pool_drain_start(&pp_first, &where); | | 1064 | pool_drain_start(&pp_first, &where); |
1036 | pp = pp_first; | | 1065 | pp = pp_first; |
1037 | for (;;) { | | 1066 | for (;;) { |
1038 | rump_vfs_drainbufs(10 /* XXX: estimate better */); | | 1067 | rump_vfs_drainbufs(10 /* XXX: estimate better */); |
1039 | succ = pool_drain_end(pp, where); | | 1068 | succ = pool_drain_end(pp, where); |
1040 | if (succ) | | 1069 | if (succ) |
1041 | break; | | 1070 | break; |
1042 | pool_drain_start(&pp, &where); | | 1071 | pool_drain_start(&pp, &where); |
1043 | if (pp == pp_first) { | | 1072 | if (pp == pp_first) { |
1044 | succ = pool_drain_end(pp, where); | | 1073 | succ = pool_drain_end(pp, where); |
1045 | break; | | 1074 | break; |
1046 | } | | 1075 | } |
1047 | } | | 1076 | } |
1048 | | | 1077 | |
1049 | /* | | 1078 | /* |
1050 | * Need to use PYEC on our bag of tricks. | | 1079 | * Need to use PYEC on our bag of tricks. |
1051 | * Unfortunately, the wife just borrowed it. | | 1080 | * Unfortunately, the wife just borrowed it. |
1052 | */ | | 1081 | */ |
1053 | | | 1082 | |
1054 | if (!succ && cleaned == 0) { | | 1083 | if (!succ && cleaned == 0) { |
1055 | rumpuser_dprintf("pagedaemoness: failed to reclaim " | | 1084 | rumpuser_dprintf("pagedaemoness: failed to reclaim " |
1056 | "memory ... sleeping (deadlock?)\n"); | | 1085 | "memory ... sleeping (deadlock?)\n"); |
1057 | timo = hz; | | 1086 | timo = hz; |
1058 | } | | 1087 | } |
1059 | | | 1088 | |
1060 | mutex_enter(&pdaemonmtx); | | 1089 | mutex_enter(&pdaemonmtx); |
1061 | } | | 1090 | } |
1062 | | | 1091 | |
1063 | panic("you can swap out any time you like, but you can never leave"); | | 1092 | panic("you can swap out any time you like, but you can never leave"); |
1064 | } | | 1093 | } |
1065 | | | 1094 | |
1066 | void | | 1095 | void |
1067 | uvm_kick_pdaemon() | | 1096 | uvm_kick_pdaemon() |
1068 | { | | 1097 | { |
1069 | | | 1098 | |
1070 | /* | | 1099 | /* |
1071 | * Wake up the diabolical pagedaemon director if we are over | | 1100 | * Wake up the diabolical pagedaemon director if we are over |
1072 | * 90% of the memory limit. This is a complete and utter | | 1101 | * 90% of the memory limit. This is a complete and utter |
1073 | * stetson-harrison decision which you are allowed to finetune. | | 1102 | * stetson-harrison decision which you are allowed to finetune. |
1074 | * Don't bother locking. If we have some unflushed caches, | | 1103 | * Don't bother locking. If we have some unflushed caches, |
1075 | * other waker-uppers will deal with the issue. | | 1104 | * other waker-uppers will deal with the issue. |
1076 | */ | | 1105 | */ |
1077 | if (NEED_PAGEDAEMON()) { | | 1106 | if (NEED_PAGEDAEMON()) { |
1078 | cv_signal(&pdaemoncv); | | 1107 | cv_signal(&pdaemoncv); |
1079 | } | | 1108 | } |
1080 | } | | 1109 | } |
1081 | | | 1110 | |
1082 | void * | | 1111 | void * |
1083 | rump_hypermalloc(size_t howmuch, int alignment, bool waitok, const char *wmsg) | | 1112 | rump_hypermalloc(size_t howmuch, int alignment, bool waitok, const char *wmsg) |
1084 | { | | 1113 | { |
1085 | unsigned long newmem; | | 1114 | unsigned long newmem; |
1086 | void *rv; | | 1115 | void *rv; |
1087 | | | 1116 | |
1088 | uvm_kick_pdaemon(); /* ouch */ | | 1117 | uvm_kick_pdaemon(); /* ouch */ |
1089 | | | 1118 | |
1090 | /* first we must be within the limit */ | | 1119 | /* first we must be within the limit */ |
1091 | limitagain: | | 1120 | limitagain: |
1092 | if (rump_physmemlimit != RUMPMEM_UNLIMITED) { | | 1121 | if (rump_physmemlimit != RUMPMEM_UNLIMITED) { |
1093 | newmem = atomic_add_long_nv(&curphysmem, howmuch); | | 1122 | newmem = atomic_add_long_nv(&curphysmem, howmuch); |
1094 | if (newmem > rump_physmemlimit) { | | 1123 | if (newmem > rump_physmemlimit) { |
1095 | newmem = atomic_add_long_nv(&curphysmem, -howmuch); | | 1124 | newmem = atomic_add_long_nv(&curphysmem, -howmuch); |
1096 | if (!waitok) { | | 1125 | if (!waitok) { |
1097 | return NULL; | | 1126 | return NULL; |
1098 | } | | 1127 | } |
1099 | uvm_wait(wmsg); | | 1128 | uvm_wait(wmsg); |
1100 | goto limitagain; | | 1129 | goto limitagain; |
1101 | } | | 1130 | } |
1102 | } | | 1131 | } |
1103 | | | 1132 | |
1104 | /* second, we must get something from the backend */ | | 1133 | /* second, we must get something from the backend */ |
1105 | again: | | 1134 | again: |
1106 | rv = rumpuser_malloc(howmuch, alignment); | | 1135 | rv = rumpuser_malloc(howmuch, alignment); |
1107 | if (__predict_false(rv == NULL && waitok)) { | | 1136 | if (__predict_false(rv == NULL && waitok)) { |
1108 | uvm_wait(wmsg); | | 1137 | uvm_wait(wmsg); |
1109 | goto again; | | 1138 | goto again; |
1110 | } | | 1139 | } |
1111 | | | 1140 | |
1112 | return rv; | | 1141 | return rv; |
1113 | } | | 1142 | } |
1114 | | | 1143 | |
1115 | void | | 1144 | void |
1116 | rump_hyperfree(void *what, size_t size) | | 1145 | rump_hyperfree(void *what, size_t size) |
1117 | { | | 1146 | { |
1118 | | | 1147 | |
1119 | if (rump_physmemlimit != RUMPMEM_UNLIMITED) { | | 1148 | if (rump_physmemlimit != RUMPMEM_UNLIMITED) { |
1120 | atomic_add_long(&curphysmem, -size); | | 1149 | atomic_add_long(&curphysmem, -size); |
1121 | } | | 1150 | } |
1122 | rumpuser_free(what); | | 1151 | rumpuser_free(what); |
1123 | } | | 1152 | } |