Tue Sep 1 11:24:14 2020 UTC ()
Fix braino in pmap_find_gnt(), really return the gnt entry covering the range
and not one that starts just after.
Fixes a KASSERT in pmap_remove_gnt().


(bouyer)
diff -r1.403 -r1.404 src/sys/arch/x86/x86/pmap.c

cvs diff -r1.403 -r1.404 src/sys/arch/x86/x86/pmap.c (switch to unified diff)

--- src/sys/arch/x86/x86/pmap.c 2020/08/04 06:23:46 1.403
+++ src/sys/arch/x86/x86/pmap.c 2020/09/01 11:24:14 1.404
@@ -1,1132 +1,1132 @@ @@ -1,1132 +1,1132 @@
1/* $NetBSD: pmap.c,v 1.403 2020/08/04 06:23:46 skrll Exp $ */ 1/* $NetBSD: pmap.c,v 1.404 2020/09/01 11:24:14 bouyer Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc. 4 * Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran, and by Maxime Villard. 8 * by Andrew Doran, and by Maxime Villard.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * Copyright (c) 2007 Manuel Bouyer. 33 * Copyright (c) 2007 Manuel Bouyer.
34 * 34 *
35 * Redistribution and use in source and binary forms, with or without 35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions 36 * modification, are permitted provided that the following conditions
37 * are met: 37 * are met:
38 * 1. Redistributions of source code must retain the above copyright 38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer. 39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright 40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the 41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution. 42 * documentation and/or other materials provided with the distribution.
43 * 43 *
44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 */ 54 */
55 55
56/* 56/*
57 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> 57 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
58 * 58 *
59 * Permission to use, copy, modify, and distribute this software for any 59 * Permission to use, copy, modify, and distribute this software for any
60 * purpose with or without fee is hereby granted, provided that the above 60 * purpose with or without fee is hereby granted, provided that the above
61 * copyright notice and this permission notice appear in all copies. 61 * copyright notice and this permission notice appear in all copies.
62 * 62 *
63 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 63 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
64 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 64 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
65 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 65 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
66 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 66 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
67 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 67 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
68 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 68 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
69 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 69 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
70 */ 70 */
71 71
72/* 72/*
73 * Copyright 2001 (c) Wasabi Systems, Inc. 73 * Copyright 2001 (c) Wasabi Systems, Inc.
74 * All rights reserved. 74 * All rights reserved.
75 * 75 *
76 * Written by Frank van der Linden for Wasabi Systems, Inc. 76 * Written by Frank van der Linden for Wasabi Systems, Inc.
77 * 77 *
78 * Redistribution and use in source and binary forms, with or without 78 * Redistribution and use in source and binary forms, with or without
79 * modification, are permitted provided that the following conditions 79 * modification, are permitted provided that the following conditions
80 * are met: 80 * are met:
81 * 1. Redistributions of source code must retain the above copyright 81 * 1. Redistributions of source code must retain the above copyright
82 * notice, this list of conditions and the following disclaimer. 82 * notice, this list of conditions and the following disclaimer.
83 * 2. Redistributions in binary form must reproduce the above copyright 83 * 2. Redistributions in binary form must reproduce the above copyright
84 * notice, this list of conditions and the following disclaimer in the 84 * notice, this list of conditions and the following disclaimer in the
85 * documentation and/or other materials provided with the distribution. 85 * documentation and/or other materials provided with the distribution.
86 * 3. All advertising materials mentioning features or use of this software 86 * 3. All advertising materials mentioning features or use of this software
87 * must display the following acknowledgement: 87 * must display the following acknowledgement:
88 * This product includes software developed for the NetBSD Project by 88 * This product includes software developed for the NetBSD Project by
89 * Wasabi Systems, Inc. 89 * Wasabi Systems, Inc.
90 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 90 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
91 * or promote products derived from this software without specific prior 91 * or promote products derived from this software without specific prior
92 * written permission. 92 * written permission.
93 * 93 *
94 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 94 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
95 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 95 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
96 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 96 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
97 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 97 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
98 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 98 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
99 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 99 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
100 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 100 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
101 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 101 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
102 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 102 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
103 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 103 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
104 * POSSIBILITY OF SUCH DAMAGE. 104 * POSSIBILITY OF SUCH DAMAGE.
105 */ 105 */
106 106
107/* 107/*
108 * Copyright (c) 1997 Charles D. Cranor and Washington University. 108 * Copyright (c) 1997 Charles D. Cranor and Washington University.
109 * All rights reserved. 109 * All rights reserved.
110 * 110 *
111 * Redistribution and use in source and binary forms, with or without 111 * Redistribution and use in source and binary forms, with or without
112 * modification, are permitted provided that the following conditions 112 * modification, are permitted provided that the following conditions
113 * are met: 113 * are met:
114 * 1. Redistributions of source code must retain the above copyright 114 * 1. Redistributions of source code must retain the above copyright
115 * notice, this list of conditions and the following disclaimer. 115 * notice, this list of conditions and the following disclaimer.
116 * 2. Redistributions in binary form must reproduce the above copyright 116 * 2. Redistributions in binary form must reproduce the above copyright
117 * notice, this list of conditions and the following disclaimer in the 117 * notice, this list of conditions and the following disclaimer in the
118 * documentation and/or other materials provided with the distribution. 118 * documentation and/or other materials provided with the distribution.
119 * 119 *
120 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 120 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
121 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 121 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
122 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 122 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
123 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 123 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
124 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 124 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
125 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 125 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
126 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 126 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
127 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 127 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
128 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 128 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
129 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 129 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
130 */ 130 */
131 131
132#include <sys/cdefs.h> 132#include <sys/cdefs.h>
133__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.403 2020/08/04 06:23:46 skrll Exp $"); 133__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.404 2020/09/01 11:24:14 bouyer Exp $");
134 134
135#include "opt_user_ldt.h" 135#include "opt_user_ldt.h"
136#include "opt_lockdebug.h" 136#include "opt_lockdebug.h"
137#include "opt_multiprocessor.h" 137#include "opt_multiprocessor.h"
138#include "opt_xen.h" 138#include "opt_xen.h"
139#include "opt_svs.h" 139#include "opt_svs.h"
140#include "opt_kaslr.h" 140#include "opt_kaslr.h"
141 141
142#define __MUTEX_PRIVATE /* for assertions */ 142#define __MUTEX_PRIVATE /* for assertions */
143 143
144#include <sys/param.h> 144#include <sys/param.h>
145#include <sys/systm.h> 145#include <sys/systm.h>
146#include <sys/proc.h> 146#include <sys/proc.h>
147#include <sys/pool.h> 147#include <sys/pool.h>
148#include <sys/kernel.h> 148#include <sys/kernel.h>
149#include <sys/atomic.h> 149#include <sys/atomic.h>
150#include <sys/cpu.h> 150#include <sys/cpu.h>
151#include <sys/intr.h> 151#include <sys/intr.h>
152#include <sys/xcall.h> 152#include <sys/xcall.h>
153#include <sys/kcore.h> 153#include <sys/kcore.h>
154#include <sys/kmem.h> 154#include <sys/kmem.h>
155#include <sys/asan.h> 155#include <sys/asan.h>
156#include <sys/msan.h> 156#include <sys/msan.h>
157#include <sys/entropy.h> 157#include <sys/entropy.h>
158 158
159#include <uvm/uvm.h> 159#include <uvm/uvm.h>
160#include <uvm/pmap/pmap_pvt.h> 160#include <uvm/pmap/pmap_pvt.h>
161 161
162#include <dev/isa/isareg.h> 162#include <dev/isa/isareg.h>
163 163
164#include <machine/specialreg.h> 164#include <machine/specialreg.h>
165#include <machine/gdt.h> 165#include <machine/gdt.h>
166#include <machine/isa_machdep.h> 166#include <machine/isa_machdep.h>
167#include <machine/cpuvar.h> 167#include <machine/cpuvar.h>
168#include <machine/cputypes.h> 168#include <machine/cputypes.h>
169 169
170#include <x86/pmap.h> 170#include <x86/pmap.h>
171#include <x86/pmap_pv.h> 171#include <x86/pmap_pv.h>
172 172
173#include <x86/i82489reg.h> 173#include <x86/i82489reg.h>
174#include <x86/i82489var.h> 174#include <x86/i82489var.h>
175 175
176#ifdef XEN 176#ifdef XEN
177#include <xen/include/public/xen.h> 177#include <xen/include/public/xen.h>
178#include <xen/hypervisor.h> 178#include <xen/hypervisor.h>
179#endif 179#endif
180 180
181/* 181/*
182 * general info: 182 * general info:
183 * 183 *
184 * - for an explanation of how the x86 MMU hardware works see 184 * - for an explanation of how the x86 MMU hardware works see
185 * the comments in <machine/pte.h>. 185 * the comments in <machine/pte.h>.
186 * 186 *
187 * - for an explanation of the general memory structure used by 187 * - for an explanation of the general memory structure used by
188 * this pmap (including the recursive mapping), see the comments 188 * this pmap (including the recursive mapping), see the comments
189 * in <machine/pmap.h>. 189 * in <machine/pmap.h>.
190 * 190 *
191 * this file contains the code for the "pmap module." the module's 191 * this file contains the code for the "pmap module." the module's
192 * job is to manage the hardware's virtual to physical address mappings. 192 * job is to manage the hardware's virtual to physical address mappings.
193 * note that there are two levels of mapping in the VM system: 193 * note that there are two levels of mapping in the VM system:
194 * 194 *
195 * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's 195 * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's
196 * to map ranges of virtual address space to objects/files. for 196 * to map ranges of virtual address space to objects/files. for
197 * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only 197 * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only
198 * to the file /bin/ls starting at offset zero." note that 198 * to the file /bin/ls starting at offset zero." note that
199 * the upper layer mapping is not concerned with how individual 199 * the upper layer mapping is not concerned with how individual
200 * vm_pages are mapped. 200 * vm_pages are mapped.
201 * 201 *
202 * [2] the lower layer of the VM system (the pmap) maintains the mappings 202 * [2] the lower layer of the VM system (the pmap) maintains the mappings
203 * from virtual addresses. it is concerned with which vm_page is 203 * from virtual addresses. it is concerned with which vm_page is
204 * mapped where. for example, when you run /bin/ls and start 204 * mapped where. for example, when you run /bin/ls and start
205 * at page 0x1000 the fault routine may lookup the correct page 205 * at page 0x1000 the fault routine may lookup the correct page
206 * of the /bin/ls file and then ask the pmap layer to establish 206 * of the /bin/ls file and then ask the pmap layer to establish
207 * a mapping for it. 207 * a mapping for it.
208 * 208 *
209 * note that information in the lower layer of the VM system can be 209 * note that information in the lower layer of the VM system can be
210 * thrown away since it can easily be reconstructed from the info 210 * thrown away since it can easily be reconstructed from the info
211 * in the upper layer. 211 * in the upper layer.
212 * 212 *
213 * data structures we use include: 213 * data structures we use include:
214 * 214 *
215 * - struct pmap: describes the address space of one thread 215 * - struct pmap: describes the address space of one thread
216 * - struct pmap_page: describes one pv-tracked page, without 216 * - struct pmap_page: describes one pv-tracked page, without
217 * necessarily a corresponding vm_page 217 * necessarily a corresponding vm_page
218 * - struct pv_entry: describes one <PMAP,VA> mapping of a PA 218 * - struct pv_entry: describes one <PMAP,VA> mapping of a PA
219 * - pmap_page::pp_pvlist: there is one list per pv-tracked page of 219 * - pmap_page::pp_pvlist: there is one list per pv-tracked page of
220 * physical memory. the pp_pvlist points to a list of pv_entry 220 * physical memory. the pp_pvlist points to a list of pv_entry
221 * structures which describe all the <PMAP,VA> pairs that this 221 * structures which describe all the <PMAP,VA> pairs that this
222 * page is mapped in. this is critical for page based operations 222 * page is mapped in. this is critical for page based operations
223 * such as pmap_page_protect() [change protection on _all_ mappings 223 * such as pmap_page_protect() [change protection on _all_ mappings
224 * of a page] 224 * of a page]
225 */ 225 */
226 226
227/* 227/*
228 * Locking 228 * Locking
229 * 229 *
230 * We have the following locks that we must deal with, listed in the order 230 * We have the following locks that we must deal with, listed in the order
231 * that they are acquired: 231 * that they are acquired:
232 * 232 *
233 * pg->uobject->vmobjlock, pg->uanon->an_lock 233 * pg->uobject->vmobjlock, pg->uanon->an_lock
234 * 234 *
235 * For managed pages, these per-object locks are taken by the VM system 235 * For managed pages, these per-object locks are taken by the VM system
236 * before calling into the pmap module - either a read or write hold. 236 * before calling into the pmap module - either a read or write hold.
237 * The lock hold prevent pages from changing identity while the pmap is 237 * The lock hold prevent pages from changing identity while the pmap is
238 * operating on them. For example, the same lock is held across a call 238 * operating on them. For example, the same lock is held across a call
239 * to pmap_remove() and the following call to pmap_update(), so that a 239 * to pmap_remove() and the following call to pmap_update(), so that a
240 * page does not gain a new identity while its TLB visibility is stale. 240 * page does not gain a new identity while its TLB visibility is stale.
241 * 241 *
242 * pmap->pm_lock 242 * pmap->pm_lock
243 * 243 *
244 * This lock protects the fields in the pmap structure including the 244 * This lock protects the fields in the pmap structure including the
245 * non-kernel PDEs in the PDP, the PTEs, and PTPs and connected data 245 * non-kernel PDEs in the PDP, the PTEs, and PTPs and connected data
246 * structures. For modifying unmanaged kernel PTEs it is not needed as 246 * structures. For modifying unmanaged kernel PTEs it is not needed as
247 * kernel PDEs are never freed, and the kernel is expected to be self 247 * kernel PDEs are never freed, and the kernel is expected to be self
248 * consistent (and the lock can't be taken for unmanaged kernel PTEs, 248 * consistent (and the lock can't be taken for unmanaged kernel PTEs,
249 * because they can be modified from interrupt context). 249 * because they can be modified from interrupt context).
250 * 250 *
251 * pmaps_lock 251 * pmaps_lock
252 * 252 *
253 * This lock protects the list of active pmaps (headed by "pmaps"). 253 * This lock protects the list of active pmaps (headed by "pmaps").
254 * It's acquired when adding or removing pmaps or adjusting kernel PDEs. 254 * It's acquired when adding or removing pmaps or adjusting kernel PDEs.
255 * 255 *
256 * pp_lock 256 * pp_lock
257 * 257 *
258 * This per-page lock protects PV entry lists and the embedded PV entry 258 * This per-page lock protects PV entry lists and the embedded PV entry
259 * in each vm_page, allowing for concurrent operation on pages by 259 * in each vm_page, allowing for concurrent operation on pages by
260 * different pmaps. This is a spin mutex at IPL_VM, because at the 260 * different pmaps. This is a spin mutex at IPL_VM, because at the
261 * points it is taken context switching is usually not tolerable, and 261 * points it is taken context switching is usually not tolerable, and
262 * spin mutexes must block out interrupts that could take kernel_lock. 262 * spin mutexes must block out interrupts that could take kernel_lock.
263 */ 263 */
264 264
265/* uvm_object is abused here to index pmap_pages; make assertions happy. */ 265/* uvm_object is abused here to index pmap_pages; make assertions happy. */
266#ifdef DIAGNOSTIC 266#ifdef DIAGNOSTIC
267#define PMAP_DUMMY_LOCK(pm) rw_enter(&(pm)->pm_dummy_lock, RW_WRITER) 267#define PMAP_DUMMY_LOCK(pm) rw_enter(&(pm)->pm_dummy_lock, RW_WRITER)
268#define PMAP_DUMMY_UNLOCK(pm) rw_exit(&(pm)->pm_dummy_lock) 268#define PMAP_DUMMY_UNLOCK(pm) rw_exit(&(pm)->pm_dummy_lock)
269#else 269#else
270#define PMAP_DUMMY_LOCK(pm) 270#define PMAP_DUMMY_LOCK(pm)
271#define PMAP_DUMMY_UNLOCK(pm) 271#define PMAP_DUMMY_UNLOCK(pm)
272#endif 272#endif
273 273
274static const struct uvm_pagerops pmap_pager = { 274static const struct uvm_pagerops pmap_pager = {
275 /* nothing */ 275 /* nothing */
276}; 276};
277 277
278const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER; 278const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER;
279const vaddr_t ptp_frames[] = PTP_FRAME_INITIALIZER; 279const vaddr_t ptp_frames[] = PTP_FRAME_INITIALIZER;
280const int ptp_shifts[] = PTP_SHIFT_INITIALIZER; 280const int ptp_shifts[] = PTP_SHIFT_INITIALIZER;
281const long nkptpmax[] = NKPTPMAX_INITIALIZER; 281const long nkptpmax[] = NKPTPMAX_INITIALIZER;
282const long nbpd[] = NBPD_INITIALIZER; 282const long nbpd[] = NBPD_INITIALIZER;
283#ifdef i386 283#ifdef i386
284pd_entry_t * const normal_pdes[] = PDES_INITIALIZER; 284pd_entry_t * const normal_pdes[] = PDES_INITIALIZER;
285#else 285#else
286pd_entry_t *normal_pdes[3]; 286pd_entry_t *normal_pdes[3];
287#endif 287#endif
288 288
289long nkptp[] = NKPTP_INITIALIZER; 289long nkptp[] = NKPTP_INITIALIZER;
290 290
291struct pmap_head pmaps; 291struct pmap_head pmaps;
292kmutex_t pmaps_lock __cacheline_aligned; 292kmutex_t pmaps_lock __cacheline_aligned;
293 293
294struct pcpu_area *pcpuarea __read_mostly; 294struct pcpu_area *pcpuarea __read_mostly;
295 295
296static vaddr_t pmap_maxkvaddr; 296static vaddr_t pmap_maxkvaddr;
297 297
298/* 298/*
299 * Misc. event counters. 299 * Misc. event counters.
300 */ 300 */
301struct evcnt pmap_iobmp_evcnt; 301struct evcnt pmap_iobmp_evcnt;
302struct evcnt pmap_ldt_evcnt; 302struct evcnt pmap_ldt_evcnt;
303 303
304/* 304/*
305 * PAT 305 * PAT
306 */ 306 */
307static bool cpu_pat_enabled __read_mostly = false; 307static bool cpu_pat_enabled __read_mostly = false;
308 308
309/* 309/*
310 * Global data structures 310 * Global data structures
311 */ 311 */
312 312
313static struct pmap kernel_pmap_store __cacheline_aligned; /* kernel's pmap */ 313static struct pmap kernel_pmap_store __cacheline_aligned; /* kernel's pmap */
314struct pmap *const kernel_pmap_ptr = &kernel_pmap_store; 314struct pmap *const kernel_pmap_ptr = &kernel_pmap_store;
315static rb_tree_t pmap_kernel_rb __cacheline_aligned; 315static rb_tree_t pmap_kernel_rb __cacheline_aligned;
316 316
317struct bootspace bootspace __read_mostly; 317struct bootspace bootspace __read_mostly;
318struct slotspace slotspace __read_mostly; 318struct slotspace slotspace __read_mostly;
319 319
320/* Set to PTE_NX if supported. */ 320/* Set to PTE_NX if supported. */
321pd_entry_t pmap_pg_nx __read_mostly = 0; 321pd_entry_t pmap_pg_nx __read_mostly = 0;
322 322
323/* Set to PTE_G if supported. */ 323/* Set to PTE_G if supported. */
324pd_entry_t pmap_pg_g __read_mostly = 0; 324pd_entry_t pmap_pg_g __read_mostly = 0;
325 325
326/* Set to true if large pages are supported. */ 326/* Set to true if large pages are supported. */
327int pmap_largepages __read_mostly = 0; 327int pmap_largepages __read_mostly = 0;
328 328
329paddr_t lowmem_rsvd __read_mostly; 329paddr_t lowmem_rsvd __read_mostly;
330paddr_t avail_start __read_mostly; /* PA of first available physical page */ 330paddr_t avail_start __read_mostly; /* PA of first available physical page */
331paddr_t avail_end __read_mostly; /* PA of last available physical page */ 331paddr_t avail_end __read_mostly; /* PA of last available physical page */
332 332
333#ifdef XENPV 333#ifdef XENPV
334paddr_t pmap_pa_start; /* PA of first physical page for this domain */ 334paddr_t pmap_pa_start; /* PA of first physical page for this domain */
335paddr_t pmap_pa_end; /* PA of last physical page for this domain */ 335paddr_t pmap_pa_end; /* PA of last physical page for this domain */
336#endif 336#endif
337 337
338#define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp) 338#define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp)
339#define PMAP_CHECK_PP(pp) \ 339#define PMAP_CHECK_PP(pp) \
340 KASSERTMSG((pp)->pp_lock.mtx_ipl._ipl == IPL_VM, "bad pmap_page %p", pp) 340 KASSERTMSG((pp)->pp_lock.mtx_ipl._ipl == IPL_VM, "bad pmap_page %p", pp)
341 341
342/* 342/*
343 * Other data structures 343 * Other data structures
344 */ 344 */
345 345
346static pt_entry_t protection_codes[8] __read_mostly; 346static pt_entry_t protection_codes[8] __read_mostly;
347 347
348static bool pmap_initialized __read_mostly = false; /* pmap_init done yet? */ 348static bool pmap_initialized __read_mostly = false; /* pmap_init done yet? */
349 349
350/* 350/*
351 * The following two vaddr_t's are used during system startup to keep track of 351 * The following two vaddr_t's are used during system startup to keep track of
352 * how much of the kernel's VM space we have used. Once the system is started, 352 * how much of the kernel's VM space we have used. Once the system is started,
353 * the management of the remaining kernel VM space is turned over to the 353 * the management of the remaining kernel VM space is turned over to the
354 * kernel_map vm_map. 354 * kernel_map vm_map.
355 */ 355 */
356static vaddr_t virtual_avail __read_mostly; /* VA of first free KVA */ 356static vaddr_t virtual_avail __read_mostly; /* VA of first free KVA */
357static vaddr_t virtual_end __read_mostly; /* VA of last free KVA */ 357static vaddr_t virtual_end __read_mostly; /* VA of last free KVA */
358 358
359#ifndef XENPV 359#ifndef XENPV
360/* 360/*
361 * LAPIC virtual address, and fake physical address. 361 * LAPIC virtual address, and fake physical address.
362 */ 362 */
363volatile vaddr_t local_apic_va __read_mostly; 363volatile vaddr_t local_apic_va __read_mostly;
364paddr_t local_apic_pa __read_mostly; 364paddr_t local_apic_pa __read_mostly;
365#endif 365#endif
366 366
367/* 367/*
368 * pool that pmap structures are allocated from 368 * pool that pmap structures are allocated from
369 */ 369 */
370struct pool_cache pmap_cache; 370struct pool_cache pmap_cache;
371static int pmap_ctor(void *, void *, int); 371static int pmap_ctor(void *, void *, int);
372static void pmap_dtor(void *, void *); 372static void pmap_dtor(void *, void *);
373 373
374/* 374/*
375 * pv_page cache 375 * pv_page cache
376 */ 376 */
377static struct pool_cache pmap_pvp_cache; 377static struct pool_cache pmap_pvp_cache;
378 378
379#ifdef __HAVE_DIRECT_MAP 379#ifdef __HAVE_DIRECT_MAP
380vaddr_t pmap_direct_base __read_mostly; 380vaddr_t pmap_direct_base __read_mostly;
381vaddr_t pmap_direct_end __read_mostly; 381vaddr_t pmap_direct_end __read_mostly;
382#endif 382#endif
383 383
384#ifndef __HAVE_DIRECT_MAP 384#ifndef __HAVE_DIRECT_MAP
385/* 385/*
386 * Special VAs and the PTEs that map them 386 * Special VAs and the PTEs that map them
387 */ 387 */
388static pt_entry_t *early_zero_pte; 388static pt_entry_t *early_zero_pte;
389static void pmap_vpage_cpualloc(struct cpu_info *); 389static void pmap_vpage_cpualloc(struct cpu_info *);
390#ifdef XENPV 390#ifdef XENPV
391char *early_zerop; /* also referenced from xen_locore() */ 391char *early_zerop; /* also referenced from xen_locore() */
392#else 392#else
393static char *early_zerop; 393static char *early_zerop;
394#endif 394#endif
395#endif 395#endif
396 396
397int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int); 397int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int);
398 398
399/* PDP pool and its callbacks */ 399/* PDP pool and its callbacks */
400static struct pool pmap_pdp_pool; 400static struct pool pmap_pdp_pool;
401static void pmap_pdp_init(pd_entry_t *); 401static void pmap_pdp_init(pd_entry_t *);
402static void pmap_pdp_fini(pd_entry_t *); 402static void pmap_pdp_fini(pd_entry_t *);
403 403
404#ifdef PAE 404#ifdef PAE
405/* need to allocate items of 4 pages */ 405/* need to allocate items of 4 pages */
406static void *pmap_pdp_alloc(struct pool *, int); 406static void *pmap_pdp_alloc(struct pool *, int);
407static void pmap_pdp_free(struct pool *, void *); 407static void pmap_pdp_free(struct pool *, void *);
408static struct pool_allocator pmap_pdp_allocator = { 408static struct pool_allocator pmap_pdp_allocator = {
409 .pa_alloc = pmap_pdp_alloc, 409 .pa_alloc = pmap_pdp_alloc,
410 .pa_free = pmap_pdp_free, 410 .pa_free = pmap_pdp_free,
411 .pa_pagesz = PAGE_SIZE * PDP_SIZE, 411 .pa_pagesz = PAGE_SIZE * PDP_SIZE,
412}; 412};
413#endif 413#endif
414 414
415extern vaddr_t idt_vaddr; 415extern vaddr_t idt_vaddr;
416extern paddr_t idt_paddr; 416extern paddr_t idt_paddr;
417extern vaddr_t gdt_vaddr; 417extern vaddr_t gdt_vaddr;
418extern paddr_t gdt_paddr; 418extern paddr_t gdt_paddr;
419extern vaddr_t ldt_vaddr; 419extern vaddr_t ldt_vaddr;
420extern paddr_t ldt_paddr; 420extern paddr_t ldt_paddr;
421 421
422#ifdef i386 422#ifdef i386
423/* stuff to fix the pentium f00f bug */ 423/* stuff to fix the pentium f00f bug */
424extern vaddr_t pentium_idt_vaddr; 424extern vaddr_t pentium_idt_vaddr;
425#endif 425#endif
426 426
427/* Array of freshly allocated PTPs, for pmap_get_ptp(). */ 427/* Array of freshly allocated PTPs, for pmap_get_ptp(). */
428struct pmap_ptparray { 428struct pmap_ptparray {
429 struct vm_page *pg[PTP_LEVELS + 1]; 429 struct vm_page *pg[PTP_LEVELS + 1];
430 bool alloced[PTP_LEVELS + 1]; 430 bool alloced[PTP_LEVELS + 1];
431}; 431};
432 432
433/* 433/*
434 * PV entries are allocated in page-sized chunks and cached per-pmap to 434 * PV entries are allocated in page-sized chunks and cached per-pmap to
435 * avoid intense pressure on memory allocators. 435 * avoid intense pressure on memory allocators.
436 */ 436 */
437 437
438struct pv_page { 438struct pv_page {
439 LIST_HEAD(, pv_entry) pvp_pves; 439 LIST_HEAD(, pv_entry) pvp_pves;
440 LIST_ENTRY(pv_page) pvp_list; 440 LIST_ENTRY(pv_page) pvp_list;
441 long pvp_nfree; 441 long pvp_nfree;
442 struct pmap *pvp_pmap; 442 struct pmap *pvp_pmap;
443}; 443};
444 444
445#define PVE_PER_PVP ((PAGE_SIZE / sizeof(struct pv_entry)) - 1) 445#define PVE_PER_PVP ((PAGE_SIZE / sizeof(struct pv_entry)) - 1)
446 446
447/* 447/*
448 * PV tree prototypes 448 * PV tree prototypes
449 */ 449 */
450 450
451static int pmap_compare_key(void *, const void *, const void *); 451static int pmap_compare_key(void *, const void *, const void *);
452static int pmap_compare_nodes(void *, const void *, const void *); 452static int pmap_compare_nodes(void *, const void *, const void *);
453 453
454/* Read-black tree */ 454/* Read-black tree */
455static const rb_tree_ops_t pmap_rbtree_ops = { 455static const rb_tree_ops_t pmap_rbtree_ops = {
456 .rbto_compare_nodes = pmap_compare_nodes, 456 .rbto_compare_nodes = pmap_compare_nodes,
457 .rbto_compare_key = pmap_compare_key, 457 .rbto_compare_key = pmap_compare_key,
458 .rbto_node_offset = offsetof(struct pv_entry, pve_rb), 458 .rbto_node_offset = offsetof(struct pv_entry, pve_rb),
459 .rbto_context = NULL 459 .rbto_context = NULL
460}; 460};
461 461
462/* 462/*
463 * Local prototypes 463 * Local prototypes
464 */ 464 */
465 465
466#ifdef __HAVE_PCPU_AREA 466#ifdef __HAVE_PCPU_AREA
467static void pmap_init_pcpu(void); 467static void pmap_init_pcpu(void);
468#endif 468#endif
469#ifdef __HAVE_DIRECT_MAP 469#ifdef __HAVE_DIRECT_MAP
470static void pmap_init_directmap(struct pmap *); 470static void pmap_init_directmap(struct pmap *);
471#endif 471#endif
472#if !defined(XENPV) 472#if !defined(XENPV)
473static void pmap_remap_global(void); 473static void pmap_remap_global(void);
474#endif 474#endif
475#ifndef XENPV 475#ifndef XENPV
476static void pmap_init_lapic(void); 476static void pmap_init_lapic(void);
477static void pmap_remap_largepages(void); 477static void pmap_remap_largepages(void);
478#endif 478#endif
479 479
480static int pmap_get_ptp(struct pmap *, struct pmap_ptparray *, vaddr_t, int, 480static int pmap_get_ptp(struct pmap *, struct pmap_ptparray *, vaddr_t, int,
481 struct vm_page **); 481 struct vm_page **);
482static void pmap_unget_ptp(struct pmap *, struct pmap_ptparray *); 482static void pmap_unget_ptp(struct pmap *, struct pmap_ptparray *);
483static void pmap_install_ptp(struct pmap *, struct pmap_ptparray *, vaddr_t, 483static void pmap_install_ptp(struct pmap *, struct pmap_ptparray *, vaddr_t,
484 pd_entry_t * const *); 484 pd_entry_t * const *);
485static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, int); 485static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, int);
486static void pmap_freepage(struct pmap *, struct vm_page *, int); 486static void pmap_freepage(struct pmap *, struct vm_page *, int);
487static void pmap_free_ptp(struct pmap *, struct vm_page *, vaddr_t, 487static void pmap_free_ptp(struct pmap *, struct vm_page *, vaddr_t,
488 pt_entry_t *, pd_entry_t * const *); 488 pt_entry_t *, pd_entry_t * const *);
489static bool pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *, 489static bool pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *,
490 vaddr_t); 490 vaddr_t);
491static void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, vaddr_t, 491static void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, vaddr_t,
492 vaddr_t); 492 vaddr_t);
493static int pmap_pvp_ctor(void *, void *, int); 493static int pmap_pvp_ctor(void *, void *, int);
494static void pmap_pvp_dtor(void *, void *); 494static void pmap_pvp_dtor(void *, void *);
495static struct pv_entry *pmap_alloc_pv(struct pmap *); 495static struct pv_entry *pmap_alloc_pv(struct pmap *);
496static void pmap_free_pv(struct pmap *, struct pv_entry *); 496static void pmap_free_pv(struct pmap *, struct pv_entry *);
497static void pmap_drain_pv(struct pmap *); 497static void pmap_drain_pv(struct pmap *);
498 498
499static void pmap_alloc_level(struct pmap *, vaddr_t, long *); 499static void pmap_alloc_level(struct pmap *, vaddr_t, long *);
500 500
501static void pmap_load1(struct lwp *, struct pmap *, struct pmap *); 501static void pmap_load1(struct lwp *, struct pmap *, struct pmap *);
502static void pmap_reactivate(struct pmap *); 502static void pmap_reactivate(struct pmap *);
503 503
504/* 504/*
505 * p m a p h e l p e r f u n c t i o n s 505 * p m a p h e l p e r f u n c t i o n s
506 */ 506 */
507 507
508static inline void 508static inline void
509pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff) 509pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff)
510{ 510{
511 511
512 KASSERT(cold || mutex_owned(&pmap->pm_lock)); 512 KASSERT(cold || mutex_owned(&pmap->pm_lock));
513 pmap->pm_stats.resident_count += resid_diff; 513 pmap->pm_stats.resident_count += resid_diff;
514 pmap->pm_stats.wired_count += wired_diff; 514 pmap->pm_stats.wired_count += wired_diff;
515} 515}
516 516
517static inline void 517static inline void
518pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte) 518pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte)
519{ 519{
520 int resid_diff = ((npte & PTE_P) ? 1 : 0) - ((opte & PTE_P) ? 1 : 0); 520 int resid_diff = ((npte & PTE_P) ? 1 : 0) - ((opte & PTE_P) ? 1 : 0);
521 int wired_diff = ((npte & PTE_WIRED) ? 1 : 0) - ((opte & PTE_WIRED) ? 1 : 0); 521 int wired_diff = ((npte & PTE_WIRED) ? 1 : 0) - ((opte & PTE_WIRED) ? 1 : 0);
522 522
523 KASSERT((npte & (PTE_P | PTE_WIRED)) != PTE_WIRED); 523 KASSERT((npte & (PTE_P | PTE_WIRED)) != PTE_WIRED);
524 KASSERT((opte & (PTE_P | PTE_WIRED)) != PTE_WIRED); 524 KASSERT((opte & (PTE_P | PTE_WIRED)) != PTE_WIRED);
525 525
526 pmap_stats_update(pmap, resid_diff, wired_diff); 526 pmap_stats_update(pmap, resid_diff, wired_diff);
527} 527}
528 528
529/* 529/*
530 * ptp_to_pmap: lookup pmap by ptp 530 * ptp_to_pmap: lookup pmap by ptp
531 */ 531 */
532static inline struct pmap * 532static inline struct pmap *
533ptp_to_pmap(struct vm_page *ptp) 533ptp_to_pmap(struct vm_page *ptp)
534{ 534{
535 struct pmap *pmap; 535 struct pmap *pmap;
536 536
537 if (ptp == NULL) { 537 if (ptp == NULL) {
538 return pmap_kernel(); 538 return pmap_kernel();
539 } 539 }
540 pmap = (struct pmap *)ptp->uobject; 540 pmap = (struct pmap *)ptp->uobject;
541 KASSERT(pmap != NULL); 541 KASSERT(pmap != NULL);
542 KASSERT(&pmap->pm_obj[0] == ptp->uobject); 542 KASSERT(&pmap->pm_obj[0] == ptp->uobject);
543 return pmap; 543 return pmap;
544} 544}
545 545
546static inline struct pv_pte * 546static inline struct pv_pte *
547pve_to_pvpte(struct pv_entry *pve) 547pve_to_pvpte(struct pv_entry *pve)
548{ 548{
549 549
550 if (pve == NULL) 550 if (pve == NULL)
551 return NULL; 551 return NULL;
552 KASSERT((void *)&pve->pve_pte == (void *)pve); 552 KASSERT((void *)&pve->pve_pte == (void *)pve);
553 return &pve->pve_pte; 553 return &pve->pve_pte;
554} 554}
555 555
556static inline struct pv_entry * 556static inline struct pv_entry *
557pvpte_to_pve(struct pv_pte *pvpte) 557pvpte_to_pve(struct pv_pte *pvpte)
558{ 558{
559 struct pv_entry *pve = (void *)pvpte; 559 struct pv_entry *pve = (void *)pvpte;
560 560
561 KASSERT(pve_to_pvpte(pve) == pvpte); 561 KASSERT(pve_to_pvpte(pve) == pvpte);
562 return pve; 562 return pve;
563} 563}
564 564
565/* 565/*
566 * Return true if the pmap page has an embedded PV entry. 566 * Return true if the pmap page has an embedded PV entry.
567 */ 567 */
568static inline bool 568static inline bool
569pv_pte_embedded(struct pmap_page *pp) 569pv_pte_embedded(struct pmap_page *pp)
570{ 570{
571 571
572 KASSERT(mutex_owned(&pp->pp_lock)); 572 KASSERT(mutex_owned(&pp->pp_lock));
573 return (bool)((vaddr_t)pp->pp_pte.pte_ptp | pp->pp_pte.pte_va); 573 return (bool)((vaddr_t)pp->pp_pte.pte_ptp | pp->pp_pte.pte_va);
574} 574}
575 575
576/* 576/*
577 * pv_pte_first, pv_pte_next: PV list iterator. 577 * pv_pte_first, pv_pte_next: PV list iterator.
578 */ 578 */
579static inline struct pv_pte * 579static inline struct pv_pte *
580pv_pte_first(struct pmap_page *pp) 580pv_pte_first(struct pmap_page *pp)
581{ 581{
582 582
583 KASSERT(mutex_owned(&pp->pp_lock)); 583 KASSERT(mutex_owned(&pp->pp_lock));
584 if (pv_pte_embedded(pp)) { 584 if (pv_pte_embedded(pp)) {
585 return &pp->pp_pte; 585 return &pp->pp_pte;
586 } 586 }
587 return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist)); 587 return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist));
588} 588}
589 589
590static inline struct pv_pte * 590static inline struct pv_pte *
591pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte) 591pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte)
592{ 592{
593 593
594 KASSERT(mutex_owned(&pp->pp_lock)); 594 KASSERT(mutex_owned(&pp->pp_lock));
595 KASSERT(pvpte != NULL); 595 KASSERT(pvpte != NULL);
596 if (pvpte == &pp->pp_pte) { 596 if (pvpte == &pp->pp_pte) {
597 return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist)); 597 return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist));
598 } 598 }
599 return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list)); 599 return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list));
600} 600}
601 601
602static inline uint8_t 602static inline uint8_t
603pmap_pte_to_pp_attrs(pt_entry_t pte) 603pmap_pte_to_pp_attrs(pt_entry_t pte)
604{ 604{
605 uint8_t ret = 0; 605 uint8_t ret = 0;
606 if (pte & PTE_D) 606 if (pte & PTE_D)
607 ret |= PP_ATTRS_D; 607 ret |= PP_ATTRS_D;
608 if (pte & PTE_A) 608 if (pte & PTE_A)
609 ret |= PP_ATTRS_A; 609 ret |= PP_ATTRS_A;
610 if (pte & PTE_W) 610 if (pte & PTE_W)
611 ret |= PP_ATTRS_W; 611 ret |= PP_ATTRS_W;
612 return ret; 612 return ret;
613} 613}
614 614
615static inline pt_entry_t 615static inline pt_entry_t
616pmap_pp_attrs_to_pte(uint8_t attrs) 616pmap_pp_attrs_to_pte(uint8_t attrs)
617{ 617{
618 pt_entry_t pte = 0; 618 pt_entry_t pte = 0;
619 if (attrs & PP_ATTRS_D) 619 if (attrs & PP_ATTRS_D)
620 pte |= PTE_D; 620 pte |= PTE_D;
621 if (attrs & PP_ATTRS_A) 621 if (attrs & PP_ATTRS_A)
622 pte |= PTE_A; 622 pte |= PTE_A;
623 if (attrs & PP_ATTRS_W) 623 if (attrs & PP_ATTRS_W)
624 pte |= PTE_W; 624 pte |= PTE_W;
625 return pte; 625 return pte;
626} 626}
627 627
628/* 628/*
629 * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? 629 * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]?
630 * of course the kernel is always loaded 630 * of course the kernel is always loaded
631 */ 631 */
632bool 632bool
633pmap_is_curpmap(struct pmap *pmap) 633pmap_is_curpmap(struct pmap *pmap)
634{ 634{
635 return ((pmap == pmap_kernel()) || (pmap == curcpu()->ci_pmap)); 635 return ((pmap == pmap_kernel()) || (pmap == curcpu()->ci_pmap));
636} 636}
637 637
638inline void 638inline void
639pmap_reference(struct pmap *pmap) 639pmap_reference(struct pmap *pmap)
640{ 640{
641 641
642 atomic_inc_uint(&pmap->pm_obj[0].uo_refs); 642 atomic_inc_uint(&pmap->pm_obj[0].uo_refs);
643} 643}
644 644
645/* 645/*
646 * rbtree: compare two nodes. 646 * rbtree: compare two nodes.
647 */ 647 */
648static int 648static int
649pmap_compare_nodes(void *context, const void *n1, const void *n2) 649pmap_compare_nodes(void *context, const void *n1, const void *n2)
650{ 650{
651 const struct pv_entry *pve1 = n1; 651 const struct pv_entry *pve1 = n1;
652 const struct pv_entry *pve2 = n2; 652 const struct pv_entry *pve2 = n2;
653 653
654 KASSERT(pve1->pve_pte.pte_ptp == pve2->pve_pte.pte_ptp); 654 KASSERT(pve1->pve_pte.pte_ptp == pve2->pve_pte.pte_ptp);
655 655
656 if (pve1->pve_pte.pte_va < pve2->pve_pte.pte_va) { 656 if (pve1->pve_pte.pte_va < pve2->pve_pte.pte_va) {
657 return -1; 657 return -1;
658 } 658 }
659 if (pve1->pve_pte.pte_va > pve2->pve_pte.pte_va) { 659 if (pve1->pve_pte.pte_va > pve2->pve_pte.pte_va) {
660 return 1; 660 return 1;
661 } 661 }
662 return 0; 662 return 0;
663} 663}
664 664
665/* 665/*
666 * rbtree: compare a node and a key. 666 * rbtree: compare a node and a key.
667 */ 667 */
668static int 668static int
669pmap_compare_key(void *context, const void *n, const void *k) 669pmap_compare_key(void *context, const void *n, const void *k)
670{ 670{
671 const struct pv_entry *pve = n; 671 const struct pv_entry *pve = n;
672 const vaddr_t key = (vaddr_t)k; 672 const vaddr_t key = (vaddr_t)k;
673 673
674 if (pve->pve_pte.pte_va < key) { 674 if (pve->pve_pte.pte_va < key) {
675 return -1; 675 return -1;
676 } 676 }
677 if (pve->pve_pte.pte_va > key) { 677 if (pve->pve_pte.pte_va > key) {
678 return 1; 678 return 1;
679 } 679 }
680 return 0; 680 return 0;
681} 681}
682 682
683/* 683/*
684 * pmap_ptp_range_set: abuse ptp->uanon to record minimum VA of PTE 684 * pmap_ptp_range_set: abuse ptp->uanon to record minimum VA of PTE
685 */ 685 */
686static inline void 686static inline void
687pmap_ptp_range_set(struct vm_page *ptp, vaddr_t va) 687pmap_ptp_range_set(struct vm_page *ptp, vaddr_t va)
688{ 688{
689 vaddr_t *min = (vaddr_t *)&ptp->uanon; 689 vaddr_t *min = (vaddr_t *)&ptp->uanon;
690 690
691 if (va < *min) { 691 if (va < *min) {
692 *min = va; 692 *min = va;
693 } 693 }
694} 694}
695 695
696/* 696/*
697 * pmap_ptp_range_clip: abuse ptp->uanon to clip range of PTEs to remove 697 * pmap_ptp_range_clip: abuse ptp->uanon to clip range of PTEs to remove
698 */ 698 */
699static inline void 699static inline void
700pmap_ptp_range_clip(struct vm_page *ptp, vaddr_t *startva, pt_entry_t **pte) 700pmap_ptp_range_clip(struct vm_page *ptp, vaddr_t *startva, pt_entry_t **pte)
701{ 701{
702 vaddr_t sclip; 702 vaddr_t sclip;
703 703
704 if (ptp == NULL) { 704 if (ptp == NULL) {
705 return; 705 return;
706 } 706 }
707 707
708 sclip = (vaddr_t)ptp->uanon; 708 sclip = (vaddr_t)ptp->uanon;
709 sclip = (*startva < sclip ? sclip : *startva); 709 sclip = (*startva < sclip ? sclip : *startva);
710 *pte += (sclip - *startva) / PAGE_SIZE; 710 *pte += (sclip - *startva) / PAGE_SIZE;
711 *startva = sclip; 711 *startva = sclip;
712} 712}
713 713
714/* 714/*
715 * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in 715 * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
716 * 716 *
717 * there are several pmaps involved. some or all of them might be same. 717 * there are several pmaps involved. some or all of them might be same.
718 * 718 *
719 * - the pmap given by the first argument 719 * - the pmap given by the first argument
720 * our caller wants to access this pmap's PTEs. 720 * our caller wants to access this pmap's PTEs.
721 * 721 *
722 * - pmap_kernel() 722 * - pmap_kernel()
723 * the kernel pmap. note that it only contains the kernel part 723 * the kernel pmap. note that it only contains the kernel part
724 * of the address space which is shared by any pmap. ie. any 724 * of the address space which is shared by any pmap. ie. any
725 * pmap can be used instead of pmap_kernel() for our purpose. 725 * pmap can be used instead of pmap_kernel() for our purpose.
726 * 726 *
727 * - ci->ci_pmap 727 * - ci->ci_pmap
728 * pmap currently loaded on the cpu. 728 * pmap currently loaded on the cpu.
729 * 729 *
730 * - vm_map_pmap(&curproc->p_vmspace->vm_map) 730 * - vm_map_pmap(&curproc->p_vmspace->vm_map)
731 * current process' pmap. 731 * current process' pmap.
732 * 732 *
733 * => caller must lock pmap first (if not the kernel pmap) 733 * => caller must lock pmap first (if not the kernel pmap)
734 * => must be undone with pmap_unmap_ptes before returning 734 * => must be undone with pmap_unmap_ptes before returning
735 * => disables kernel preemption 735 * => disables kernel preemption
736 */ 736 */
737void 737void
738pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2, pd_entry_t **ptepp, 738pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2, pd_entry_t **ptepp,
739 pd_entry_t * const **pdeppp) 739 pd_entry_t * const **pdeppp)
740{ 740{
741 struct pmap *curpmap; 741 struct pmap *curpmap;
742 struct cpu_info *ci; 742 struct cpu_info *ci;
743 lwp_t *l; 743 lwp_t *l;
744 744
745 kpreempt_disable(); 745 kpreempt_disable();
746 746
747 /* The kernel's pmap is always accessible. */ 747 /* The kernel's pmap is always accessible. */
748 if (pmap == pmap_kernel()) { 748 if (pmap == pmap_kernel()) {
749 *pmap2 = NULL; 749 *pmap2 = NULL;
750 *ptepp = PTE_BASE; 750 *ptepp = PTE_BASE;
751 *pdeppp = normal_pdes; 751 *pdeppp = normal_pdes;
752 return; 752 return;
753 } 753 }
754 754
755 KASSERT(mutex_owned(&pmap->pm_lock)); 755 KASSERT(mutex_owned(&pmap->pm_lock));
756 756
757 l = curlwp; 757 l = curlwp;
758 ci = l->l_cpu; 758 ci = l->l_cpu;
759 curpmap = ci->ci_pmap; 759 curpmap = ci->ci_pmap;
760 if (pmap == curpmap) { 760 if (pmap == curpmap) {
761 /* 761 /*
762 * Already on the CPU: make it valid. This is very 762 * Already on the CPU: make it valid. This is very
763 * often the case during exit(), when we have switched 763 * often the case during exit(), when we have switched
764 * to the kernel pmap in order to destroy a user pmap. 764 * to the kernel pmap in order to destroy a user pmap.
765 */ 765 */
766 if (__predict_false(ci->ci_tlbstate != TLBSTATE_VALID)) { 766 if (__predict_false(ci->ci_tlbstate != TLBSTATE_VALID)) {
767 pmap_reactivate(pmap); 767 pmap_reactivate(pmap);
768 } 768 }
769 *pmap2 = NULL; 769 *pmap2 = NULL;
770 } else { 770 } else {
771 /* 771 /*
772 * Toss current pmap from CPU and install new pmap, but keep 772 * Toss current pmap from CPU and install new pmap, but keep
773 * a reference to the old one. Dropping the reference can 773 * a reference to the old one. Dropping the reference can
774 * can block as it needs to take locks, so defer that to 774 * can block as it needs to take locks, so defer that to
775 * pmap_unmap_ptes(). 775 * pmap_unmap_ptes().
776 */ 776 */
777 pmap_reference(pmap); 777 pmap_reference(pmap);
778 pmap_load1(l, pmap, curpmap); 778 pmap_load1(l, pmap, curpmap);
779 *pmap2 = curpmap; 779 *pmap2 = curpmap;
780 } 780 }
781 KASSERT(ci->ci_tlbstate == TLBSTATE_VALID); 781 KASSERT(ci->ci_tlbstate == TLBSTATE_VALID);
782#ifdef DIAGNOSTIC 782#ifdef DIAGNOSTIC
783 pmap->pm_ncsw = lwp_pctr(); 783 pmap->pm_ncsw = lwp_pctr();
784#endif 784#endif
785 *ptepp = PTE_BASE; 785 *ptepp = PTE_BASE;
786 786
787#if defined(XENPV) && defined(__x86_64__) 787#if defined(XENPV) && defined(__x86_64__)
788 KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] == L4_BASE); 788 KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] == L4_BASE);
789 ci->ci_normal_pdes[PTP_LEVELS - 2] = pmap->pm_pdir; 789 ci->ci_normal_pdes[PTP_LEVELS - 2] = pmap->pm_pdir;
790 *pdeppp = ci->ci_normal_pdes; 790 *pdeppp = ci->ci_normal_pdes;
791#else 791#else
792 *pdeppp = normal_pdes; 792 *pdeppp = normal_pdes;
793#endif 793#endif
794} 794}
795 795
796/* 796/*
797 * pmap_unmap_ptes: unlock the PTE mapping of "pmap" 797 * pmap_unmap_ptes: unlock the PTE mapping of "pmap"
798 * 798 *
799 * => we cannot tolerate context switches while mapped in: assert this. 799 * => we cannot tolerate context switches while mapped in: assert this.
800 * => reenables kernel preemption. 800 * => reenables kernel preemption.
801 * => does not unlock pmap. 801 * => does not unlock pmap.
802 */ 802 */
803void 803void
804pmap_unmap_ptes(struct pmap *pmap, struct pmap * pmap2) 804pmap_unmap_ptes(struct pmap *pmap, struct pmap * pmap2)
805{ 805{
806 struct cpu_info *ci; 806 struct cpu_info *ci;
807 struct pmap *mypmap; 807 struct pmap *mypmap;
808 struct lwp *l; 808 struct lwp *l;
809 809
810 KASSERT(kpreempt_disabled()); 810 KASSERT(kpreempt_disabled());
811 811
812 /* The kernel's pmap is always accessible. */ 812 /* The kernel's pmap is always accessible. */
813 if (pmap == pmap_kernel()) { 813 if (pmap == pmap_kernel()) {
814 kpreempt_enable(); 814 kpreempt_enable();
815 return; 815 return;
816 } 816 }
817 817
818 l = curlwp; 818 l = curlwp;
819 ci = l->l_cpu; 819 ci = l->l_cpu;
820 820
821 KASSERT(mutex_owned(&pmap->pm_lock)); 821 KASSERT(mutex_owned(&pmap->pm_lock));
822 KASSERT(pmap->pm_ncsw == lwp_pctr()); 822 KASSERT(pmap->pm_ncsw == lwp_pctr());
823 823
824#if defined(XENPV) && defined(__x86_64__) 824#if defined(XENPV) && defined(__x86_64__)
825 KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] != L4_BASE); 825 KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] != L4_BASE);
826 ci->ci_normal_pdes[PTP_LEVELS - 2] = L4_BASE; 826 ci->ci_normal_pdes[PTP_LEVELS - 2] = L4_BASE;
827#endif 827#endif
828 828
829 /* If not our own pmap, mark whatever's on the CPU now as lazy. */ 829 /* If not our own pmap, mark whatever's on the CPU now as lazy. */
830 KASSERT(ci->ci_tlbstate == TLBSTATE_VALID); 830 KASSERT(ci->ci_tlbstate == TLBSTATE_VALID);
831 mypmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); 831 mypmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
832 if (ci->ci_pmap == vm_map_pmap(&l->l_proc->p_vmspace->vm_map)) { 832 if (ci->ci_pmap == vm_map_pmap(&l->l_proc->p_vmspace->vm_map)) {
833 ci->ci_want_pmapload = 0; 833 ci->ci_want_pmapload = 0;
834 } else { 834 } else {
835 ci->ci_want_pmapload = (mypmap != pmap_kernel()); 835 ci->ci_want_pmapload = (mypmap != pmap_kernel());
836 ci->ci_tlbstate = TLBSTATE_LAZY; 836 ci->ci_tlbstate = TLBSTATE_LAZY;
837 } 837 }
838 838
839 /* Now safe to re-enable preemption. */ 839 /* Now safe to re-enable preemption. */
840 kpreempt_enable(); 840 kpreempt_enable();
841 841
842 /* Toss reference to other pmap taken earlier. */ 842 /* Toss reference to other pmap taken earlier. */
843 if (pmap2 != NULL) { 843 if (pmap2 != NULL) {
844 pmap_destroy(pmap2); 844 pmap_destroy(pmap2);
845 } 845 }
846} 846}
847 847
848inline static void 848inline static void
849pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte) 849pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte)
850{ 850{
851 851
852#if !defined(__x86_64__) 852#if !defined(__x86_64__)
853 if (curproc == NULL || curproc->p_vmspace == NULL || 853 if (curproc == NULL || curproc->p_vmspace == NULL ||
854 pm != vm_map_pmap(&curproc->p_vmspace->vm_map)) 854 pm != vm_map_pmap(&curproc->p_vmspace->vm_map))
855 return; 855 return;
856 856
857 if ((opte ^ npte) & PTE_X) 857 if ((opte ^ npte) & PTE_X)
858 pmap_update_pg(va); 858 pmap_update_pg(va);
859 859
860 /* 860 /*
861 * Executability was removed on the last executable change. 861 * Executability was removed on the last executable change.
862 * Reset the code segment to something conservative and 862 * Reset the code segment to something conservative and
863 * let the trap handler deal with setting the right limit. 863 * let the trap handler deal with setting the right limit.
864 * We can't do that because of locking constraints on the vm map. 864 * We can't do that because of locking constraints on the vm map.
865 */ 865 */
866 866
867 if ((opte & PTE_X) && (npte & PTE_X) == 0 && va == pm->pm_hiexec) { 867 if ((opte & PTE_X) && (npte & PTE_X) == 0 && va == pm->pm_hiexec) {
868 struct trapframe *tf = curlwp->l_md.md_regs; 868 struct trapframe *tf = curlwp->l_md.md_regs;
869 869
870 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 870 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
871 pm->pm_hiexec = I386_MAX_EXE_ADDR; 871 pm->pm_hiexec = I386_MAX_EXE_ADDR;
872 } 872 }
873#endif /* !defined(__x86_64__) */ 873#endif /* !defined(__x86_64__) */
874} 874}
875 875
876#if !defined(__x86_64__) 876#if !defined(__x86_64__)
877/* 877/*
878 * Fixup the code segment to cover all potential executable mappings. 878 * Fixup the code segment to cover all potential executable mappings.
879 * returns 0 if no changes to the code segment were made. 879 * returns 0 if no changes to the code segment were made.
880 */ 880 */
881int 881int
882pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb) 882pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb)
883{ 883{
884 struct vm_map_entry *ent; 884 struct vm_map_entry *ent;
885 struct pmap *pm = vm_map_pmap(map); 885 struct pmap *pm = vm_map_pmap(map);
886 vaddr_t va = 0; 886 vaddr_t va = 0;
887 887
888 vm_map_lock_read(map); 888 vm_map_lock_read(map);
889 for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) { 889 for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) {
890 /* 890 /*
891 * This entry has greater va than the entries before. 891 * This entry has greater va than the entries before.
892 * We need to make it point to the last page, not past it. 892 * We need to make it point to the last page, not past it.
893 */ 893 */
894 if (ent->protection & VM_PROT_EXECUTE) 894 if (ent->protection & VM_PROT_EXECUTE)
895 va = trunc_page(ent->end) - PAGE_SIZE; 895 va = trunc_page(ent->end) - PAGE_SIZE;
896 } 896 }
897 vm_map_unlock_read(map); 897 vm_map_unlock_read(map);
898 if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL)) 898 if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL))
899 return 0; 899 return 0;
900 900
901 pm->pm_hiexec = va; 901 pm->pm_hiexec = va;
902 if (pm->pm_hiexec > I386_MAX_EXE_ADDR) { 902 if (pm->pm_hiexec > I386_MAX_EXE_ADDR) {
903 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); 903 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
904 } else { 904 } else {
905 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 905 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
906 return 0; 906 return 0;
907 } 907 }
908 return 1; 908 return 1;
909} 909}
910#endif /* !defined(__x86_64__) */ 910#endif /* !defined(__x86_64__) */
911 911
912void 912void
913pat_init(struct cpu_info *ci) 913pat_init(struct cpu_info *ci)
914{ 914{
915 uint64_t pat; 915 uint64_t pat;
916 916
917 if (!(ci->ci_feat_val[0] & CPUID_PAT)) 917 if (!(ci->ci_feat_val[0] & CPUID_PAT))
918 return; 918 return;
919 919
920 /* We change WT to WC. Leave all other entries the default values. */ 920 /* We change WT to WC. Leave all other entries the default values. */
921 pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) | 921 pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) |
922 PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) | 922 PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) |
923 PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) | 923 PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) |
924 PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC); 924 PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC);
925 925
926 wrmsr(MSR_CR_PAT, pat); 926 wrmsr(MSR_CR_PAT, pat);
927 cpu_pat_enabled = true; 927 cpu_pat_enabled = true;
928} 928}
929 929
930static pt_entry_t 930static pt_entry_t
931pmap_pat_flags(u_int flags) 931pmap_pat_flags(u_int flags)
932{ 932{
933 u_int cacheflags = (flags & PMAP_CACHE_MASK); 933 u_int cacheflags = (flags & PMAP_CACHE_MASK);
934 934
935 if (!cpu_pat_enabled) { 935 if (!cpu_pat_enabled) {
936 switch (cacheflags) { 936 switch (cacheflags) {
937 case PMAP_NOCACHE: 937 case PMAP_NOCACHE:
938 case PMAP_NOCACHE_OVR: 938 case PMAP_NOCACHE_OVR:
939 /* results in PGC_UCMINUS on cpus which have 939 /* results in PGC_UCMINUS on cpus which have
940 * the cpuid PAT but PAT "disabled" 940 * the cpuid PAT but PAT "disabled"
941 */ 941 */
942 return PTE_PCD; 942 return PTE_PCD;
943 default: 943 default:
944 return 0; 944 return 0;
945 } 945 }
946 } 946 }
947 947
948 switch (cacheflags) { 948 switch (cacheflags) {
949 case PMAP_NOCACHE: 949 case PMAP_NOCACHE:
950 return PGC_UC; 950 return PGC_UC;
951 case PMAP_WRITE_COMBINE: 951 case PMAP_WRITE_COMBINE:
952 return PGC_WC; 952 return PGC_WC;
953 case PMAP_WRITE_BACK: 953 case PMAP_WRITE_BACK:
954 return PGC_WB; 954 return PGC_WB;
955 case PMAP_NOCACHE_OVR: 955 case PMAP_NOCACHE_OVR:
956 return PGC_UCMINUS; 956 return PGC_UCMINUS;
957 } 957 }
958 958
959 return 0; 959 return 0;
960} 960}
961 961
962/* 962/*
963 * p m a p k e n t e r f u n c t i o n s 963 * p m a p k e n t e r f u n c t i o n s
964 * 964 *
965 * functions to quickly enter/remove pages from the kernel address 965 * functions to quickly enter/remove pages from the kernel address
966 * space. pmap_kremove is exported to MI kernel. we make use of 966 * space. pmap_kremove is exported to MI kernel. we make use of
967 * the recursive PTE mappings. 967 * the recursive PTE mappings.
968 */ 968 */
969 969
970/* 970/*
971 * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking 971 * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking
972 * 972 *
973 * => no need to lock anything, assume va is already allocated 973 * => no need to lock anything, assume va is already allocated
974 * => should be faster than normal pmap enter function 974 * => should be faster than normal pmap enter function
975 */ 975 */
976void 976void
977pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 977pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
978{ 978{
979 pt_entry_t *pte, opte, npte; 979 pt_entry_t *pte, opte, npte;
980 980
981 KASSERT(!(prot & ~VM_PROT_ALL)); 981 KASSERT(!(prot & ~VM_PROT_ALL));
982 982
983 if (va < VM_MIN_KERNEL_ADDRESS) 983 if (va < VM_MIN_KERNEL_ADDRESS)
984 pte = vtopte(va); 984 pte = vtopte(va);
985 else 985 else
986 pte = kvtopte(va); 986 pte = kvtopte(va);
987#if defined(XENPV) && defined(DOM0OPS) 987#if defined(XENPV) && defined(DOM0OPS)
988 if (pa < pmap_pa_start || pa >= pmap_pa_end) { 988 if (pa < pmap_pa_start || pa >= pmap_pa_end) {
989#ifdef DEBUG 989#ifdef DEBUG
990 printf_nolog("%s: pa %#" PRIxPADDR " for va %#" PRIxVADDR 990 printf_nolog("%s: pa %#" PRIxPADDR " for va %#" PRIxVADDR
991 " outside range\n", __func__, pa, va); 991 " outside range\n", __func__, pa, va);
992#endif /* DEBUG */ 992#endif /* DEBUG */
993 npte = pa; 993 npte = pa;
994 } else 994 } else
995#endif /* XENPV && DOM0OPS */ 995#endif /* XENPV && DOM0OPS */
996 npte = pmap_pa2pte(pa); 996 npte = pmap_pa2pte(pa);
997 npte |= protection_codes[prot] | PTE_P | pmap_pg_g; 997 npte |= protection_codes[prot] | PTE_P | pmap_pg_g;
998 npte |= pmap_pat_flags(flags); 998 npte |= pmap_pat_flags(flags);
999 opte = pmap_pte_testset(pte, npte); /* zap! */ 999 opte = pmap_pte_testset(pte, npte); /* zap! */
1000 1000
1001 /* 1001 /*
1002 * XXX: make sure we are not dealing with a large page, since the only 1002 * XXX: make sure we are not dealing with a large page, since the only
1003 * large pages created are for the kernel image, and they should never 1003 * large pages created are for the kernel image, and they should never
1004 * be kentered. 1004 * be kentered.
1005 */ 1005 */
1006 KASSERTMSG(!(opte & PTE_PS), "PTE_PS va=%#"PRIxVADDR, va); 1006 KASSERTMSG(!(opte & PTE_PS), "PTE_PS va=%#"PRIxVADDR, va);
1007 1007
1008 if ((opte & (PTE_P | PTE_A)) == (PTE_P | PTE_A)) { 1008 if ((opte & (PTE_P | PTE_A)) == (PTE_P | PTE_A)) {
1009 /* This should not happen. */ 1009 /* This should not happen. */
1010 printf_nolog("%s: mapping already present\n", __func__); 1010 printf_nolog("%s: mapping already present\n", __func__);
1011 kpreempt_disable(); 1011 kpreempt_disable();
1012 pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER); 1012 pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER);
1013 kpreempt_enable(); 1013 kpreempt_enable();
1014 } 1014 }
1015} 1015}
1016 1016
1017__strict_weak_alias(pmap_kenter_ma, pmap_kenter_pa); 1017__strict_weak_alias(pmap_kenter_ma, pmap_kenter_pa);
1018 1018
1019#if defined(__x86_64__) 1019#if defined(__x86_64__)
1020/* 1020/*
1021 * Change protection for a virtual address. Local for a CPU only, don't 1021 * Change protection for a virtual address. Local for a CPU only, don't
1022 * care about TLB shootdowns. 1022 * care about TLB shootdowns.
1023 * 1023 *
1024 * => must be called with preemption disabled 1024 * => must be called with preemption disabled
1025 */ 1025 */
1026void 1026void
1027pmap_changeprot_local(vaddr_t va, vm_prot_t prot) 1027pmap_changeprot_local(vaddr_t va, vm_prot_t prot)
1028{ 1028{
1029 pt_entry_t *pte, opte, npte; 1029 pt_entry_t *pte, opte, npte;
1030 1030
1031 KASSERT(kpreempt_disabled()); 1031 KASSERT(kpreempt_disabled());
1032 1032
1033 if (va < VM_MIN_KERNEL_ADDRESS) 1033 if (va < VM_MIN_KERNEL_ADDRESS)
1034 pte = vtopte(va); 1034 pte = vtopte(va);
1035 else 1035 else
1036 pte = kvtopte(va); 1036 pte = kvtopte(va);
1037 1037
1038 npte = opte = *pte; 1038 npte = opte = *pte;
1039 1039
1040 if ((prot & VM_PROT_WRITE) != 0) 1040 if ((prot & VM_PROT_WRITE) != 0)
1041 npte |= PTE_W; 1041 npte |= PTE_W;
1042 else 1042 else
1043 npte &= ~(PTE_W|PTE_D); 1043 npte &= ~(PTE_W|PTE_D);
1044 1044
1045 if (opte != npte) { 1045 if (opte != npte) {
1046 pmap_pte_set(pte, npte); 1046 pmap_pte_set(pte, npte);
1047 pmap_pte_flush(); 1047 pmap_pte_flush();
1048 invlpg(va); 1048 invlpg(va);
1049 } 1049 }
1050} 1050}
1051#endif /* defined(__x86_64__) */ 1051#endif /* defined(__x86_64__) */
1052 1052
1053/* 1053/*
1054 * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking 1054 * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking
1055 * 1055 *
1056 * => no need to lock anything 1056 * => no need to lock anything
1057 * => caller must dispose of any vm_page mapped in the va range 1057 * => caller must dispose of any vm_page mapped in the va range
1058 * => note: not an inline function 1058 * => note: not an inline function
1059 * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE 1059 * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE
1060 * => we assume kernel only unmaps valid addresses and thus don't bother 1060 * => we assume kernel only unmaps valid addresses and thus don't bother
1061 * checking the valid bit before doing TLB flushing 1061 * checking the valid bit before doing TLB flushing
1062 * => must be followed by call to pmap_update() before reuse of page 1062 * => must be followed by call to pmap_update() before reuse of page
1063 */ 1063 */
1064static void 1064static void
1065pmap_kremove1(vaddr_t sva, vsize_t len, bool localonly) 1065pmap_kremove1(vaddr_t sva, vsize_t len, bool localonly)
1066{ 1066{
1067 pt_entry_t *pte, opte; 1067 pt_entry_t *pte, opte;
1068 vaddr_t va, eva; 1068 vaddr_t va, eva;
1069 1069
1070 eva = sva + len; 1070 eva = sva + len;
1071 1071
1072 kpreempt_disable(); 1072 kpreempt_disable();
1073 for (va = sva; va < eva; va += PAGE_SIZE) { 1073 for (va = sva; va < eva; va += PAGE_SIZE) {
1074 pte = kvtopte(va); 1074 pte = kvtopte(va);
1075 opte = pmap_pte_testset(pte, 0); /* zap! */ 1075 opte = pmap_pte_testset(pte, 0); /* zap! */
1076 if ((opte & (PTE_P | PTE_A)) == (PTE_P | PTE_A) && !localonly) { 1076 if ((opte & (PTE_P | PTE_A)) == (PTE_P | PTE_A) && !localonly) {
1077 pmap_tlb_shootdown(pmap_kernel(), va, opte, 1077 pmap_tlb_shootdown(pmap_kernel(), va, opte,
1078 TLBSHOOT_KREMOVE); 1078 TLBSHOOT_KREMOVE);
1079 } 1079 }
1080 KASSERTMSG((opte & PTE_PS) == 0, 1080 KASSERTMSG((opte & PTE_PS) == 0,
1081 "va %#" PRIxVADDR " is a large page", va); 1081 "va %#" PRIxVADDR " is a large page", va);
1082 KASSERTMSG((opte & PTE_PVLIST) == 0, 1082 KASSERTMSG((opte & PTE_PVLIST) == 0,
1083 "va %#" PRIxVADDR " is a pv tracked page", va); 1083 "va %#" PRIxVADDR " is a pv tracked page", va);
1084 } 1084 }
1085 if (localonly) { 1085 if (localonly) {
1086 tlbflushg(); 1086 tlbflushg();
1087 } 1087 }
1088 kpreempt_enable(); 1088 kpreempt_enable();
1089} 1089}
1090 1090
1091void 1091void
1092pmap_kremove(vaddr_t sva, vsize_t len) 1092pmap_kremove(vaddr_t sva, vsize_t len)
1093{ 1093{
1094 1094
1095 pmap_kremove1(sva, len, false); 1095 pmap_kremove1(sva, len, false);
1096} 1096}
1097 1097
1098/* 1098/*
1099 * pmap_kremove_local: like pmap_kremove(), but only worry about 1099 * pmap_kremove_local: like pmap_kremove(), but only worry about
1100 * TLB invalidations on the current CPU. this is only intended 1100 * TLB invalidations on the current CPU. this is only intended
1101 * for use while writing kernel crash dumps, either after panic 1101 * for use while writing kernel crash dumps, either after panic
1102 * or via reboot -d. 1102 * or via reboot -d.
1103 */ 1103 */
1104void 1104void
1105pmap_kremove_local(vaddr_t sva, vsize_t len) 1105pmap_kremove_local(vaddr_t sva, vsize_t len)
1106{ 1106{
1107 1107
1108 pmap_kremove1(sva, len, true); 1108 pmap_kremove1(sva, len, true);
1109} 1109}
1110 1110
1111/* 1111/*
1112 * p m a p i n i t f u n c t i o n s 1112 * p m a p i n i t f u n c t i o n s
1113 * 1113 *
1114 * pmap_bootstrap and pmap_init are called during system startup 1114 * pmap_bootstrap and pmap_init are called during system startup
1115 * to init the pmap module. pmap_bootstrap() does a low level 1115 * to init the pmap module. pmap_bootstrap() does a low level
1116 * init just to get things rolling. pmap_init() finishes the job. 1116 * init just to get things rolling. pmap_init() finishes the job.
1117 */ 1117 */
1118 1118
1119/* 1119/*
1120 * pmap_bootstrap_valloc: allocate a virtual address in the bootstrap area. 1120 * pmap_bootstrap_valloc: allocate a virtual address in the bootstrap area.
1121 * This function is to be used before any VM system has been set up. 1121 * This function is to be used before any VM system has been set up.
1122 * 1122 *
1123 * The va is taken from virtual_avail. 1123 * The va is taken from virtual_avail.
1124 */ 1124 */
1125static vaddr_t 1125static vaddr_t
1126pmap_bootstrap_valloc(size_t npages) 1126pmap_bootstrap_valloc(size_t npages)
1127{ 1127{
1128 vaddr_t va = virtual_avail; 1128 vaddr_t va = virtual_avail;
1129 virtual_avail += npages * PAGE_SIZE; 1129 virtual_avail += npages * PAGE_SIZE;
1130 return va; 1130 return va;
1131} 1131}
1132 1132
@@ -4086,1999 +4086,1999 @@ pmap_remove_pte(struct pmap *pmap, struc @@ -4086,1999 +4086,1999 @@ pmap_remove_pte(struct pmap *pmap, struc
4086 return true; 4086 return true;
4087} 4087}
4088 4088
4089static void 4089static void
4090pmap_remove_locked(struct pmap *pmap, vaddr_t sva, vaddr_t eva) 4090pmap_remove_locked(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
4091{ 4091{
4092 pt_entry_t *ptes; 4092 pt_entry_t *ptes;
4093 pd_entry_t pde; 4093 pd_entry_t pde;
4094 pd_entry_t * const *pdes; 4094 pd_entry_t * const *pdes;
4095 bool result; 4095 bool result;
4096 vaddr_t blkendva, va = sva; 4096 vaddr_t blkendva, va = sva;
4097 struct vm_page *ptp; 4097 struct vm_page *ptp;
4098 struct pmap *pmap2; 4098 struct pmap *pmap2;
4099 int lvl; 4099 int lvl;
4100 4100
4101 KASSERT(mutex_owned(&pmap->pm_lock)); 4101 KASSERT(mutex_owned(&pmap->pm_lock));
4102 4102
4103 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); 4103 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
4104 4104
4105 /* 4105 /*
4106 * removing one page? take shortcut function. 4106 * removing one page? take shortcut function.
4107 */ 4107 */
4108 4108
4109 if (va + PAGE_SIZE == eva) { 4109 if (va + PAGE_SIZE == eva) {
4110 if (pmap_pdes_valid(va, pdes, &pde, &lvl)) { 4110 if (pmap_pdes_valid(va, pdes, &pde, &lvl)) {
4111 KASSERT(lvl == 1); 4111 KASSERT(lvl == 1);
4112 4112
4113 /* Get PTP if non-kernel mapping. */ 4113 /* Get PTP if non-kernel mapping. */
4114 if (pmap != pmap_kernel()) { 4114 if (pmap != pmap_kernel()) {
4115 ptp = pmap_find_ptp(pmap, va, 1); 4115 ptp = pmap_find_ptp(pmap, va, 1);
4116 KASSERTMSG(ptp != NULL, 4116 KASSERTMSG(ptp != NULL,
4117 "%s: unmanaged PTP detected", __func__); 4117 "%s: unmanaged PTP detected", __func__);
4118 } else { 4118 } else {
4119 /* Never free kernel PTPs. */ 4119 /* Never free kernel PTPs. */
4120 ptp = NULL; 4120 ptp = NULL;
4121 } 4121 }
4122 4122
4123 result = pmap_remove_pte(pmap, ptp, 4123 result = pmap_remove_pte(pmap, ptp,
4124 &ptes[pl1_i(va)], va); 4124 &ptes[pl1_i(va)], va);
4125 4125
4126 /* 4126 /*
4127 * if mapping removed and the PTP is no longer 4127 * if mapping removed and the PTP is no longer
4128 * being used, free it! 4128 * being used, free it!
4129 */ 4129 */
4130 4130
4131 if (result && ptp && ptp->wire_count <= 1) 4131 if (result && ptp && ptp->wire_count <= 1)
4132 pmap_free_ptp(pmap, ptp, va, ptes, pdes); 4132 pmap_free_ptp(pmap, ptp, va, ptes, pdes);
4133 } 4133 }
4134 } else for (/* null */ ; va < eva ; va = blkendva) { 4134 } else for (/* null */ ; va < eva ; va = blkendva) {
4135 /* determine range of block */ 4135 /* determine range of block */
4136 blkendva = x86_round_pdr(va+1); 4136 blkendva = x86_round_pdr(va+1);
4137 if (blkendva > eva) 4137 if (blkendva > eva)
4138 blkendva = eva; 4138 blkendva = eva;
4139 4139
4140 if (!pmap_pdes_valid(va, pdes, &pde, &lvl)) { 4140 if (!pmap_pdes_valid(va, pdes, &pde, &lvl)) {
4141 /* Skip a range corresponding to an invalid pde. */ 4141 /* Skip a range corresponding to an invalid pde. */
4142 blkendva = (va & ptp_frames[lvl - 1]) + nbpd[lvl - 1]; 4142 blkendva = (va & ptp_frames[lvl - 1]) + nbpd[lvl - 1];
4143 continue; 4143 continue;
4144 } 4144 }
4145 KASSERT(lvl == 1); 4145 KASSERT(lvl == 1);
4146 4146
4147 /* Get PTP if non-kernel mapping. */ 4147 /* Get PTP if non-kernel mapping. */
4148 if (pmap != pmap_kernel()) { 4148 if (pmap != pmap_kernel()) {
4149 ptp = pmap_find_ptp(pmap, va, 1); 4149 ptp = pmap_find_ptp(pmap, va, 1);
4150 KASSERTMSG(ptp != NULL, "%s: unmanaged PTP detected", 4150 KASSERTMSG(ptp != NULL, "%s: unmanaged PTP detected",
4151 __func__); 4151 __func__);
4152 } else { 4152 } else {
4153 /* Never free kernel PTPs. */ 4153 /* Never free kernel PTPs. */
4154 ptp = NULL; 4154 ptp = NULL;
4155 } 4155 }
4156 4156
4157 pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[pl1_i(va)], va, 4157 pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[pl1_i(va)], va,
4158 blkendva); 4158 blkendva);
4159 4159
4160 /* If PTP is no longer being used, free it. */ 4160 /* If PTP is no longer being used, free it. */
4161 if (ptp && ptp->wire_count <= 1) { 4161 if (ptp && ptp->wire_count <= 1) {
4162 pmap_free_ptp(pmap, ptp, va, ptes, pdes); 4162 pmap_free_ptp(pmap, ptp, va, ptes, pdes);
4163 } 4163 }
4164 } 4164 }
4165 pmap_unmap_ptes(pmap, pmap2); 4165 pmap_unmap_ptes(pmap, pmap2);
4166 pmap_drain_pv(pmap); 4166 pmap_drain_pv(pmap);
4167} 4167}
4168 4168
4169/* 4169/*
4170 * pmap_remove: mapping removal function. 4170 * pmap_remove: mapping removal function.
4171 * 4171 *
4172 * => caller should not be holding any pmap locks 4172 * => caller should not be holding any pmap locks
4173 */ 4173 */
4174void 4174void
4175pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva) 4175pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
4176{ 4176{
4177 if (__predict_false(pmap->pm_remove != NULL)) { 4177 if (__predict_false(pmap->pm_remove != NULL)) {
4178 (*pmap->pm_remove)(pmap, sva, eva); 4178 (*pmap->pm_remove)(pmap, sva, eva);
4179 return; 4179 return;
4180 } 4180 }
4181 4181
4182 mutex_enter(&pmap->pm_lock); 4182 mutex_enter(&pmap->pm_lock);
4183 pmap_remove_locked(pmap, sva, eva); 4183 pmap_remove_locked(pmap, sva, eva);
4184 mutex_exit(&pmap->pm_lock); 4184 mutex_exit(&pmap->pm_lock);
4185} 4185}
4186 4186
4187/* 4187/*
4188 * pmap_sync_pv: clear pte bits and return the old value of the pp_attrs. 4188 * pmap_sync_pv: clear pte bits and return the old value of the pp_attrs.
4189 * 4189 *
4190 * => The 'clearbits' parameter is either ~0 or PP_ATTRS_... 4190 * => The 'clearbits' parameter is either ~0 or PP_ATTRS_...
4191 * => Caller should disable kernel preemption. 4191 * => Caller should disable kernel preemption.
4192 * => issues tlb shootdowns if necessary. 4192 * => issues tlb shootdowns if necessary.
4193 */ 4193 */
4194static int 4194static int
4195pmap_sync_pv(struct pv_pte *pvpte, paddr_t pa, int clearbits, uint8_t *oattrs, 4195pmap_sync_pv(struct pv_pte *pvpte, paddr_t pa, int clearbits, uint8_t *oattrs,
4196 pt_entry_t *optep) 4196 pt_entry_t *optep)
4197{ 4197{
4198 struct pmap *pmap; 4198 struct pmap *pmap;
4199 struct vm_page *ptp; 4199 struct vm_page *ptp;
4200 vaddr_t va; 4200 vaddr_t va;
4201 pt_entry_t *ptep; 4201 pt_entry_t *ptep;
4202 pt_entry_t opte; 4202 pt_entry_t opte;
4203 pt_entry_t npte; 4203 pt_entry_t npte;
4204 pt_entry_t expect; 4204 pt_entry_t expect;
4205 bool need_shootdown; 4205 bool need_shootdown;
4206 4206
4207 ptp = pvpte->pte_ptp; 4207 ptp = pvpte->pte_ptp;
4208 va = pvpte->pte_va; 4208 va = pvpte->pte_va;
4209 KASSERT(ptp == NULL || ptp->uobject != NULL); 4209 KASSERT(ptp == NULL || ptp->uobject != NULL);
4210 KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset); 4210 KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
4211 pmap = ptp_to_pmap(ptp); 4211 pmap = ptp_to_pmap(ptp);
4212 KASSERT(kpreempt_disabled()); 4212 KASSERT(kpreempt_disabled());
4213 4213
4214 if (__predict_false(pmap->pm_sync_pv != NULL)) { 4214 if (__predict_false(pmap->pm_sync_pv != NULL)) {
4215 return (*pmap->pm_sync_pv)(ptp, va, pa, clearbits, oattrs, 4215 return (*pmap->pm_sync_pv)(ptp, va, pa, clearbits, oattrs,
4216 optep); 4216 optep);
4217 } 4217 }
4218 4218
4219 expect = pmap_pa2pte(pa) | PTE_P; 4219 expect = pmap_pa2pte(pa) | PTE_P;
4220 4220
4221 if (clearbits != ~0) { 4221 if (clearbits != ~0) {
4222 KASSERT((clearbits & ~(PP_ATTRS_D|PP_ATTRS_A|PP_ATTRS_W)) == 0); 4222 KASSERT((clearbits & ~(PP_ATTRS_D|PP_ATTRS_A|PP_ATTRS_W)) == 0);
4223 clearbits = pmap_pp_attrs_to_pte(clearbits); 4223 clearbits = pmap_pp_attrs_to_pte(clearbits);
4224 } 4224 }
4225 4225
4226 ptep = pmap_map_pte(pmap, ptp, va); 4226 ptep = pmap_map_pte(pmap, ptp, va);
4227 do { 4227 do {
4228 opte = *ptep; 4228 opte = *ptep;
4229 KASSERT((opte & (PTE_D | PTE_A)) != PTE_D); 4229 KASSERT((opte & (PTE_D | PTE_A)) != PTE_D);
4230 KASSERT((opte & (PTE_A | PTE_P)) != PTE_A); 4230 KASSERT((opte & (PTE_A | PTE_P)) != PTE_A);
4231 KASSERT(opte == 0 || (opte & PTE_P) != 0); 4231 KASSERT(opte == 0 || (opte & PTE_P) != 0);
4232 if ((opte & (PTE_FRAME | PTE_P)) != expect) { 4232 if ((opte & (PTE_FRAME | PTE_P)) != expect) {
4233 /* 4233 /*
4234 * We lost a race with a V->P operation like 4234 * We lost a race with a V->P operation like
4235 * pmap_remove(). Wait for the competitor 4235 * pmap_remove(). Wait for the competitor
4236 * reflecting pte bits into mp_attrs. 4236 * reflecting pte bits into mp_attrs.
4237 */ 4237 */
4238 pmap_unmap_pte(); 4238 pmap_unmap_pte();
4239 return EAGAIN; 4239 return EAGAIN;
4240 } 4240 }
4241 4241
4242 /* 4242 /*
4243 * Check if there's anything to do on this PTE. 4243 * Check if there's anything to do on this PTE.
4244 */ 4244 */
4245 if ((opte & clearbits) == 0) { 4245 if ((opte & clearbits) == 0) {
4246 need_shootdown = false; 4246 need_shootdown = false;
4247 break; 4247 break;
4248 } 4248 }
4249 4249
4250 /* 4250 /*
4251 * We need a shootdown if the PTE is cached (PTE_A) ... 4251 * We need a shootdown if the PTE is cached (PTE_A) ...
4252 * ... Unless we are clearing only the PTE_W bit and 4252 * ... Unless we are clearing only the PTE_W bit and
4253 * it isn't cached as RW (PTE_D). 4253 * it isn't cached as RW (PTE_D).
4254 */ 4254 */
4255 need_shootdown = (opte & PTE_A) != 0 && 4255 need_shootdown = (opte & PTE_A) != 0 &&
4256 !(clearbits == PTE_W && (opte & PTE_D) == 0); 4256 !(clearbits == PTE_W && (opte & PTE_D) == 0);
4257 4257
4258 npte = opte & ~clearbits; 4258 npte = opte & ~clearbits;
4259 4259
4260 /* 4260 /*
4261 * If we need a shootdown anyway, clear PTE_A and PTE_D. 4261 * If we need a shootdown anyway, clear PTE_A and PTE_D.
4262 */ 4262 */
4263 if (need_shootdown) { 4263 if (need_shootdown) {
4264 npte &= ~(PTE_A | PTE_D); 4264 npte &= ~(PTE_A | PTE_D);
4265 } 4265 }
4266 KASSERT((npte & (PTE_D | PTE_A)) != PTE_D); 4266 KASSERT((npte & (PTE_D | PTE_A)) != PTE_D);
4267 KASSERT((npte & (PTE_A | PTE_P)) != PTE_A); 4267 KASSERT((npte & (PTE_A | PTE_P)) != PTE_A);
4268 KASSERT(npte == 0 || (opte & PTE_P) != 0); 4268 KASSERT(npte == 0 || (opte & PTE_P) != 0);
4269 } while (pmap_pte_cas(ptep, opte, npte) != opte); 4269 } while (pmap_pte_cas(ptep, opte, npte) != opte);
4270 4270
4271 if (need_shootdown) { 4271 if (need_shootdown) {
4272 pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_SYNC_PV); 4272 pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_SYNC_PV);
4273 } 4273 }
4274 pmap_unmap_pte(); 4274 pmap_unmap_pte();
4275 4275
4276 *oattrs = pmap_pte_to_pp_attrs(opte); 4276 *oattrs = pmap_pte_to_pp_attrs(opte);
4277 if (optep != NULL) 4277 if (optep != NULL)
4278 *optep = opte; 4278 *optep = opte;
4279 return 0; 4279 return 0;
4280} 4280}
4281 4281
4282static void 4282static void
4283pmap_pp_remove_ent(struct pmap *pmap, struct vm_page *ptp, pt_entry_t opte, 4283pmap_pp_remove_ent(struct pmap *pmap, struct vm_page *ptp, pt_entry_t opte,
4284 vaddr_t va) 4284 vaddr_t va)
4285{ 4285{
4286 struct pmap *pmap2; 4286 struct pmap *pmap2;
4287 pt_entry_t *ptes; 4287 pt_entry_t *ptes;
4288 pd_entry_t * const *pdes; 4288 pd_entry_t * const *pdes;
4289 4289
4290 KASSERT(mutex_owned(&pmap->pm_lock)); 4290 KASSERT(mutex_owned(&pmap->pm_lock));
4291 4291
4292 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); 4292 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
4293 pmap_stats_update_bypte(pmap, 0, opte); 4293 pmap_stats_update_bypte(pmap, 0, opte);
4294 ptp->wire_count--; 4294 ptp->wire_count--;
4295 if (ptp->wire_count <= 1) { 4295 if (ptp->wire_count <= 1) {
4296 pmap_free_ptp(pmap, ptp, va, ptes, pdes); 4296 pmap_free_ptp(pmap, ptp, va, ptes, pdes);
4297 } 4297 }
4298 pmap_unmap_ptes(pmap, pmap2); 4298 pmap_unmap_ptes(pmap, pmap2);
4299} 4299}
4300 4300
4301static void 4301static void
4302pmap_pp_remove(struct pmap_page *pp, paddr_t pa) 4302pmap_pp_remove(struct pmap_page *pp, paddr_t pa)
4303{ 4303{
4304 struct pv_pte *pvpte; 4304 struct pv_pte *pvpte;
4305 struct vm_page *ptp; 4305 struct vm_page *ptp;
4306 uintptr_t sum; 4306 uintptr_t sum;
4307 uint8_t oattrs; 4307 uint8_t oattrs;
4308 bool locked; 4308 bool locked;
4309 4309
4310 /* 4310 /*
4311 * Do an unlocked check to see if the page has no mappings, eg when 4311 * Do an unlocked check to see if the page has no mappings, eg when
4312 * pmap_remove_all() was called before amap_wipeout() for a process 4312 * pmap_remove_all() was called before amap_wipeout() for a process
4313 * private amap - common. The page being removed must be on the way 4313 * private amap - common. The page being removed must be on the way
4314 * out, so we don't have to worry about concurrent attempts to enter 4314 * out, so we don't have to worry about concurrent attempts to enter
4315 * it (otherwise the caller either doesn't care or has screwed up). 4315 * it (otherwise the caller either doesn't care or has screwed up).
4316 */ 4316 */
4317 sum = (uintptr_t)atomic_load_relaxed(&pp->pp_pte.pte_va); 4317 sum = (uintptr_t)atomic_load_relaxed(&pp->pp_pte.pte_va);
4318 sum |= (uintptr_t)atomic_load_relaxed(&pp->pp_pte.pte_ptp); 4318 sum |= (uintptr_t)atomic_load_relaxed(&pp->pp_pte.pte_ptp);
4319 sum |= (uintptr_t)atomic_load_relaxed(&pp->pp_pvlist.lh_first); 4319 sum |= (uintptr_t)atomic_load_relaxed(&pp->pp_pvlist.lh_first);
4320 if (sum == 0) { 4320 if (sum == 0) {
4321 return; 4321 return;
4322 } 4322 }
4323 4323
4324 kpreempt_disable(); 4324 kpreempt_disable();
4325 for (;;) { 4325 for (;;) {
4326 struct pmap *pmap; 4326 struct pmap *pmap;
4327 struct pv_entry *pve; 4327 struct pv_entry *pve;
4328 pt_entry_t opte; 4328 pt_entry_t opte;
4329 vaddr_t va; 4329 vaddr_t va;
4330 4330
4331 mutex_spin_enter(&pp->pp_lock); 4331 mutex_spin_enter(&pp->pp_lock);
4332 if ((pvpte = pv_pte_first(pp)) == NULL) { 4332 if ((pvpte = pv_pte_first(pp)) == NULL) {
4333 mutex_spin_exit(&pp->pp_lock); 4333 mutex_spin_exit(&pp->pp_lock);
4334 break; 4334 break;
4335 } 4335 }
4336 4336
4337 /* 4337 /*
4338 * Add a reference to the pmap before clearing the pte. 4338 * Add a reference to the pmap before clearing the pte.
4339 * Otherwise the pmap can disappear behind us. 4339 * Otherwise the pmap can disappear behind us.
4340 */ 4340 */
4341 ptp = pvpte->pte_ptp; 4341 ptp = pvpte->pte_ptp;
4342 pmap = ptp_to_pmap(ptp); 4342 pmap = ptp_to_pmap(ptp);
4343 KASSERT(pmap->pm_obj[0].uo_refs > 0); 4343 KASSERT(pmap->pm_obj[0].uo_refs > 0);
4344 if (ptp != NULL) { 4344 if (ptp != NULL) {
4345 pmap_reference(pmap); 4345 pmap_reference(pmap);
4346 } 4346 }
4347 4347
4348 /* 4348 /*
4349 * Now try to lock it. We need a direct handoff between 4349 * Now try to lock it. We need a direct handoff between
4350 * pp_lock and pm_lock to know the pv_entry is kept intact 4350 * pp_lock and pm_lock to know the pv_entry is kept intact
4351 * and kept associated with this pmap. If that can't be 4351 * and kept associated with this pmap. If that can't be
4352 * had, wait for the pmap's lock to become free and then 4352 * had, wait for the pmap's lock to become free and then
4353 * retry. 4353 * retry.
4354 */ 4354 */
4355 locked = mutex_tryenter(&pmap->pm_lock); 4355 locked = mutex_tryenter(&pmap->pm_lock);
4356 mutex_spin_exit(&pp->pp_lock); 4356 mutex_spin_exit(&pp->pp_lock);
4357 if (!locked) { 4357 if (!locked) {
4358 mutex_enter(&pmap->pm_lock); 4358 mutex_enter(&pmap->pm_lock);
4359 /* nothing, just wait for it */ 4359 /* nothing, just wait for it */
4360 mutex_exit(&pmap->pm_lock); 4360 mutex_exit(&pmap->pm_lock);
4361 if (ptp != NULL) { 4361 if (ptp != NULL) {
4362 pmap_destroy(pmap); 4362 pmap_destroy(pmap);
4363 } 4363 }
4364 continue; 4364 continue;
4365 } 4365 }
4366 va = pvpte->pte_va; 4366 va = pvpte->pte_va;
4367 4367
4368 KASSERTMSG(pmap->pm_stats.resident_count > PDP_SIZE, 4368 KASSERTMSG(pmap->pm_stats.resident_count > PDP_SIZE,
4369 "va %lx pmap %p ptp %p is empty", va, pmap, ptp); 4369 "va %lx pmap %p ptp %p is empty", va, pmap, ptp);
4370 KASSERTMSG(ptp == NULL || (ptp->flags & PG_FREE) == 0, 4370 KASSERTMSG(ptp == NULL || (ptp->flags & PG_FREE) == 0,
4371 "va %lx pmap %p ptp %p is free", va, pmap, ptp); 4371 "va %lx pmap %p ptp %p is free", va, pmap, ptp);
4372 KASSERTMSG(ptp == NULL || ptp->wire_count > 1, 4372 KASSERTMSG(ptp == NULL || ptp->wire_count > 1,
4373 "va %lx pmap %p ptp %p is empty", va, pmap, ptp); 4373 "va %lx pmap %p ptp %p is empty", va, pmap, ptp);
4374 4374
4375#ifdef DEBUG 4375#ifdef DEBUG
4376 pmap_check_pv(pmap, ptp, pp, pvpte->pte_va, true); 4376 pmap_check_pv(pmap, ptp, pp, pvpte->pte_va, true);
4377 rb_tree_t *tree = (ptp != NULL ? 4377 rb_tree_t *tree = (ptp != NULL ?
4378 &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb); 4378 &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb);
4379 pve = pmap_treelookup_pv(pmap, ptp, tree, va); 4379 pve = pmap_treelookup_pv(pmap, ptp, tree, va);
4380 if (pve == NULL) { 4380 if (pve == NULL) {
4381 KASSERTMSG(&pp->pp_pte == pvpte, 4381 KASSERTMSG(&pp->pp_pte == pvpte,
4382 "va %lx pmap %p ptp %p pvpte %p pve %p oops 1", 4382 "va %lx pmap %p ptp %p pvpte %p pve %p oops 1",
4383 va, pmap, ptp, pvpte, pve); 4383 va, pmap, ptp, pvpte, pve);
4384 } else { 4384 } else {
4385 KASSERTMSG(&pve->pve_pte == pvpte, 4385 KASSERTMSG(&pve->pve_pte == pvpte,
4386 "va %lx pmap %p ptp %p pvpte %p pve %p oops 2", 4386 "va %lx pmap %p ptp %p pvpte %p pve %p oops 2",
4387 va, pmap, ptp, pvpte, pve); 4387 va, pmap, ptp, pvpte, pve);
4388 } 4388 }
4389#endif 4389#endif
4390 4390
4391 if (pmap_sync_pv(pvpte, pa, ~0, &oattrs, &opte)) { 4391 if (pmap_sync_pv(pvpte, pa, ~0, &oattrs, &opte)) {
4392 panic("pmap_pp_remove: mapping not present"); 4392 panic("pmap_pp_remove: mapping not present");
4393 } 4393 }
4394 4394
4395 pve = pmap_lookup_pv(pmap, ptp, pp, va); 4395 pve = pmap_lookup_pv(pmap, ptp, pp, va);
4396 pmap_remove_pv(pmap, pp, ptp, va, pve, oattrs); 4396 pmap_remove_pv(pmap, pp, ptp, va, pve, oattrs);
4397 4397
4398 /* Update the PTP reference count. Free if last reference. */ 4398 /* Update the PTP reference count. Free if last reference. */
4399 if (ptp != NULL) { 4399 if (ptp != NULL) {
4400 KASSERT(pmap != pmap_kernel()); 4400 KASSERT(pmap != pmap_kernel());
4401 pmap_tlb_shootnow(); 4401 pmap_tlb_shootnow();
4402 if (__predict_false(pmap->pm_pp_remove_ent != NULL)) { 4402 if (__predict_false(pmap->pm_pp_remove_ent != NULL)) {
4403 (*pmap->pm_pp_remove_ent)(pmap, ptp, opte, va); 4403 (*pmap->pm_pp_remove_ent)(pmap, ptp, opte, va);
4404 } else { 4404 } else {
4405 pmap_pp_remove_ent(pmap, ptp, opte, va); 4405 pmap_pp_remove_ent(pmap, ptp, opte, va);
4406 } 4406 }
4407 } else { 4407 } else {
4408 KASSERT(pmap == pmap_kernel()); 4408 KASSERT(pmap == pmap_kernel());
4409 pmap_stats_update_bypte(pmap, 0, opte); 4409 pmap_stats_update_bypte(pmap, 0, opte);
4410 } 4410 }
4411 pmap_tlb_shootnow(); 4411 pmap_tlb_shootnow();
4412 pmap_drain_pv(pmap); 4412 pmap_drain_pv(pmap);
4413 mutex_exit(&pmap->pm_lock); 4413 mutex_exit(&pmap->pm_lock);
4414 if (ptp != NULL) { 4414 if (ptp != NULL) {
4415 pmap_destroy(pmap); 4415 pmap_destroy(pmap);
4416 } 4416 }
4417 } 4417 }
4418 kpreempt_enable(); 4418 kpreempt_enable();
4419} 4419}
4420 4420
4421/* 4421/*
4422 * pmap_page_remove: remove a managed vm_page from all pmaps that map it 4422 * pmap_page_remove: remove a managed vm_page from all pmaps that map it
4423 * 4423 *
4424 * => R/M bits are sync'd back to attrs 4424 * => R/M bits are sync'd back to attrs
4425 */ 4425 */
4426void 4426void
4427pmap_page_remove(struct vm_page *pg) 4427pmap_page_remove(struct vm_page *pg)
4428{ 4428{
4429 struct pmap_page *pp; 4429 struct pmap_page *pp;
4430 paddr_t pa; 4430 paddr_t pa;
4431 4431
4432 pp = VM_PAGE_TO_PP(pg); 4432 pp = VM_PAGE_TO_PP(pg);
4433 pa = VM_PAGE_TO_PHYS(pg); 4433 pa = VM_PAGE_TO_PHYS(pg);
4434 pmap_pp_remove(pp, pa); 4434 pmap_pp_remove(pp, pa);
4435} 4435}
4436 4436
4437/* 4437/*
4438 * pmap_pv_remove: remove an unmanaged pv-tracked page from all pmaps 4438 * pmap_pv_remove: remove an unmanaged pv-tracked page from all pmaps
4439 * that map it 4439 * that map it
4440 */ 4440 */
4441void 4441void
4442pmap_pv_remove(paddr_t pa) 4442pmap_pv_remove(paddr_t pa)
4443{ 4443{
4444 struct pmap_page *pp; 4444 struct pmap_page *pp;
4445 4445
4446 pp = pmap_pv_tracked(pa); 4446 pp = pmap_pv_tracked(pa);
4447 if (pp == NULL) 4447 if (pp == NULL)
4448 panic("%s: page not pv-tracked: %#"PRIxPADDR, __func__, pa); 4448 panic("%s: page not pv-tracked: %#"PRIxPADDR, __func__, pa);
4449 pmap_pp_remove(pp, pa); 4449 pmap_pp_remove(pp, pa);
4450} 4450}
4451 4451
4452/* 4452/*
4453 * p m a p a t t r i b u t e f u n c t i o n s 4453 * p m a p a t t r i b u t e f u n c t i o n s
4454 * functions that test/change managed page's attributes 4454 * functions that test/change managed page's attributes
4455 * since a page can be mapped multiple times we must check each PTE that 4455 * since a page can be mapped multiple times we must check each PTE that
4456 * maps it by going down the pv lists. 4456 * maps it by going down the pv lists.
4457 */ 4457 */
4458 4458
4459/* 4459/*
4460 * pmap_test_attrs: test a page's attributes 4460 * pmap_test_attrs: test a page's attributes
4461 */ 4461 */
4462bool 4462bool
4463pmap_test_attrs(struct vm_page *pg, unsigned testbits) 4463pmap_test_attrs(struct vm_page *pg, unsigned testbits)
4464{ 4464{
4465 struct pmap_page *pp; 4465 struct pmap_page *pp;
4466 struct pv_pte *pvpte; 4466 struct pv_pte *pvpte;
4467 struct pmap *pmap; 4467 struct pmap *pmap;
4468 uint8_t oattrs; 4468 uint8_t oattrs;
4469 u_int result; 4469 u_int result;
4470 paddr_t pa; 4470 paddr_t pa;
4471 4471
4472 pp = VM_PAGE_TO_PP(pg); 4472 pp = VM_PAGE_TO_PP(pg);
4473 if ((pp->pp_attrs & testbits) != 0) { 4473 if ((pp->pp_attrs & testbits) != 0) {
4474 return true; 4474 return true;
4475 } 4475 }
4476 pa = VM_PAGE_TO_PHYS(pg); 4476 pa = VM_PAGE_TO_PHYS(pg);
4477 startover: 4477 startover:
4478 mutex_spin_enter(&pp->pp_lock); 4478 mutex_spin_enter(&pp->pp_lock);
4479 for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) { 4479 for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) {
4480 if ((pp->pp_attrs & testbits) != 0) { 4480 if ((pp->pp_attrs & testbits) != 0) {
4481 break; 4481 break;
4482 } 4482 }
4483 if (pmap_sync_pv(pvpte, pa, 0, &oattrs, NULL)) { 4483 if (pmap_sync_pv(pvpte, pa, 0, &oattrs, NULL)) {
4484 /* 4484 /*
4485 * raced with a V->P operation. wait for the other 4485 * raced with a V->P operation. wait for the other
4486 * side to finish by acquring pmap's lock. if no 4486 * side to finish by acquring pmap's lock. if no
4487 * wait, updates to pp_attrs by the other side may 4487 * wait, updates to pp_attrs by the other side may
4488 * go unseen. 4488 * go unseen.
4489 */ 4489 */
4490 pmap = ptp_to_pmap(pvpte->pte_ptp); 4490 pmap = ptp_to_pmap(pvpte->pte_ptp);
4491 pmap_reference(pmap); 4491 pmap_reference(pmap);
4492 mutex_spin_exit(&pp->pp_lock); 4492 mutex_spin_exit(&pp->pp_lock);
4493 mutex_enter(&pmap->pm_lock); 4493 mutex_enter(&pmap->pm_lock);
4494 /* nothing. */ 4494 /* nothing. */
4495 mutex_exit(&pmap->pm_lock); 4495 mutex_exit(&pmap->pm_lock);
4496 pmap_destroy(pmap); 4496 pmap_destroy(pmap);
4497 goto startover; 4497 goto startover;
4498 } 4498 }
4499 pp->pp_attrs |= oattrs; 4499 pp->pp_attrs |= oattrs;
4500 } 4500 }
4501 result = pp->pp_attrs & testbits; 4501 result = pp->pp_attrs & testbits;
4502 mutex_spin_exit(&pp->pp_lock); 4502 mutex_spin_exit(&pp->pp_lock);
4503 4503
4504 /* 4504 /*
4505 * note that we will exit the for loop with a non-null pve if 4505 * note that we will exit the for loop with a non-null pve if
4506 * we have found the bits we are testing for. 4506 * we have found the bits we are testing for.
4507 */ 4507 */
4508 4508
4509 return result != 0; 4509 return result != 0;
4510} 4510}
4511 4511
4512static bool 4512static bool
4513pmap_pp_clear_attrs(struct pmap_page *pp, paddr_t pa, unsigned clearbits) 4513pmap_pp_clear_attrs(struct pmap_page *pp, paddr_t pa, unsigned clearbits)
4514{ 4514{
4515 struct pv_pte *pvpte; 4515 struct pv_pte *pvpte;
4516 struct pmap *pmap; 4516 struct pmap *pmap;
4517 uint8_t oattrs; 4517 uint8_t oattrs;
4518 u_int result; 4518 u_int result;
4519 4519
4520startover: 4520startover:
4521 mutex_spin_enter(&pp->pp_lock); 4521 mutex_spin_enter(&pp->pp_lock);
4522 for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) { 4522 for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) {
4523 if (pmap_sync_pv(pvpte, pa, clearbits, &oattrs, NULL)) { 4523 if (pmap_sync_pv(pvpte, pa, clearbits, &oattrs, NULL)) {
4524 /* 4524 /*
4525 * raced with a V->P operation. wait for the other 4525 * raced with a V->P operation. wait for the other
4526 * side to finish by acquring pmap's lock. it is 4526 * side to finish by acquring pmap's lock. it is
4527 * probably unmapping the page, and it will be gone 4527 * probably unmapping the page, and it will be gone
4528 * when the loop is restarted. 4528 * when the loop is restarted.
4529 */ 4529 */
4530 pmap = ptp_to_pmap(pvpte->pte_ptp); 4530 pmap = ptp_to_pmap(pvpte->pte_ptp);
4531 pmap_reference(pmap); 4531 pmap_reference(pmap);
4532 mutex_spin_exit(&pp->pp_lock); 4532 mutex_spin_exit(&pp->pp_lock);
4533 mutex_enter(&pmap->pm_lock); 4533 mutex_enter(&pmap->pm_lock);
4534 /* nothing. */ 4534 /* nothing. */
4535 mutex_exit(&pmap->pm_lock); 4535 mutex_exit(&pmap->pm_lock);
4536 pmap_destroy(pmap); 4536 pmap_destroy(pmap);
4537 goto startover; 4537 goto startover;
4538 } 4538 }
4539 pp->pp_attrs |= oattrs; 4539 pp->pp_attrs |= oattrs;
4540 } 4540 }
4541 result = pp->pp_attrs & clearbits; 4541 result = pp->pp_attrs & clearbits;
4542 pp->pp_attrs &= ~clearbits; 4542 pp->pp_attrs &= ~clearbits;
4543 pmap_tlb_shootnow(); 4543 pmap_tlb_shootnow();
4544 mutex_spin_exit(&pp->pp_lock); 4544 mutex_spin_exit(&pp->pp_lock);
4545 4545
4546 return result != 0; 4546 return result != 0;
4547} 4547}
4548 4548
4549/* 4549/*
4550 * pmap_clear_attrs: clear the specified attribute for a page. 4550 * pmap_clear_attrs: clear the specified attribute for a page.
4551 * 4551 *
4552 * => we return true if we cleared one of the bits we were asked to 4552 * => we return true if we cleared one of the bits we were asked to
4553 */ 4553 */
4554bool 4554bool
4555pmap_clear_attrs(struct vm_page *pg, unsigned clearbits) 4555pmap_clear_attrs(struct vm_page *pg, unsigned clearbits)
4556{ 4556{
4557 struct pmap_page *pp; 4557 struct pmap_page *pp;
4558 paddr_t pa; 4558 paddr_t pa;
4559 4559
4560 pp = VM_PAGE_TO_PP(pg); 4560 pp = VM_PAGE_TO_PP(pg);
4561 pa = VM_PAGE_TO_PHYS(pg); 4561 pa = VM_PAGE_TO_PHYS(pg);
4562 4562
4563 /* 4563 /*
4564 * If this is a new page, assert it has no mappings and simply zap 4564 * If this is a new page, assert it has no mappings and simply zap
4565 * the stored attributes without taking any locks. 4565 * the stored attributes without taking any locks.
4566 */ 4566 */
4567 if ((pg->flags & PG_FAKE) != 0) { 4567 if ((pg->flags & PG_FAKE) != 0) {
4568 KASSERT(atomic_load_relaxed(&pp->pp_pte.pte_va) == 0); 4568 KASSERT(atomic_load_relaxed(&pp->pp_pte.pte_va) == 0);
4569 KASSERT(atomic_load_relaxed(&pp->pp_pte.pte_ptp) == NULL); 4569 KASSERT(atomic_load_relaxed(&pp->pp_pte.pte_ptp) == NULL);
4570 KASSERT(atomic_load_relaxed(&pp->pp_pvlist.lh_first) == NULL); 4570 KASSERT(atomic_load_relaxed(&pp->pp_pvlist.lh_first) == NULL);
4571 atomic_store_relaxed(&pp->pp_attrs, 0); 4571 atomic_store_relaxed(&pp->pp_attrs, 0);
4572 return false; 4572 return false;
4573 } else { 4573 } else {
4574 return pmap_pp_clear_attrs(pp, pa, clearbits); 4574 return pmap_pp_clear_attrs(pp, pa, clearbits);
4575 } 4575 }
4576} 4576}
4577 4577
4578/* 4578/*
4579 * pmap_pv_clear_attrs: clear the specified attributes for an unmanaged 4579 * pmap_pv_clear_attrs: clear the specified attributes for an unmanaged
4580 * pv-tracked page. 4580 * pv-tracked page.
4581 */ 4581 */
4582bool 4582bool
4583pmap_pv_clear_attrs(paddr_t pa, unsigned clearbits) 4583pmap_pv_clear_attrs(paddr_t pa, unsigned clearbits)
4584{ 4584{
4585 struct pmap_page *pp; 4585 struct pmap_page *pp;
4586 4586
4587 pp = pmap_pv_tracked(pa); 4587 pp = pmap_pv_tracked(pa);
4588 if (pp == NULL) 4588 if (pp == NULL)
4589 panic("%s: page not pv-tracked: %#"PRIxPADDR, __func__, pa); 4589 panic("%s: page not pv-tracked: %#"PRIxPADDR, __func__, pa);
4590 4590
4591 return pmap_pp_clear_attrs(pp, pa, clearbits); 4591 return pmap_pp_clear_attrs(pp, pa, clearbits);
4592} 4592}
4593 4593
4594/* 4594/*
4595 * p m a p p r o t e c t i o n f u n c t i o n s 4595 * p m a p p r o t e c t i o n f u n c t i o n s
4596 */ 4596 */
4597 4597
4598/* 4598/*
4599 * pmap_page_protect: change the protection of all recorded mappings 4599 * pmap_page_protect: change the protection of all recorded mappings
4600 * of a managed page 4600 * of a managed page
4601 * 4601 *
4602 * => NOTE: this is an inline function in pmap.h 4602 * => NOTE: this is an inline function in pmap.h
4603 */ 4603 */
4604 4604
4605/* see pmap.h */ 4605/* see pmap.h */
4606 4606
4607/* 4607/*
4608 * pmap_pv_protect: change the protection of all recorded mappings 4608 * pmap_pv_protect: change the protection of all recorded mappings
4609 * of an unmanaged pv-tracked page 4609 * of an unmanaged pv-tracked page
4610 * 4610 *
4611 * => NOTE: this is an inline function in pmap.h 4611 * => NOTE: this is an inline function in pmap.h
4612 */ 4612 */
4613 4613
4614/* see pmap.h */ 4614/* see pmap.h */
4615 4615
4616/* 4616/*
4617 * pmap_protect: set the protection in of the pages in a pmap 4617 * pmap_protect: set the protection in of the pages in a pmap
4618 * 4618 *
4619 * => NOTE: this is an inline function in pmap.h 4619 * => NOTE: this is an inline function in pmap.h
4620 */ 4620 */
4621 4621
4622/* see pmap.h */ 4622/* see pmap.h */
4623 4623
4624/* 4624/*
4625 * pmap_write_protect: write-protect pages in a pmap. 4625 * pmap_write_protect: write-protect pages in a pmap.
4626 * 4626 *
4627 * Note for Xen-amd64. Xen automatically adds PTE_U to the kernel pages, but we 4627 * Note for Xen-amd64. Xen automatically adds PTE_U to the kernel pages, but we
4628 * don't need to remove this bit when re-entering the PTEs here: Xen tracks the 4628 * don't need to remove this bit when re-entering the PTEs here: Xen tracks the
4629 * kernel pages with a reserved bit (_PAGE_GUEST_KERNEL), so even if PTE_U is 4629 * kernel pages with a reserved bit (_PAGE_GUEST_KERNEL), so even if PTE_U is
4630 * present the page will still be considered as a kernel page, and the privilege 4630 * present the page will still be considered as a kernel page, and the privilege
4631 * separation will be enforced correctly. 4631 * separation will be enforced correctly.
4632 */ 4632 */
4633void 4633void
4634pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 4634pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
4635{ 4635{
4636 pt_entry_t bit_rem, bit_put; 4636 pt_entry_t bit_rem, bit_put;
4637 pt_entry_t *ptes; 4637 pt_entry_t *ptes;
4638 pt_entry_t * const *pdes; 4638 pt_entry_t * const *pdes;
4639 struct pmap *pmap2; 4639 struct pmap *pmap2;
4640 vaddr_t blockend, va; 4640 vaddr_t blockend, va;
4641 int lvl, i; 4641 int lvl, i;
4642 4642
4643 if (__predict_false(pmap->pm_write_protect != NULL)) { 4643 if (__predict_false(pmap->pm_write_protect != NULL)) {
4644 (*pmap->pm_write_protect)(pmap, sva, eva, prot); 4644 (*pmap->pm_write_protect)(pmap, sva, eva, prot);
4645 return; 4645 return;
4646 } 4646 }
4647 4647
4648 bit_rem = 0; 4648 bit_rem = 0;
4649 if (!(prot & VM_PROT_WRITE)) 4649 if (!(prot & VM_PROT_WRITE))
4650 bit_rem = PTE_W; 4650 bit_rem = PTE_W;
4651 4651
4652 bit_put = 0; 4652 bit_put = 0;
4653 if (!(prot & VM_PROT_EXECUTE)) 4653 if (!(prot & VM_PROT_EXECUTE))
4654 bit_put = pmap_pg_nx; 4654 bit_put = pmap_pg_nx;
4655 4655
4656 sva &= ~PAGE_MASK; 4656 sva &= ~PAGE_MASK;
4657 eva &= ~PAGE_MASK; 4657 eva &= ~PAGE_MASK;
4658 4658
4659 /* 4659 /*
4660 * Acquire pmap. No need to lock the kernel pmap as we won't 4660 * Acquire pmap. No need to lock the kernel pmap as we won't
4661 * be touching PV entries nor stats and kernel PDEs aren't 4661 * be touching PV entries nor stats and kernel PDEs aren't
4662 * freed. 4662 * freed.
4663 */ 4663 */
4664 if (pmap != pmap_kernel()) { 4664 if (pmap != pmap_kernel()) {
4665 mutex_enter(&pmap->pm_lock); 4665 mutex_enter(&pmap->pm_lock);
4666 } 4666 }
4667 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); 4667 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
4668 4668
4669 for (va = sva ; va < eva; va = blockend) { 4669 for (va = sva ; va < eva; va = blockend) {
4670 pt_entry_t *spte, *epte; 4670 pt_entry_t *spte, *epte;
4671 4671
4672 blockend = x86_round_pdr(va + 1); 4672 blockend = x86_round_pdr(va + 1);
4673 if (blockend > eva) 4673 if (blockend > eva)
4674 blockend = eva; 4674 blockend = eva;
4675 4675
4676 /* Is it a valid block? */ 4676 /* Is it a valid block? */
4677 if (!pmap_pdes_valid(va, pdes, NULL, &lvl)) { 4677 if (!pmap_pdes_valid(va, pdes, NULL, &lvl)) {
4678 continue; 4678 continue;
4679 } 4679 }
4680 KASSERT(va < VM_MAXUSER_ADDRESS || va >= VM_MAX_ADDRESS); 4680 KASSERT(va < VM_MAXUSER_ADDRESS || va >= VM_MAX_ADDRESS);
4681 KASSERT(lvl == 1); 4681 KASSERT(lvl == 1);
4682 4682
4683 spte = &ptes[pl1_i(va)]; 4683 spte = &ptes[pl1_i(va)];
4684 epte = &ptes[pl1_i(blockend)]; 4684 epte = &ptes[pl1_i(blockend)];
4685 4685
4686 for (i = 0; spte < epte; spte++, i++) { 4686 for (i = 0; spte < epte; spte++, i++) {
4687 pt_entry_t opte, npte; 4687 pt_entry_t opte, npte;
4688 4688
4689 do { 4689 do {
4690 opte = *spte; 4690 opte = *spte;
4691 if (!pmap_valid_entry(opte)) { 4691 if (!pmap_valid_entry(opte)) {
4692 goto next; 4692 goto next;
4693 } 4693 }
4694 npte = (opte & ~bit_rem) | bit_put; 4694 npte = (opte & ~bit_rem) | bit_put;
4695 } while (pmap_pte_cas(spte, opte, npte) != opte); 4695 } while (pmap_pte_cas(spte, opte, npte) != opte);
4696 4696
4697 if ((opte & PTE_D) != 0) { 4697 if ((opte & PTE_D) != 0) {
4698 vaddr_t tva = va + x86_ptob(i); 4698 vaddr_t tva = va + x86_ptob(i);
4699 pmap_tlb_shootdown(pmap, tva, opte, 4699 pmap_tlb_shootdown(pmap, tva, opte,
4700 TLBSHOOT_WRITE_PROTECT); 4700 TLBSHOOT_WRITE_PROTECT);
4701 } 4701 }
4702next:; 4702next:;
4703 } 4703 }
4704 } 4704 }
4705 4705
4706 /* Release pmap. */ 4706 /* Release pmap. */
4707 pmap_unmap_ptes(pmap, pmap2); 4707 pmap_unmap_ptes(pmap, pmap2);
4708 if (pmap != pmap_kernel()) { 4708 if (pmap != pmap_kernel()) {
4709 mutex_exit(&pmap->pm_lock); 4709 mutex_exit(&pmap->pm_lock);
4710 } 4710 }
4711} 4711}
4712 4712
4713/* 4713/*
4714 * pmap_unwire: clear the wired bit in the PTE. 4714 * pmap_unwire: clear the wired bit in the PTE.
4715 * 4715 *
4716 * => Mapping should already be present. 4716 * => Mapping should already be present.
4717 */ 4717 */
4718void 4718void
4719pmap_unwire(struct pmap *pmap, vaddr_t va) 4719pmap_unwire(struct pmap *pmap, vaddr_t va)
4720{ 4720{
4721 pt_entry_t *ptes, *ptep, opte; 4721 pt_entry_t *ptes, *ptep, opte;
4722 pd_entry_t * const *pdes; 4722 pd_entry_t * const *pdes;
4723 struct pmap *pmap2; 4723 struct pmap *pmap2;
4724 int lvl; 4724 int lvl;
4725 4725
4726 if (__predict_false(pmap->pm_unwire != NULL)) { 4726 if (__predict_false(pmap->pm_unwire != NULL)) {
4727 (*pmap->pm_unwire)(pmap, va); 4727 (*pmap->pm_unwire)(pmap, va);
4728 return; 4728 return;
4729 } 4729 }
4730 4730
4731 /* 4731 /*
4732 * Acquire pmap. Need to lock the kernel pmap only to protect the 4732 * Acquire pmap. Need to lock the kernel pmap only to protect the
4733 * statistics. 4733 * statistics.
4734 */ 4734 */
4735 mutex_enter(&pmap->pm_lock); 4735 mutex_enter(&pmap->pm_lock);
4736 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); 4736 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
4737 4737
4738 if (!pmap_pdes_valid(va, pdes, NULL, &lvl)) { 4738 if (!pmap_pdes_valid(va, pdes, NULL, &lvl)) {
4739 panic("%s: invalid PDE va=%#" PRIxVADDR, __func__, va); 4739 panic("%s: invalid PDE va=%#" PRIxVADDR, __func__, va);
4740 } 4740 }
4741 KASSERT(lvl == 1); 4741 KASSERT(lvl == 1);
4742 4742
4743 ptep = &ptes[pl1_i(va)]; 4743 ptep = &ptes[pl1_i(va)];
4744 opte = *ptep; 4744 opte = *ptep;
4745 KASSERT(pmap_valid_entry(opte)); 4745 KASSERT(pmap_valid_entry(opte));
4746 4746
4747 if (opte & PTE_WIRED) { 4747 if (opte & PTE_WIRED) {
4748 pt_entry_t npte = opte & ~PTE_WIRED; 4748 pt_entry_t npte = opte & ~PTE_WIRED;
4749 4749
4750 opte = pmap_pte_testset(ptep, npte); 4750 opte = pmap_pte_testset(ptep, npte);
4751 pmap_stats_update_bypte(pmap, npte, opte); 4751 pmap_stats_update_bypte(pmap, npte, opte);
4752 } else { 4752 } else {
4753 printf("%s: wiring for pmap %p va %#" PRIxVADDR 4753 printf("%s: wiring for pmap %p va %#" PRIxVADDR
4754 " did not change!\n", __func__, pmap, va); 4754 " did not change!\n", __func__, pmap, va);
4755 } 4755 }
4756 4756
4757 /* Release pmap. */ 4757 /* Release pmap. */
4758 pmap_unmap_ptes(pmap, pmap2); 4758 pmap_unmap_ptes(pmap, pmap2);
4759 mutex_exit(&pmap->pm_lock); 4759 mutex_exit(&pmap->pm_lock);
4760} 4760}
4761 4761
4762/* 4762/*
4763 * pmap_copy: copy mappings from one pmap to another 4763 * pmap_copy: copy mappings from one pmap to another
4764 * 4764 *
4765 * => optional function 4765 * => optional function
4766 * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) 4766 * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
4767 */ 4767 */
4768 4768
4769/* 4769/*
4770 * defined as macro in pmap.h 4770 * defined as macro in pmap.h
4771 */ 4771 */
4772 4772
4773__strict_weak_alias(pmap_enter, pmap_enter_default); 4773__strict_weak_alias(pmap_enter, pmap_enter_default);
4774 4774
4775int 4775int
4776pmap_enter_default(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, 4776pmap_enter_default(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot,
4777 u_int flags) 4777 u_int flags)
4778{ 4778{
4779 if (__predict_false(pmap->pm_enter != NULL)) { 4779 if (__predict_false(pmap->pm_enter != NULL)) {
4780 return (*pmap->pm_enter)(pmap, va, pa, prot, flags); 4780 return (*pmap->pm_enter)(pmap, va, pa, prot, flags);
4781 } 4781 }
4782 4782
4783 return pmap_enter_ma(pmap, va, pa, pa, prot, flags, 0); 4783 return pmap_enter_ma(pmap, va, pa, pa, prot, flags, 0);
4784} 4784}
4785 4785
4786/* 4786/*
4787 * pmap_enter: enter a mapping into a pmap 4787 * pmap_enter: enter a mapping into a pmap
4788 * 4788 *
4789 * => must be done "now" ... no lazy-evaluation 4789 * => must be done "now" ... no lazy-evaluation
4790 */ 4790 */
4791int 4791int
4792pmap_enter_ma(struct pmap *pmap, vaddr_t va, paddr_t ma, paddr_t pa, 4792pmap_enter_ma(struct pmap *pmap, vaddr_t va, paddr_t ma, paddr_t pa,
4793 vm_prot_t prot, u_int flags, int domid) 4793 vm_prot_t prot, u_int flags, int domid)
4794{ 4794{
4795 pt_entry_t *ptes, opte, npte; 4795 pt_entry_t *ptes, opte, npte;
4796 pt_entry_t *ptep; 4796 pt_entry_t *ptep;
4797 pd_entry_t * const *pdes; 4797 pd_entry_t * const *pdes;
4798 struct vm_page *ptp; 4798 struct vm_page *ptp;
4799 struct vm_page *new_pg, *old_pg; 4799 struct vm_page *new_pg, *old_pg;
4800 struct pmap_page *new_pp, *old_pp; 4800 struct pmap_page *new_pp, *old_pp;
4801 struct pv_entry *old_pve, *new_pve; 4801 struct pv_entry *old_pve, *new_pve;
4802 bool wired = (flags & PMAP_WIRED) != 0; 4802 bool wired = (flags & PMAP_WIRED) != 0;
4803 struct pmap *pmap2; 4803 struct pmap *pmap2;
4804 struct pmap_ptparray pt; 4804 struct pmap_ptparray pt;
4805 int error; 4805 int error;
4806 bool getptp, samepage, new_embedded; 4806 bool getptp, samepage, new_embedded;
4807 rb_tree_t *tree; 4807 rb_tree_t *tree;
4808 4808
4809 KASSERT(pmap_initialized); 4809 KASSERT(pmap_initialized);
4810 KASSERT(va < VM_MAX_KERNEL_ADDRESS); 4810 KASSERT(va < VM_MAX_KERNEL_ADDRESS);
4811 KASSERTMSG(va != (vaddr_t)PDP_BASE, "%s: trying to map va=%#" 4811 KASSERTMSG(va != (vaddr_t)PDP_BASE, "%s: trying to map va=%#"
4812 PRIxVADDR " over PDP!", __func__, va); 4812 PRIxVADDR " over PDP!", __func__, va);
4813 KASSERTMSG(va < VM_MIN_KERNEL_ADDRESS || 4813 KASSERTMSG(va < VM_MIN_KERNEL_ADDRESS ||
4814 pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)]), 4814 pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)]),
4815 "%s: missing kernel PTP for va=%#" PRIxVADDR, __func__, va); 4815 "%s: missing kernel PTP for va=%#" PRIxVADDR, __func__, va);
4816 4816
4817#ifdef XENPV 4817#ifdef XENPV
4818 KASSERT(domid == DOMID_SELF || pa == 0); 4818 KASSERT(domid == DOMID_SELF || pa == 0);
4819#endif 4819#endif
4820 4820
4821 npte = ma | protection_codes[prot] | PTE_P; 4821 npte = ma | protection_codes[prot] | PTE_P;
4822 npte |= pmap_pat_flags(flags); 4822 npte |= pmap_pat_flags(flags);
4823 if (wired) 4823 if (wired)
4824 npte |= PTE_WIRED; 4824 npte |= PTE_WIRED;
4825 if (va < VM_MAXUSER_ADDRESS) 4825 if (va < VM_MAXUSER_ADDRESS)
4826 npte |= PTE_U; 4826 npte |= PTE_U;
4827 4827
4828 if (pmap == pmap_kernel()) 4828 if (pmap == pmap_kernel())
4829 npte |= pmap_pg_g; 4829 npte |= pmap_pg_g;
4830 if (flags & VM_PROT_ALL) { 4830 if (flags & VM_PROT_ALL) {
4831 npte |= PTE_A; 4831 npte |= PTE_A;
4832 if (flags & VM_PROT_WRITE) { 4832 if (flags & VM_PROT_WRITE) {
4833 KASSERT((npte & PTE_W) != 0); 4833 KASSERT((npte & PTE_W) != 0);
4834 npte |= PTE_D; 4834 npte |= PTE_D;
4835 } 4835 }
4836 } 4836 }
4837 4837
4838#ifdef XENPV 4838#ifdef XENPV
4839 if (domid != DOMID_SELF) 4839 if (domid != DOMID_SELF)
4840 new_pg = NULL; 4840 new_pg = NULL;
4841 else 4841 else
4842#endif 4842#endif
4843 new_pg = PHYS_TO_VM_PAGE(pa); 4843 new_pg = PHYS_TO_VM_PAGE(pa);
4844 4844
4845 if (new_pg != NULL) { 4845 if (new_pg != NULL) {
4846 /* This is a managed page */ 4846 /* This is a managed page */
4847 npte |= PTE_PVLIST; 4847 npte |= PTE_PVLIST;
4848 new_pp = VM_PAGE_TO_PP(new_pg); 4848 new_pp = VM_PAGE_TO_PP(new_pg);
4849 PMAP_CHECK_PP(new_pp); 4849 PMAP_CHECK_PP(new_pp);
4850 } else if ((new_pp = pmap_pv_tracked(pa)) != NULL) { 4850 } else if ((new_pp = pmap_pv_tracked(pa)) != NULL) {
4851 /* This is an unmanaged pv-tracked page */ 4851 /* This is an unmanaged pv-tracked page */
4852 npte |= PTE_PVLIST; 4852 npte |= PTE_PVLIST;
4853 PMAP_CHECK_PP(new_pp); 4853 PMAP_CHECK_PP(new_pp);
4854 } else { 4854 } else {
4855 new_pp = NULL; 4855 new_pp = NULL;
4856 } 4856 }
4857 4857
4858 /* Begin by locking the pmap. */ 4858 /* Begin by locking the pmap. */
4859 mutex_enter(&pmap->pm_lock); 4859 mutex_enter(&pmap->pm_lock);
4860 4860
4861 /* Look up the PTP. Allocate if none present. */ 4861 /* Look up the PTP. Allocate if none present. */
4862 ptp = NULL; 4862 ptp = NULL;
4863 getptp = false; 4863 getptp = false;
4864 if (pmap != pmap_kernel()) { 4864 if (pmap != pmap_kernel()) {
4865 ptp = pmap_find_ptp(pmap, va, 1); 4865 ptp = pmap_find_ptp(pmap, va, 1);
4866 if (ptp == NULL) { 4866 if (ptp == NULL) {
4867 getptp = true; 4867 getptp = true;
4868 error = pmap_get_ptp(pmap, &pt, va, flags, &ptp); 4868 error = pmap_get_ptp(pmap, &pt, va, flags, &ptp);
4869 if (error != 0) { 4869 if (error != 0) {
4870 if (flags & PMAP_CANFAIL) { 4870 if (flags & PMAP_CANFAIL) {
4871 mutex_exit(&pmap->pm_lock); 4871 mutex_exit(&pmap->pm_lock);
4872 return error; 4872 return error;
4873 } 4873 }
4874 panic("%s: get ptp failed, error=%d", __func__, 4874 panic("%s: get ptp failed, error=%d", __func__,
4875 error); 4875 error);
4876 } 4876 }
4877 } 4877 }
4878 tree = &VM_PAGE_TO_PP(ptp)->pp_rb; 4878 tree = &VM_PAGE_TO_PP(ptp)->pp_rb;
4879 } else { 4879 } else {
4880 /* Embedded PV entries rely on this. */ 4880 /* Embedded PV entries rely on this. */
4881 KASSERT(va != 0); 4881 KASSERT(va != 0);
4882 tree = &pmap_kernel_rb; 4882 tree = &pmap_kernel_rb;
4883 } 4883 }
4884 4884
4885 /* 4885 /*
4886 * Look up the old PV entry at this VA (if any), and insert a new PV 4886 * Look up the old PV entry at this VA (if any), and insert a new PV
4887 * entry if required for the new mapping. Temporarily track the old 4887 * entry if required for the new mapping. Temporarily track the old
4888 * and new mappings concurrently. Only after the old mapping is 4888 * and new mappings concurrently. Only after the old mapping is
4889 * evicted from the pmap will we remove its PV entry. Otherwise, 4889 * evicted from the pmap will we remove its PV entry. Otherwise,
4890 * our picture of modified/accessed state for either page could get 4890 * our picture of modified/accessed state for either page could get
4891 * out of sync (we need any P->V operation for either page to stall 4891 * out of sync (we need any P->V operation for either page to stall
4892 * on pmap->pm_lock until done here). 4892 * on pmap->pm_lock until done here).
4893 */ 4893 */
4894 new_pve = NULL; 4894 new_pve = NULL;
4895 old_pve = NULL; 4895 old_pve = NULL;
4896 samepage = false; 4896 samepage = false;
4897 new_embedded = false; 4897 new_embedded = false;
4898 4898
4899 if (new_pp != NULL) { 4899 if (new_pp != NULL) {
4900 error = pmap_enter_pv(pmap, new_pp, ptp, va, &new_pve, 4900 error = pmap_enter_pv(pmap, new_pp, ptp, va, &new_pve,
4901 &old_pve, &samepage, &new_embedded, tree); 4901 &old_pve, &samepage, &new_embedded, tree);
4902 4902
4903 /* 4903 /*
4904 * If a new pv_entry was needed and none was available, we 4904 * If a new pv_entry was needed and none was available, we
4905 * can go no further. 4905 * can go no further.
4906 */ 4906 */
4907 if (error != 0) { 4907 if (error != 0) {
4908 if (flags & PMAP_CANFAIL) { 4908 if (flags & PMAP_CANFAIL) {
4909 if (getptp) { 4909 if (getptp) {
4910 pmap_unget_ptp(pmap, &pt); 4910 pmap_unget_ptp(pmap, &pt);
4911 } 4911 }
4912 mutex_exit(&pmap->pm_lock); 4912 mutex_exit(&pmap->pm_lock);
4913 return error; 4913 return error;
4914 } 4914 }
4915 panic("%s: alloc pve failed", __func__); 4915 panic("%s: alloc pve failed", __func__);
4916 } 4916 }
4917 } else { 4917 } else {
4918 old_pve = pmap_treelookup_pv(pmap, ptp, tree, va); 4918 old_pve = pmap_treelookup_pv(pmap, ptp, tree, va);
4919 } 4919 }
4920 4920
4921 /* Map PTEs into address space. */ 4921 /* Map PTEs into address space. */
4922 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); 4922 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
4923 4923
4924 /* Install any newly allocated PTPs. */ 4924 /* Install any newly allocated PTPs. */
4925 if (getptp) { 4925 if (getptp) {
4926 pmap_install_ptp(pmap, &pt, va, pdes); 4926 pmap_install_ptp(pmap, &pt, va, pdes);
4927 } 4927 }
4928 4928
4929 /* Check if there is an existing mapping. */ 4929 /* Check if there is an existing mapping. */
4930 ptep = &ptes[pl1_i(va)]; 4930 ptep = &ptes[pl1_i(va)];
4931 opte = *ptep; 4931 opte = *ptep;
4932 bool have_oldpa = pmap_valid_entry(opte); 4932 bool have_oldpa = pmap_valid_entry(opte);
4933 paddr_t oldpa = pmap_pte2pa(opte); 4933 paddr_t oldpa = pmap_pte2pa(opte);
4934 4934
4935 /* 4935 /*
4936 * Update the pte. 4936 * Update the pte.
4937 */ 4937 */
4938 do { 4938 do {
4939 opte = *ptep; 4939 opte = *ptep;
4940 4940
4941 /* 4941 /*
4942 * if the same page, inherit PTE_A and PTE_D. 4942 * if the same page, inherit PTE_A and PTE_D.
4943 */ 4943 */
4944 if (((opte ^ npte) & (PTE_FRAME | PTE_P)) == 0) { 4944 if (((opte ^ npte) & (PTE_FRAME | PTE_P)) == 0) {
4945 npte |= opte & (PTE_A | PTE_D); 4945 npte |= opte & (PTE_A | PTE_D);
4946 } 4946 }
4947#if defined(XENPV) 4947#if defined(XENPV)
4948 if (domid != DOMID_SELF) { 4948 if (domid != DOMID_SELF) {
4949 /* pmap_pte_cas with error handling */ 4949 /* pmap_pte_cas with error handling */
4950 int s = splvm(); 4950 int s = splvm();
4951 if (opte != *ptep) { 4951 if (opte != *ptep) {
4952 splx(s); 4952 splx(s);
4953 continue; 4953 continue;
4954 } 4954 }
4955 error = xpq_update_foreign( 4955 error = xpq_update_foreign(
4956 vtomach((vaddr_t)ptep), npte, domid, flags); 4956 vtomach((vaddr_t)ptep), npte, domid, flags);
4957 splx(s); 4957 splx(s);
4958 if (error) { 4958 if (error) {
4959 /* Undo pv_entry tracking - oof. */ 4959 /* Undo pv_entry tracking - oof. */
4960 if (new_pp != NULL) { 4960 if (new_pp != NULL) {
4961 mutex_spin_enter(&new_pp->pp_lock); 4961 mutex_spin_enter(&new_pp->pp_lock);
4962 if (new_pve != NULL) { 4962 if (new_pve != NULL) {
4963 LIST_REMOVE(new_pve, pve_list); 4963 LIST_REMOVE(new_pve, pve_list);
4964 KASSERT(pmap->pm_pve == NULL); 4964 KASSERT(pmap->pm_pve == NULL);
4965 pmap->pm_pve = new_pve; 4965 pmap->pm_pve = new_pve;
4966 } else if (new_embedded) { 4966 } else if (new_embedded) {
4967 new_pp->pp_pte.pte_ptp = NULL; 4967 new_pp->pp_pte.pte_ptp = NULL;
4968 new_pp->pp_pte.pte_va = 0; 4968 new_pp->pp_pte.pte_va = 0;
4969 } 4969 }
4970 mutex_spin_exit(&new_pp->pp_lock); 4970 mutex_spin_exit(&new_pp->pp_lock);
4971 } 4971 }
4972 pmap_unmap_ptes(pmap, pmap2); 4972 pmap_unmap_ptes(pmap, pmap2);
4973 /* Free new PTP. */ 4973 /* Free new PTP. */
4974 if (ptp != NULL && ptp->wire_count <= 1) { 4974 if (ptp != NULL && ptp->wire_count <= 1) {
4975 pmap_free_ptp(pmap, ptp, va, ptes, 4975 pmap_free_ptp(pmap, ptp, va, ptes,
4976 pdes); 4976 pdes);
4977 } 4977 }
4978 mutex_exit(&pmap->pm_lock); 4978 mutex_exit(&pmap->pm_lock);
4979 return error; 4979 return error;
4980 } 4980 }
4981 break; 4981 break;
4982 } 4982 }
4983#endif /* defined(XENPV) */ 4983#endif /* defined(XENPV) */
4984 } while (pmap_pte_cas(ptep, opte, npte) != opte); 4984 } while (pmap_pte_cas(ptep, opte, npte) != opte);
4985 4985
4986 /* 4986 /*
4987 * Done with the PTEs: they can now be unmapped. 4987 * Done with the PTEs: they can now be unmapped.
4988 */ 4988 */
4989 pmap_unmap_ptes(pmap, pmap2); 4989 pmap_unmap_ptes(pmap, pmap2);
4990 4990
4991 /* 4991 /*
4992 * Update statistics and PTP's reference count. 4992 * Update statistics and PTP's reference count.
4993 */ 4993 */
4994 pmap_stats_update_bypte(pmap, npte, opte); 4994 pmap_stats_update_bypte(pmap, npte, opte);
4995 if (ptp != NULL) { 4995 if (ptp != NULL) {
4996 if (!have_oldpa) { 4996 if (!have_oldpa) {
4997 ptp->wire_count++; 4997 ptp->wire_count++;
4998 } 4998 }
4999 /* Remember minimum VA in PTP. */ 4999 /* Remember minimum VA in PTP. */
5000 pmap_ptp_range_set(ptp, va); 5000 pmap_ptp_range_set(ptp, va);
5001 } 5001 }
5002 KASSERT(ptp == NULL || ptp->wire_count > 1); 5002 KASSERT(ptp == NULL || ptp->wire_count > 1);
5003 5003
5004 /* 5004 /*
5005 * If the same page, we can skip pv_entry handling. 5005 * If the same page, we can skip pv_entry handling.
5006 */ 5006 */
5007 if (((opte ^ npte) & (PTE_FRAME | PTE_P)) == 0) { 5007 if (((opte ^ npte) & (PTE_FRAME | PTE_P)) == 0) {
5008 KASSERT(((opte ^ npte) & PTE_PVLIST) == 0); 5008 KASSERT(((opte ^ npte) & PTE_PVLIST) == 0);
5009 if ((npte & PTE_PVLIST) != 0) { 5009 if ((npte & PTE_PVLIST) != 0) {
5010 KASSERT(samepage); 5010 KASSERT(samepage);
5011 pmap_check_pv(pmap, ptp, new_pp, va, true); 5011 pmap_check_pv(pmap, ptp, new_pp, va, true);
5012 } 5012 }
5013 goto same_pa; 5013 goto same_pa;
5014 } else if ((npte & PTE_PVLIST) != 0) { 5014 } else if ((npte & PTE_PVLIST) != 0) {
5015 KASSERT(!samepage); 5015 KASSERT(!samepage);
5016 } 5016 }
5017 5017
5018 /* 5018 /*
5019 * If old page is pv-tracked, remove pv_entry from its list. 5019 * If old page is pv-tracked, remove pv_entry from its list.
5020 */ 5020 */
5021 if ((~opte & (PTE_P | PTE_PVLIST)) == 0) { 5021 if ((~opte & (PTE_P | PTE_PVLIST)) == 0) {
5022 if ((old_pg = PHYS_TO_VM_PAGE(oldpa)) != NULL) { 5022 if ((old_pg = PHYS_TO_VM_PAGE(oldpa)) != NULL) {
5023 old_pp = VM_PAGE_TO_PP(old_pg); 5023 old_pp = VM_PAGE_TO_PP(old_pg);
5024 } else if ((old_pp = pmap_pv_tracked(oldpa)) == NULL) { 5024 } else if ((old_pp = pmap_pv_tracked(oldpa)) == NULL) {
5025 panic("%s: PTE_PVLIST with pv-untracked page" 5025 panic("%s: PTE_PVLIST with pv-untracked page"
5026 " va = %#"PRIxVADDR 5026 " va = %#"PRIxVADDR
5027 " pa = %#" PRIxPADDR " (%#" PRIxPADDR ")", 5027 " pa = %#" PRIxPADDR " (%#" PRIxPADDR ")",
5028 __func__, va, oldpa, atop(pa)); 5028 __func__, va, oldpa, atop(pa));
5029 } 5029 }
5030 5030
5031 pmap_remove_pv(pmap, old_pp, ptp, va, old_pve, 5031 pmap_remove_pv(pmap, old_pp, ptp, va, old_pve,
5032 pmap_pte_to_pp_attrs(opte)); 5032 pmap_pte_to_pp_attrs(opte));
5033 } else { 5033 } else {
5034 KASSERT(old_pve == NULL); 5034 KASSERT(old_pve == NULL);
5035 KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL); 5035 KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL);
5036 } 5036 }
5037 5037
5038 /* 5038 /*
5039 * If new page is dynamically PV tracked, insert to tree. 5039 * If new page is dynamically PV tracked, insert to tree.
5040 */ 5040 */
5041 if (new_pve != NULL) { 5041 if (new_pve != NULL) {
5042 KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL); 5042 KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL);
5043 old_pve = rb_tree_insert_node(tree, new_pve); 5043 old_pve = rb_tree_insert_node(tree, new_pve);
5044 KASSERT(old_pve == new_pve); 5044 KASSERT(old_pve == new_pve);
5045 pmap_check_pv(pmap, ptp, new_pp, va, true); 5045 pmap_check_pv(pmap, ptp, new_pp, va, true);
5046 } 5046 }
5047 5047
5048same_pa: 5048same_pa:
5049 /* 5049 /*
5050 * shootdown tlb if necessary. 5050 * shootdown tlb if necessary.
5051 */ 5051 */
5052 5052
5053 if ((~opte & (PTE_P | PTE_A)) == 0 && 5053 if ((~opte & (PTE_P | PTE_A)) == 0 &&
5054 ((opte ^ npte) & (PTE_FRAME | PTE_W)) != 0) { 5054 ((opte ^ npte) & (PTE_FRAME | PTE_W)) != 0) {
5055 pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_ENTER); 5055 pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_ENTER);
5056 } 5056 }
5057 pmap_drain_pv(pmap); 5057 pmap_drain_pv(pmap);
5058 mutex_exit(&pmap->pm_lock); 5058 mutex_exit(&pmap->pm_lock);
5059 return 0; 5059 return 0;
5060} 5060}
5061 5061
5062#if defined(XEN) && defined(DOM0OPS) 5062#if defined(XEN) && defined(DOM0OPS)
5063 5063
5064struct pmap_data_gnt { 5064struct pmap_data_gnt {
5065 SLIST_ENTRY(pmap_data_gnt) pd_gnt_list; 5065 SLIST_ENTRY(pmap_data_gnt) pd_gnt_list;
5066 vaddr_t pd_gnt_sva; 5066 vaddr_t pd_gnt_sva;
5067 vaddr_t pd_gnt_eva; /* range covered by this gnt */ 5067 vaddr_t pd_gnt_eva; /* range covered by this gnt */
5068 int pd_gnt_refs; /* ref counter */ 5068 int pd_gnt_refs; /* ref counter */
5069 struct gnttab_map_grant_ref pd_gnt_ops[1]; /* variable length */ 5069 struct gnttab_map_grant_ref pd_gnt_ops[1]; /* variable length */
5070}; 5070};
5071SLIST_HEAD(pmap_data_gnt_head, pmap_data_gnt); 5071SLIST_HEAD(pmap_data_gnt_head, pmap_data_gnt);
5072 5072
5073static void pmap_remove_gnt(struct pmap *, vaddr_t, vaddr_t); 5073static void pmap_remove_gnt(struct pmap *, vaddr_t, vaddr_t);
5074 5074
5075static struct pmap_data_gnt * 5075static struct pmap_data_gnt *
5076pmap_find_gnt(struct pmap *pmap, vaddr_t sva, vaddr_t eva) 5076pmap_find_gnt(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
5077{ 5077{
5078 struct pmap_data_gnt_head *headp; 5078 struct pmap_data_gnt_head *headp;
5079 struct pmap_data_gnt *pgnt; 5079 struct pmap_data_gnt *pgnt;
5080 5080
5081 KASSERT(mutex_owned(&pmap->pm_lock)); 5081 KASSERT(mutex_owned(&pmap->pm_lock));
5082 headp = pmap->pm_data; 5082 headp = pmap->pm_data;
5083 KASSERT(headp != NULL); 5083 KASSERT(headp != NULL);
5084 SLIST_FOREACH(pgnt, headp, pd_gnt_list) { 5084 SLIST_FOREACH(pgnt, headp, pd_gnt_list) {
5085 if (pgnt->pd_gnt_sva >= sva && pgnt->pd_gnt_sva <= eva) 5085 if (pgnt->pd_gnt_sva <= sva && eva <= pgnt->pd_gnt_eva)
5086 return pgnt; 5086 return pgnt;
5087 /* check that we're not overlapping part of a region */ 5087 /* check that we're not overlapping part of a region */
5088 KASSERT(pgnt->pd_gnt_sva >= eva || pgnt->pd_gnt_eva <= sva); 5088 KASSERT(pgnt->pd_gnt_sva >= eva || pgnt->pd_gnt_eva <= sva);
5089 } 5089 }
5090 return NULL; 5090 return NULL;
5091} 5091}
5092 5092
5093static void 5093static void
5094pmap_alloc_gnt(struct pmap *pmap, vaddr_t sva, int nentries, 5094pmap_alloc_gnt(struct pmap *pmap, vaddr_t sva, int nentries,
5095 const struct gnttab_map_grant_ref *ops) 5095 const struct gnttab_map_grant_ref *ops)
5096{ 5096{
5097 struct pmap_data_gnt_head *headp; 5097 struct pmap_data_gnt_head *headp;
5098 struct pmap_data_gnt *pgnt; 5098 struct pmap_data_gnt *pgnt;
5099 vaddr_t eva = sva + nentries * PAGE_SIZE; 5099 vaddr_t eva = sva + nentries * PAGE_SIZE;
5100 KASSERT(mutex_owned(&pmap->pm_lock)); 5100 KASSERT(mutex_owned(&pmap->pm_lock));
5101 KASSERT(nentries >= 1); 5101 KASSERT(nentries >= 1);
5102 if (pmap->pm_remove == NULL) { 5102 if (pmap->pm_remove == NULL) {
5103 pmap->pm_remove = pmap_remove_gnt; 5103 pmap->pm_remove = pmap_remove_gnt;
5104 KASSERT(pmap->pm_data == NULL); 5104 KASSERT(pmap->pm_data == NULL);
5105 headp = kmem_alloc(sizeof(*headp), KM_SLEEP); 5105 headp = kmem_alloc(sizeof(*headp), KM_SLEEP);
5106 SLIST_INIT(headp); 5106 SLIST_INIT(headp);
5107 pmap->pm_data = headp; 5107 pmap->pm_data = headp;
5108 } else { 5108 } else {
5109 KASSERT(pmap->pm_remove == pmap_remove_gnt); 5109 KASSERT(pmap->pm_remove == pmap_remove_gnt);
5110 KASSERT(pmap->pm_data != NULL); 5110 KASSERT(pmap->pm_data != NULL);
5111 headp = pmap->pm_data; 5111 headp = pmap->pm_data;
5112 } 5112 }
5113 5113
5114 pgnt = pmap_find_gnt(pmap, sva, eva); 5114 pgnt = pmap_find_gnt(pmap, sva, eva);
5115 if (pgnt != NULL) { 5115 if (pgnt != NULL) {
5116 KASSERT(pgnt->pd_gnt_sva == sva); 5116 KASSERT(pgnt->pd_gnt_sva == sva);
5117 KASSERT(pgnt->pd_gnt_eva == eva); 5117 KASSERT(pgnt->pd_gnt_eva == eva);
5118 return; 5118 return;
5119 } 5119 }
5120 5120
5121 /* new entry */ 5121 /* new entry */
5122 pgnt = kmem_alloc(sizeof(*pgnt) + 5122 pgnt = kmem_alloc(sizeof(*pgnt) +
5123 (nentries - 1) * sizeof(struct gnttab_map_grant_ref), KM_SLEEP); 5123 (nentries - 1) * sizeof(struct gnttab_map_grant_ref), KM_SLEEP);
5124 pgnt->pd_gnt_sva = sva; 5124 pgnt->pd_gnt_sva = sva;
5125 pgnt->pd_gnt_eva = eva; 5125 pgnt->pd_gnt_eva = eva;
5126 pgnt->pd_gnt_refs = 0; 5126 pgnt->pd_gnt_refs = 0;
5127 memcpy(pgnt->pd_gnt_ops, ops, 5127 memcpy(pgnt->pd_gnt_ops, ops,
5128 sizeof(struct gnttab_map_grant_ref) * nentries); 5128 sizeof(struct gnttab_map_grant_ref) * nentries);
5129 SLIST_INSERT_HEAD(headp, pgnt, pd_gnt_list); 5129 SLIST_INSERT_HEAD(headp, pgnt, pd_gnt_list);
5130} 5130}
5131 5131
5132static void 5132static void
5133pmap_free_gnt(struct pmap *pmap, struct pmap_data_gnt *pgnt) 5133pmap_free_gnt(struct pmap *pmap, struct pmap_data_gnt *pgnt)
5134{ 5134{
5135 struct pmap_data_gnt_head *headp = pmap->pm_data; 5135 struct pmap_data_gnt_head *headp = pmap->pm_data;
5136 int nentries = (pgnt->pd_gnt_eva - pgnt->pd_gnt_sva) / PAGE_SIZE; 5136 int nentries = (pgnt->pd_gnt_eva - pgnt->pd_gnt_sva) / PAGE_SIZE;
5137 KASSERT(nentries >= 1); 5137 KASSERT(nentries >= 1);
5138 KASSERT(mutex_owned(&pmap->pm_lock)); 5138 KASSERT(mutex_owned(&pmap->pm_lock));
5139 KASSERT(pgnt->pd_gnt_refs == 0); 5139 KASSERT(pgnt->pd_gnt_refs == 0);
5140 SLIST_REMOVE(headp, pgnt, pmap_data_gnt, pd_gnt_list); 5140 SLIST_REMOVE(headp, pgnt, pmap_data_gnt, pd_gnt_list);
5141 kmem_free(pgnt, sizeof(*pgnt) + 5141 kmem_free(pgnt, sizeof(*pgnt) +
5142 (nentries - 1) * sizeof(struct gnttab_map_grant_ref)); 5142 (nentries - 1) * sizeof(struct gnttab_map_grant_ref));
5143 if (SLIST_EMPTY(headp)) { 5143 if (SLIST_EMPTY(headp)) {
5144 kmem_free(headp, sizeof(*headp)); 5144 kmem_free(headp, sizeof(*headp));
5145 pmap->pm_data = NULL; 5145 pmap->pm_data = NULL;
5146 pmap->pm_remove = NULL; 5146 pmap->pm_remove = NULL;
5147 } 5147 }
5148} 5148}
5149 5149
5150/* 5150/*
5151 * pmap_enter_gnt: enter a grant entry into a pmap 5151 * pmap_enter_gnt: enter a grant entry into a pmap
5152 * 5152 *
5153 * => must be done "now" ... no lazy-evaluation 5153 * => must be done "now" ... no lazy-evaluation
5154 */ 5154 */
5155int 5155int
5156pmap_enter_gnt(struct pmap *pmap, vaddr_t va, vaddr_t sva, int nentries, 5156pmap_enter_gnt(struct pmap *pmap, vaddr_t va, vaddr_t sva, int nentries,
5157 const struct gnttab_map_grant_ref *oops) 5157 const struct gnttab_map_grant_ref *oops)
5158{ 5158{
5159 struct pmap_data_gnt *pgnt; 5159 struct pmap_data_gnt *pgnt;
5160 pt_entry_t *ptes, opte; 5160 pt_entry_t *ptes, opte;
5161 pt_entry_t *ptep; 5161 pt_entry_t *ptep;
5162 pd_entry_t * const *pdes; 5162 pd_entry_t * const *pdes;
5163 struct vm_page *ptp; 5163 struct vm_page *ptp;
5164 struct vm_page *old_pg; 5164 struct vm_page *old_pg;
5165 struct pmap_page *old_pp; 5165 struct pmap_page *old_pp;
5166 struct pv_entry *old_pve; 5166 struct pv_entry *old_pve;
5167 struct pmap *pmap2; 5167 struct pmap *pmap2;
5168 struct pmap_ptparray pt; 5168 struct pmap_ptparray pt;
5169 int error; 5169 int error;
5170 bool getptp; 5170 bool getptp;
5171 rb_tree_t *tree; 5171 rb_tree_t *tree;
5172 struct gnttab_map_grant_ref *op; 5172 struct gnttab_map_grant_ref *op;
5173 int ret; 5173 int ret;
5174 int idx; 5174 int idx;
5175 5175
5176 KASSERT(pmap_initialized); 5176 KASSERT(pmap_initialized);
5177 KASSERT(va < VM_MAX_KERNEL_ADDRESS); 5177 KASSERT(va < VM_MAX_KERNEL_ADDRESS);
5178 KASSERTMSG(va != (vaddr_t)PDP_BASE, "%s: trying to map va=%#" 5178 KASSERTMSG(va != (vaddr_t)PDP_BASE, "%s: trying to map va=%#"
5179 PRIxVADDR " over PDP!", __func__, va); 5179 PRIxVADDR " over PDP!", __func__, va);
5180 KASSERT(pmap != pmap_kernel()); 5180 KASSERT(pmap != pmap_kernel());
5181 5181
5182 /* Begin by locking the pmap. */ 5182 /* Begin by locking the pmap. */
5183 mutex_enter(&pmap->pm_lock); 5183 mutex_enter(&pmap->pm_lock);
5184 pmap_alloc_gnt(pmap, sva, nentries, oops); 5184 pmap_alloc_gnt(pmap, sva, nentries, oops);
5185 5185
5186 pgnt = pmap_find_gnt(pmap, va, va + PAGE_SIZE); 5186 pgnt = pmap_find_gnt(pmap, va, va + PAGE_SIZE);
5187 KASSERT(pgnt != NULL); 5187 KASSERT(pgnt != NULL);
5188 5188
5189 /* Look up the PTP. Allocate if none present. */ 5189 /* Look up the PTP. Allocate if none present. */
5190 ptp = NULL; 5190 ptp = NULL;
5191 getptp = false; 5191 getptp = false;
5192 ptp = pmap_find_ptp(pmap, va, 1); 5192 ptp = pmap_find_ptp(pmap, va, 1);
5193 if (ptp == NULL) { 5193 if (ptp == NULL) {
5194 getptp = true; 5194 getptp = true;
5195 error = pmap_get_ptp(pmap, &pt, va, PMAP_CANFAIL, &ptp); 5195 error = pmap_get_ptp(pmap, &pt, va, PMAP_CANFAIL, &ptp);
5196 if (error != 0) { 5196 if (error != 0) {
5197 mutex_exit(&pmap->pm_lock); 5197 mutex_exit(&pmap->pm_lock);
5198 return error; 5198 return error;
5199 } 5199 }
5200 } 5200 }
5201 tree = &VM_PAGE_TO_PP(ptp)->pp_rb; 5201 tree = &VM_PAGE_TO_PP(ptp)->pp_rb;
5202 5202
5203 /* 5203 /*
5204 * Look up the old PV entry at this VA (if any), and insert a new PV 5204 * Look up the old PV entry at this VA (if any), and insert a new PV
5205 * entry if required for the new mapping. Temporarily track the old 5205 * entry if required for the new mapping. Temporarily track the old
5206 * and new mappings concurrently. Only after the old mapping is 5206 * and new mappings concurrently. Only after the old mapping is
5207 * evicted from the pmap will we remove its PV entry. Otherwise, 5207 * evicted from the pmap will we remove its PV entry. Otherwise,
5208 * our picture of modified/accessed state for either page could get 5208 * our picture of modified/accessed state for either page could get
5209 * out of sync (we need any P->V operation for either page to stall 5209 * out of sync (we need any P->V operation for either page to stall
5210 * on pmap->pm_lock until done here). 5210 * on pmap->pm_lock until done here).
5211 */ 5211 */
5212 old_pve = NULL; 5212 old_pve = NULL;
5213 5213
5214 old_pve = pmap_treelookup_pv(pmap, ptp, tree, va); 5214 old_pve = pmap_treelookup_pv(pmap, ptp, tree, va);
5215 5215
5216 /* Map PTEs into address space. */ 5216 /* Map PTEs into address space. */
5217 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); 5217 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
5218 5218
5219 /* Install any newly allocated PTPs. */ 5219 /* Install any newly allocated PTPs. */
5220 if (getptp) { 5220 if (getptp) {
5221 pmap_install_ptp(pmap, &pt, va, pdes); 5221 pmap_install_ptp(pmap, &pt, va, pdes);
5222 } 5222 }
5223 5223
5224 /* Check if there is an existing mapping. */ 5224 /* Check if there is an existing mapping. */
5225 ptep = &ptes[pl1_i(va)]; 5225 ptep = &ptes[pl1_i(va)];
5226 opte = *ptep; 5226 opte = *ptep;
5227 bool have_oldpa = pmap_valid_entry(opte); 5227 bool have_oldpa = pmap_valid_entry(opte);
5228 paddr_t oldpa = pmap_pte2pa(opte); 5228 paddr_t oldpa = pmap_pte2pa(opte);
5229 5229
5230 /* 5230 /*
5231 * Update the pte. 5231 * Update the pte.
5232 */ 5232 */
5233 5233
5234 idx = (va - pgnt->pd_gnt_sva) / PAGE_SIZE; 5234 idx = (va - pgnt->pd_gnt_sva) / PAGE_SIZE;
5235 op = &pgnt->pd_gnt_ops[idx]; 5235 op = &pgnt->pd_gnt_ops[idx];
5236 5236
5237 op->host_addr = xpmap_ptetomach(ptep); 5237 op->host_addr = xpmap_ptetomach(ptep);
5238 op->dev_bus_addr = 0; 5238 op->dev_bus_addr = 0;
5239 op->status = GNTST_general_error; 5239 op->status = GNTST_general_error;
5240 ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, 1); 5240 ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, 1);
5241 if (__predict_false(ret)) { 5241 if (__predict_false(ret)) {
5242 printf("%s: GNTTABOP_map_grant_ref failed: %d\n", 5242 printf("%s: GNTTABOP_map_grant_ref failed: %d\n",
5243 __func__, ret); 5243 __func__, ret);
5244 op->status = GNTST_general_error; 5244 op->status = GNTST_general_error;
5245 } 5245 }
5246 for (int d = 0; d < 256 && op->status == GNTST_eagain; d++) { 5246 for (int d = 0; d < 256 && op->status == GNTST_eagain; d++) {
5247 kpause("gntmap", false, mstohz(1), NULL); 5247 kpause("gntmap", false, mstohz(1), NULL);
5248 ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, 1); 5248 ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, 1);
5249 if (__predict_false(ret)) { 5249 if (__predict_false(ret)) {
5250 printf("%s: GNTTABOP_map_grant_ref failed: %d\n", 5250 printf("%s: GNTTABOP_map_grant_ref failed: %d\n",
5251 __func__, ret); 5251 __func__, ret);
5252 op->status = GNTST_general_error; 5252 op->status = GNTST_general_error;
5253 } 5253 }
5254 } 5254 }
5255 if (__predict_false(op->status != GNTST_okay)) { 5255 if (__predict_false(op->status != GNTST_okay)) {
5256 printf("%s: GNTTABOP_map_grant_ref status: %d\n", 5256 printf("%s: GNTTABOP_map_grant_ref status: %d\n",
5257 __func__, op->status); 5257 __func__, op->status);
5258 if (ptp != NULL) { 5258 if (ptp != NULL) {
5259 if (have_oldpa) { 5259 if (have_oldpa) {
5260 ptp->wire_count--; 5260 ptp->wire_count--;
5261 } 5261 }
5262 } 5262 }
5263 } else { 5263 } else {
5264 pgnt->pd_gnt_refs++; 5264 pgnt->pd_gnt_refs++;
5265 if (ptp != NULL) { 5265 if (ptp != NULL) {
5266 if (!have_oldpa) { 5266 if (!have_oldpa) {
5267 ptp->wire_count++; 5267 ptp->wire_count++;
5268 } 5268 }
5269 /* Remember minimum VA in PTP. */ 5269 /* Remember minimum VA in PTP. */
5270 pmap_ptp_range_set(ptp, va); 5270 pmap_ptp_range_set(ptp, va);
5271 } 5271 }
5272 } 5272 }
5273 5273
5274 /* 5274 /*
5275 * Done with the PTEs: they can now be unmapped. 5275 * Done with the PTEs: they can now be unmapped.
5276 */ 5276 */
5277 pmap_unmap_ptes(pmap, pmap2); 5277 pmap_unmap_ptes(pmap, pmap2);
5278 5278
5279 /* 5279 /*
5280 * Update statistics and PTP's reference count. 5280 * Update statistics and PTP's reference count.
5281 */ 5281 */
5282 pmap_stats_update_bypte(pmap, 0, opte); 5282 pmap_stats_update_bypte(pmap, 0, opte);
5283 KASSERT(ptp == NULL || ptp->wire_count >= 1); 5283 KASSERT(ptp == NULL || ptp->wire_count >= 1);
5284 5284
5285 /* 5285 /*
5286 * If old page is pv-tracked, remove pv_entry from its list. 5286 * If old page is pv-tracked, remove pv_entry from its list.
5287 */ 5287 */
5288 if ((~opte & (PTE_P | PTE_PVLIST)) == 0) { 5288 if ((~opte & (PTE_P | PTE_PVLIST)) == 0) {
5289 if ((old_pg = PHYS_TO_VM_PAGE(oldpa)) != NULL) { 5289 if ((old_pg = PHYS_TO_VM_PAGE(oldpa)) != NULL) {
5290 old_pp = VM_PAGE_TO_PP(old_pg); 5290 old_pp = VM_PAGE_TO_PP(old_pg);
5291 } else if ((old_pp = pmap_pv_tracked(oldpa)) == NULL) { 5291 } else if ((old_pp = pmap_pv_tracked(oldpa)) == NULL) {
5292 panic("%s: PTE_PVLIST with pv-untracked page" 5292 panic("%s: PTE_PVLIST with pv-untracked page"
5293 " va = %#"PRIxVADDR " pa = %#" PRIxPADDR, 5293 " va = %#"PRIxVADDR " pa = %#" PRIxPADDR,
5294 __func__, va, oldpa); 5294 __func__, va, oldpa);
5295 } 5295 }
5296 5296
5297 pmap_remove_pv(pmap, old_pp, ptp, va, old_pve, 5297 pmap_remove_pv(pmap, old_pp, ptp, va, old_pve,
5298 pmap_pte_to_pp_attrs(opte)); 5298 pmap_pte_to_pp_attrs(opte));
5299 } else { 5299 } else {
5300 KASSERT(old_pve == NULL); 5300 KASSERT(old_pve == NULL);
5301 KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL); 5301 KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL);
5302 } 5302 }
5303 5303
5304 pmap_drain_pv(pmap); 5304 pmap_drain_pv(pmap);
5305 mutex_exit(&pmap->pm_lock); 5305 mutex_exit(&pmap->pm_lock);
5306 return op->status; 5306 return op->status;
5307} 5307}
5308 5308
5309/* 5309/*
5310 * pmap_remove_gnt: grant mapping removal function. 5310 * pmap_remove_gnt: grant mapping removal function.
5311 * 5311 *
5312 * => caller should not be holding any pmap locks 5312 * => caller should not be holding any pmap locks
5313 */ 5313 */
5314static void 5314static void
5315pmap_remove_gnt(struct pmap *pmap, vaddr_t sva, vaddr_t eva) 5315pmap_remove_gnt(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
5316{ 5316{
5317 struct pmap_data_gnt *pgnt; 5317 struct pmap_data_gnt *pgnt;
5318 pt_entry_t *ptes; 5318 pt_entry_t *ptes;
5319 pd_entry_t pde; 5319 pd_entry_t pde;
5320 pd_entry_t * const *pdes; 5320 pd_entry_t * const *pdes;
5321 struct vm_page *ptp; 5321 struct vm_page *ptp;
5322 struct pmap *pmap2; 5322 struct pmap *pmap2;
5323 vaddr_t va; 5323 vaddr_t va;
5324 int lvl; 5324 int lvl;
5325 int idx; 5325 int idx;
5326 struct gnttab_map_grant_ref *op; 5326 struct gnttab_map_grant_ref *op;
5327 struct gnttab_unmap_grant_ref unmap_op; 5327 struct gnttab_unmap_grant_ref unmap_op;
5328 int ret; 5328 int ret;
5329 5329
5330 KASSERT(pmap != pmap_kernel()); 5330 KASSERT(pmap != pmap_kernel());
5331 KASSERT(pmap->pm_remove == pmap_remove_gnt); 5331 KASSERT(pmap->pm_remove == pmap_remove_gnt);
5332 5332
5333 mutex_enter(&pmap->pm_lock); 5333 mutex_enter(&pmap->pm_lock);
5334 for (va = sva; va < eva; va += PAGE_SIZE) { 5334 for (va = sva; va < eva; va += PAGE_SIZE) {
5335 pgnt = pmap_find_gnt(pmap, va, va + PAGE_SIZE); 5335 pgnt = pmap_find_gnt(pmap, va, va + PAGE_SIZE);
5336 if (pgnt == NULL) { 5336 if (pgnt == NULL) {
5337 pmap_remove_locked(pmap, sva, eva); 5337 pmap_remove_locked(pmap, sva, eva);
5338 continue; 5338 continue;
5339 } 5339 }
5340 5340
5341 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); 5341 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
5342 if (!pmap_pdes_valid(va, pdes, &pde, &lvl)) { 5342 if (!pmap_pdes_valid(va, pdes, &pde, &lvl)) {
5343 panic("pmap_remove_gnt pdes not valid"); 5343 panic("pmap_remove_gnt pdes not valid");
5344 } 5344 }
5345 5345
5346 idx = (va - pgnt->pd_gnt_sva) / PAGE_SIZE; 5346 idx = (va - pgnt->pd_gnt_sva) / PAGE_SIZE;
5347 op = &pgnt->pd_gnt_ops[idx]; 5347 op = &pgnt->pd_gnt_ops[idx];
5348 KASSERT(lvl == 1); 5348 KASSERT(lvl == 1);
5349 KASSERT(op->status == GNTST_okay); 5349 KASSERT(op->status == GNTST_okay);
5350 5350
5351 /* Get PTP if non-kernel mapping. */ 5351 /* Get PTP if non-kernel mapping. */
5352 ptp = pmap_find_ptp(pmap, va, 1); 5352 ptp = pmap_find_ptp(pmap, va, 1);
5353 KASSERTMSG(ptp != NULL, 5353 KASSERTMSG(ptp != NULL,
5354 "%s: unmanaged PTP detected", __func__); 5354 "%s: unmanaged PTP detected", __func__);
5355 5355
5356 if (op->status == GNTST_okay) { 5356 if (op->status == GNTST_okay) {
5357 KASSERT(pmap_valid_entry(ptes[pl1_i(va)])); 5357 KASSERT(pmap_valid_entry(ptes[pl1_i(va)]));
5358 unmap_op.handle = op->handle; 5358 unmap_op.handle = op->handle;
5359 unmap_op.dev_bus_addr = 0; 5359 unmap_op.dev_bus_addr = 0;
5360 unmap_op.host_addr = xpmap_ptetomach(&ptes[pl1_i(va)]); 5360 unmap_op.host_addr = xpmap_ptetomach(&ptes[pl1_i(va)]);
5361 ret = HYPERVISOR_grant_table_op( 5361 ret = HYPERVISOR_grant_table_op(
5362 GNTTABOP_unmap_grant_ref, &unmap_op, 1); 5362 GNTTABOP_unmap_grant_ref, &unmap_op, 1);
5363 if (ret) { 5363 if (ret) {
5364 printf("%s: GNTTABOP_unmap_grant_ref " 5364 printf("%s: GNTTABOP_unmap_grant_ref "
5365 "failed: %d\n", __func__, ret); 5365 "failed: %d\n", __func__, ret);
5366 } 5366 }
5367 5367
5368 ptp->wire_count--; 5368 ptp->wire_count--;
5369 pgnt->pd_gnt_refs--; 5369 pgnt->pd_gnt_refs--;
5370 if (pgnt->pd_gnt_refs == 0) { 5370 if (pgnt->pd_gnt_refs == 0) {
5371 pmap_free_gnt(pmap, pgnt); 5371 pmap_free_gnt(pmap, pgnt);
5372 } 5372 }
5373 } 5373 }
5374 /* 5374 /*
5375 * if mapping removed and the PTP is no longer 5375 * if mapping removed and the PTP is no longer
5376 * being used, free it! 5376 * being used, free it!
5377 */ 5377 */
5378 5378
5379 if (ptp && ptp->wire_count <= 1) 5379 if (ptp && ptp->wire_count <= 1)
5380 pmap_free_ptp(pmap, ptp, va, ptes, pdes); 5380 pmap_free_ptp(pmap, ptp, va, ptes, pdes);
5381 pmap_unmap_ptes(pmap, pmap2); 5381 pmap_unmap_ptes(pmap, pmap2);
5382 } 5382 }
5383 mutex_exit(&pmap->pm_lock); 5383 mutex_exit(&pmap->pm_lock);
5384} 5384}
5385#endif /* XEN && DOM0OPS */ 5385#endif /* XEN && DOM0OPS */
5386 5386
5387paddr_t 5387paddr_t
5388pmap_get_physpage(void) 5388pmap_get_physpage(void)
5389{ 5389{
5390 struct vm_page *ptp; 5390 struct vm_page *ptp;
5391 struct pmap *kpm = pmap_kernel(); 5391 struct pmap *kpm = pmap_kernel();
5392 paddr_t pa; 5392 paddr_t pa;
5393 5393
5394 if (!uvm.page_init_done) { 5394 if (!uvm.page_init_done) {
5395 /* 5395 /*
5396 * We're growing the kernel pmap early (from 5396 * We're growing the kernel pmap early (from
5397 * uvm_pageboot_alloc()). This case must be 5397 * uvm_pageboot_alloc()). This case must be
5398 * handled a little differently. 5398 * handled a little differently.
5399 */ 5399 */
5400 5400
5401 if (!uvm_page_physget(&pa)) 5401 if (!uvm_page_physget(&pa))
5402 panic("%s: out of memory", __func__); 5402 panic("%s: out of memory", __func__);
5403#if defined(__HAVE_DIRECT_MAP) 5403#if defined(__HAVE_DIRECT_MAP)
5404 memset((void *)PMAP_DIRECT_MAP(pa), 0, PAGE_SIZE); 5404 memset((void *)PMAP_DIRECT_MAP(pa), 0, PAGE_SIZE);
5405#else 5405#else
5406#if defined(XENPV) 5406#if defined(XENPV)
5407 if (XEN_VERSION_SUPPORTED(3, 4)) { 5407 if (XEN_VERSION_SUPPORTED(3, 4)) {
5408 xen_pagezero(pa); 5408 xen_pagezero(pa);
5409 return pa; 5409 return pa;
5410 } 5410 }
5411#endif 5411#endif
5412 kpreempt_disable(); 5412 kpreempt_disable();
5413 pmap_pte_set(early_zero_pte, pmap_pa2pte(pa) | PTE_P | 5413 pmap_pte_set(early_zero_pte, pmap_pa2pte(pa) | PTE_P |
5414 PTE_W | pmap_pg_nx); 5414 PTE_W | pmap_pg_nx);
5415 pmap_pte_flush(); 5415 pmap_pte_flush();
5416 pmap_update_pg((vaddr_t)early_zerop); 5416 pmap_update_pg((vaddr_t)early_zerop);
5417 memset(early_zerop, 0, PAGE_SIZE); 5417 memset(early_zerop, 0, PAGE_SIZE);
5418#if defined(DIAGNOSTIC) || defined(XENPV) 5418#if defined(DIAGNOSTIC) || defined(XENPV)
5419 pmap_pte_set(early_zero_pte, 0); 5419 pmap_pte_set(early_zero_pte, 0);
5420 pmap_pte_flush(); 5420 pmap_pte_flush();
5421#endif /* defined(DIAGNOSTIC) */ 5421#endif /* defined(DIAGNOSTIC) */
5422 kpreempt_enable(); 5422 kpreempt_enable();
5423#endif /* defined(__HAVE_DIRECT_MAP) */ 5423#endif /* defined(__HAVE_DIRECT_MAP) */
5424 } else { 5424 } else {
5425 /* XXX */ 5425 /* XXX */
5426 ptp = uvm_pagealloc(NULL, 0, NULL, 5426 ptp = uvm_pagealloc(NULL, 0, NULL,
5427 UVM_PGA_USERESERVE|UVM_PGA_ZERO); 5427 UVM_PGA_USERESERVE|UVM_PGA_ZERO);
5428 if (ptp == NULL) 5428 if (ptp == NULL)
5429 panic("%s: out of memory", __func__); 5429 panic("%s: out of memory", __func__);
5430 ptp->flags &= ~PG_BUSY; 5430 ptp->flags &= ~PG_BUSY;
5431 ptp->wire_count = 1; 5431 ptp->wire_count = 1;
5432 pa = VM_PAGE_TO_PHYS(ptp); 5432 pa = VM_PAGE_TO_PHYS(ptp);
5433 } 5433 }
5434 pmap_stats_update(kpm, 1, 0); 5434 pmap_stats_update(kpm, 1, 0);
5435 5435
5436 return pa; 5436 return pa;
5437} 5437}
5438 5438
5439/* 5439/*
5440 * Expand the page tree with the specified amount of PTPs, mapping virtual 5440 * Expand the page tree with the specified amount of PTPs, mapping virtual
5441 * addresses starting at kva. We populate all the levels but the last one 5441 * addresses starting at kva. We populate all the levels but the last one
5442 * (L1). The nodes of the tree are created as RW, but the pages covered 5442 * (L1). The nodes of the tree are created as RW, but the pages covered
5443 * will be kentered in L1, with proper permissions. 5443 * will be kentered in L1, with proper permissions.
5444 * 5444 *
5445 * Used only by pmap_growkernel. 5445 * Used only by pmap_growkernel.
5446 */ 5446 */
5447static void 5447static void
5448pmap_alloc_level(struct pmap *cpm, vaddr_t kva, long *needed_ptps) 5448pmap_alloc_level(struct pmap *cpm, vaddr_t kva, long *needed_ptps)
5449{ 5449{
5450 unsigned long i; 5450 unsigned long i;
5451 paddr_t pa; 5451 paddr_t pa;
5452 unsigned long index, endindex; 5452 unsigned long index, endindex;
5453 int level; 5453 int level;
5454 pd_entry_t *pdep; 5454 pd_entry_t *pdep;
5455#ifdef XENPV 5455#ifdef XENPV
5456 int s = splvm(); /* protect xpq_* */ 5456 int s = splvm(); /* protect xpq_* */
5457#endif 5457#endif
5458 5458
5459 for (level = PTP_LEVELS; level > 1; level--) { 5459 for (level = PTP_LEVELS; level > 1; level--) {
5460 if (level == PTP_LEVELS) 5460 if (level == PTP_LEVELS)
5461 pdep = cpm->pm_pdir; 5461 pdep = cpm->pm_pdir;
5462 else 5462 else
5463 pdep = normal_pdes[level - 2]; 5463 pdep = normal_pdes[level - 2];
5464 index = pl_i_roundup(kva, level); 5464 index = pl_i_roundup(kva, level);
5465 endindex = index + needed_ptps[level - 1] - 1; 5465 endindex = index + needed_ptps[level - 1] - 1;
5466 5466
5467 for (i = index; i <= endindex; i++) { 5467 for (i = index; i <= endindex; i++) {
5468 pt_entry_t pte; 5468 pt_entry_t pte;
5469 5469
5470 KASSERT(!pmap_valid_entry(pdep[i])); 5470 KASSERT(!pmap_valid_entry(pdep[i]));
5471 pa = pmap_get_physpage(); 5471 pa = pmap_get_physpage();
5472 pte = pmap_pa2pte(pa) | PTE_P | PTE_W; 5472 pte = pmap_pa2pte(pa) | PTE_P | PTE_W;
5473#ifdef __x86_64__ 5473#ifdef __x86_64__
5474 pte |= pmap_pg_nx; 5474 pte |= pmap_pg_nx;
5475#endif 5475#endif
5476 pmap_pte_set(&pdep[i], pte); 5476 pmap_pte_set(&pdep[i], pte);
5477 5477
5478#ifdef XENPV 5478#ifdef XENPV
5479 if (level == PTP_LEVELS && i >= PDIR_SLOT_KERN) { 5479 if (level == PTP_LEVELS && i >= PDIR_SLOT_KERN) {
5480 if (__predict_true( 5480 if (__predict_true(
5481 cpu_info_primary.ci_flags & CPUF_PRESENT)) { 5481 cpu_info_primary.ci_flags & CPUF_PRESENT)) {
5482 /* update per-cpu PMDs on all cpus */ 5482 /* update per-cpu PMDs on all cpus */
5483 xen_kpm_sync(pmap_kernel(), i); 5483 xen_kpm_sync(pmap_kernel(), i);
5484 } else { 5484 } else {
5485 /* 5485 /*
5486 * too early; update primary CPU 5486 * too early; update primary CPU
5487 * PMD only (without locks) 5487 * PMD only (without locks)
5488 */ 5488 */
5489#ifdef __x86_64__ 5489#ifdef __x86_64__
5490 pd_entry_t *cpu_pdep = 5490 pd_entry_t *cpu_pdep =
5491 &cpu_info_primary.ci_kpm_pdir[i]; 5491 &cpu_info_primary.ci_kpm_pdir[i];
5492#else 5492#else
5493 pd_entry_t *cpu_pdep = 5493 pd_entry_t *cpu_pdep =
5494 &cpu_info_primary.ci_kpm_pdir[l2tol2(i)]; 5494 &cpu_info_primary.ci_kpm_pdir[l2tol2(i)];
5495#endif 5495#endif
5496 pmap_pte_set(cpu_pdep, pte); 5496 pmap_pte_set(cpu_pdep, pte);
5497 } 5497 }
5498 } 5498 }
5499#endif 5499#endif
5500 5500
5501 KASSERT(level != PTP_LEVELS || nkptp[level - 1] + 5501 KASSERT(level != PTP_LEVELS || nkptp[level - 1] +
5502 pl_i(VM_MIN_KERNEL_ADDRESS, level) == i); 5502 pl_i(VM_MIN_KERNEL_ADDRESS, level) == i);
5503 nkptp[level - 1]++; 5503 nkptp[level - 1]++;
5504 } 5504 }
5505 pmap_pte_flush(); 5505 pmap_pte_flush();
5506 } 5506 }
5507#ifdef XENPV 5507#ifdef XENPV
5508 splx(s); 5508 splx(s);
5509#endif 5509#endif
5510} 5510}
5511 5511
5512/* 5512/*
5513 * pmap_growkernel: increase usage of KVM space. 5513 * pmap_growkernel: increase usage of KVM space.
5514 * 5514 *
5515 * => we allocate new PTPs for the kernel and install them in all 5515 * => we allocate new PTPs for the kernel and install them in all
5516 * the pmaps on the system. 5516 * the pmaps on the system.
5517 */ 5517 */
5518vaddr_t 5518vaddr_t
5519pmap_growkernel(vaddr_t maxkvaddr) 5519pmap_growkernel(vaddr_t maxkvaddr)
5520{ 5520{
5521 struct pmap *kpm = pmap_kernel(); 5521 struct pmap *kpm = pmap_kernel();
5522 struct pmap *cpm; 5522 struct pmap *cpm;
5523#if !defined(XENPV) || !defined(__x86_64__) 5523#if !defined(XENPV) || !defined(__x86_64__)
5524 struct pmap *pm; 5524 struct pmap *pm;
5525 long old; 5525 long old;
5526#endif 5526#endif
5527 int s, i; 5527 int s, i;
5528 long needed_kptp[PTP_LEVELS], target_nptp; 5528 long needed_kptp[PTP_LEVELS], target_nptp;
5529 bool invalidate = false; 5529 bool invalidate = false;
5530 5530
5531 s = splvm(); /* to be safe */ 5531 s = splvm(); /* to be safe */
5532 mutex_enter(&kpm->pm_lock); 5532 mutex_enter(&kpm->pm_lock);
5533 5533
5534 if (maxkvaddr <= pmap_maxkvaddr) { 5534 if (maxkvaddr <= pmap_maxkvaddr) {
5535 mutex_exit(&kpm->pm_lock); 5535 mutex_exit(&kpm->pm_lock);
5536 splx(s); 5536 splx(s);
5537 return pmap_maxkvaddr; 5537 return pmap_maxkvaddr;
5538 } 5538 }
5539 5539
5540 maxkvaddr = x86_round_pdr(maxkvaddr); 5540 maxkvaddr = x86_round_pdr(maxkvaddr);
5541#if !defined(XENPV) || !defined(__x86_64__) 5541#if !defined(XENPV) || !defined(__x86_64__)
5542 old = nkptp[PTP_LEVELS - 1]; 5542 old = nkptp[PTP_LEVELS - 1];
5543#endif 5543#endif
5544 5544
5545 /* Initialize needed_kptp. */ 5545 /* Initialize needed_kptp. */
5546 for (i = PTP_LEVELS - 1; i >= 1; i--) { 5546 for (i = PTP_LEVELS - 1; i >= 1; i--) {
5547 target_nptp = pl_i_roundup(maxkvaddr, i + 1) - 5547 target_nptp = pl_i_roundup(maxkvaddr, i + 1) -
5548 pl_i_roundup(VM_MIN_KERNEL_ADDRESS, i + 1); 5548 pl_i_roundup(VM_MIN_KERNEL_ADDRESS, i + 1);
5549 5549
5550 if (target_nptp > nkptpmax[i]) 5550 if (target_nptp > nkptpmax[i])
5551 panic("out of KVA space"); 5551 panic("out of KVA space");
5552 KASSERT(target_nptp >= nkptp[i]); 5552 KASSERT(target_nptp >= nkptp[i]);
5553 needed_kptp[i] = target_nptp - nkptp[i]; 5553 needed_kptp[i] = target_nptp - nkptp[i];
5554 } 5554 }
5555 5555
5556#ifdef XENPV 5556#ifdef XENPV
5557 /* only pmap_kernel(), or the per-cpu map, has kernel entries */ 5557 /* only pmap_kernel(), or the per-cpu map, has kernel entries */
5558 cpm = kpm; 5558 cpm = kpm;
5559#else 5559#else
5560 /* Get the current pmap */ 5560 /* Get the current pmap */
5561 if (__predict_true(cpu_info_primary.ci_flags & CPUF_PRESENT)) { 5561 if (__predict_true(cpu_info_primary.ci_flags & CPUF_PRESENT)) {
5562 cpm = curcpu()->ci_pmap; 5562 cpm = curcpu()->ci_pmap;
5563 } else { 5563 } else {
5564 cpm = kpm; 5564 cpm = kpm;
5565 } 5565 }
5566#endif 5566#endif
5567 5567
5568 kasan_shadow_map((void *)pmap_maxkvaddr, 5568 kasan_shadow_map((void *)pmap_maxkvaddr,
5569 (size_t)(maxkvaddr - pmap_maxkvaddr)); 5569 (size_t)(maxkvaddr - pmap_maxkvaddr));
5570 kmsan_shadow_map((void *)pmap_maxkvaddr, 5570 kmsan_shadow_map((void *)pmap_maxkvaddr,
5571 (size_t)(maxkvaddr - pmap_maxkvaddr)); 5571 (size_t)(maxkvaddr - pmap_maxkvaddr));
5572 5572
5573 pmap_alloc_level(cpm, pmap_maxkvaddr, needed_kptp); 5573 pmap_alloc_level(cpm, pmap_maxkvaddr, needed_kptp);
5574 5574
5575 /* 5575 /*
5576 * If the number of top level entries changed, update all pmaps. 5576 * If the number of top level entries changed, update all pmaps.
5577 */ 5577 */
5578 if (needed_kptp[PTP_LEVELS - 1] != 0) { 5578 if (needed_kptp[PTP_LEVELS - 1] != 0) {
5579#ifdef XENPV 5579#ifdef XENPV
5580#ifdef __x86_64__ 5580#ifdef __x86_64__
5581 /* nothing, kernel entries are never entered in user pmap */ 5581 /* nothing, kernel entries are never entered in user pmap */
5582#else 5582#else
5583 int pdkidx; 5583 int pdkidx;
5584 5584
5585 mutex_enter(&pmaps_lock); 5585 mutex_enter(&pmaps_lock);
5586 LIST_FOREACH(pm, &pmaps, pm_list) { 5586 LIST_FOREACH(pm, &pmaps, pm_list) {
5587 for (pdkidx = PDIR_SLOT_KERN + old; 5587 for (pdkidx = PDIR_SLOT_KERN + old;
5588 pdkidx < PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1]; 5588 pdkidx < PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1];
5589 pdkidx++) { 5589 pdkidx++) {
5590 pmap_pte_set(&pm->pm_pdir[pdkidx], 5590 pmap_pte_set(&pm->pm_pdir[pdkidx],
5591 kpm->pm_pdir[pdkidx]); 5591 kpm->pm_pdir[pdkidx]);
5592 } 5592 }
5593 pmap_pte_flush(); 5593 pmap_pte_flush();
5594 } 5594 }
5595 mutex_exit(&pmaps_lock); 5595 mutex_exit(&pmaps_lock);
5596#endif /* __x86_64__ */ 5596#endif /* __x86_64__ */
5597#else /* XENPV */ 5597#else /* XENPV */
5598 size_t newpdes; 5598 size_t newpdes;
5599 newpdes = nkptp[PTP_LEVELS - 1] - old; 5599 newpdes = nkptp[PTP_LEVELS - 1] - old;
5600 if (cpm != kpm) { 5600 if (cpm != kpm) {
5601 memcpy(&kpm->pm_pdir[PDIR_SLOT_KERN + old], 5601 memcpy(&kpm->pm_pdir[PDIR_SLOT_KERN + old],
5602 &cpm->pm_pdir[PDIR_SLOT_KERN + old], 5602 &cpm->pm_pdir[PDIR_SLOT_KERN + old],
5603 newpdes * sizeof(pd_entry_t)); 5603 newpdes * sizeof(pd_entry_t));
5604 } 5604 }
5605 5605
5606 mutex_enter(&pmaps_lock); 5606 mutex_enter(&pmaps_lock);
5607 LIST_FOREACH(pm, &pmaps, pm_list) { 5607 LIST_FOREACH(pm, &pmaps, pm_list) {
5608 if (__predict_false(pm->pm_enter != NULL)) { 5608 if (__predict_false(pm->pm_enter != NULL)) {
5609 /* 5609 /*
5610 * Not a native pmap, the kernel is not mapped, 5610 * Not a native pmap, the kernel is not mapped,
5611 * so nothing to synchronize. 5611 * so nothing to synchronize.
5612 */ 5612 */
5613 continue; 5613 continue;
5614 } 5614 }
5615 memcpy(&pm->pm_pdir[PDIR_SLOT_KERN + old], 5615 memcpy(&pm->pm_pdir[PDIR_SLOT_KERN + old],
5616 &kpm->pm_pdir[PDIR_SLOT_KERN + old], 5616 &kpm->pm_pdir[PDIR_SLOT_KERN + old],
5617 newpdes * sizeof(pd_entry_t)); 5617 newpdes * sizeof(pd_entry_t));
5618 } 5618 }
5619 mutex_exit(&pmaps_lock); 5619 mutex_exit(&pmaps_lock);
5620#endif 5620#endif
5621 invalidate = true; 5621 invalidate = true;
5622 } 5622 }
5623 pmap_maxkvaddr = maxkvaddr; 5623 pmap_maxkvaddr = maxkvaddr;
5624 mutex_exit(&kpm->pm_lock); 5624 mutex_exit(&kpm->pm_lock);
5625 splx(s); 5625 splx(s);
5626 5626
5627 if (invalidate && pmap_initialized) { 5627 if (invalidate && pmap_initialized) {
5628 /* Invalidate the pmap cache. */ 5628 /* Invalidate the pmap cache. */
5629 pool_cache_invalidate(&pmap_cache); 5629 pool_cache_invalidate(&pmap_cache);
5630 } 5630 }
5631 5631
5632 return maxkvaddr; 5632 return maxkvaddr;
5633} 5633}
5634 5634
5635#ifdef DEBUG 5635#ifdef DEBUG
5636void pmap_dump(struct pmap *, vaddr_t, vaddr_t); 5636void pmap_dump(struct pmap *, vaddr_t, vaddr_t);
5637 5637
5638/* 5638/*
5639 * pmap_dump: dump all the mappings from a pmap 5639 * pmap_dump: dump all the mappings from a pmap
5640 * 5640 *
5641 * => caller should not be holding any pmap locks 5641 * => caller should not be holding any pmap locks
5642 */ 5642 */
5643void 5643void
5644pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) 5644pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
5645{ 5645{
5646 pt_entry_t *ptes, *pte; 5646 pt_entry_t *ptes, *pte;
5647 pd_entry_t * const *pdes; 5647 pd_entry_t * const *pdes;
5648 struct pmap *pmap2; 5648 struct pmap *pmap2;
5649 vaddr_t blkendva; 5649 vaddr_t blkendva;
5650 int lvl; 5650 int lvl;
5651 5651
5652 /* 5652 /*
5653 * if end is out of range truncate. 5653 * if end is out of range truncate.
5654 * if (end == start) update to max. 5654 * if (end == start) update to max.
5655 */ 5655 */
5656 5656
5657 if (eva > VM_MAXUSER_ADDRESS || eva <= sva) 5657 if (eva > VM_MAXUSER_ADDRESS || eva <= sva)
5658 eva = VM_MAXUSER_ADDRESS; 5658 eva = VM_MAXUSER_ADDRESS;
5659 5659
5660 mutex_enter(&pmap->pm_lock); 5660 mutex_enter(&pmap->pm_lock);
5661 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); 5661 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
5662 5662
5663 /* 5663 /*
5664 * dumping a range of pages: we dump in PTP sized blocks (4MB) 5664 * dumping a range of pages: we dump in PTP sized blocks (4MB)
5665 */ 5665 */
5666 5666
5667 for (/* null */ ; sva < eva ; sva = blkendva) { 5667 for (/* null */ ; sva < eva ; sva = blkendva) {
5668 5668
5669 /* determine range of block */ 5669 /* determine range of block */
5670 blkendva = x86_round_pdr(sva+1); 5670 blkendva = x86_round_pdr(sva+1);
5671 if (blkendva > eva) 5671 if (blkendva > eva)
5672 blkendva = eva; 5672 blkendva = eva;
5673 5673
5674 /* valid block? */ 5674 /* valid block? */
5675 if (!pmap_pdes_valid(sva, pdes, NULL, &lvl)) 5675 if (!pmap_pdes_valid(sva, pdes, NULL, &lvl))
5676 continue; 5676 continue;
5677 KASSERT(lvl == 1); 5677 KASSERT(lvl == 1);
5678 5678
5679 pte = &ptes[pl1_i(sva)]; 5679 pte = &ptes[pl1_i(sva)];
5680 for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) { 5680 for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) {
5681 if (!pmap_valid_entry(*pte)) 5681 if (!pmap_valid_entry(*pte))
5682 continue; 5682 continue;
5683 printf("va %#" PRIxVADDR " -> pa %#" PRIxPADDR 5683 printf("va %#" PRIxVADDR " -> pa %#" PRIxPADDR
5684 " (pte=%#" PRIxPADDR ")\n", 5684 " (pte=%#" PRIxPADDR ")\n",
5685 sva, (paddr_t)pmap_pte2pa(*pte), (paddr_t)*pte); 5685 sva, (paddr_t)pmap_pte2pa(*pte), (paddr_t)*pte);
5686 } 5686 }
5687 } 5687 }
5688 pmap_unmap_ptes(pmap, pmap2); 5688 pmap_unmap_ptes(pmap, pmap2);
5689 mutex_exit(&pmap->pm_lock); 5689 mutex_exit(&pmap->pm_lock);
5690} 5690}
5691#endif 5691#endif
5692 5692
5693/* 5693/*
5694 * pmap_update: process deferred invalidations and frees. 5694 * pmap_update: process deferred invalidations and frees.
5695 */ 5695 */
5696void 5696void
5697pmap_update(struct pmap *pmap) 5697pmap_update(struct pmap *pmap)
5698{ 5698{
5699 struct pmap_page *pp; 5699 struct pmap_page *pp;
5700 struct vm_page *ptp; 5700 struct vm_page *ptp;
5701 5701
5702 /* 5702 /*
5703 * Initiate any pending TLB shootdowns. Wait for them to 5703 * Initiate any pending TLB shootdowns. Wait for them to
5704 * complete before returning control to the caller. 5704 * complete before returning control to the caller.
5705 */ 5705 */
5706 kpreempt_disable(); 5706 kpreempt_disable();
5707 pmap_tlb_shootnow(); 5707 pmap_tlb_shootnow();
5708 kpreempt_enable(); 5708 kpreempt_enable();
5709 5709
5710 /* 5710 /*
5711 * Now that shootdowns are complete, process deferred frees. This 5711 * Now that shootdowns are complete, process deferred frees. This
5712 * is an unlocked check, but is safe as we're only interested in 5712 * is an unlocked check, but is safe as we're only interested in
5713 * work done in this LWP - we won't get a false negative. 5713 * work done in this LWP - we won't get a false negative.
5714 */ 5714 */
5715 if (atomic_load_relaxed(&pmap->pm_gc_ptp.lh_first) == NULL) { 5715 if (atomic_load_relaxed(&pmap->pm_gc_ptp.lh_first) == NULL) {
5716 return; 5716 return;
5717 } 5717 }
5718 5718
5719 mutex_enter(&pmap->pm_lock); 5719 mutex_enter(&pmap->pm_lock);
5720 while ((ptp = LIST_FIRST(&pmap->pm_gc_ptp)) != NULL) { 5720 while ((ptp = LIST_FIRST(&pmap->pm_gc_ptp)) != NULL) {
5721 KASSERT(ptp->wire_count == 0); 5721 KASSERT(ptp->wire_count == 0);
5722 KASSERT(ptp->uanon == NULL); 5722 KASSERT(ptp->uanon == NULL);
5723 LIST_REMOVE(ptp, mdpage.mp_pp.pp_link); 5723 LIST_REMOVE(ptp, mdpage.mp_pp.pp_link);
5724 pp = VM_PAGE_TO_PP(ptp); 5724 pp = VM_PAGE_TO_PP(ptp);
5725 LIST_INIT(&pp->pp_pvlist); 5725 LIST_INIT(&pp->pp_pvlist);
5726 pp->pp_attrs = 0; 5726 pp->pp_attrs = 0;
5727 pp->pp_pte.pte_ptp = NULL; 5727 pp->pp_pte.pte_ptp = NULL;
5728 pp->pp_pte.pte_va = 0; 5728 pp->pp_pte.pte_va = 0;
5729 PMAP_CHECK_PP(VM_PAGE_TO_PP(ptp)); 5729 PMAP_CHECK_PP(VM_PAGE_TO_PP(ptp));
5730 5730
5731 /* 5731 /*
5732 * XXX Hack to avoid extra locking, and lock 5732 * XXX Hack to avoid extra locking, and lock
5733 * assertions in uvm_pagefree(). Despite uobject 5733 * assertions in uvm_pagefree(). Despite uobject
5734 * being set, this isn't a managed page. 5734 * being set, this isn't a managed page.
5735 */ 5735 */
5736 PMAP_DUMMY_LOCK(pmap); 5736 PMAP_DUMMY_LOCK(pmap);
5737 uvm_pagerealloc(ptp, NULL, 0); 5737 uvm_pagerealloc(ptp, NULL, 0);
5738 PMAP_DUMMY_UNLOCK(pmap); 5738 PMAP_DUMMY_UNLOCK(pmap);
5739 uvm_pagefree(ptp); 5739 uvm_pagefree(ptp);
5740 } 5740 }
5741 mutex_exit(&pmap->pm_lock); 5741 mutex_exit(&pmap->pm_lock);
5742} 5742}
5743 5743
5744#if PTP_LEVELS > 4 5744#if PTP_LEVELS > 4
5745#error "Unsupported number of page table mappings" 5745#error "Unsupported number of page table mappings"
5746#endif 5746#endif
5747 5747
5748paddr_t 5748paddr_t
5749pmap_init_tmp_pgtbl(paddr_t pg) 5749pmap_init_tmp_pgtbl(paddr_t pg)
5750{ 5750{
5751 static bool maps_loaded; 5751 static bool maps_loaded;
5752 static const paddr_t x86_tmp_pml_paddr[] = { 5752 static const paddr_t x86_tmp_pml_paddr[] = {
5753 4 * PAGE_SIZE, /* L1 */ 5753 4 * PAGE_SIZE, /* L1 */
5754 5 * PAGE_SIZE, /* L2 */ 5754 5 * PAGE_SIZE, /* L2 */
5755 6 * PAGE_SIZE, /* L3 */ 5755 6 * PAGE_SIZE, /* L3 */
5756 7 * PAGE_SIZE /* L4 */ 5756 7 * PAGE_SIZE /* L4 */
5757 }; 5757 };
5758 static vaddr_t x86_tmp_pml_vaddr[] = { 0, 0, 0, 0 }; 5758 static vaddr_t x86_tmp_pml_vaddr[] = { 0, 0, 0, 0 };
5759 5759
5760 pd_entry_t *tmp_pml, *kernel_pml; 5760 pd_entry_t *tmp_pml, *kernel_pml;
5761 5761
5762 int level; 5762 int level;
5763 5763
5764 if (!maps_loaded) { 5764 if (!maps_loaded) {
5765 for (level = 0; level < PTP_LEVELS; ++level) { 5765 for (level = 0; level < PTP_LEVELS; ++level) {
5766 x86_tmp_pml_vaddr[level] = 5766 x86_tmp_pml_vaddr[level] =
5767 uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 5767 uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
5768 UVM_KMF_VAONLY); 5768 UVM_KMF_VAONLY);
5769 5769
5770 if (x86_tmp_pml_vaddr[level] == 0) 5770 if (x86_tmp_pml_vaddr[level] == 0)
5771 panic("mapping of real mode PML failed\n"); 5771 panic("mapping of real mode PML failed\n");
5772 pmap_kenter_pa(x86_tmp_pml_vaddr[level], 5772 pmap_kenter_pa(x86_tmp_pml_vaddr[level],
5773 x86_tmp_pml_paddr[level], 5773 x86_tmp_pml_paddr[level],
5774 VM_PROT_READ | VM_PROT_WRITE, 0); 5774 VM_PROT_READ | VM_PROT_WRITE, 0);
5775 } 5775 }
5776 pmap_update(pmap_kernel()); 5776 pmap_update(pmap_kernel());
5777 maps_loaded = true; 5777 maps_loaded = true;
5778 } 5778 }
5779 5779
5780 /* Zero levels 1-3 */ 5780 /* Zero levels 1-3 */
5781 for (level = 0; level < PTP_LEVELS - 1; ++level) { 5781 for (level = 0; level < PTP_LEVELS - 1; ++level) {
5782 tmp_pml = (void *)x86_tmp_pml_vaddr[level]; 5782 tmp_pml = (void *)x86_tmp_pml_vaddr[level];
5783 memset(tmp_pml, 0, PAGE_SIZE); 5783 memset(tmp_pml, 0, PAGE_SIZE);
5784 } 5784 }
5785 5785
5786 /* Copy PML4 */ 5786 /* Copy PML4 */
5787 kernel_pml = pmap_kernel()->pm_pdir; 5787 kernel_pml = pmap_kernel()->pm_pdir;
5788 tmp_pml = (void *)x86_tmp_pml_vaddr[PTP_LEVELS - 1]; 5788 tmp_pml = (void *)x86_tmp_pml_vaddr[PTP_LEVELS - 1];
5789 memcpy(tmp_pml, kernel_pml, PAGE_SIZE); 5789 memcpy(tmp_pml, kernel_pml, PAGE_SIZE);
5790 5790
5791#ifdef PAE 5791#ifdef PAE
5792 /* 5792 /*
5793 * Use the last 4 entries of the L2 page as L3 PD entries. These 5793 * Use the last 4 entries of the L2 page as L3 PD entries. These
5794 * last entries are unlikely to be used for temporary mappings. 5794 * last entries are unlikely to be used for temporary mappings.
5795 * 508: maps 0->1GB (userland) 5795 * 508: maps 0->1GB (userland)
5796 * 509: unused 5796 * 509: unused
5797 * 510: unused 5797 * 510: unused
5798 * 511: maps 3->4GB (kernel) 5798 * 511: maps 3->4GB (kernel)
5799 */ 5799 */
5800 tmp_pml[508] = x86_tmp_pml_paddr[PTP_LEVELS - 1] | PTE_P; 5800 tmp_pml[508] = x86_tmp_pml_paddr[PTP_LEVELS - 1] | PTE_P;
5801 tmp_pml[509] = 0; 5801 tmp_pml[509] = 0;
5802 tmp_pml[510] = 0; 5802 tmp_pml[510] = 0;
5803 tmp_pml[511] = pmap_pdirpa(pmap_kernel(), PDIR_SLOT_KERN) | PTE_P; 5803 tmp_pml[511] = pmap_pdirpa(pmap_kernel(), PDIR_SLOT_KERN) | PTE_P;
5804#endif 5804#endif
5805 5805
5806 for (level = PTP_LEVELS - 1; level > 0; --level) { 5806 for (level = PTP_LEVELS - 1; level > 0; --level) {
5807 tmp_pml = (void *)x86_tmp_pml_vaddr[level]; 5807 tmp_pml = (void *)x86_tmp_pml_vaddr[level];
5808 5808
5809 tmp_pml[pl_i(pg, level + 1)] = 5809 tmp_pml[pl_i(pg, level + 1)] =
5810 (x86_tmp_pml_paddr[level - 1] & PTE_FRAME) | PTE_W | PTE_P; 5810 (x86_tmp_pml_paddr[level - 1] & PTE_FRAME) | PTE_W | PTE_P;
5811 } 5811 }
5812 5812
5813 tmp_pml = (void *)x86_tmp_pml_vaddr[0]; 5813 tmp_pml = (void *)x86_tmp_pml_vaddr[0];
5814 tmp_pml[pl_i(pg, 1)] = (pg & PTE_FRAME) | PTE_W | PTE_P; 5814 tmp_pml[pl_i(pg, 1)] = (pg & PTE_FRAME) | PTE_W | PTE_P;
5815 5815
5816#ifdef PAE 5816#ifdef PAE
5817 /* Return the PA of the L3 page (entry 508 of the L2 page) */ 5817 /* Return the PA of the L3 page (entry 508 of the L2 page) */
5818 return x86_tmp_pml_paddr[PTP_LEVELS - 1] + 508 * sizeof(pd_entry_t); 5818 return x86_tmp_pml_paddr[PTP_LEVELS - 1] + 508 * sizeof(pd_entry_t);
5819#endif 5819#endif
5820 5820
5821 return x86_tmp_pml_paddr[PTP_LEVELS - 1]; 5821 return x86_tmp_pml_paddr[PTP_LEVELS - 1];
5822} 5822}
5823 5823
5824u_int 5824u_int
5825x86_mmap_flags(paddr_t mdpgno) 5825x86_mmap_flags(paddr_t mdpgno)
5826{ 5826{
5827 u_int nflag = (mdpgno >> X86_MMAP_FLAG_SHIFT) & X86_MMAP_FLAG_MASK; 5827 u_int nflag = (mdpgno >> X86_MMAP_FLAG_SHIFT) & X86_MMAP_FLAG_MASK;
5828 u_int pflag = 0; 5828 u_int pflag = 0;
5829 5829
5830 if (nflag & X86_MMAP_FLAG_PREFETCH) 5830 if (nflag & X86_MMAP_FLAG_PREFETCH)
5831 pflag |= PMAP_WRITE_COMBINE; 5831 pflag |= PMAP_WRITE_COMBINE;
5832 5832
5833 return pflag; 5833 return pflag;
5834} 5834}
5835 5835
5836#if defined(__HAVE_DIRECT_MAP) && defined(__x86_64__) && !defined(XENPV) 5836#if defined(__HAVE_DIRECT_MAP) && defined(__x86_64__) && !defined(XENPV)
5837 5837
5838/* 5838/*
5839 * ----------------------------------------------------------------------------- 5839 * -----------------------------------------------------------------------------
5840 * ***************************************************************************** 5840 * *****************************************************************************
5841 * ***************************************************************************** 5841 * *****************************************************************************
5842 * ***************************************************************************** 5842 * *****************************************************************************
5843 * ***************************************************************************** 5843 * *****************************************************************************
5844 * **************** HERE BEGINS THE EPT CODE, USED BY INTEL-VMX **************** 5844 * **************** HERE BEGINS THE EPT CODE, USED BY INTEL-VMX ****************
5845 * ***************************************************************************** 5845 * *****************************************************************************
5846 * ***************************************************************************** 5846 * *****************************************************************************
5847 * ***************************************************************************** 5847 * *****************************************************************************
5848 * ***************************************************************************** 5848 * *****************************************************************************
5849 * ----------------------------------------------------------------------------- 5849 * -----------------------------------------------------------------------------
5850 * 5850 *
5851 * These functions are invoked as callbacks from the code above. Contrary to 5851 * These functions are invoked as callbacks from the code above. Contrary to
5852 * native, EPT does not have a recursive slot; therefore, it is not possible 5852 * native, EPT does not have a recursive slot; therefore, it is not possible
5853 * to call pmap_map_ptes(). Instead, we use the direct map and walk down the 5853 * to call pmap_map_ptes(). Instead, we use the direct map and walk down the
5854 * tree manually. 5854 * tree manually.
5855 * 5855 *
5856 * Apart from that, the logic is mostly the same as native. Once a pmap has 5856 * Apart from that, the logic is mostly the same as native. Once a pmap has
5857 * been created, NVMM calls pmap_ept_transform() to make it an EPT pmap. 5857 * been created, NVMM calls pmap_ept_transform() to make it an EPT pmap.
5858 * After that we're good, and the callbacks will handle the translations 5858 * After that we're good, and the callbacks will handle the translations
5859 * for us. 5859 * for us.
5860 * 5860 *
5861 * ----------------------------------------------------------------------------- 5861 * -----------------------------------------------------------------------------
5862 */ 5862 */
5863 5863
5864/* Hardware bits. */ 5864/* Hardware bits. */
5865#define EPT_R __BIT(0) /* read */ 5865#define EPT_R __BIT(0) /* read */
5866#define EPT_W __BIT(1) /* write */ 5866#define EPT_W __BIT(1) /* write */
5867#define EPT_X __BIT(2) /* execute */ 5867#define EPT_X __BIT(2) /* execute */
5868#define EPT_T __BITS(5,3) /* type */ 5868#define EPT_T __BITS(5,3) /* type */
5869#define TYPE_UC 0 5869#define TYPE_UC 0
5870#define TYPE_WC 1 5870#define TYPE_WC 1
5871#define TYPE_WT 4 5871#define TYPE_WT 4
5872#define TYPE_WP 5 5872#define TYPE_WP 5
5873#define TYPE_WB 6 5873#define TYPE_WB 6
5874#define EPT_NOPAT __BIT(6) 5874#define EPT_NOPAT __BIT(6)
5875#define EPT_L __BIT(7) /* large */ 5875#define EPT_L __BIT(7) /* large */
5876#define EPT_A __BIT(8) /* accessed */ 5876#define EPT_A __BIT(8) /* accessed */
5877#define EPT_D __BIT(9) /* dirty */ 5877#define EPT_D __BIT(9) /* dirty */
5878/* Software bits. */ 5878/* Software bits. */
5879#define EPT_PVLIST __BIT(60) 5879#define EPT_PVLIST __BIT(60)
5880#define EPT_WIRED __BIT(61) 5880#define EPT_WIRED __BIT(61)
5881 5881
5882#define pmap_ept_valid_entry(pte) (pte & EPT_R) 5882#define pmap_ept_valid_entry(pte) (pte & EPT_R)
5883 5883
5884bool pmap_ept_has_ad __read_mostly; 5884bool pmap_ept_has_ad __read_mostly;
5885 5885
5886static inline void 5886static inline void
5887pmap_ept_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte) 5887pmap_ept_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte)
5888{ 5888{
5889 int resid_diff = ((npte & EPT_R) ? 1 : 0) - ((opte & EPT_R) ? 1 : 0); 5889 int resid_diff = ((npte & EPT_R) ? 1 : 0) - ((opte & EPT_R) ? 1 : 0);
5890 int wired_diff = ((npte & EPT_WIRED) ? 1 : 0) - ((opte & EPT_WIRED) ? 1 : 0); 5890 int wired_diff = ((npte & EPT_WIRED) ? 1 : 0) - ((opte & EPT_WIRED) ? 1 : 0);
5891 5891
5892 KASSERT((npte & (EPT_R | EPT_WIRED)) != EPT_WIRED); 5892 KASSERT((npte & (EPT_R | EPT_WIRED)) != EPT_WIRED);
5893 KASSERT((opte & (EPT_R | EPT_WIRED)) != EPT_WIRED); 5893 KASSERT((opte & (EPT_R | EPT_WIRED)) != EPT_WIRED);
5894 5894
5895 pmap_stats_update(pmap, resid_diff, wired_diff); 5895 pmap_stats_update(pmap, resid_diff, wired_diff);
5896} 5896}
5897 5897
5898static pt_entry_t 5898static pt_entry_t
5899pmap_ept_type(u_int flags) 5899pmap_ept_type(u_int flags)
5900{ 5900{
5901 u_int cacheflags = (flags & PMAP_CACHE_MASK); 5901 u_int cacheflags = (flags & PMAP_CACHE_MASK);
5902 pt_entry_t ret; 5902 pt_entry_t ret;
5903 5903
5904 switch (cacheflags) { 5904 switch (cacheflags) {
5905 case PMAP_NOCACHE: 5905 case PMAP_NOCACHE:
5906 case PMAP_NOCACHE_OVR: 5906 case PMAP_NOCACHE_OVR:
5907 ret = __SHIFTIN(TYPE_UC, EPT_T); 5907 ret = __SHIFTIN(TYPE_UC, EPT_T);
5908 break; 5908 break;
5909 case PMAP_WRITE_COMBINE: 5909 case PMAP_WRITE_COMBINE:
5910 ret = __SHIFTIN(TYPE_WC, EPT_T); 5910 ret = __SHIFTIN(TYPE_WC, EPT_T);
5911 break; 5911 break;
5912 case PMAP_WRITE_BACK: 5912 case PMAP_WRITE_BACK:
5913 default: 5913 default:
5914 ret = __SHIFTIN(TYPE_WB, EPT_T); 5914 ret = __SHIFTIN(TYPE_WB, EPT_T);
5915 break; 5915 break;
5916 } 5916 }
5917 5917
5918 ret |= EPT_NOPAT; 5918 ret |= EPT_NOPAT;
5919 return ret; 5919 return ret;
5920} 5920}
5921 5921
5922static inline pt_entry_t 5922static inline pt_entry_t
5923pmap_ept_prot(vm_prot_t prot) 5923pmap_ept_prot(vm_prot_t prot)
5924{ 5924{
5925 pt_entry_t res = 0; 5925 pt_entry_t res = 0;
5926 5926
5927 if (prot & VM_PROT_READ) 5927 if (prot & VM_PROT_READ)
5928 res |= EPT_R; 5928 res |= EPT_R;
5929 if (prot & VM_PROT_WRITE) 5929 if (prot & VM_PROT_WRITE)
5930 res |= EPT_W; 5930 res |= EPT_W;
5931 if (prot & VM_PROT_EXECUTE) 5931 if (prot & VM_PROT_EXECUTE)
5932 res |= EPT_X; 5932 res |= EPT_X;
5933 5933
5934 return res; 5934 return res;
5935} 5935}
5936 5936
5937static inline uint8_t 5937static inline uint8_t
5938pmap_ept_to_pp_attrs(pt_entry_t ept) 5938pmap_ept_to_pp_attrs(pt_entry_t ept)
5939{ 5939{
5940 uint8_t ret = 0; 5940 uint8_t ret = 0;
5941 if (pmap_ept_has_ad) { 5941 if (pmap_ept_has_ad) {
5942 if (ept & EPT_D) 5942 if (ept & EPT_D)
5943 ret |= PP_ATTRS_D; 5943 ret |= PP_ATTRS_D;
5944 if (ept & EPT_A) 5944 if (ept & EPT_A)
5945 ret |= PP_ATTRS_A; 5945 ret |= PP_ATTRS_A;
5946 } else { 5946 } else {
5947 ret |= (PP_ATTRS_D|PP_ATTRS_A); 5947 ret |= (PP_ATTRS_D|PP_ATTRS_A);
5948 } 5948 }
5949 if (ept & EPT_W) 5949 if (ept & EPT_W)
5950 ret |= PP_ATTRS_W; 5950 ret |= PP_ATTRS_W;
5951 return ret; 5951 return ret;
5952} 5952}
5953 5953
5954static inline pt_entry_t 5954static inline pt_entry_t
5955pmap_pp_attrs_to_ept(uint8_t attrs) 5955pmap_pp_attrs_to_ept(uint8_t attrs)
5956{ 5956{
5957 pt_entry_t ept = 0; 5957 pt_entry_t ept = 0;
5958 if (attrs & PP_ATTRS_D) 5958 if (attrs & PP_ATTRS_D)
5959 ept |= EPT_D; 5959 ept |= EPT_D;
5960 if (attrs & PP_ATTRS_A) 5960 if (attrs & PP_ATTRS_A)
5961 ept |= EPT_A; 5961 ept |= EPT_A;
5962 if (attrs & PP_ATTRS_W) 5962 if (attrs & PP_ATTRS_W)
5963 ept |= EPT_W; 5963 ept |= EPT_W;
5964 return ept; 5964 return ept;
5965} 5965}
5966 5966
5967/* 5967/*
5968 * Helper for pmap_ept_free_ptp. 5968 * Helper for pmap_ept_free_ptp.
5969 * tree[0] = &L2[L2idx] 5969 * tree[0] = &L2[L2idx]
5970 * tree[1] = &L3[L3idx] 5970 * tree[1] = &L3[L3idx]
5971 * tree[2] = &L4[L4idx] 5971 * tree[2] = &L4[L4idx]
5972 */ 5972 */
5973static void 5973static void
5974pmap_ept_get_tree(struct pmap *pmap, vaddr_t va, pd_entry_t **tree) 5974pmap_ept_get_tree(struct pmap *pmap, vaddr_t va, pd_entry_t **tree)
5975{ 5975{
5976 pt_entry_t *pteva; 5976 pt_entry_t *pteva;
5977 paddr_t ptepa; 5977 paddr_t ptepa;
5978 int i, index; 5978 int i, index;
5979 5979
5980 ptepa = pmap->pm_pdirpa[0]; 5980 ptepa = pmap->pm_pdirpa[0];
5981 for (i = PTP_LEVELS; i > 1; i--) { 5981 for (i = PTP_LEVELS; i > 1; i--) {
5982 index = pl_pi(va, i); 5982 index = pl_pi(va, i);
5983 pteva = (pt_entry_t *)PMAP_DIRECT_MAP(ptepa); 5983 pteva = (pt_entry_t *)PMAP_DIRECT_MAP(ptepa);
5984 KASSERT(pmap_ept_valid_entry(pteva[index])); 5984 KASSERT(pmap_ept_valid_entry(pteva[index]));
5985 tree[i - 2] = &pteva[index]; 5985 tree[i - 2] = &pteva[index];
5986 ptepa = pmap_pte2pa(pteva[index]); 5986 ptepa = pmap_pte2pa(pteva[index]);
5987 } 5987 }
5988} 5988}
5989 5989
5990static void 5990static void
5991pmap_ept_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va) 5991pmap_ept_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va)
5992{ 5992{
5993 pd_entry_t *tree[3]; 5993 pd_entry_t *tree[3];
5994 int level; 5994 int level;
5995 5995
5996 KASSERT(pmap != pmap_kernel()); 5996 KASSERT(pmap != pmap_kernel());
5997 KASSERT(mutex_owned(&pmap->pm_lock)); 5997 KASSERT(mutex_owned(&pmap->pm_lock));
5998 KASSERT(kpreempt_disabled()); 5998 KASSERT(kpreempt_disabled());
5999 5999
6000 pmap_ept_get_tree(pmap, va, tree); 6000 pmap_ept_get_tree(pmap, va, tree);
6001 6001
6002 level = 1; 6002 level = 1;
6003 do { 6003 do {
6004 (void)pmap_pte_testset(tree[level - 1], 0); 6004 (void)pmap_pte_testset(tree[level - 1], 0);
6005 6005
6006 pmap_freepage(pmap, ptp, level); 6006 pmap_freepage(pmap, ptp, level);
6007 if (level < PTP_LEVELS - 1) { 6007 if (level < PTP_LEVELS - 1) {
6008 ptp = pmap_find_ptp(pmap, va, level + 1); 6008 ptp = pmap_find_ptp(pmap, va, level + 1);
6009 ptp->wire_count--; 6009 ptp->wire_count--;
6010 if (ptp->wire_count > 1) 6010 if (ptp->wire_count > 1)
6011 break; 6011 break;
6012 } 6012 }
6013 } while (++level < PTP_LEVELS); 6013 } while (++level < PTP_LEVELS);
6014 pmap_pte_flush(); 6014 pmap_pte_flush();
6015} 6015}
6016 6016
6017/* Allocate L4->L3->L2. Return L2. */ 6017/* Allocate L4->L3->L2. Return L2. */
6018static void 6018static void
6019pmap_ept_install_ptp(struct pmap *pmap, struct pmap_ptparray *pt, vaddr_t va) 6019pmap_ept_install_ptp(struct pmap *pmap, struct pmap_ptparray *pt, vaddr_t va)
6020{ 6020{
6021 struct vm_page *ptp; 6021 struct vm_page *ptp;
6022 unsigned long index; 6022 unsigned long index;
6023 pd_entry_t *pteva; 6023 pd_entry_t *pteva;
6024 paddr_t ptepa; 6024 paddr_t ptepa;
6025 int i; 6025 int i;
6026 6026
6027 KASSERT(pmap != pmap_kernel()); 6027 KASSERT(pmap != pmap_kernel());
6028 KASSERT(mutex_owned(&pmap->pm_lock)); 6028 KASSERT(mutex_owned(&pmap->pm_lock));
6029 KASSERT(kpreempt_disabled()); 6029 KASSERT(kpreempt_disabled());
6030 6030
6031 /* 6031 /*
6032 * Now that we have all the pages looked up or allocated, 6032 * Now that we have all the pages looked up or allocated,
6033 * loop through again installing any new ones into the tree. 6033 * loop through again installing any new ones into the tree.
6034 */ 6034 */
6035 ptepa = pmap->pm_pdirpa[0]; 6035 ptepa = pmap->pm_pdirpa[0];
6036 for (i = PTP_LEVELS; i > 1; i--) { 6036 for (i = PTP_LEVELS; i > 1; i--) {
6037 index = pl_pi(va, i); 6037 index = pl_pi(va, i);
6038 pteva = (pt_entry_t *)PMAP_DIRECT_MAP(ptepa); 6038 pteva = (pt_entry_t *)PMAP_DIRECT_MAP(ptepa);
6039 6039
6040 if (pmap_ept_valid_entry(pteva[index])) { 6040 if (pmap_ept_valid_entry(pteva[index])) {
6041 KASSERT(!pt->alloced[i]); 6041 KASSERT(!pt->alloced[i]);
6042 ptepa = pmap_pte2pa(pteva[index]); 6042 ptepa = pmap_pte2pa(pteva[index]);
6043 continue; 6043 continue;
6044 } 6044 }
6045 6045
6046 ptp = pt->pg[i]; 6046 ptp = pt->pg[i];
6047 ptp->flags &= ~PG_BUSY; /* never busy */ 6047 ptp->flags &= ~PG_BUSY; /* never busy */
6048 ptp->wire_count = 1; 6048 ptp->wire_count = 1;
6049 pmap->pm_ptphint[i - 2] = ptp; 6049 pmap->pm_ptphint[i - 2] = ptp;
6050 ptepa = VM_PAGE_TO_PHYS(ptp); 6050 ptepa = VM_PAGE_TO_PHYS(ptp);
6051 pmap_pte_set(&pteva[index], ptepa | EPT_R | EPT_W | EPT_X); 6051 pmap_pte_set(&pteva[index], ptepa | EPT_R | EPT_W | EPT_X);
6052 6052
6053 pmap_pte_flush(); 6053 pmap_pte_flush();
6054 pmap_stats_update(pmap, 1, 0); 6054 pmap_stats_update(pmap, 1, 0);
6055 6055
6056 /* 6056 /*
6057 * If we're not in the top level, increase the 6057 * If we're not in the top level, increase the
6058 * wire count of the parent page. 6058 * wire count of the parent page.
6059 */ 6059 */
6060 if (i < PTP_LEVELS) { 6060 if (i < PTP_LEVELS) {
6061 pt->pg[i + 1]->wire_count++; 6061 pt->pg[i + 1]->wire_count++;
6062 } 6062 }
6063 } 6063 }
6064} 6064}
6065 6065
6066static int 6066static int
6067pmap_ept_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, 6067pmap_ept_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot,
6068 u_int flags) 6068 u_int flags)
6069{ 6069{
6070 pt_entry_t *ptes, opte, npte; 6070 pt_entry_t *ptes, opte, npte;
6071 pt_entry_t *ptep; 6071 pt_entry_t *ptep;
6072 struct vm_page *ptp; 6072 struct vm_page *ptp;
6073 struct vm_page *new_pg, *old_pg; 6073 struct vm_page *new_pg, *old_pg;
6074 struct pmap_page *new_pp, *old_pp; 6074 struct pmap_page *new_pp, *old_pp;
6075 struct pv_entry *old_pve, *new_pve; 6075 struct pv_entry *old_pve, *new_pve;
6076 bool wired = (flags & PMAP_WIRED) != 0; 6076 bool wired = (flags & PMAP_WIRED) != 0;
6077 bool accessed; 6077 bool accessed;
6078 struct pmap_ptparray pt; 6078 struct pmap_ptparray pt;
6079 int error; 6079 int error;
6080 bool getptp, samepage, new_embedded; 6080 bool getptp, samepage, new_embedded;
6081 rb_tree_t *tree; 6081 rb_tree_t *tree;
6082 6082
6083 KASSERT(pmap_initialized); 6083 KASSERT(pmap_initialized);
6084 KASSERT(va < VM_MAXUSER_ADDRESS); 6084 KASSERT(va < VM_MAXUSER_ADDRESS);