Sun Aug 28 00:51:22 2011 UTC ()
Use __strict_weak_alias().


(dyoung)
diff -r1.128 -r1.129 src/sys/arch/x86/x86/pmap.c

cvs diff -r1.128 -r1.129 src/sys/arch/x86/x86/pmap.c (switch to unified diff)

--- src/sys/arch/x86/x86/pmap.c 2011/08/14 02:31:08 1.128
+++ src/sys/arch/x86/x86/pmap.c 2011/08/28 00:51:21 1.129
@@ -1,4389 +1,4389 @@ @@ -1,4389 +1,4389 @@
1/* $NetBSD: pmap.c,v 1.128 2011/08/14 02:31:08 rmind Exp $ */ 1/* $NetBSD: pmap.c,v 1.129 2011/08/28 00:51:21 dyoung Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc. 4 * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran. 8 * by Andrew Doran.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * Copyright (c) 2007 Manuel Bouyer. 33 * Copyright (c) 2007 Manuel Bouyer.
34 * 34 *
35 * Redistribution and use in source and binary forms, with or without 35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions 36 * modification, are permitted provided that the following conditions
37 * are met: 37 * are met:
38 * 1. Redistributions of source code must retain the above copyright 38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer. 39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright 40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the 41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution. 42 * documentation and/or other materials provided with the distribution.
43 * 43 *
44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 * 54 *
55 */ 55 */
56 56
57/* 57/*
58 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> 58 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
59 * 59 *
60 * Permission to use, copy, modify, and distribute this software for any 60 * Permission to use, copy, modify, and distribute this software for any
61 * purpose with or without fee is hereby granted, provided that the above 61 * purpose with or without fee is hereby granted, provided that the above
62 * copyright notice and this permission notice appear in all copies. 62 * copyright notice and this permission notice appear in all copies.
63 * 63 *
64 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 64 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
65 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 65 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
66 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 66 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
67 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 67 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
68 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 68 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
69 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 69 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
70 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 70 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
71 */ 71 */
72 72
73/* 73/*
74 * Copyright (c) 1997 Charles D. Cranor and Washington University. 74 * Copyright (c) 1997 Charles D. Cranor and Washington University.
75 * All rights reserved. 75 * All rights reserved.
76 * 76 *
77 * Redistribution and use in source and binary forms, with or without 77 * Redistribution and use in source and binary forms, with or without
78 * modification, are permitted provided that the following conditions 78 * modification, are permitted provided that the following conditions
79 * are met: 79 * are met:
80 * 1. Redistributions of source code must retain the above copyright 80 * 1. Redistributions of source code must retain the above copyright
81 * notice, this list of conditions and the following disclaimer. 81 * notice, this list of conditions and the following disclaimer.
82 * 2. Redistributions in binary form must reproduce the above copyright 82 * 2. Redistributions in binary form must reproduce the above copyright
83 * notice, this list of conditions and the following disclaimer in the 83 * notice, this list of conditions and the following disclaimer in the
84 * documentation and/or other materials provided with the distribution. 84 * documentation and/or other materials provided with the distribution.
85 * 85 *
86 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 86 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
87 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 87 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
88 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 88 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
89 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 89 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
90 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 90 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
91 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 91 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
92 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 92 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
93 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 93 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
94 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 94 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
95 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 95 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
96 */ 96 */
97 97
98/* 98/*
99 * Copyright 2001 (c) Wasabi Systems, Inc. 99 * Copyright 2001 (c) Wasabi Systems, Inc.
100 * All rights reserved. 100 * All rights reserved.
101 * 101 *
102 * Written by Frank van der Linden for Wasabi Systems, Inc. 102 * Written by Frank van der Linden for Wasabi Systems, Inc.
103 * 103 *
104 * Redistribution and use in source and binary forms, with or without 104 * Redistribution and use in source and binary forms, with or without
105 * modification, are permitted provided that the following conditions 105 * modification, are permitted provided that the following conditions
106 * are met: 106 * are met:
107 * 1. Redistributions of source code must retain the above copyright 107 * 1. Redistributions of source code must retain the above copyright
108 * notice, this list of conditions and the following disclaimer. 108 * notice, this list of conditions and the following disclaimer.
109 * 2. Redistributions in binary form must reproduce the above copyright 109 * 2. Redistributions in binary form must reproduce the above copyright
110 * notice, this list of conditions and the following disclaimer in the 110 * notice, this list of conditions and the following disclaimer in the
111 * documentation and/or other materials provided with the distribution. 111 * documentation and/or other materials provided with the distribution.
112 * 3. All advertising materials mentioning features or use of this software 112 * 3. All advertising materials mentioning features or use of this software
113 * must display the following acknowledgement: 113 * must display the following acknowledgement:
114 * This product includes software developed for the NetBSD Project by 114 * This product includes software developed for the NetBSD Project by
115 * Wasabi Systems, Inc. 115 * Wasabi Systems, Inc.
116 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 116 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
117 * or promote products derived from this software without specific prior 117 * or promote products derived from this software without specific prior
118 * written permission. 118 * written permission.
119 * 119 *
120 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 120 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
121 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 121 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
122 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 122 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
123 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 123 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
124 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 124 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
125 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 125 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
126 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 126 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
127 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 127 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
128 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 128 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
129 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 129 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
130 * POSSIBILITY OF SUCH DAMAGE. 130 * POSSIBILITY OF SUCH DAMAGE.
131 */ 131 */
132 132
133/* 133/*
134 * This is the i386 pmap modified and generalized to support x86-64 134 * This is the i386 pmap modified and generalized to support x86-64
135 * as well. The idea is to hide the upper N levels of the page tables 135 * as well. The idea is to hide the upper N levels of the page tables
136 * inside pmap_get_ptp, pmap_free_ptp and pmap_growkernel. The rest 136 * inside pmap_get_ptp, pmap_free_ptp and pmap_growkernel. The rest
137 * is mostly untouched, except that it uses some more generalized 137 * is mostly untouched, except that it uses some more generalized
138 * macros and interfaces. 138 * macros and interfaces.
139 * 139 *
140 * This pmap has been tested on the i386 as well, and it can be easily 140 * This pmap has been tested on the i386 as well, and it can be easily
141 * adapted to PAE. 141 * adapted to PAE.
142 * 142 *
143 * fvdl@wasabisystems.com 18-Jun-2001 143 * fvdl@wasabisystems.com 18-Jun-2001
144 */ 144 */
145 145
146/* 146/*
147 * pmap.c: i386 pmap module rewrite 147 * pmap.c: i386 pmap module rewrite
148 * Chuck Cranor <chuck@netbsd> 148 * Chuck Cranor <chuck@netbsd>
149 * 11-Aug-97 149 * 11-Aug-97
150 * 150 *
151 * history of this pmap module: in addition to my own input, i used 151 * history of this pmap module: in addition to my own input, i used
152 * the following references for this rewrite of the i386 pmap: 152 * the following references for this rewrite of the i386 pmap:
153 * 153 *
154 * [1] the NetBSD i386 pmap. this pmap appears to be based on the 154 * [1] the NetBSD i386 pmap. this pmap appears to be based on the
155 * BSD hp300 pmap done by Mike Hibler at University of Utah. 155 * BSD hp300 pmap done by Mike Hibler at University of Utah.
156 * it was then ported to the i386 by William Jolitz of UUNET 156 * it was then ported to the i386 by William Jolitz of UUNET
157 * Technologies, Inc. Then Charles M. Hannum of the NetBSD 157 * Technologies, Inc. Then Charles M. Hannum of the NetBSD
158 * project fixed some bugs and provided some speed ups. 158 * project fixed some bugs and provided some speed ups.
159 * 159 *
160 * [2] the FreeBSD i386 pmap. this pmap seems to be the 160 * [2] the FreeBSD i386 pmap. this pmap seems to be the
161 * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson 161 * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson
162 * and David Greenman. 162 * and David Greenman.
163 * 163 *
164 * [3] the Mach pmap. this pmap, from CMU, seems to have migrated 164 * [3] the Mach pmap. this pmap, from CMU, seems to have migrated
165 * between several processors. the VAX version was done by 165 * between several processors. the VAX version was done by
166 * Avadis Tevanian, Jr., and Michael Wayne Young. the i386 166 * Avadis Tevanian, Jr., and Michael Wayne Young. the i386
167 * version was done by Lance Berc, Mike Kupfer, Bob Baron, 167 * version was done by Lance Berc, Mike Kupfer, Bob Baron,
168 * David Golub, and Richard Draves. the alpha version was 168 * David Golub, and Richard Draves. the alpha version was
169 * done by Alessandro Forin (CMU/Mach) and Chris Demetriou 169 * done by Alessandro Forin (CMU/Mach) and Chris Demetriou
170 * (NetBSD/alpha). 170 * (NetBSD/alpha).
171 */ 171 */
172 172
173#include <sys/cdefs.h> 173#include <sys/cdefs.h>
174__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.128 2011/08/14 02:31:08 rmind Exp $"); 174__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.129 2011/08/28 00:51:21 dyoung Exp $");
175 175
176#include "opt_user_ldt.h" 176#include "opt_user_ldt.h"
177#include "opt_lockdebug.h" 177#include "opt_lockdebug.h"
178#include "opt_multiprocessor.h" 178#include "opt_multiprocessor.h"
179#include "opt_xen.h" 179#include "opt_xen.h"
180#if !defined(__x86_64__) 180#if !defined(__x86_64__)
181#include "opt_kstack_dr0.h" 181#include "opt_kstack_dr0.h"
182#endif /* !defined(__x86_64__) */ 182#endif /* !defined(__x86_64__) */
183 183
184#include <sys/param.h> 184#include <sys/param.h>
185#include <sys/systm.h> 185#include <sys/systm.h>
186#include <sys/proc.h> 186#include <sys/proc.h>
187#include <sys/pool.h> 187#include <sys/pool.h>
188#include <sys/kernel.h> 188#include <sys/kernel.h>
189#include <sys/atomic.h> 189#include <sys/atomic.h>
190#include <sys/cpu.h> 190#include <sys/cpu.h>
191#include <sys/intr.h> 191#include <sys/intr.h>
192#include <sys/xcall.h> 192#include <sys/xcall.h>
193 193
194#include <uvm/uvm.h> 194#include <uvm/uvm.h>
195 195
196#include <dev/isa/isareg.h> 196#include <dev/isa/isareg.h>
197 197
198#include <machine/specialreg.h> 198#include <machine/specialreg.h>
199#include <machine/gdt.h> 199#include <machine/gdt.h>
200#include <machine/isa_machdep.h> 200#include <machine/isa_machdep.h>
201#include <machine/cpuvar.h> 201#include <machine/cpuvar.h>
202 202
203#include <x86/pmap.h> 203#include <x86/pmap.h>
204#include <x86/pmap_pv.h> 204#include <x86/pmap_pv.h>
205 205
206#include <x86/i82489reg.h> 206#include <x86/i82489reg.h>
207#include <x86/i82489var.h> 207#include <x86/i82489var.h>
208 208
209#ifdef XEN 209#ifdef XEN
210#include <xen/xen3-public/xen.h> 210#include <xen/xen3-public/xen.h>
211#include <xen/hypervisor.h> 211#include <xen/hypervisor.h>
212#endif 212#endif
213 213
214/* flag to be used for kernel mappings: PG_u on Xen/amd64, 0 otherwise */ 214/* flag to be used for kernel mappings: PG_u on Xen/amd64, 0 otherwise */
215#if defined(XEN) && defined(__x86_64__) 215#if defined(XEN) && defined(__x86_64__)
216#define PG_k PG_u 216#define PG_k PG_u
217#else 217#else
218#define PG_k 0 218#define PG_k 0
219#endif 219#endif
220 220
221/* 221/*
222 * general info: 222 * general info:
223 * 223 *
224 * - for an explanation of how the i386 MMU hardware works see 224 * - for an explanation of how the i386 MMU hardware works see
225 * the comments in <machine/pte.h>. 225 * the comments in <machine/pte.h>.
226 * 226 *
227 * - for an explanation of the general memory structure used by 227 * - for an explanation of the general memory structure used by
228 * this pmap (including the recursive mapping), see the comments 228 * this pmap (including the recursive mapping), see the comments
229 * in <machine/pmap.h>. 229 * in <machine/pmap.h>.
230 * 230 *
231 * this file contains the code for the "pmap module." the module's 231 * this file contains the code for the "pmap module." the module's
232 * job is to manage the hardware's virtual to physical address mappings. 232 * job is to manage the hardware's virtual to physical address mappings.
233 * note that there are two levels of mapping in the VM system: 233 * note that there are two levels of mapping in the VM system:
234 * 234 *
235 * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's 235 * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's
236 * to map ranges of virtual address space to objects/files. for 236 * to map ranges of virtual address space to objects/files. for
237 * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only 237 * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only
238 * to the file /bin/ls starting at offset zero." note that 238 * to the file /bin/ls starting at offset zero." note that
239 * the upper layer mapping is not concerned with how individual 239 * the upper layer mapping is not concerned with how individual
240 * vm_pages are mapped. 240 * vm_pages are mapped.
241 * 241 *
242 * [2] the lower layer of the VM system (the pmap) maintains the mappings 242 * [2] the lower layer of the VM system (the pmap) maintains the mappings
243 * from virtual addresses. it is concerned with which vm_page is 243 * from virtual addresses. it is concerned with which vm_page is
244 * mapped where. for example, when you run /bin/ls and start 244 * mapped where. for example, when you run /bin/ls and start
245 * at page 0x1000 the fault routine may lookup the correct page 245 * at page 0x1000 the fault routine may lookup the correct page
246 * of the /bin/ls file and then ask the pmap layer to establish 246 * of the /bin/ls file and then ask the pmap layer to establish
247 * a mapping for it. 247 * a mapping for it.
248 * 248 *
249 * note that information in the lower layer of the VM system can be 249 * note that information in the lower layer of the VM system can be
250 * thrown away since it can easily be reconstructed from the info 250 * thrown away since it can easily be reconstructed from the info
251 * in the upper layer. 251 * in the upper layer.
252 * 252 *
253 * data structures we use include: 253 * data structures we use include:
254 * 254 *
255 * - struct pmap: describes the address space of one thread 255 * - struct pmap: describes the address space of one thread
256 * - struct pv_entry: describes one <PMAP,VA> mapping of a PA 256 * - struct pv_entry: describes one <PMAP,VA> mapping of a PA
257 * - struct pv_head: there is one pv_head per managed page of 257 * - struct pv_head: there is one pv_head per managed page of
258 * physical memory. the pv_head points to a list of pv_entry 258 * physical memory. the pv_head points to a list of pv_entry
259 * structures which describe all the <PMAP,VA> pairs that this 259 * structures which describe all the <PMAP,VA> pairs that this
260 * page is mapped in. this is critical for page based operations 260 * page is mapped in. this is critical for page based operations
261 * such as pmap_page_protect() [change protection on _all_ mappings 261 * such as pmap_page_protect() [change protection on _all_ mappings
262 * of a page] 262 * of a page]
263 */ 263 */
264 264
265/* 265/*
266 * memory allocation 266 * memory allocation
267 * 267 *
268 * - there are three data structures that we must dynamically allocate: 268 * - there are three data structures that we must dynamically allocate:
269 * 269 *
270 * [A] new process' page directory page (PDP) 270 * [A] new process' page directory page (PDP)
271 * - plan 1: done at pmap_create() we use 271 * - plan 1: done at pmap_create() we use
272 * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this 272 * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this
273 * allocation. 273 * allocation.
274 * 274 *
275 * if we are low in free physical memory then we sleep in 275 * if we are low in free physical memory then we sleep in
276 * uvm_km_alloc -- in this case this is ok since we are creating 276 * uvm_km_alloc -- in this case this is ok since we are creating
277 * a new pmap and should not be holding any locks. 277 * a new pmap and should not be holding any locks.
278 * 278 *
279 * if the kernel is totally out of virtual space 279 * if the kernel is totally out of virtual space
280 * (i.e. uvm_km_alloc returns NULL), then we panic. 280 * (i.e. uvm_km_alloc returns NULL), then we panic.
281 * 281 *
282 * [B] new page tables pages (PTP) 282 * [B] new page tables pages (PTP)
283 * - call uvm_pagealloc() 283 * - call uvm_pagealloc()
284 * => success: zero page, add to pm_pdir 284 * => success: zero page, add to pm_pdir
285 * => failure: we are out of free vm_pages, let pmap_enter() 285 * => failure: we are out of free vm_pages, let pmap_enter()
286 * tell UVM about it. 286 * tell UVM about it.
287 * 287 *
288 * note: for kernel PTPs, we start with NKPTP of them. as we map 288 * note: for kernel PTPs, we start with NKPTP of them. as we map
289 * kernel memory (at uvm_map time) we check to see if we've grown 289 * kernel memory (at uvm_map time) we check to see if we've grown
290 * the kernel pmap. if so, we call the optional function 290 * the kernel pmap. if so, we call the optional function
291 * pmap_growkernel() to grow the kernel PTPs in advance. 291 * pmap_growkernel() to grow the kernel PTPs in advance.
292 * 292 *
293 * [C] pv_entry structures 293 * [C] pv_entry structures
294 */ 294 */
295 295
296/* 296/*
297 * locking 297 * locking
298 * 298 *
299 * we have the following locks that we must contend with: 299 * we have the following locks that we must contend with:
300 * 300 *
301 * mutexes: 301 * mutexes:
302 * 302 *
303 * - pmap lock (per pmap, part of uvm_object) 303 * - pmap lock (per pmap, part of uvm_object)
304 * this lock protects the fields in the pmap structure including 304 * this lock protects the fields in the pmap structure including
305 * the non-kernel PDEs in the PDP, and the PTEs. it also locks 305 * the non-kernel PDEs in the PDP, and the PTEs. it also locks
306 * in the alternate PTE space (since that is determined by the 306 * in the alternate PTE space (since that is determined by the
307 * entry in the PDP). 307 * entry in the PDP).
308 * 308 *
309 * - pvh_lock (per pv_head) 309 * - pvh_lock (per pv_head)
310 * this lock protects the pv_entry list which is chained off the 310 * this lock protects the pv_entry list which is chained off the
311 * pv_head structure for a specific managed PA. it is locked 311 * pv_head structure for a specific managed PA. it is locked
312 * when traversing the list (e.g. adding/removing mappings, 312 * when traversing the list (e.g. adding/removing mappings,
313 * syncing R/M bits, etc.) 313 * syncing R/M bits, etc.)
314 * 314 *
315 * - pmaps_lock 315 * - pmaps_lock
316 * this lock protects the list of active pmaps (headed by "pmaps"). 316 * this lock protects the list of active pmaps (headed by "pmaps").
317 * we lock it when adding or removing pmaps from this list. 317 * we lock it when adding or removing pmaps from this list.
318 */ 318 */
319 319
320const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER; 320const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER;
321const int ptp_shifts[] = PTP_SHIFT_INITIALIZER; 321const int ptp_shifts[] = PTP_SHIFT_INITIALIZER;
322const long nkptpmax[] = NKPTPMAX_INITIALIZER; 322const long nkptpmax[] = NKPTPMAX_INITIALIZER;
323const long nbpd[] = NBPD_INITIALIZER; 323const long nbpd[] = NBPD_INITIALIZER;
324pd_entry_t * const normal_pdes[] = PDES_INITIALIZER; 324pd_entry_t * const normal_pdes[] = PDES_INITIALIZER;
325 325
326long nkptp[] = NKPTP_INITIALIZER; 326long nkptp[] = NKPTP_INITIALIZER;
327 327
328static kmutex_t pmaps_lock; 328static kmutex_t pmaps_lock;
329 329
330static vaddr_t pmap_maxkvaddr; 330static vaddr_t pmap_maxkvaddr;
331 331
332/* 332/*
333 * XXX kludge: dummy locking to make KASSERTs in uvm_page.c comfortable. 333 * XXX kludge: dummy locking to make KASSERTs in uvm_page.c comfortable.
334 * actual locking is done by pm_lock. 334 * actual locking is done by pm_lock.
335 */ 335 */
336#if defined(DIAGNOSTIC) 336#if defined(DIAGNOSTIC)
337#define PMAP_SUBOBJ_LOCK(pm, idx) \ 337#define PMAP_SUBOBJ_LOCK(pm, idx) \
338 KASSERT(mutex_owned((pm)->pm_lock)); \ 338 KASSERT(mutex_owned((pm)->pm_lock)); \
339 if ((idx) != 0) \ 339 if ((idx) != 0) \
340 mutex_enter((pm)->pm_obj[(idx)].vmobjlock) 340 mutex_enter((pm)->pm_obj[(idx)].vmobjlock)
341#define PMAP_SUBOBJ_UNLOCK(pm, idx) \ 341#define PMAP_SUBOBJ_UNLOCK(pm, idx) \
342 KASSERT(mutex_owned((pm)->pm_lock)); \ 342 KASSERT(mutex_owned((pm)->pm_lock)); \
343 if ((idx) != 0) \ 343 if ((idx) != 0) \
344 mutex_exit((pm)->pm_obj[(idx)].vmobjlock) 344 mutex_exit((pm)->pm_obj[(idx)].vmobjlock)
345#else /* defined(DIAGNOSTIC) */ 345#else /* defined(DIAGNOSTIC) */
346#define PMAP_SUBOBJ_LOCK(pm, idx) /* nothing */ 346#define PMAP_SUBOBJ_LOCK(pm, idx) /* nothing */
347#define PMAP_SUBOBJ_UNLOCK(pm, idx) /* nothing */ 347#define PMAP_SUBOBJ_UNLOCK(pm, idx) /* nothing */
348#endif /* defined(DIAGNOSTIC) */ 348#endif /* defined(DIAGNOSTIC) */
349 349
350/* 350/*
351 * Misc. event counters. 351 * Misc. event counters.
352 */ 352 */
353struct evcnt pmap_iobmp_evcnt; 353struct evcnt pmap_iobmp_evcnt;
354struct evcnt pmap_ldt_evcnt; 354struct evcnt pmap_ldt_evcnt;
355 355
356/* 356/*
357 * PAT 357 * PAT
358 */ 358 */
359#define PATENTRY(n, type) (type << ((n) * 8)) 359#define PATENTRY(n, type) (type << ((n) * 8))
360#define PAT_UC 0x0ULL 360#define PAT_UC 0x0ULL
361#define PAT_WC 0x1ULL 361#define PAT_WC 0x1ULL
362#define PAT_WT 0x4ULL 362#define PAT_WT 0x4ULL
363#define PAT_WP 0x5ULL 363#define PAT_WP 0x5ULL
364#define PAT_WB 0x6ULL 364#define PAT_WB 0x6ULL
365#define PAT_UCMINUS 0x7ULL 365#define PAT_UCMINUS 0x7ULL
366 366
367static bool cpu_pat_enabled = false; 367static bool cpu_pat_enabled = false;
368 368
369/* 369/*
370 * global data structures 370 * global data structures
371 */ 371 */
372 372
373static struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ 373static struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */
374struct pmap *const kernel_pmap_ptr = &kernel_pmap_store; 374struct pmap *const kernel_pmap_ptr = &kernel_pmap_store;
375 375
376/* 376/*
377 * pmap_pg_g: if our processor supports PG_G in the PTE then we 377 * pmap_pg_g: if our processor supports PG_G in the PTE then we
378 * set pmap_pg_g to PG_G (otherwise it is zero). 378 * set pmap_pg_g to PG_G (otherwise it is zero).
379 */ 379 */
380 380
381int pmap_pg_g = 0; 381int pmap_pg_g = 0;
382 382
383/* 383/*
384 * pmap_largepages: if our processor supports PG_PS and we are 384 * pmap_largepages: if our processor supports PG_PS and we are
385 * using it, this is set to true. 385 * using it, this is set to true.
386 */ 386 */
387 387
388int pmap_largepages; 388int pmap_largepages;
389 389
390/* 390/*
391 * i386 physical memory comes in a big contig chunk with a small 391 * i386 physical memory comes in a big contig chunk with a small
392 * hole toward the front of it... the following two paddr_t's 392 * hole toward the front of it... the following two paddr_t's
393 * (shared with machdep.c) describe the physical address space 393 * (shared with machdep.c) describe the physical address space
394 * of this machine. 394 * of this machine.
395 */ 395 */
396paddr_t avail_start; /* PA of first available physical page */ 396paddr_t avail_start; /* PA of first available physical page */
397paddr_t avail_end; /* PA of last available physical page */ 397paddr_t avail_end; /* PA of last available physical page */
398 398
399#ifdef XEN 399#ifdef XEN
400#ifdef __x86_64__ 400#ifdef __x86_64__
401/* Dummy PGD for user cr3, used between pmap_deactivate() and pmap_activate() */ 401/* Dummy PGD for user cr3, used between pmap_deactivate() and pmap_activate() */
402static paddr_t xen_dummy_user_pgd; 402static paddr_t xen_dummy_user_pgd;
403#endif /* __x86_64__ */ 403#endif /* __x86_64__ */
404paddr_t pmap_pa_start; /* PA of first physical page for this domain */ 404paddr_t pmap_pa_start; /* PA of first physical page for this domain */
405paddr_t pmap_pa_end; /* PA of last physical page for this domain */ 405paddr_t pmap_pa_end; /* PA of last physical page for this domain */
406#endif /* XEN */ 406#endif /* XEN */
407 407
408#define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp) 408#define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp)
409 409
410#define PV_HASH_SIZE 32768 410#define PV_HASH_SIZE 32768
411#define PV_HASH_LOCK_CNT 32 411#define PV_HASH_LOCK_CNT 32
412 412
413struct pv_hash_lock { 413struct pv_hash_lock {
414 kmutex_t lock; 414 kmutex_t lock;
415} __aligned(CACHE_LINE_SIZE) pv_hash_locks[PV_HASH_LOCK_CNT] 415} __aligned(CACHE_LINE_SIZE) pv_hash_locks[PV_HASH_LOCK_CNT]
416 __aligned(CACHE_LINE_SIZE); 416 __aligned(CACHE_LINE_SIZE);
417 417
418struct pv_hash_head { 418struct pv_hash_head {
419 SLIST_HEAD(, pv_entry) hh_list; 419 SLIST_HEAD(, pv_entry) hh_list;
420} pv_hash_heads[PV_HASH_SIZE]; 420} pv_hash_heads[PV_HASH_SIZE];
421 421
422static u_int 422static u_int
423pvhash_hash(struct vm_page *ptp, vaddr_t va) 423pvhash_hash(struct vm_page *ptp, vaddr_t va)
424{ 424{
425 425
426 return (uintptr_t)ptp / sizeof(*ptp) + (va >> PAGE_SHIFT); 426 return (uintptr_t)ptp / sizeof(*ptp) + (va >> PAGE_SHIFT);
427} 427}
428 428
429static struct pv_hash_head * 429static struct pv_hash_head *
430pvhash_head(u_int hash) 430pvhash_head(u_int hash)
431{ 431{
432 432
433 return &pv_hash_heads[hash % PV_HASH_SIZE]; 433 return &pv_hash_heads[hash % PV_HASH_SIZE];
434} 434}
435 435
436static kmutex_t * 436static kmutex_t *
437pvhash_lock(u_int hash) 437pvhash_lock(u_int hash)
438{ 438{
439 439
440 return &pv_hash_locks[hash % PV_HASH_LOCK_CNT].lock; 440 return &pv_hash_locks[hash % PV_HASH_LOCK_CNT].lock;
441} 441}
442 442
443static struct pv_entry * 443static struct pv_entry *
444pvhash_remove(struct pv_hash_head *hh, struct vm_page *ptp, vaddr_t va) 444pvhash_remove(struct pv_hash_head *hh, struct vm_page *ptp, vaddr_t va)
445{ 445{
446 struct pv_entry *pve; 446 struct pv_entry *pve;
447 struct pv_entry *prev; 447 struct pv_entry *prev;
448 448
449 prev = NULL; 449 prev = NULL;
450 SLIST_FOREACH(pve, &hh->hh_list, pve_hash) { 450 SLIST_FOREACH(pve, &hh->hh_list, pve_hash) {
451 if (pve->pve_pte.pte_ptp == ptp && 451 if (pve->pve_pte.pte_ptp == ptp &&
452 pve->pve_pte.pte_va == va) { 452 pve->pve_pte.pte_va == va) {
453 if (prev != NULL) { 453 if (prev != NULL) {
454 SLIST_REMOVE_AFTER(prev, pve_hash); 454 SLIST_REMOVE_AFTER(prev, pve_hash);
455 } else { 455 } else {
456 SLIST_REMOVE_HEAD(&hh->hh_list, pve_hash); 456 SLIST_REMOVE_HEAD(&hh->hh_list, pve_hash);
457 } 457 }
458 break; 458 break;
459 } 459 }
460 prev = pve; 460 prev = pve;
461 } 461 }
462 return pve; 462 return pve;
463} 463}
464 464
465/* 465/*
466 * other data structures 466 * other data structures
467 */ 467 */
468 468
469static pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */ 469static pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */
470static bool pmap_initialized = false; /* pmap_init done yet? */ 470static bool pmap_initialized = false; /* pmap_init done yet? */
471 471
472/* 472/*
473 * the following two vaddr_t's are used during system startup 473 * the following two vaddr_t's are used during system startup
474 * to keep track of how much of the kernel's VM space we have used. 474 * to keep track of how much of the kernel's VM space we have used.
475 * once the system is started, the management of the remaining kernel 475 * once the system is started, the management of the remaining kernel
476 * VM space is turned over to the kernel_map vm_map. 476 * VM space is turned over to the kernel_map vm_map.
477 */ 477 */
478 478
479static vaddr_t virtual_avail; /* VA of first free KVA */ 479static vaddr_t virtual_avail; /* VA of first free KVA */
480static vaddr_t virtual_end; /* VA of last free KVA */ 480static vaddr_t virtual_end; /* VA of last free KVA */
481 481
482/* 482/*
483 * linked list of all non-kernel pmaps 483 * linked list of all non-kernel pmaps
484 */ 484 */
485 485
486static struct pmap_head pmaps; 486static struct pmap_head pmaps;
487 487
488/* 488/*
489 * pool that pmap structures are allocated from 489 * pool that pmap structures are allocated from
490 */ 490 */
491 491
492static struct pool_cache pmap_cache; 492static struct pool_cache pmap_cache;
493 493
494/* 494/*
495 * pv_entry cache 495 * pv_entry cache
496 */ 496 */
497 497
498static struct pool_cache pmap_pv_cache; 498static struct pool_cache pmap_pv_cache;
499 499
500/* 500/*
501 * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a 501 * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a
502 * maxcpus*NPTECL array of PTE's, to avoid cache line thrashing 502 * maxcpus*NPTECL array of PTE's, to avoid cache line thrashing
503 * due to false sharing. 503 * due to false sharing.
504 */ 504 */
505 505
506#ifdef MULTIPROCESSOR 506#ifdef MULTIPROCESSOR
507#define PTESLEW(pte, id) ((pte)+(id)*NPTECL) 507#define PTESLEW(pte, id) ((pte)+(id)*NPTECL)
508#define VASLEW(va,id) ((va)+(id)*NPTECL*PAGE_SIZE) 508#define VASLEW(va,id) ((va)+(id)*NPTECL*PAGE_SIZE)
509#else 509#else
510#define PTESLEW(pte, id) (pte) 510#define PTESLEW(pte, id) (pte)
511#define VASLEW(va,id) (va) 511#define VASLEW(va,id) (va)
512#endif 512#endif
513 513
514/* 514/*
515 * special VAs and the PTEs that map them 515 * special VAs and the PTEs that map them
516 */ 516 */
517static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte, *early_zero_pte; 517static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte, *early_zero_pte;
518static char *csrcp, *cdstp, *zerop, *ptpp, *early_zerop; 518static char *csrcp, *cdstp, *zerop, *ptpp, *early_zerop;
519 519
520int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int); 520int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int);
521 521
522/* 522/*
523 * pool and cache that PDPs are allocated from 523 * pool and cache that PDPs are allocated from
524 */ 524 */
525 525
526static struct pool_cache pmap_pdp_cache; 526static struct pool_cache pmap_pdp_cache;
527int pmap_pdp_ctor(void *, void *, int); 527int pmap_pdp_ctor(void *, void *, int);
528void pmap_pdp_dtor(void *, void *); 528void pmap_pdp_dtor(void *, void *);
529#ifdef PAE 529#ifdef PAE
530/* need to allocate items of 4 pages */ 530/* need to allocate items of 4 pages */
531void *pmap_pdp_alloc(struct pool *, int); 531void *pmap_pdp_alloc(struct pool *, int);
532void pmap_pdp_free(struct pool *, void *); 532void pmap_pdp_free(struct pool *, void *);
533static struct pool_allocator pmap_pdp_allocator = { 533static struct pool_allocator pmap_pdp_allocator = {
534 .pa_alloc = pmap_pdp_alloc, 534 .pa_alloc = pmap_pdp_alloc,
535 .pa_free = pmap_pdp_free, 535 .pa_free = pmap_pdp_free,
536 .pa_pagesz = PAGE_SIZE * PDP_SIZE, 536 .pa_pagesz = PAGE_SIZE * PDP_SIZE,
537}; 537};
538#endif /* PAE */ 538#endif /* PAE */
539 539
540extern vaddr_t idt_vaddr; /* we allocate IDT early */ 540extern vaddr_t idt_vaddr; /* we allocate IDT early */
541extern paddr_t idt_paddr; 541extern paddr_t idt_paddr;
542 542
543#ifdef _LP64 543#ifdef _LP64
544extern vaddr_t lo32_vaddr; 544extern vaddr_t lo32_vaddr;
545extern vaddr_t lo32_paddr; 545extern vaddr_t lo32_paddr;
546#endif 546#endif
547 547
548extern int end; 548extern int end;
549 549
550#ifdef i386 550#ifdef i386
551/* stuff to fix the pentium f00f bug */ 551/* stuff to fix the pentium f00f bug */
552extern vaddr_t pentium_idt_vaddr; 552extern vaddr_t pentium_idt_vaddr;
553#endif 553#endif
554 554
555 555
556/* 556/*
557 * local prototypes 557 * local prototypes
558 */ 558 */
559 559
560static struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t, 560static struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t,
561 pd_entry_t * const *); 561 pd_entry_t * const *);
562static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int); 562static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int);
563static void pmap_freepage(struct pmap *, struct vm_page *, int); 563static void pmap_freepage(struct pmap *, struct vm_page *, int);
564static void pmap_free_ptp(struct pmap *, struct vm_page *, 564static void pmap_free_ptp(struct pmap *, struct vm_page *,
565 vaddr_t, pt_entry_t *, 565 vaddr_t, pt_entry_t *,
566 pd_entry_t * const *); 566 pd_entry_t * const *);
567static bool pmap_is_active(struct pmap *, struct cpu_info *, bool); 567static bool pmap_is_active(struct pmap *, struct cpu_info *, bool);
568static bool pmap_remove_pte(struct pmap *, struct vm_page *, 568static bool pmap_remove_pte(struct pmap *, struct vm_page *,
569 pt_entry_t *, vaddr_t, 569 pt_entry_t *, vaddr_t,
570 struct pv_entry **); 570 struct pv_entry **);
571static void pmap_remove_ptes(struct pmap *, struct vm_page *, 571static void pmap_remove_ptes(struct pmap *, struct vm_page *,
572 vaddr_t, vaddr_t, vaddr_t, 572 vaddr_t, vaddr_t, vaddr_t,
573 struct pv_entry **); 573 struct pv_entry **);
574 574
575static bool pmap_get_physpage(vaddr_t, int, paddr_t *); 575static bool pmap_get_physpage(vaddr_t, int, paddr_t *);
576static void pmap_alloc_level(pd_entry_t * const *, vaddr_t, int, 576static void pmap_alloc_level(pd_entry_t * const *, vaddr_t, int,
577 long *); 577 long *);
578 578
579static bool pmap_reactivate(struct pmap *); 579static bool pmap_reactivate(struct pmap *);
580 580
581/* 581/*
582 * p m a p h e l p e r f u n c t i o n s 582 * p m a p h e l p e r f u n c t i o n s
583 */ 583 */
584 584
585static inline void 585static inline void
586pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff) 586pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff)
587{ 587{
588 588
589 if (pmap == pmap_kernel()) { 589 if (pmap == pmap_kernel()) {
590 atomic_add_long(&pmap->pm_stats.resident_count, resid_diff); 590 atomic_add_long(&pmap->pm_stats.resident_count, resid_diff);
591 atomic_add_long(&pmap->pm_stats.wired_count, wired_diff); 591 atomic_add_long(&pmap->pm_stats.wired_count, wired_diff);
592 } else { 592 } else {
593 KASSERT(mutex_owned(pmap->pm_lock)); 593 KASSERT(mutex_owned(pmap->pm_lock));
594 pmap->pm_stats.resident_count += resid_diff; 594 pmap->pm_stats.resident_count += resid_diff;
595 pmap->pm_stats.wired_count += wired_diff; 595 pmap->pm_stats.wired_count += wired_diff;
596 } 596 }
597} 597}
598 598
599static inline void 599static inline void
600pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte) 600pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte)
601{ 601{
602 int resid_diff = ((npte & PG_V) ? 1 : 0) - ((opte & PG_V) ? 1 : 0); 602 int resid_diff = ((npte & PG_V) ? 1 : 0) - ((opte & PG_V) ? 1 : 0);
603 int wired_diff = ((npte & PG_W) ? 1 : 0) - ((opte & PG_W) ? 1 : 0); 603 int wired_diff = ((npte & PG_W) ? 1 : 0) - ((opte & PG_W) ? 1 : 0);
604 604
605 KASSERT((npte & (PG_V | PG_W)) != PG_W); 605 KASSERT((npte & (PG_V | PG_W)) != PG_W);
606 KASSERT((opte & (PG_V | PG_W)) != PG_W); 606 KASSERT((opte & (PG_V | PG_W)) != PG_W);
607 607
608 pmap_stats_update(pmap, resid_diff, wired_diff); 608 pmap_stats_update(pmap, resid_diff, wired_diff);
609} 609}
610 610
611/* 611/*
612 * ptp_to_pmap: lookup pmap by ptp 612 * ptp_to_pmap: lookup pmap by ptp
613 */ 613 */
614 614
615static struct pmap * 615static struct pmap *
616ptp_to_pmap(struct vm_page *ptp) 616ptp_to_pmap(struct vm_page *ptp)
617{ 617{
618 struct pmap *pmap; 618 struct pmap *pmap;
619 619
620 if (ptp == NULL) { 620 if (ptp == NULL) {
621 return pmap_kernel(); 621 return pmap_kernel();
622 } 622 }
623 pmap = (struct pmap *)ptp->uobject; 623 pmap = (struct pmap *)ptp->uobject;
624 KASSERT(pmap != NULL); 624 KASSERT(pmap != NULL);
625 KASSERT(&pmap->pm_obj[0] == ptp->uobject); 625 KASSERT(&pmap->pm_obj[0] == ptp->uobject);
626 return pmap; 626 return pmap;
627} 627}
628 628
629static inline struct pv_pte * 629static inline struct pv_pte *
630pve_to_pvpte(struct pv_entry *pve) 630pve_to_pvpte(struct pv_entry *pve)
631{ 631{
632 632
633 KASSERT((void *)&pve->pve_pte == (void *)pve); 633 KASSERT((void *)&pve->pve_pte == (void *)pve);
634 return &pve->pve_pte; 634 return &pve->pve_pte;
635} 635}
636 636
637static inline struct pv_entry * 637static inline struct pv_entry *
638pvpte_to_pve(struct pv_pte *pvpte) 638pvpte_to_pve(struct pv_pte *pvpte)
639{ 639{
640 struct pv_entry *pve = (void *)pvpte; 640 struct pv_entry *pve = (void *)pvpte;
641 641
642 KASSERT(pve_to_pvpte(pve) == pvpte); 642 KASSERT(pve_to_pvpte(pve) == pvpte);
643 return pve; 643 return pve;
644} 644}
645 645
646/* 646/*
647 * pv_pte_first, pv_pte_next: PV list iterator. 647 * pv_pte_first, pv_pte_next: PV list iterator.
648 */ 648 */
649 649
650static struct pv_pte * 650static struct pv_pte *
651pv_pte_first(struct pmap_page *pp) 651pv_pte_first(struct pmap_page *pp)
652{ 652{
653 653
654 if ((pp->pp_flags & PP_EMBEDDED) != 0) { 654 if ((pp->pp_flags & PP_EMBEDDED) != 0) {
655 return &pp->pp_pte; 655 return &pp->pp_pte;
656 } 656 }
657 return pve_to_pvpte(LIST_FIRST(&pp->pp_head.pvh_list)); 657 return pve_to_pvpte(LIST_FIRST(&pp->pp_head.pvh_list));
658} 658}
659 659
660static struct pv_pte * 660static struct pv_pte *
661pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte) 661pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte)
662{ 662{
663 663
664 KASSERT(pvpte != NULL); 664 KASSERT(pvpte != NULL);
665 if (pvpte == &pp->pp_pte) { 665 if (pvpte == &pp->pp_pte) {
666 KASSERT((pp->pp_flags & PP_EMBEDDED) != 0); 666 KASSERT((pp->pp_flags & PP_EMBEDDED) != 0);
667 return NULL; 667 return NULL;
668 } 668 }
669 KASSERT((pp->pp_flags & PP_EMBEDDED) == 0); 669 KASSERT((pp->pp_flags & PP_EMBEDDED) == 0);
670 return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list)); 670 return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list));
671} 671}
672 672
673/* 673/*
674 * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? 674 * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]?
675 * of course the kernel is always loaded 675 * of course the kernel is always loaded
676 */ 676 */
677 677
678bool 678bool
679pmap_is_curpmap(struct pmap *pmap) 679pmap_is_curpmap(struct pmap *pmap)
680{ 680{
681#if defined(XEN) && defined(__x86_64__) 681#if defined(XEN) && defined(__x86_64__)
682 /* 682 /*
683 * Only kernel pmap is physically loaded. 683 * Only kernel pmap is physically loaded.
684 * User PGD may be active, but TLB will be flushed 684 * User PGD may be active, but TLB will be flushed
685 * with HYPERVISOR_iret anyway, so let's say no 685 * with HYPERVISOR_iret anyway, so let's say no
686 */ 686 */
687 return(pmap == pmap_kernel()); 687 return(pmap == pmap_kernel());
688#else /* XEN && __x86_64__*/ 688#else /* XEN && __x86_64__*/
689 return((pmap == pmap_kernel()) || 689 return((pmap == pmap_kernel()) ||
690 (pmap == curcpu()->ci_pmap)); 690 (pmap == curcpu()->ci_pmap));
691#endif 691#endif
692} 692}
693 693
694/* 694/*
695 * pmap_is_active: is this pmap loaded into the specified processor's %cr3? 695 * pmap_is_active: is this pmap loaded into the specified processor's %cr3?
696 */ 696 */
697 697
698inline static bool 698inline static bool
699pmap_is_active(struct pmap *pmap, struct cpu_info *ci, bool kernel) 699pmap_is_active(struct pmap *pmap, struct cpu_info *ci, bool kernel)
700{ 700{
701 701
702 return (pmap == pmap_kernel() || 702 return (pmap == pmap_kernel() ||
703 (pmap->pm_cpus & ci->ci_cpumask) != 0 || 703 (pmap->pm_cpus & ci->ci_cpumask) != 0 ||
704 (kernel && (pmap->pm_kernel_cpus & ci->ci_cpumask) != 0)); 704 (kernel && (pmap->pm_kernel_cpus & ci->ci_cpumask) != 0));
705} 705}
706 706
707/* 707/*
708 * Add a reference to the specified pmap. 708 * Add a reference to the specified pmap.
709 */ 709 */
710 710
711void 711void
712pmap_reference(struct pmap *pmap) 712pmap_reference(struct pmap *pmap)
713{ 713{
714 714
715 atomic_inc_uint(&pmap->pm_obj[0].uo_refs); 715 atomic_inc_uint(&pmap->pm_obj[0].uo_refs);
716} 716}
717 717
718#ifndef XEN 718#ifndef XEN
719 719
720/* 720/*
721 * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in 721 * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
722 * 722 *
723 * => we lock enough pmaps to keep things locked in 723 * => we lock enough pmaps to keep things locked in
724 * => must be undone with pmap_unmap_ptes before returning 724 * => must be undone with pmap_unmap_ptes before returning
725 */ 725 */
726 726
727void 727void
728pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2, 728pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2,
729 pd_entry_t **ptepp, pd_entry_t * const **pdeppp) 729 pd_entry_t **ptepp, pd_entry_t * const **pdeppp)
730{ 730{
731 struct pmap *curpmap; 731 struct pmap *curpmap;
732 struct cpu_info *ci; 732 struct cpu_info *ci;
733 uint32_t cpumask; 733 uint32_t cpumask;
734 lwp_t *l; 734 lwp_t *l;
735 735
736 /* The kernel's pmap is always accessible. */ 736 /* The kernel's pmap is always accessible. */
737 if (pmap == pmap_kernel()) { 737 if (pmap == pmap_kernel()) {
738 *pmap2 = NULL; 738 *pmap2 = NULL;
739 *ptepp = PTE_BASE; 739 *ptepp = PTE_BASE;
740 *pdeppp = normal_pdes; 740 *pdeppp = normal_pdes;
741 return; 741 return;
742 } 742 }
743 KASSERT(kpreempt_disabled()); 743 KASSERT(kpreempt_disabled());
744 744
745 l = curlwp; 745 l = curlwp;
746 retry: 746 retry:
747 mutex_enter(pmap->pm_lock); 747 mutex_enter(pmap->pm_lock);
748 ci = curcpu(); 748 ci = curcpu();
749 curpmap = ci->ci_pmap; 749 curpmap = ci->ci_pmap;
750 if (vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) { 750 if (vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) {
751 /* Our own pmap so just load it: easy. */ 751 /* Our own pmap so just load it: easy. */
752 if (__predict_false(ci->ci_want_pmapload)) { 752 if (__predict_false(ci->ci_want_pmapload)) {
753 mutex_exit(pmap->pm_lock); 753 mutex_exit(pmap->pm_lock);
754 pmap_load(); 754 pmap_load();
755 goto retry; 755 goto retry;
756 } 756 }
757 KASSERT(pmap == curpmap); 757 KASSERT(pmap == curpmap);
758 } else if (pmap == curpmap) { 758 } else if (pmap == curpmap) {
759 /* 759 /*
760 * Already on the CPU: make it valid. This is very 760 * Already on the CPU: make it valid. This is very
761 * often the case during exit(), when we have switched 761 * often the case during exit(), when we have switched
762 * to the kernel pmap in order to destroy a user pmap. 762 * to the kernel pmap in order to destroy a user pmap.
763 */ 763 */
764 if (!pmap_reactivate(pmap)) { 764 if (!pmap_reactivate(pmap)) {
765 u_int gen = uvm_emap_gen_return(); 765 u_int gen = uvm_emap_gen_return();
766 tlbflush(); 766 tlbflush();
767 uvm_emap_update(gen); 767 uvm_emap_update(gen);
768 } 768 }
769 } else { 769 } else {
770 /* 770 /*
771 * Toss current pmap from CPU, but keep ref to it. 771 * Toss current pmap from CPU, but keep ref to it.
772 * Can happen if we block during exit(). 772 * Can happen if we block during exit().
773 */ 773 */
774 cpumask = ci->ci_cpumask; 774 cpumask = ci->ci_cpumask;
775 atomic_and_32(&curpmap->pm_cpus, ~cpumask); 775 atomic_and_32(&curpmap->pm_cpus, ~cpumask);
776 atomic_and_32(&curpmap->pm_kernel_cpus, ~cpumask); 776 atomic_and_32(&curpmap->pm_kernel_cpus, ~cpumask);
777 ci->ci_pmap = pmap; 777 ci->ci_pmap = pmap;
778 ci->ci_tlbstate = TLBSTATE_VALID; 778 ci->ci_tlbstate = TLBSTATE_VALID;
779 atomic_or_32(&pmap->pm_cpus, cpumask); 779 atomic_or_32(&pmap->pm_cpus, cpumask);
780 atomic_or_32(&pmap->pm_kernel_cpus, cpumask); 780 atomic_or_32(&pmap->pm_kernel_cpus, cpumask);
781 cpu_load_pmap(pmap); 781 cpu_load_pmap(pmap);
782 } 782 }
783 pmap->pm_ncsw = l->l_ncsw; 783 pmap->pm_ncsw = l->l_ncsw;
784 *pmap2 = curpmap; 784 *pmap2 = curpmap;
785 *ptepp = PTE_BASE; 785 *ptepp = PTE_BASE;
786 *pdeppp = normal_pdes; 786 *pdeppp = normal_pdes;
787} 787}
788 788
789/* 789/*
790 * pmap_unmap_ptes: unlock the PTE mapping of "pmap" 790 * pmap_unmap_ptes: unlock the PTE mapping of "pmap"
791 */ 791 */
792 792
793void 793void
794pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2) 794pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2)
795{ 795{
796 struct cpu_info *ci; 796 struct cpu_info *ci;
797 struct pmap *mypmap; 797 struct pmap *mypmap;
798 798
799 KASSERT(kpreempt_disabled()); 799 KASSERT(kpreempt_disabled());
800 800
801 /* The kernel's pmap is always accessible. */ 801 /* The kernel's pmap is always accessible. */
802 if (pmap == pmap_kernel()) { 802 if (pmap == pmap_kernel()) {
803 return; 803 return;
804 } 804 }
805 805
806 /* 806 /*
807 * We cannot tolerate context switches while mapped in. 807 * We cannot tolerate context switches while mapped in.
808 * If it is our own pmap all we have to do is unlock. 808 * If it is our own pmap all we have to do is unlock.
809 */ 809 */
810 KASSERT(pmap->pm_ncsw == curlwp->l_ncsw); 810 KASSERT(pmap->pm_ncsw == curlwp->l_ncsw);
811 mypmap = vm_map_pmap(&curproc->p_vmspace->vm_map); 811 mypmap = vm_map_pmap(&curproc->p_vmspace->vm_map);
812 if (pmap == mypmap) { 812 if (pmap == mypmap) {
813 mutex_exit(pmap->pm_lock); 813 mutex_exit(pmap->pm_lock);
814 return; 814 return;
815 } 815 }
816 816
817 /* 817 /*
818 * Mark whatever's on the CPU now as lazy and unlock. 818 * Mark whatever's on the CPU now as lazy and unlock.
819 * If the pmap was already installed, we are done. 819 * If the pmap was already installed, we are done.
820 */ 820 */
821 ci = curcpu(); 821 ci = curcpu();
822 ci->ci_tlbstate = TLBSTATE_LAZY; 822 ci->ci_tlbstate = TLBSTATE_LAZY;
823 ci->ci_want_pmapload = (mypmap != pmap_kernel()); 823 ci->ci_want_pmapload = (mypmap != pmap_kernel());
824 mutex_exit(pmap->pm_lock); 824 mutex_exit(pmap->pm_lock);
825 if (pmap == pmap2) { 825 if (pmap == pmap2) {
826 return; 826 return;
827 } 827 }
828 828
829 /* 829 /*
830 * We installed another pmap on the CPU. Grab a reference to 830 * We installed another pmap on the CPU. Grab a reference to
831 * it and leave in place. Toss the evicted pmap (can block). 831 * it and leave in place. Toss the evicted pmap (can block).
832 */ 832 */
833 pmap_reference(pmap); 833 pmap_reference(pmap);
834 pmap_destroy(pmap2); 834 pmap_destroy(pmap2);
835} 835}
836 836
837#endif 837#endif
838 838
839inline static void 839inline static void
840pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte) 840pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte)
841{ 841{
842 842
843#if !defined(__x86_64__) 843#if !defined(__x86_64__)
844 if (curproc == NULL || curproc->p_vmspace == NULL || 844 if (curproc == NULL || curproc->p_vmspace == NULL ||
845 pm != vm_map_pmap(&curproc->p_vmspace->vm_map)) 845 pm != vm_map_pmap(&curproc->p_vmspace->vm_map))
846 return; 846 return;
847 847
848 if ((opte ^ npte) & PG_X) 848 if ((opte ^ npte) & PG_X)
849 pmap_update_pg(va); 849 pmap_update_pg(va);
850 850
851 /* 851 /*
852 * Executability was removed on the last executable change. 852 * Executability was removed on the last executable change.
853 * Reset the code segment to something conservative and 853 * Reset the code segment to something conservative and
854 * let the trap handler deal with setting the right limit. 854 * let the trap handler deal with setting the right limit.
855 * We can't do that because of locking constraints on the vm map. 855 * We can't do that because of locking constraints on the vm map.
856 */ 856 */
857 857
858 if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) { 858 if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) {
859 struct trapframe *tf = curlwp->l_md.md_regs; 859 struct trapframe *tf = curlwp->l_md.md_regs;
860 860
861 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 861 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
862 pm->pm_hiexec = I386_MAX_EXE_ADDR; 862 pm->pm_hiexec = I386_MAX_EXE_ADDR;
863 } 863 }
864#endif /* !defined(__x86_64__) */ 864#endif /* !defined(__x86_64__) */
865} 865}
866 866
867#if !defined(__x86_64__) 867#if !defined(__x86_64__)
868/* 868/*
869 * Fixup the code segment to cover all potential executable mappings. 869 * Fixup the code segment to cover all potential executable mappings.
870 * returns 0 if no changes to the code segment were made. 870 * returns 0 if no changes to the code segment were made.
871 */ 871 */
872 872
873int 873int
874pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb) 874pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb)
875{ 875{
876 struct vm_map_entry *ent; 876 struct vm_map_entry *ent;
877 struct pmap *pm = vm_map_pmap(map); 877 struct pmap *pm = vm_map_pmap(map);
878 vaddr_t va = 0; 878 vaddr_t va = 0;
879 879
880 vm_map_lock_read(map); 880 vm_map_lock_read(map);
881 for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) { 881 for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) {
882 882
883 /* 883 /*
884 * This entry has greater va than the entries before. 884 * This entry has greater va than the entries before.
885 * We need to make it point to the last page, not past it. 885 * We need to make it point to the last page, not past it.
886 */ 886 */
887 887
888 if (ent->protection & VM_PROT_EXECUTE) 888 if (ent->protection & VM_PROT_EXECUTE)
889 va = trunc_page(ent->end) - PAGE_SIZE; 889 va = trunc_page(ent->end) - PAGE_SIZE;
890 } 890 }
891 vm_map_unlock_read(map); 891 vm_map_unlock_read(map);
892 if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL)) 892 if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL))
893 return (0); 893 return (0);
894 894
895 pm->pm_hiexec = va; 895 pm->pm_hiexec = va;
896 if (pm->pm_hiexec > I386_MAX_EXE_ADDR) { 896 if (pm->pm_hiexec > I386_MAX_EXE_ADDR) {
897 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); 897 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
898 } else { 898 } else {
899 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 899 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
900 return (0); 900 return (0);
901 } 901 }
902 return (1); 902 return (1);
903} 903}
904#endif /* !defined(__x86_64__) */ 904#endif /* !defined(__x86_64__) */
905 905
906void 906void
907pat_init(struct cpu_info *ci) 907pat_init(struct cpu_info *ci)
908{ 908{
909 uint64_t pat; 909 uint64_t pat;
910 910
911 if (!(ci->ci_feat_val[0] & CPUID_PAT)) 911 if (!(ci->ci_feat_val[0] & CPUID_PAT))
912 return; 912 return;
913 913
914 /* We change WT to WC. Leave all other entries the default values. */ 914 /* We change WT to WC. Leave all other entries the default values. */
915 pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) | 915 pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) |
916 PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) | 916 PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) |
917 PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) | 917 PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) |
918 PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC); 918 PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC);
919 919
920 wrmsr(MSR_CR_PAT, pat); 920 wrmsr(MSR_CR_PAT, pat);
921 cpu_pat_enabled = true; 921 cpu_pat_enabled = true;
922 aprint_debug_dev(ci->ci_dev, "PAT enabled\n"); 922 aprint_debug_dev(ci->ci_dev, "PAT enabled\n");
923} 923}
924 924
925static pt_entry_t 925static pt_entry_t
926pmap_pat_flags(u_int flags) 926pmap_pat_flags(u_int flags)
927{ 927{
928 u_int cacheflags = (flags & PMAP_CACHE_MASK); 928 u_int cacheflags = (flags & PMAP_CACHE_MASK);
929 929
930 if (!cpu_pat_enabled) { 930 if (!cpu_pat_enabled) {
931 switch (cacheflags) { 931 switch (cacheflags) {
932 case PMAP_NOCACHE: 932 case PMAP_NOCACHE:
933 case PMAP_NOCACHE_OVR: 933 case PMAP_NOCACHE_OVR:
934 /* results in PGC_UCMINUS on cpus which have 934 /* results in PGC_UCMINUS on cpus which have
935 * the cpuid PAT but PAT "disabled" 935 * the cpuid PAT but PAT "disabled"
936 */ 936 */
937 return PG_N; 937 return PG_N;
938 default: 938 default:
939 return 0; 939 return 0;
940 } 940 }
941 } 941 }
942 942
943 switch (cacheflags) { 943 switch (cacheflags) {
944 case PMAP_NOCACHE: 944 case PMAP_NOCACHE:
945 return PGC_UC; 945 return PGC_UC;
946 case PMAP_WRITE_COMBINE: 946 case PMAP_WRITE_COMBINE:
947 return PGC_WC; 947 return PGC_WC;
948 case PMAP_WRITE_BACK: 948 case PMAP_WRITE_BACK:
949 return PGC_WB; 949 return PGC_WB;
950 case PMAP_NOCACHE_OVR: 950 case PMAP_NOCACHE_OVR:
951 return PGC_UCMINUS; 951 return PGC_UCMINUS;
952 } 952 }
953 953
954 return 0; 954 return 0;
955} 955}
956 956
957/* 957/*
958 * p m a p k e n t e r f u n c t i o n s 958 * p m a p k e n t e r f u n c t i o n s
959 * 959 *
960 * functions to quickly enter/remove pages from the kernel address 960 * functions to quickly enter/remove pages from the kernel address
961 * space. pmap_kremove is exported to MI kernel. we make use of 961 * space. pmap_kremove is exported to MI kernel. we make use of
962 * the recursive PTE mappings. 962 * the recursive PTE mappings.
963 */ 963 */
964 964
965/* 965/*
966 * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking 966 * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking
967 * 967 *
968 * => no need to lock anything, assume va is already allocated 968 * => no need to lock anything, assume va is already allocated
969 * => should be faster than normal pmap enter function 969 * => should be faster than normal pmap enter function
970 */ 970 */
971 971
972void 972void
973pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 973pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
974{ 974{
975 pt_entry_t *pte, opte, npte; 975 pt_entry_t *pte, opte, npte;
976 976
977 KASSERT(!(prot & ~VM_PROT_ALL)); 977 KASSERT(!(prot & ~VM_PROT_ALL));
978 978
979 if (va < VM_MIN_KERNEL_ADDRESS) 979 if (va < VM_MIN_KERNEL_ADDRESS)
980 pte = vtopte(va); 980 pte = vtopte(va);
981 else 981 else
982 pte = kvtopte(va); 982 pte = kvtopte(va);
983#ifdef DOM0OPS 983#ifdef DOM0OPS
984 if (pa < pmap_pa_start || pa >= pmap_pa_end) { 984 if (pa < pmap_pa_start || pa >= pmap_pa_end) {
985#ifdef DEBUG 985#ifdef DEBUG
986 printk("pmap_kenter_pa: pa 0x%" PRIx64 " for va 0x%" PRIx64 986 printk("pmap_kenter_pa: pa 0x%" PRIx64 " for va 0x%" PRIx64
987 " outside range\n", (int64_t)pa, (int64_t)va); 987 " outside range\n", (int64_t)pa, (int64_t)va);
988#endif /* DEBUG */ 988#endif /* DEBUG */
989 npte = pa; 989 npte = pa;
990 } else 990 } else
991#endif /* DOM0OPS */ 991#endif /* DOM0OPS */
992 npte = pmap_pa2pte(pa); 992 npte = pmap_pa2pte(pa);
993 npte |= protection_codes[prot] | PG_k | PG_V | pmap_pg_g; 993 npte |= protection_codes[prot] | PG_k | PG_V | pmap_pg_g;
994 npte |= pmap_pat_flags(flags); 994 npte |= pmap_pat_flags(flags);
995 opte = pmap_pte_testset(pte, npte); /* zap! */ 995 opte = pmap_pte_testset(pte, npte); /* zap! */
996#if defined(DIAGNOSTIC) 996#if defined(DIAGNOSTIC)
997 /* XXX For now... */ 997 /* XXX For now... */
998 if (opte & PG_PS) 998 if (opte & PG_PS)
999 panic("pmap_kenter_pa: PG_PS"); 999 panic("pmap_kenter_pa: PG_PS");
1000#endif 1000#endif
1001 if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) { 1001 if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) {
1002#if defined(DIAGNOSTIC) 1002#if defined(DIAGNOSTIC)
1003 printf("pmap_kenter_pa: mapping already present\n"); 1003 printf("pmap_kenter_pa: mapping already present\n");
1004#endif 1004#endif
1005 /* This should not happen. */ 1005 /* This should not happen. */
1006 kpreempt_disable(); 1006 kpreempt_disable();
1007 pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER); 1007 pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER);
1008 kpreempt_enable(); 1008 kpreempt_enable();
1009 } 1009 }
1010} 1010}
1011 1011
1012void 1012void
1013pmap_emap_enter(vaddr_t va, paddr_t pa, vm_prot_t prot) 1013pmap_emap_enter(vaddr_t va, paddr_t pa, vm_prot_t prot)
1014{ 1014{
1015 pt_entry_t *pte, opte, npte; 1015 pt_entry_t *pte, opte, npte;
1016 1016
1017 KASSERT((prot & ~VM_PROT_ALL) == 0); 1017 KASSERT((prot & ~VM_PROT_ALL) == 0);
1018 pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va); 1018 pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va);
1019 1019
1020#ifdef DOM0OPS 1020#ifdef DOM0OPS
1021 if (pa < pmap_pa_start || pa >= pmap_pa_end) { 1021 if (pa < pmap_pa_start || pa >= pmap_pa_end) {
1022 npte = pa; 1022 npte = pa;
1023 } else 1023 } else
1024#endif 1024#endif
1025 npte = pmap_pa2pte(pa); 1025 npte = pmap_pa2pte(pa);
1026 1026
1027 npte = pmap_pa2pte(pa); 1027 npte = pmap_pa2pte(pa);
1028 npte |= protection_codes[prot] | PG_k | PG_V; 1028 npte |= protection_codes[prot] | PG_k | PG_V;
1029 opte = pmap_pte_testset(pte, npte); 1029 opte = pmap_pte_testset(pte, npte);
1030} 1030}
1031 1031
1032/* 1032/*
1033 * pmap_emap_sync: perform TLB flush or pmap load, if it was deferred. 1033 * pmap_emap_sync: perform TLB flush or pmap load, if it was deferred.
1034 */ 1034 */
1035void 1035void
1036pmap_emap_sync(bool canload) 1036pmap_emap_sync(bool canload)
1037{ 1037{
1038 struct cpu_info *ci = curcpu(); 1038 struct cpu_info *ci = curcpu();
1039 struct pmap *pmap; 1039 struct pmap *pmap;
1040 1040
1041 KASSERT(kpreempt_disabled()); 1041 KASSERT(kpreempt_disabled());
1042 if (__predict_true(ci->ci_want_pmapload && canload)) { 1042 if (__predict_true(ci->ci_want_pmapload && canload)) {
1043 /* 1043 /*
1044 * XXX: Hint for pmap_reactivate(), which might suggest to 1044 * XXX: Hint for pmap_reactivate(), which might suggest to
1045 * not perform TLB flush, if state has not changed. 1045 * not perform TLB flush, if state has not changed.
1046 */ 1046 */
1047 pmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map); 1047 pmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map);
1048 if (__predict_false(pmap == ci->ci_pmap)) { 1048 if (__predict_false(pmap == ci->ci_pmap)) {
1049 const uint32_t cpumask = ci->ci_cpumask; 1049 const uint32_t cpumask = ci->ci_cpumask;
1050 atomic_and_32(&pmap->pm_cpus, ~cpumask); 1050 atomic_and_32(&pmap->pm_cpus, ~cpumask);
1051 } 1051 }
1052 pmap_load(); 1052 pmap_load();
1053 KASSERT(ci->ci_want_pmapload == 0); 1053 KASSERT(ci->ci_want_pmapload == 0);
1054 } else { 1054 } else {
1055 tlbflush(); 1055 tlbflush();
1056 } 1056 }
1057 1057
1058} 1058}
1059 1059
1060void 1060void
1061pmap_emap_remove(vaddr_t sva, vsize_t len) 1061pmap_emap_remove(vaddr_t sva, vsize_t len)
1062{ 1062{
1063 pt_entry_t *pte, xpte; 1063 pt_entry_t *pte, xpte;
1064 vaddr_t va, eva = sva + len; 1064 vaddr_t va, eva = sva + len;
1065 1065
1066 for (va = sva; va < eva; va += PAGE_SIZE) { 1066 for (va = sva; va < eva; va += PAGE_SIZE) {
1067 pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va); 1067 pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va);
1068 xpte |= pmap_pte_testset(pte, 0); 1068 xpte |= pmap_pte_testset(pte, 0);
1069 } 1069 }
1070} 1070}
1071 1071
1072__weak_alias(pmap_kenter_ma, pmap_kenter_pa); 1072__strict_weak_alias(pmap_kenter_ma, pmap_kenter_pa);
1073 1073
1074#if defined(__x86_64__) 1074#if defined(__x86_64__)
1075/* 1075/*
1076 * Change protection for a virtual address. Local for a CPU only, don't 1076 * Change protection for a virtual address. Local for a CPU only, don't
1077 * care about TLB shootdowns. 1077 * care about TLB shootdowns.
1078 * 1078 *
1079 * => must be called with preemption disabled 1079 * => must be called with preemption disabled
1080 */ 1080 */
1081void 1081void
1082pmap_changeprot_local(vaddr_t va, vm_prot_t prot) 1082pmap_changeprot_local(vaddr_t va, vm_prot_t prot)
1083{ 1083{
1084 pt_entry_t *pte, opte, npte; 1084 pt_entry_t *pte, opte, npte;
1085 1085
1086 KASSERT(kpreempt_disabled()); 1086 KASSERT(kpreempt_disabled());
1087 1087
1088 if (va < VM_MIN_KERNEL_ADDRESS) 1088 if (va < VM_MIN_KERNEL_ADDRESS)
1089 pte = vtopte(va); 1089 pte = vtopte(va);
1090 else 1090 else
1091 pte = kvtopte(va); 1091 pte = kvtopte(va);
1092 1092
1093 npte = opte = *pte; 1093 npte = opte = *pte;
1094 1094
1095 if ((prot & VM_PROT_WRITE) != 0) 1095 if ((prot & VM_PROT_WRITE) != 0)
1096 npte |= PG_RW; 1096 npte |= PG_RW;
1097 else 1097 else
1098 npte &= ~PG_RW; 1098 npte &= ~PG_RW;
1099 1099
1100 if (opte != npte) { 1100 if (opte != npte) {
1101 pmap_pte_set(pte, npte); 1101 pmap_pte_set(pte, npte);
1102 pmap_pte_flush(); 1102 pmap_pte_flush();
1103 invlpg(va); 1103 invlpg(va);
1104 } 1104 }
1105} 1105}
1106#endif /* defined(__x86_64__) */ 1106#endif /* defined(__x86_64__) */
1107 1107
1108/* 1108/*
1109 * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking 1109 * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking
1110 * 1110 *
1111 * => no need to lock anything 1111 * => no need to lock anything
1112 * => caller must dispose of any vm_page mapped in the va range 1112 * => caller must dispose of any vm_page mapped in the va range
1113 * => note: not an inline function 1113 * => note: not an inline function
1114 * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE 1114 * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE
1115 * => we assume kernel only unmaps valid addresses and thus don't bother 1115 * => we assume kernel only unmaps valid addresses and thus don't bother
1116 * checking the valid bit before doing TLB flushing 1116 * checking the valid bit before doing TLB flushing
1117 * => must be followed by call to pmap_update() before reuse of page 1117 * => must be followed by call to pmap_update() before reuse of page
1118 */ 1118 */
1119 1119
1120void 1120void
1121pmap_kremove(vaddr_t sva, vsize_t len) 1121pmap_kremove(vaddr_t sva, vsize_t len)
1122{ 1122{
1123 pt_entry_t *pte, opte; 1123 pt_entry_t *pte, opte;
1124 vaddr_t va, eva; 1124 vaddr_t va, eva;
1125 1125
1126 eva = sva + len; 1126 eva = sva + len;
1127 1127
1128 kpreempt_disable(); 1128 kpreempt_disable();
1129 for (va = sva; va < eva; va += PAGE_SIZE) { 1129 for (va = sva; va < eva; va += PAGE_SIZE) {
1130 if (va < VM_MIN_KERNEL_ADDRESS) 1130 if (va < VM_MIN_KERNEL_ADDRESS)
1131 pte = vtopte(va); 1131 pte = vtopte(va);
1132 else 1132 else
1133 pte = kvtopte(va); 1133 pte = kvtopte(va);
1134 opte = pmap_pte_testset(pte, 0); /* zap! */ 1134 opte = pmap_pte_testset(pte, 0); /* zap! */
1135 if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) { 1135 if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) {
1136 pmap_tlb_shootdown(pmap_kernel(), va, opte, 1136 pmap_tlb_shootdown(pmap_kernel(), va, opte,
1137 TLBSHOOT_KREMOVE); 1137 TLBSHOOT_KREMOVE);
1138 } 1138 }
1139 KASSERT((opte & PG_PS) == 0); 1139 KASSERT((opte & PG_PS) == 0);
1140 KASSERT((opte & PG_PVLIST) == 0); 1140 KASSERT((opte & PG_PVLIST) == 0);
1141 } 1141 }
1142 kpreempt_enable(); 1142 kpreempt_enable();
1143} 1143}
1144 1144
1145/* 1145/*
1146 * p m a p i n i t f u n c t i o n s 1146 * p m a p i n i t f u n c t i o n s
1147 * 1147 *
1148 * pmap_bootstrap and pmap_init are called during system startup 1148 * pmap_bootstrap and pmap_init are called during system startup
1149 * to init the pmap module. pmap_bootstrap() does a low level 1149 * to init the pmap module. pmap_bootstrap() does a low level
1150 * init just to get things rolling. pmap_init() finishes the job. 1150 * init just to get things rolling. pmap_init() finishes the job.
1151 */ 1151 */
1152 1152
1153/* 1153/*
1154 * pmap_bootstrap: get the system in a state where it can run with VM 1154 * pmap_bootstrap: get the system in a state where it can run with VM
1155 * properly enabled (called before main()). the VM system is 1155 * properly enabled (called before main()). the VM system is
1156 * fully init'd later... 1156 * fully init'd later...
1157 * 1157 *
1158 * => on i386, locore.s has already enabled the MMU by allocating 1158 * => on i386, locore.s has already enabled the MMU by allocating
1159 * a PDP for the kernel, and nkpde PTP's for the kernel. 1159 * a PDP for the kernel, and nkpde PTP's for the kernel.
1160 * => kva_start is the first free virtual address in kernel space 1160 * => kva_start is the first free virtual address in kernel space
1161 */ 1161 */
1162 1162
1163void 1163void
1164pmap_bootstrap(vaddr_t kva_start) 1164pmap_bootstrap(vaddr_t kva_start)
1165{ 1165{
1166 struct pmap *kpm; 1166 struct pmap *kpm;
1167 pt_entry_t *pte; 1167 pt_entry_t *pte;
1168 int i; 1168 int i;
1169 vaddr_t kva; 1169 vaddr_t kva;
1170#ifndef XEN 1170#ifndef XEN
1171 unsigned long p1i; 1171 unsigned long p1i;
1172 vaddr_t kva_end; 1172 vaddr_t kva_end;
1173#endif 1173#endif
1174 1174
1175 pt_entry_t pg_nx = (cpu_feature[2] & CPUID_NOX ? PG_NX : 0); 1175 pt_entry_t pg_nx = (cpu_feature[2] & CPUID_NOX ? PG_NX : 0);
1176 1176
1177 /* 1177 /*
1178 * set up our local static global vars that keep track of the 1178 * set up our local static global vars that keep track of the
1179 * usage of KVM before kernel_map is set up 1179 * usage of KVM before kernel_map is set up
1180 */ 1180 */
1181 1181
1182 virtual_avail = kva_start; /* first free KVA */ 1182 virtual_avail = kva_start; /* first free KVA */
1183 virtual_end = VM_MAX_KERNEL_ADDRESS; /* last KVA */ 1183 virtual_end = VM_MAX_KERNEL_ADDRESS; /* last KVA */
1184 1184
1185 /* 1185 /*
1186 * set up protection_codes: we need to be able to convert from 1186 * set up protection_codes: we need to be able to convert from
1187 * a MI protection code (some combo of VM_PROT...) to something 1187 * a MI protection code (some combo of VM_PROT...) to something
1188 * we can jam into a i386 PTE. 1188 * we can jam into a i386 PTE.
1189 */ 1189 */
1190 1190
1191 protection_codes[VM_PROT_NONE] = pg_nx; /* --- */ 1191 protection_codes[VM_PROT_NONE] = pg_nx; /* --- */
1192 protection_codes[VM_PROT_EXECUTE] = PG_RO | PG_X; /* --x */ 1192 protection_codes[VM_PROT_EXECUTE] = PG_RO | PG_X; /* --x */
1193 protection_codes[VM_PROT_READ] = PG_RO | pg_nx; /* -r- */ 1193 protection_codes[VM_PROT_READ] = PG_RO | pg_nx; /* -r- */
1194 protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO | PG_X;/* -rx */ 1194 protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO | PG_X;/* -rx */
1195 protection_codes[VM_PROT_WRITE] = PG_RW | pg_nx; /* w-- */ 1195 protection_codes[VM_PROT_WRITE] = PG_RW | pg_nx; /* w-- */
1196 protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW | PG_X;/* w-x */ 1196 protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW | PG_X;/* w-x */
1197 protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW | pg_nx; 1197 protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW | pg_nx;
1198 /* wr- */ 1198 /* wr- */
1199 protection_codes[VM_PROT_ALL] = PG_RW | PG_X; /* wrx */ 1199 protection_codes[VM_PROT_ALL] = PG_RW | PG_X; /* wrx */
1200 1200
1201 /* 1201 /*
1202 * now we init the kernel's pmap 1202 * now we init the kernel's pmap
1203 * 1203 *
1204 * the kernel pmap's pm_obj is not used for much. however, in 1204 * the kernel pmap's pm_obj is not used for much. however, in
1205 * user pmaps the pm_obj contains the list of active PTPs. 1205 * user pmaps the pm_obj contains the list of active PTPs.
1206 * the pm_obj currently does not have a pager. it might be possible 1206 * the pm_obj currently does not have a pager. it might be possible
1207 * to add a pager that would allow a process to read-only mmap its 1207 * to add a pager that would allow a process to read-only mmap its
1208 * own page tables (fast user level vtophys?). this may or may not 1208 * own page tables (fast user level vtophys?). this may or may not
1209 * be useful. 1209 * be useful.
1210 */ 1210 */
1211 1211
1212 kpm = pmap_kernel(); 1212 kpm = pmap_kernel();
1213 for (i = 0; i < PTP_LEVELS - 1; i++) { 1213 for (i = 0; i < PTP_LEVELS - 1; i++) {
1214 mutex_init(&kpm->pm_obj_lock[i], MUTEX_DEFAULT, IPL_NONE); 1214 mutex_init(&kpm->pm_obj_lock[i], MUTEX_DEFAULT, IPL_NONE);
1215 uvm_obj_init(&kpm->pm_obj[i], NULL, false, 1); 1215 uvm_obj_init(&kpm->pm_obj[i], NULL, false, 1);
1216 uvm_obj_setlock(&kpm->pm_obj[i], &kpm->pm_obj_lock[i]); 1216 uvm_obj_setlock(&kpm->pm_obj[i], &kpm->pm_obj_lock[i]);
1217 kpm->pm_ptphint[i] = NULL; 1217 kpm->pm_ptphint[i] = NULL;
1218 } 1218 }
1219 memset(&kpm->pm_list, 0, sizeof(kpm->pm_list)); /* pm_list not used */ 1219 memset(&kpm->pm_list, 0, sizeof(kpm->pm_list)); /* pm_list not used */
1220 1220
1221 kpm->pm_pdir = (pd_entry_t *)(PDPpaddr + KERNBASE); 1221 kpm->pm_pdir = (pd_entry_t *)(PDPpaddr + KERNBASE);
1222 for (i = 0; i < PDP_SIZE; i++) 1222 for (i = 0; i < PDP_SIZE; i++)
1223 kpm->pm_pdirpa[i] = PDPpaddr + PAGE_SIZE * i; 1223 kpm->pm_pdirpa[i] = PDPpaddr + PAGE_SIZE * i;
1224 1224
1225 kpm->pm_stats.wired_count = kpm->pm_stats.resident_count = 1225 kpm->pm_stats.wired_count = kpm->pm_stats.resident_count =
1226 x86_btop(kva_start - VM_MIN_KERNEL_ADDRESS); 1226 x86_btop(kva_start - VM_MIN_KERNEL_ADDRESS);
1227 1227
1228 /* 1228 /*
1229 * the above is just a rough estimate and not critical to the proper 1229 * the above is just a rough estimate and not critical to the proper
1230 * operation of the system. 1230 * operation of the system.
1231 */ 1231 */
1232 1232
1233#ifndef XEN 1233#ifndef XEN
1234 /* 1234 /*
1235 * Begin to enable global TLB entries if they are supported. 1235 * Begin to enable global TLB entries if they are supported.
1236 * The G bit has no effect until the CR4_PGE bit is set in CR4, 1236 * The G bit has no effect until the CR4_PGE bit is set in CR4,
1237 * which happens in cpu_init(), which is run on each cpu 1237 * which happens in cpu_init(), which is run on each cpu
1238 * (and happens later) 1238 * (and happens later)
1239 */ 1239 */
1240 1240
1241 if (cpu_feature[0] & CPUID_PGE) { 1241 if (cpu_feature[0] & CPUID_PGE) {
1242 pmap_pg_g = PG_G; /* enable software */ 1242 pmap_pg_g = PG_G; /* enable software */
1243 1243
1244 /* add PG_G attribute to already mapped kernel pages */ 1244 /* add PG_G attribute to already mapped kernel pages */
1245 if (KERNBASE == VM_MIN_KERNEL_ADDRESS) { 1245 if (KERNBASE == VM_MIN_KERNEL_ADDRESS) {
1246 kva_end = virtual_avail; 1246 kva_end = virtual_avail;
1247 } else { 1247 } else {
1248 extern vaddr_t eblob, esym; 1248 extern vaddr_t eblob, esym;
1249 kva_end = (vaddr_t)&end; 1249 kva_end = (vaddr_t)&end;
1250 if (esym > kva_end) 1250 if (esym > kva_end)
1251 kva_end = esym; 1251 kva_end = esym;
1252 if (eblob > kva_end) 1252 if (eblob > kva_end)
1253 kva_end = eblob; 1253 kva_end = eblob;
1254 kva_end = roundup(kva_end, PAGE_SIZE); 1254 kva_end = roundup(kva_end, PAGE_SIZE);
1255 } 1255 }
1256 for (kva = KERNBASE; kva < kva_end; kva += PAGE_SIZE) { 1256 for (kva = KERNBASE; kva < kva_end; kva += PAGE_SIZE) {
1257 p1i = pl1_i(kva); 1257 p1i = pl1_i(kva);
1258 if (pmap_valid_entry(PTE_BASE[p1i])) 1258 if (pmap_valid_entry(PTE_BASE[p1i]))
1259 PTE_BASE[p1i] |= PG_G; 1259 PTE_BASE[p1i] |= PG_G;
1260 } 1260 }
1261 } 1261 }
1262 1262
1263 /* 1263 /*
1264 * enable large pages if they are supported. 1264 * enable large pages if they are supported.
1265 */ 1265 */
1266 1266
1267 if (cpu_feature[0] & CPUID_PSE) { 1267 if (cpu_feature[0] & CPUID_PSE) {
1268 paddr_t pa; 1268 paddr_t pa;
1269 pd_entry_t *pde; 1269 pd_entry_t *pde;
1270 extern char __data_start; 1270 extern char __data_start;
1271 1271
1272 lcr4(rcr4() | CR4_PSE); /* enable hardware (via %cr4) */ 1272 lcr4(rcr4() | CR4_PSE); /* enable hardware (via %cr4) */
1273 pmap_largepages = 1; /* enable software */ 1273 pmap_largepages = 1; /* enable software */
1274 1274
1275 /* 1275 /*
1276 * the TLB must be flushed after enabling large pages 1276 * the TLB must be flushed after enabling large pages
1277 * on Pentium CPUs, according to section 3.6.2.2 of 1277 * on Pentium CPUs, according to section 3.6.2.2 of
1278 * "Intel Architecture Software Developer's Manual, 1278 * "Intel Architecture Software Developer's Manual,
1279 * Volume 3: System Programming". 1279 * Volume 3: System Programming".
1280 */ 1280 */
1281 tlbflushg(); 1281 tlbflushg();
1282 1282
1283 /* 1283 /*
1284 * now, remap the kernel text using large pages. we 1284 * now, remap the kernel text using large pages. we
1285 * assume that the linker has properly aligned the 1285 * assume that the linker has properly aligned the
1286 * .data segment to a NBPD_L2 boundary. 1286 * .data segment to a NBPD_L2 boundary.
1287 */ 1287 */
1288 kva_end = rounddown((vaddr_t)&__data_start, NBPD_L1); 1288 kva_end = rounddown((vaddr_t)&__data_start, NBPD_L1);
1289 for (pa = 0, kva = KERNBASE; kva + NBPD_L2 <= kva_end; 1289 for (pa = 0, kva = KERNBASE; kva + NBPD_L2 <= kva_end;
1290 kva += NBPD_L2, pa += NBPD_L2) { 1290 kva += NBPD_L2, pa += NBPD_L2) {
1291 pde = &L2_BASE[pl2_i(kva)]; 1291 pde = &L2_BASE[pl2_i(kva)];
1292 *pde = pa | pmap_pg_g | PG_PS | 1292 *pde = pa | pmap_pg_g | PG_PS |
1293 PG_KR | PG_V; /* zap! */ 1293 PG_KR | PG_V; /* zap! */
1294 tlbflushg(); 1294 tlbflushg();
1295 } 1295 }
1296#if defined(DEBUG) 1296#if defined(DEBUG)
1297 aprint_normal("kernel text is mapped with %" PRIuPSIZE " large " 1297 aprint_normal("kernel text is mapped with %" PRIuPSIZE " large "
1298 "pages and %" PRIuPSIZE " normal pages\n", 1298 "pages and %" PRIuPSIZE " normal pages\n",
1299 howmany(kva - KERNBASE, NBPD_L2), 1299 howmany(kva - KERNBASE, NBPD_L2),
1300 howmany((vaddr_t)&__data_start - kva, NBPD_L1)); 1300 howmany((vaddr_t)&__data_start - kva, NBPD_L1));
1301#endif /* defined(DEBUG) */ 1301#endif /* defined(DEBUG) */
1302 } 1302 }
1303#endif /* !XEN */ 1303#endif /* !XEN */
1304 1304
1305 if (VM_MIN_KERNEL_ADDRESS != KERNBASE) { 1305 if (VM_MIN_KERNEL_ADDRESS != KERNBASE) {
1306 /* 1306 /*
1307 * zero_pte is stuck at the end of mapped space for the kernel 1307 * zero_pte is stuck at the end of mapped space for the kernel
1308 * image (disjunct from kva space). This is done so that it 1308 * image (disjunct from kva space). This is done so that it
1309 * can safely be used in pmap_growkernel (pmap_get_physpage), 1309 * can safely be used in pmap_growkernel (pmap_get_physpage),
1310 * when it's called for the first time. 1310 * when it's called for the first time.
1311 * XXXfvdl fix this for MULTIPROCESSOR later. 1311 * XXXfvdl fix this for MULTIPROCESSOR later.
1312 */ 1312 */
1313 1313
1314 early_zerop = (void *)(KERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2); 1314 early_zerop = (void *)(KERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2);
1315 early_zero_pte = PTE_BASE + pl1_i((vaddr_t)early_zerop); 1315 early_zero_pte = PTE_BASE + pl1_i((vaddr_t)early_zerop);
1316 } 1316 }
1317 1317
1318 /* 1318 /*
1319 * now we allocate the "special" VAs which are used for tmp mappings 1319 * now we allocate the "special" VAs which are used for tmp mappings
1320 * by the pmap (and other modules). we allocate the VAs by advancing 1320 * by the pmap (and other modules). we allocate the VAs by advancing
1321 * virtual_avail (note that there are no pages mapped at these VAs). 1321 * virtual_avail (note that there are no pages mapped at these VAs).
1322 * we find the PTE that maps the allocated VA via the linear PTE 1322 * we find the PTE that maps the allocated VA via the linear PTE
1323 * mapping. 1323 * mapping.
1324 */ 1324 */
1325 1325
1326 pte = PTE_BASE + pl1_i(virtual_avail); 1326 pte = PTE_BASE + pl1_i(virtual_avail);
1327 1327
1328#ifdef MULTIPROCESSOR 1328#ifdef MULTIPROCESSOR
1329 /* 1329 /*
1330 * Waste some VA space to avoid false sharing of cache lines 1330 * Waste some VA space to avoid false sharing of cache lines
1331 * for page table pages: Give each possible CPU a cache line 1331 * for page table pages: Give each possible CPU a cache line
1332 * of PTE's (8) to play with, though we only need 4. We could 1332 * of PTE's (8) to play with, though we only need 4. We could
1333 * recycle some of this waste by putting the idle stacks here 1333 * recycle some of this waste by putting the idle stacks here
1334 * as well; we could waste less space if we knew the largest 1334 * as well; we could waste less space if we knew the largest
1335 * CPU ID beforehand. 1335 * CPU ID beforehand.
1336 */ 1336 */
1337 csrcp = (char *) virtual_avail; csrc_pte = pte; 1337 csrcp = (char *) virtual_avail; csrc_pte = pte;
1338 1338
1339 cdstp = (char *) virtual_avail+PAGE_SIZE; cdst_pte = pte+1; 1339 cdstp = (char *) virtual_avail+PAGE_SIZE; cdst_pte = pte+1;
1340 1340
1341 zerop = (char *) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2; 1341 zerop = (char *) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2;
1342 1342
1343 ptpp = (char *) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3; 1343 ptpp = (char *) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3;
1344 1344
1345 virtual_avail += PAGE_SIZE * maxcpus * NPTECL; 1345 virtual_avail += PAGE_SIZE * maxcpus * NPTECL;
1346 pte += maxcpus * NPTECL; 1346 pte += maxcpus * NPTECL;
1347#else 1347#else
1348 csrcp = (void *) virtual_avail; csrc_pte = pte; /* allocate */ 1348 csrcp = (void *) virtual_avail; csrc_pte = pte; /* allocate */
1349 virtual_avail += PAGE_SIZE; pte++; /* advance */ 1349 virtual_avail += PAGE_SIZE; pte++; /* advance */
1350 1350
1351 cdstp = (void *) virtual_avail; cdst_pte = pte; 1351 cdstp = (void *) virtual_avail; cdst_pte = pte;
1352 virtual_avail += PAGE_SIZE; pte++; 1352 virtual_avail += PAGE_SIZE; pte++;
1353 1353
1354 zerop = (void *) virtual_avail; zero_pte = pte; 1354 zerop = (void *) virtual_avail; zero_pte = pte;
1355 virtual_avail += PAGE_SIZE; pte++; 1355 virtual_avail += PAGE_SIZE; pte++;
1356 1356
1357 ptpp = (void *) virtual_avail; ptp_pte = pte; 1357 ptpp = (void *) virtual_avail; ptp_pte = pte;
1358 virtual_avail += PAGE_SIZE; pte++; 1358 virtual_avail += PAGE_SIZE; pte++;
1359#endif 1359#endif
1360 1360
1361 if (VM_MIN_KERNEL_ADDRESS == KERNBASE) { 1361 if (VM_MIN_KERNEL_ADDRESS == KERNBASE) {
1362 early_zerop = zerop; 1362 early_zerop = zerop;
1363 early_zero_pte = zero_pte; 1363 early_zero_pte = zero_pte;
1364 } 1364 }
1365 1365
1366 /* 1366 /*
1367 * Nothing after this point actually needs pte; 1367 * Nothing after this point actually needs pte;
1368 */ 1368 */
1369 pte = (void *)0xdeadbeef; 1369 pte = (void *)0xdeadbeef;
1370 1370
1371#ifdef XEN 1371#ifdef XEN
1372#ifdef __x86_64__ 1372#ifdef __x86_64__
1373 /* 1373 /*
1374 * We want a dummy page directory for Xen: 1374 * We want a dummy page directory for Xen:
1375 * when deactivate a pmap, Xen will still consider it active. 1375 * when deactivate a pmap, Xen will still consider it active.
1376 * So we set user PGD to this one to lift all protection on 1376 * So we set user PGD to this one to lift all protection on
1377 * the now inactive page tables set. 1377 * the now inactive page tables set.
1378 */ 1378 */
1379 xen_dummy_user_pgd = avail_start; 1379 xen_dummy_user_pgd = avail_start;
1380 avail_start += PAGE_SIZE; 1380 avail_start += PAGE_SIZE;
1381  1381
1382 /* Zero fill it, the less checks in Xen it requires the better */ 1382 /* Zero fill it, the less checks in Xen it requires the better */
1383 memset((void *) (xen_dummy_user_pgd + KERNBASE), 0, PAGE_SIZE); 1383 memset((void *) (xen_dummy_user_pgd + KERNBASE), 0, PAGE_SIZE);
1384 /* Mark read-only */ 1384 /* Mark read-only */
1385 HYPERVISOR_update_va_mapping(xen_dummy_user_pgd + KERNBASE, 1385 HYPERVISOR_update_va_mapping(xen_dummy_user_pgd + KERNBASE,
1386 pmap_pa2pte(xen_dummy_user_pgd) | PG_u | PG_V, UVMF_INVLPG); 1386 pmap_pa2pte(xen_dummy_user_pgd) | PG_u | PG_V, UVMF_INVLPG);
1387 /* Pin as L4 */ 1387 /* Pin as L4 */
1388 xpq_queue_pin_l4_table(xpmap_ptom_masked(xen_dummy_user_pgd)); 1388 xpq_queue_pin_l4_table(xpmap_ptom_masked(xen_dummy_user_pgd));
1389#endif /* __x86_64__ */ 1389#endif /* __x86_64__ */
1390 idt_vaddr = virtual_avail; /* don't need pte */ 1390 idt_vaddr = virtual_avail; /* don't need pte */
1391 idt_paddr = avail_start; /* steal a page */ 1391 idt_paddr = avail_start; /* steal a page */
1392 /* 1392 /*
1393 * Xen require one more page as we can't store 1393 * Xen require one more page as we can't store
1394 * GDT and LDT on the same page 1394 * GDT and LDT on the same page
1395 */ 1395 */
1396 virtual_avail += 3 * PAGE_SIZE; 1396 virtual_avail += 3 * PAGE_SIZE;
1397 avail_start += 3 * PAGE_SIZE; 1397 avail_start += 3 * PAGE_SIZE;
1398#else /* XEN */ 1398#else /* XEN */
1399 idt_vaddr = virtual_avail; /* don't need pte */ 1399 idt_vaddr = virtual_avail; /* don't need pte */
1400 idt_paddr = avail_start; /* steal a page */ 1400 idt_paddr = avail_start; /* steal a page */
1401#if defined(__x86_64__) 1401#if defined(__x86_64__)
1402 virtual_avail += 2 * PAGE_SIZE; pte += 2; 1402 virtual_avail += 2 * PAGE_SIZE; pte += 2;
1403 avail_start += 2 * PAGE_SIZE; 1403 avail_start += 2 * PAGE_SIZE;
1404#else /* defined(__x86_64__) */ 1404#else /* defined(__x86_64__) */
1405 virtual_avail += PAGE_SIZE; pte++; 1405 virtual_avail += PAGE_SIZE; pte++;
1406 avail_start += PAGE_SIZE; 1406 avail_start += PAGE_SIZE;
1407 /* pentium f00f bug stuff */ 1407 /* pentium f00f bug stuff */
1408 pentium_idt_vaddr = virtual_avail; /* don't need pte */ 1408 pentium_idt_vaddr = virtual_avail; /* don't need pte */
1409 virtual_avail += PAGE_SIZE; pte++; 1409 virtual_avail += PAGE_SIZE; pte++;
1410#endif /* defined(__x86_64__) */ 1410#endif /* defined(__x86_64__) */
1411#endif /* XEN */ 1411#endif /* XEN */
1412 1412
1413#ifdef _LP64 1413#ifdef _LP64
1414 /* 1414 /*
1415 * Grab a page below 4G for things that need it (i.e. 1415 * Grab a page below 4G for things that need it (i.e.
1416 * having an initial %cr3 for the MP trampoline). 1416 * having an initial %cr3 for the MP trampoline).
1417 */ 1417 */
1418 lo32_vaddr = virtual_avail; 1418 lo32_vaddr = virtual_avail;
1419 virtual_avail += PAGE_SIZE; pte++; 1419 virtual_avail += PAGE_SIZE; pte++;
1420 lo32_paddr = avail_start; 1420 lo32_paddr = avail_start;
1421 avail_start += PAGE_SIZE; 1421 avail_start += PAGE_SIZE;
1422#endif 1422#endif
1423 1423
1424 /* 1424 /*
1425 * now we reserve some VM for mapping pages when doing a crash dump 1425 * now we reserve some VM for mapping pages when doing a crash dump
1426 */ 1426 */
1427 1427
1428 virtual_avail = reserve_dumppages(virtual_avail); 1428 virtual_avail = reserve_dumppages(virtual_avail);
1429 1429
1430 /* 1430 /*
1431 * init the static-global locks and global lists. 1431 * init the static-global locks and global lists.
1432 * 1432 *
1433 * => pventry::pvh_lock (initialized elsewhere) must also be 1433 * => pventry::pvh_lock (initialized elsewhere) must also be
1434 * a spin lock, again at IPL_VM to prevent deadlock, and 1434 * a spin lock, again at IPL_VM to prevent deadlock, and
1435 * again is never taken from interrupt context. 1435 * again is never taken from interrupt context.
1436 */ 1436 */
1437 1437
1438 mutex_init(&pmaps_lock, MUTEX_DEFAULT, IPL_NONE); 1438 mutex_init(&pmaps_lock, MUTEX_DEFAULT, IPL_NONE);
1439 LIST_INIT(&pmaps); 1439 LIST_INIT(&pmaps);
1440 1440
1441 /* 1441 /*
1442 * initialize caches. 1442 * initialize caches.
1443 */ 1443 */
1444 1444
1445 pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap), 0, 0, 0, 1445 pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap), 0, 0, 0,
1446 "pmappl", NULL, IPL_NONE, NULL, NULL, NULL); 1446 "pmappl", NULL, IPL_NONE, NULL, NULL, NULL);
1447#ifdef PAE 1447#ifdef PAE
1448 pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE * PDP_SIZE, 0, 0, 0, 1448 pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE * PDP_SIZE, 0, 0, 0,
1449 "pdppl", &pmap_pdp_allocator, IPL_NONE, 1449 "pdppl", &pmap_pdp_allocator, IPL_NONE,
1450 pmap_pdp_ctor, pmap_pdp_dtor, NULL); 1450 pmap_pdp_ctor, pmap_pdp_dtor, NULL);
1451#else /* PAE */ 1451#else /* PAE */
1452 pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE, 0, 0, 0, 1452 pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE, 0, 0, 0,
1453 "pdppl", NULL, IPL_NONE, pmap_pdp_ctor, pmap_pdp_dtor, NULL); 1453 "pdppl", NULL, IPL_NONE, pmap_pdp_ctor, pmap_pdp_dtor, NULL);
1454#endif /* PAE */ 1454#endif /* PAE */
1455 pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0, 1455 pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0,
1456 PR_LARGECACHE, "pvpl", &pool_allocator_meta, IPL_NONE, NULL, 1456 PR_LARGECACHE, "pvpl", &pool_allocator_meta, IPL_NONE, NULL,
1457 NULL, NULL); 1457 NULL, NULL);
1458 1458
1459 /* 1459 /*
1460 * ensure the TLB is sync'd with reality by flushing it... 1460 * ensure the TLB is sync'd with reality by flushing it...
1461 */ 1461 */
1462 1462
1463 tlbflushg(); 1463 tlbflushg();
1464 1464
1465 /* 1465 /*
1466 * calculate pmap_maxkvaddr from nkptp[]. 1466 * calculate pmap_maxkvaddr from nkptp[].
1467 */ 1467 */
1468 1468
1469 kva = VM_MIN_KERNEL_ADDRESS; 1469 kva = VM_MIN_KERNEL_ADDRESS;
1470 for (i = PTP_LEVELS - 1; i >= 1; i--) { 1470 for (i = PTP_LEVELS - 1; i >= 1; i--) {
1471 kva += nkptp[i] * nbpd[i]; 1471 kva += nkptp[i] * nbpd[i];
1472 } 1472 }
1473 pmap_maxkvaddr = kva; 1473 pmap_maxkvaddr = kva;
1474} 1474}
1475 1475
1476#if defined(__x86_64__) 1476#if defined(__x86_64__)
1477/* 1477/*
1478 * Pre-allocate PTPs for low memory, so that 1:1 mappings for various 1478 * Pre-allocate PTPs for low memory, so that 1:1 mappings for various
1479 * trampoline code can be entered. 1479 * trampoline code can be entered.
1480 */ 1480 */
1481void 1481void
1482pmap_prealloc_lowmem_ptps(void) 1482pmap_prealloc_lowmem_ptps(void)
1483{ 1483{
1484 int level; 1484 int level;
1485 paddr_t newp; 1485 paddr_t newp;
1486#ifdef XEN 1486#ifdef XEN
1487 paddr_t pdes_pa; 1487 paddr_t pdes_pa;
1488 1488
1489 pdes_pa = pmap_pdirpa(pmap_kernel(), 0); 1489 pdes_pa = pmap_pdirpa(pmap_kernel(), 0);
1490 level = PTP_LEVELS; 1490 level = PTP_LEVELS;
1491 for (;;) { 1491 for (;;) {
1492 newp = avail_start; 1492 newp = avail_start;
1493 avail_start += PAGE_SIZE; 1493 avail_start += PAGE_SIZE;
1494 HYPERVISOR_update_va_mapping ((vaddr_t)early_zerop, 1494 HYPERVISOR_update_va_mapping ((vaddr_t)early_zerop,
1495 xpmap_ptom_masked(newp) | PG_u | PG_V | PG_RW, UVMF_INVLPG); 1495 xpmap_ptom_masked(newp) | PG_u | PG_V | PG_RW, UVMF_INVLPG);
1496 memset(early_zerop, 0, PAGE_SIZE); 1496 memset(early_zerop, 0, PAGE_SIZE);
1497 /* Mark R/O before installing */ 1497 /* Mark R/O before installing */
1498 HYPERVISOR_update_va_mapping ((vaddr_t)early_zerop, 1498 HYPERVISOR_update_va_mapping ((vaddr_t)early_zerop,
1499 xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG); 1499 xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG);
1500 if (newp < (NKL2_KIMG_ENTRIES * NBPD_L2)) 1500 if (newp < (NKL2_KIMG_ENTRIES * NBPD_L2))
1501 HYPERVISOR_update_va_mapping (newp + KERNBASE, 1501 HYPERVISOR_update_va_mapping (newp + KERNBASE,
1502 xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG); 1502 xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG);
1503 xpq_queue_pte_update ( 1503 xpq_queue_pte_update (
1504 xpmap_ptom_masked(pdes_pa) 1504 xpmap_ptom_masked(pdes_pa)
1505 + (pl_i(0, level) * sizeof (pd_entry_t)), 1505 + (pl_i(0, level) * sizeof (pd_entry_t)),
1506 xpmap_ptom_masked(newp) | PG_RW | PG_u | PG_V); 1506 xpmap_ptom_masked(newp) | PG_RW | PG_u | PG_V);
1507 pmap_pte_flush(); 1507 pmap_pte_flush();
1508 level--; 1508 level--;
1509 if (level <= 1) 1509 if (level <= 1)
1510 break; 1510 break;
1511 pdes_pa = newp; 1511 pdes_pa = newp;
1512 } 1512 }
1513#else /* XEN */ 1513#else /* XEN */
1514 pd_entry_t *pdes; 1514 pd_entry_t *pdes;
1515 1515
1516 pdes = pmap_kernel()->pm_pdir; 1516 pdes = pmap_kernel()->pm_pdir;
1517 level = PTP_LEVELS; 1517 level = PTP_LEVELS;
1518 for (;;) { 1518 for (;;) {
1519 newp = avail_start; 1519 newp = avail_start;
1520 avail_start += PAGE_SIZE; 1520 avail_start += PAGE_SIZE;
1521 pmap_pte_set(early_zero_pte, (newp & PG_FRAME) | PG_V | PG_RW); 1521 pmap_pte_set(early_zero_pte, (newp & PG_FRAME) | PG_V | PG_RW);
1522 pmap_pte_flush(); 1522 pmap_pte_flush();
1523 pmap_update_pg((vaddr_t)early_zerop); 1523 pmap_update_pg((vaddr_t)early_zerop);
1524 memset(early_zerop, 0, PAGE_SIZE); 1524 memset(early_zerop, 0, PAGE_SIZE);
1525 pdes[pl_i(0, level)] = (newp & PG_FRAME) | PG_V | PG_RW; 1525 pdes[pl_i(0, level)] = (newp & PG_FRAME) | PG_V | PG_RW;
1526 level--; 1526 level--;
1527 if (level <= 1) 1527 if (level <= 1)
1528 break; 1528 break;
1529 pdes = normal_pdes[level - 2]; 1529 pdes = normal_pdes[level - 2];
1530 } 1530 }
1531#endif /* XEN */ 1531#endif /* XEN */
1532} 1532}
1533#endif /* defined(__x86_64__) */ 1533#endif /* defined(__x86_64__) */
1534 1534
1535/* 1535/*
1536 * pmap_init: called from uvm_init, our job is to get the pmap 1536 * pmap_init: called from uvm_init, our job is to get the pmap
1537 * system ready to manage mappings... 1537 * system ready to manage mappings...
1538 */ 1538 */
1539 1539
1540void 1540void
1541pmap_init(void) 1541pmap_init(void)
1542{ 1542{
1543 int i; 1543 int i;
1544 1544
1545 for (i = 0; i < PV_HASH_SIZE; i++) { 1545 for (i = 0; i < PV_HASH_SIZE; i++) {
1546 SLIST_INIT(&pv_hash_heads[i].hh_list); 1546 SLIST_INIT(&pv_hash_heads[i].hh_list);
1547 } 1547 }
1548 for (i = 0; i < PV_HASH_LOCK_CNT; i++) { 1548 for (i = 0; i < PV_HASH_LOCK_CNT; i++) {
1549 mutex_init(&pv_hash_locks[i].lock, MUTEX_NODEBUG, IPL_VM); 1549 mutex_init(&pv_hash_locks[i].lock, MUTEX_NODEBUG, IPL_VM);
1550 } 1550 }
1551 1551
1552 pmap_tlb_init(); 1552 pmap_tlb_init();
1553 1553
1554 evcnt_attach_dynamic(&pmap_iobmp_evcnt, EVCNT_TYPE_MISC, 1554 evcnt_attach_dynamic(&pmap_iobmp_evcnt, EVCNT_TYPE_MISC,
1555 NULL, "x86", "io bitmap copy"); 1555 NULL, "x86", "io bitmap copy");
1556 evcnt_attach_dynamic(&pmap_ldt_evcnt, EVCNT_TYPE_MISC, 1556 evcnt_attach_dynamic(&pmap_ldt_evcnt, EVCNT_TYPE_MISC,
1557 NULL, "x86", "ldt sync"); 1557 NULL, "x86", "ldt sync");
1558 1558
1559 /* 1559 /*
1560 * done: pmap module is up (and ready for business) 1560 * done: pmap module is up (and ready for business)
1561 */ 1561 */
1562 1562
1563 pmap_initialized = true; 1563 pmap_initialized = true;
1564} 1564}
1565 1565
1566/* 1566/*
1567 * pmap_cpu_init_late: perform late per-CPU initialization. 1567 * pmap_cpu_init_late: perform late per-CPU initialization.
1568 */ 1568 */
1569 1569
1570void 1570void
1571pmap_cpu_init_late(struct cpu_info *ci) 1571pmap_cpu_init_late(struct cpu_info *ci)
1572{ 1572{
1573#ifdef PAE 1573#ifdef PAE
1574 int ret; 1574 int ret;
1575 struct pglist pg; 1575 struct pglist pg;
1576 struct vm_page *vmap; 1576 struct vm_page *vmap;
1577 1577
1578 /* The BP has already its own L3 page allocated in locore.S. */ 1578 /* The BP has already its own L3 page allocated in locore.S. */
1579 if (ci == &cpu_info_primary) 1579 if (ci == &cpu_info_primary)
1580 return; 1580 return;
1581 1581
1582 /* 1582 /*
1583 * Allocate a page for the per-CPU L3 PD. cr3 being 32 bits, PA musts 1583 * Allocate a page for the per-CPU L3 PD. cr3 being 32 bits, PA musts
1584 * resides below the 4GB boundary. 1584 * resides below the 4GB boundary.
1585 */ 1585 */
1586 ret = uvm_pglistalloc(PAGE_SIZE, 0, 0x100000000ULL, 32, 0, &pg, 1, 0); 1586 ret = uvm_pglistalloc(PAGE_SIZE, 0, 0x100000000ULL, 32, 0, &pg, 1, 0);
1587 vmap = TAILQ_FIRST(&pg); 1587 vmap = TAILQ_FIRST(&pg);
1588 1588
1589 if (ret != 0 || vmap == NULL) 1589 if (ret != 0 || vmap == NULL)
1590 panic("%s: failed to allocate L3 pglist for CPU %d (ret %d)\n", 1590 panic("%s: failed to allocate L3 pglist for CPU %d (ret %d)\n",
1591 __func__, cpu_index(ci), ret); 1591 __func__, cpu_index(ci), ret);
1592 1592
1593 ci->ci_pae_l3_pdirpa = vmap->phys_addr; 1593 ci->ci_pae_l3_pdirpa = vmap->phys_addr;
1594 1594
1595 ci->ci_pae_l3_pdir = (paddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 1595 ci->ci_pae_l3_pdir = (paddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
1596 UVM_KMF_VAONLY | UVM_KMF_NOWAIT); 1596 UVM_KMF_VAONLY | UVM_KMF_NOWAIT);
1597 if (ci->ci_pae_l3_pdir == NULL) 1597 if (ci->ci_pae_l3_pdir == NULL)
1598 panic("%s: failed to allocate L3 PD for CPU %d\n", 1598 panic("%s: failed to allocate L3 PD for CPU %d\n",
1599 __func__, cpu_index(ci)); 1599 __func__, cpu_index(ci));
1600 1600
1601 pmap_kenter_pa((vaddr_t)ci->ci_pae_l3_pdir, ci->ci_pae_l3_pdirpa, 1601 pmap_kenter_pa((vaddr_t)ci->ci_pae_l3_pdir, ci->ci_pae_l3_pdirpa,
1602 VM_PROT_READ | VM_PROT_WRITE, 0); 1602 VM_PROT_READ | VM_PROT_WRITE, 0);
1603 1603
1604 pmap_update(pmap_kernel()); 1604 pmap_update(pmap_kernel());
1605#endif 1605#endif
1606} 1606}
1607 1607
1608/* 1608/*
1609 * p v _ e n t r y f u n c t i o n s 1609 * p v _ e n t r y f u n c t i o n s
1610 */ 1610 */
1611 1611
1612/* 1612/*
1613 * pmap_free_pvs: free a list of pv_entrys 1613 * pmap_free_pvs: free a list of pv_entrys
1614 */ 1614 */
1615 1615
1616static void 1616static void
1617pmap_free_pvs(struct pv_entry *pve) 1617pmap_free_pvs(struct pv_entry *pve)
1618{ 1618{
1619 struct pv_entry *next; 1619 struct pv_entry *next;
1620 1620
1621 for ( /* null */ ; pve != NULL ; pve = next) { 1621 for ( /* null */ ; pve != NULL ; pve = next) {
1622 next = pve->pve_next; 1622 next = pve->pve_next;
1623 pool_cache_put(&pmap_pv_cache, pve); 1623 pool_cache_put(&pmap_pv_cache, pve);
1624 } 1624 }
1625} 1625}
1626 1626
1627/* 1627/*
1628 * main pv_entry manipulation functions: 1628 * main pv_entry manipulation functions:
1629 * pmap_enter_pv: enter a mapping onto a pv_head list 1629 * pmap_enter_pv: enter a mapping onto a pv_head list
1630 * pmap_remove_pv: remove a mapping from a pv_head list 1630 * pmap_remove_pv: remove a mapping from a pv_head list
1631 * 1631 *
1632 * NOTE: Both pmap_enter_pv and pmap_remove_pv expect the caller to lock  1632 * NOTE: Both pmap_enter_pv and pmap_remove_pv expect the caller to lock
1633 * the pvh before calling 1633 * the pvh before calling
1634 */ 1634 */
1635 1635
1636/* 1636/*
1637 * insert_pv: a helper of pmap_enter_pv 1637 * insert_pv: a helper of pmap_enter_pv
1638 */ 1638 */
1639 1639
1640static void 1640static void
1641insert_pv(struct pmap_page *pp, struct pv_entry *pve) 1641insert_pv(struct pmap_page *pp, struct pv_entry *pve)
1642{ 1642{
1643 struct pv_hash_head *hh; 1643 struct pv_hash_head *hh;
1644 kmutex_t *lock; 1644 kmutex_t *lock;
1645 u_int hash; 1645 u_int hash;
1646 1646
1647 hash = pvhash_hash(pve->pve_pte.pte_ptp, pve->pve_pte.pte_va); 1647 hash = pvhash_hash(pve->pve_pte.pte_ptp, pve->pve_pte.pte_va);
1648 lock = pvhash_lock(hash); 1648 lock = pvhash_lock(hash);
1649 hh = pvhash_head(hash); 1649 hh = pvhash_head(hash);
1650 mutex_spin_enter(lock); 1650 mutex_spin_enter(lock);
1651 SLIST_INSERT_HEAD(&hh->hh_list, pve, pve_hash); 1651 SLIST_INSERT_HEAD(&hh->hh_list, pve, pve_hash);
1652 mutex_spin_exit(lock); 1652 mutex_spin_exit(lock);
1653 1653
1654 LIST_INSERT_HEAD(&pp->pp_head.pvh_list, pve, pve_list); 1654 LIST_INSERT_HEAD(&pp->pp_head.pvh_list, pve, pve_list);
1655} 1655}
1656 1656
1657/* 1657/*
1658 * pmap_enter_pv: enter a mapping onto a pv_head lst 1658 * pmap_enter_pv: enter a mapping onto a pv_head lst
1659 * 1659 *
1660 * => caller should adjust ptp's wire_count before calling 1660 * => caller should adjust ptp's wire_count before calling
1661 */ 1661 */
1662 1662
1663static struct pv_entry * 1663static struct pv_entry *
1664pmap_enter_pv(struct pmap_page *pp, 1664pmap_enter_pv(struct pmap_page *pp,
1665 struct pv_entry *pve, /* preallocated pve for us to use */ 1665 struct pv_entry *pve, /* preallocated pve for us to use */
1666 struct pv_entry **sparepve, 1666 struct pv_entry **sparepve,
1667 struct vm_page *ptp, 1667 struct vm_page *ptp,
1668 vaddr_t va) 1668 vaddr_t va)
1669{ 1669{
1670 1670
1671 KASSERT(ptp == NULL || ptp->wire_count >= 2); 1671 KASSERT(ptp == NULL || ptp->wire_count >= 2);
1672 KASSERT(ptp == NULL || ptp->uobject != NULL); 1672 KASSERT(ptp == NULL || ptp->uobject != NULL);
1673 KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset); 1673 KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
1674 1674
1675 if ((pp->pp_flags & PP_EMBEDDED) == 0) { 1675 if ((pp->pp_flags & PP_EMBEDDED) == 0) {
1676 if (LIST_EMPTY(&pp->pp_head.pvh_list)) { 1676 if (LIST_EMPTY(&pp->pp_head.pvh_list)) {
1677 pp->pp_flags |= PP_EMBEDDED; 1677 pp->pp_flags |= PP_EMBEDDED;
1678 pp->pp_pte.pte_ptp = ptp; 1678 pp->pp_pte.pte_ptp = ptp;
1679 pp->pp_pte.pte_va = va; 1679 pp->pp_pte.pte_va = va;
1680 1680
1681 return pve; 1681 return pve;
1682 } 1682 }
1683 } else { 1683 } else {
1684 struct pv_entry *pve2; 1684 struct pv_entry *pve2;
1685 1685
1686 pve2 = *sparepve; 1686 pve2 = *sparepve;
1687 *sparepve = NULL; 1687 *sparepve = NULL;
1688 1688
1689 pve2->pve_pte = pp->pp_pte; 1689 pve2->pve_pte = pp->pp_pte;
1690 pp->pp_flags &= ~PP_EMBEDDED; 1690 pp->pp_flags &= ~PP_EMBEDDED;
1691 LIST_INIT(&pp->pp_head.pvh_list); 1691 LIST_INIT(&pp->pp_head.pvh_list);
1692 insert_pv(pp, pve2); 1692 insert_pv(pp, pve2);
1693 } 1693 }
1694 1694
1695 pve->pve_pte.pte_ptp = ptp; 1695 pve->pve_pte.pte_ptp = ptp;
1696 pve->pve_pte.pte_va = va; 1696 pve->pve_pte.pte_va = va;
1697 insert_pv(pp, pve); 1697 insert_pv(pp, pve);
1698 1698
1699 return NULL; 1699 return NULL;
1700} 1700}
1701 1701
1702/* 1702/*
1703 * pmap_remove_pv: try to remove a mapping from a pv_list 1703 * pmap_remove_pv: try to remove a mapping from a pv_list
1704 * 1704 *
1705 * => caller should adjust ptp's wire_count and free PTP if needed 1705 * => caller should adjust ptp's wire_count and free PTP if needed
1706 * => we return the removed pve 1706 * => we return the removed pve
1707 */ 1707 */
1708 1708
1709static struct pv_entry * 1709static struct pv_entry *
1710pmap_remove_pv(struct pmap_page *pp, struct vm_page *ptp, vaddr_t va) 1710pmap_remove_pv(struct pmap_page *pp, struct vm_page *ptp, vaddr_t va)
1711{ 1711{
1712 struct pv_hash_head *hh; 1712 struct pv_hash_head *hh;
1713 struct pv_entry *pve; 1713 struct pv_entry *pve;
1714 kmutex_t *lock; 1714 kmutex_t *lock;
1715 u_int hash; 1715 u_int hash;
1716 1716
1717 KASSERT(ptp == NULL || ptp->uobject != NULL); 1717 KASSERT(ptp == NULL || ptp->uobject != NULL);
1718 KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset); 1718 KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
1719 1719
1720 if ((pp->pp_flags & PP_EMBEDDED) != 0) { 1720 if ((pp->pp_flags & PP_EMBEDDED) != 0) {
1721 KASSERT(pp->pp_pte.pte_ptp == ptp); 1721 KASSERT(pp->pp_pte.pte_ptp == ptp);
1722 KASSERT(pp->pp_pte.pte_va == va); 1722 KASSERT(pp->pp_pte.pte_va == va);
1723 1723
1724 pp->pp_flags &= ~PP_EMBEDDED; 1724 pp->pp_flags &= ~PP_EMBEDDED;
1725 LIST_INIT(&pp->pp_head.pvh_list); 1725 LIST_INIT(&pp->pp_head.pvh_list);
1726 1726
1727 return NULL; 1727 return NULL;
1728 } 1728 }
1729 1729
1730 hash = pvhash_hash(ptp, va); 1730 hash = pvhash_hash(ptp, va);
1731 lock = pvhash_lock(hash); 1731 lock = pvhash_lock(hash);
1732 hh = pvhash_head(hash); 1732 hh = pvhash_head(hash);
1733 mutex_spin_enter(lock); 1733 mutex_spin_enter(lock);
1734 pve = pvhash_remove(hh, ptp, va); 1734 pve = pvhash_remove(hh, ptp, va);
1735 mutex_spin_exit(lock); 1735 mutex_spin_exit(lock);
1736 1736
1737 LIST_REMOVE(pve, pve_list); 1737 LIST_REMOVE(pve, pve_list);
1738 1738
1739 return pve; 1739 return pve;
1740} 1740}
1741 1741
1742/* 1742/*
1743 * p t p f u n c t i o n s 1743 * p t p f u n c t i o n s
1744 */ 1744 */
1745 1745
1746static inline struct vm_page * 1746static inline struct vm_page *
1747pmap_find_ptp(struct pmap *pmap, vaddr_t va, paddr_t pa, int level) 1747pmap_find_ptp(struct pmap *pmap, vaddr_t va, paddr_t pa, int level)
1748{ 1748{
1749 int lidx = level - 1; 1749 int lidx = level - 1;
1750 struct vm_page *pg; 1750 struct vm_page *pg;
1751 1751
1752 KASSERT(mutex_owned(pmap->pm_lock)); 1752 KASSERT(mutex_owned(pmap->pm_lock));
1753 1753
1754 if (pa != (paddr_t)-1 && pmap->pm_ptphint[lidx] && 1754 if (pa != (paddr_t)-1 && pmap->pm_ptphint[lidx] &&
1755 pa == VM_PAGE_TO_PHYS(pmap->pm_ptphint[lidx])) { 1755 pa == VM_PAGE_TO_PHYS(pmap->pm_ptphint[lidx])) {
1756 return (pmap->pm_ptphint[lidx]); 1756 return (pmap->pm_ptphint[lidx]);
1757 } 1757 }
1758 PMAP_SUBOBJ_LOCK(pmap, lidx); 1758 PMAP_SUBOBJ_LOCK(pmap, lidx);
1759 pg = uvm_pagelookup(&pmap->pm_obj[lidx], ptp_va2o(va, level)); 1759 pg = uvm_pagelookup(&pmap->pm_obj[lidx], ptp_va2o(va, level));
1760 PMAP_SUBOBJ_UNLOCK(pmap, lidx); 1760 PMAP_SUBOBJ_UNLOCK(pmap, lidx);
1761 1761
1762 KASSERT(pg == NULL || pg->wire_count >= 1); 1762 KASSERT(pg == NULL || pg->wire_count >= 1);
1763 return pg; 1763 return pg;
1764} 1764}
1765 1765
1766static inline void 1766static inline void
1767pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level) 1767pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level)
1768{ 1768{
1769 lwp_t *l; 1769 lwp_t *l;
1770 int lidx; 1770 int lidx;
1771 struct uvm_object *obj; 1771 struct uvm_object *obj;
1772 1772
1773 KASSERT(ptp->wire_count == 1); 1773 KASSERT(ptp->wire_count == 1);
1774 1774
1775 lidx = level - 1; 1775 lidx = level - 1;
1776 1776
1777 obj = &pmap->pm_obj[lidx]; 1777 obj = &pmap->pm_obj[lidx];
1778 pmap_stats_update(pmap, -1, 0); 1778 pmap_stats_update(pmap, -1, 0);
1779 if (lidx != 0) 1779 if (lidx != 0)
1780 mutex_enter(obj->vmobjlock); 1780 mutex_enter(obj->vmobjlock);
1781 if (pmap->pm_ptphint[lidx] == ptp) 1781 if (pmap->pm_ptphint[lidx] == ptp)
1782 pmap->pm_ptphint[lidx] = TAILQ_FIRST(&obj->memq); 1782 pmap->pm_ptphint[lidx] = TAILQ_FIRST(&obj->memq);
1783 ptp->wire_count = 0; 1783 ptp->wire_count = 0;
1784 uvm_pagerealloc(ptp, NULL, 0); 1784 uvm_pagerealloc(ptp, NULL, 0);
1785 l = curlwp; 1785 l = curlwp;
1786 KASSERT((l->l_pflag & LP_INTR) == 0); 1786 KASSERT((l->l_pflag & LP_INTR) == 0);
1787 VM_PAGE_TO_PP(ptp)->pp_link = l->l_md.md_gc_ptp; 1787 VM_PAGE_TO_PP(ptp)->pp_link = l->l_md.md_gc_ptp;
1788 l->l_md.md_gc_ptp = ptp; 1788 l->l_md.md_gc_ptp = ptp;
1789 if (lidx != 0) 1789 if (lidx != 0)
1790 mutex_exit(obj->vmobjlock); 1790 mutex_exit(obj->vmobjlock);
1791} 1791}
1792 1792
1793static void 1793static void
1794pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va, 1794pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va,
1795 pt_entry_t *ptes, pd_entry_t * const *pdes) 1795 pt_entry_t *ptes, pd_entry_t * const *pdes)
1796{ 1796{
1797 unsigned long index; 1797 unsigned long index;
1798 int level; 1798 int level;
1799 vaddr_t invaladdr; 1799 vaddr_t invaladdr;
1800 pd_entry_t opde; 1800 pd_entry_t opde;
1801#ifdef XEN 1801#ifdef XEN
1802 struct pmap *curpmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map); 1802 struct pmap *curpmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map);
1803#ifdef MULTIPROCESSOR 1803#ifdef MULTIPROCESSOR
1804 vaddr_t invaladdr2; 1804 vaddr_t invaladdr2;
1805#endif 1805#endif
1806#endif 1806#endif
1807 1807
1808 KASSERT(pmap != pmap_kernel()); 1808 KASSERT(pmap != pmap_kernel());
1809 KASSERT(mutex_owned(pmap->pm_lock)); 1809 KASSERT(mutex_owned(pmap->pm_lock));
1810 KASSERT(kpreempt_disabled()); 1810 KASSERT(kpreempt_disabled());
1811 1811
1812 level = 1; 1812 level = 1;
1813 do { 1813 do {
1814 index = pl_i(va, level + 1); 1814 index = pl_i(va, level + 1);
1815 opde = pmap_pte_testset(&pdes[level - 1][index], 0); 1815 opde = pmap_pte_testset(&pdes[level - 1][index], 0);
1816#if defined(XEN) 1816#if defined(XEN)
1817# if defined(__x86_64__) 1817# if defined(__x86_64__)
1818 /* 1818 /*
1819 * If ptp is a L3 currently mapped in kernel space, 1819 * If ptp is a L3 currently mapped in kernel space,
1820 * clear it before freeing 1820 * clear it before freeing
1821 */ 1821 */
1822 if (pmap_pdirpa(pmap, 0) == curcpu()->ci_xen_current_user_pgd 1822 if (pmap_pdirpa(pmap, 0) == curcpu()->ci_xen_current_user_pgd
1823 && level == PTP_LEVELS - 1) 1823 && level == PTP_LEVELS - 1)
1824 pmap_pte_set(&pmap_kernel()->pm_pdir[index], 0); 1824 pmap_pte_set(&pmap_kernel()->pm_pdir[index], 0);
1825# endif /*__x86_64__ */ 1825# endif /*__x86_64__ */
1826 invaladdr = level == 1 ? (vaddr_t)ptes : 1826 invaladdr = level == 1 ? (vaddr_t)ptes :
1827 (vaddr_t)pdes[level - 2]; 1827 (vaddr_t)pdes[level - 2];
1828 pmap_tlb_shootdown(curpmap, invaladdr + index * PAGE_SIZE, 1828 pmap_tlb_shootdown(curpmap, invaladdr + index * PAGE_SIZE,
1829 opde, TLBSHOOT_FREE_PTP1); 1829 opde, TLBSHOOT_FREE_PTP1);
1830# if defined(MULTIPROCESSOR) 1830# if defined(MULTIPROCESSOR)
1831 invaladdr2 = level == 1 ? (vaddr_t)PTE_BASE : 1831 invaladdr2 = level == 1 ? (vaddr_t)PTE_BASE :
1832 (vaddr_t)normal_pdes[level - 2]; 1832 (vaddr_t)normal_pdes[level - 2];
1833 if (pmap != curpmap || invaladdr != invaladdr2) { 1833 if (pmap != curpmap || invaladdr != invaladdr2) {
1834 pmap_tlb_shootdown(pmap, invaladdr2 + index * PAGE_SIZE, 1834 pmap_tlb_shootdown(pmap, invaladdr2 + index * PAGE_SIZE,
1835 opde, TLBSHOOT_FREE_PTP2); 1835 opde, TLBSHOOT_FREE_PTP2);
1836 } 1836 }
1837# endif /* MULTIPROCESSOR */ 1837# endif /* MULTIPROCESSOR */
1838#else /* XEN */ 1838#else /* XEN */
1839 invaladdr = level == 1 ? (vaddr_t)ptes : 1839 invaladdr = level == 1 ? (vaddr_t)ptes :
1840 (vaddr_t)pdes[level - 2]; 1840 (vaddr_t)pdes[level - 2];
1841 pmap_tlb_shootdown(pmap, invaladdr + index * PAGE_SIZE, 1841 pmap_tlb_shootdown(pmap, invaladdr + index * PAGE_SIZE,
1842 opde, TLBSHOOT_FREE_PTP1); 1842 opde, TLBSHOOT_FREE_PTP1);
1843#endif /* XEN */ 1843#endif /* XEN */
1844 pmap_freepage(pmap, ptp, level); 1844 pmap_freepage(pmap, ptp, level);
1845 if (level < PTP_LEVELS - 1) { 1845 if (level < PTP_LEVELS - 1) {
1846 ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1); 1846 ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1);
1847 ptp->wire_count--; 1847 ptp->wire_count--;
1848 if (ptp->wire_count > 1) 1848 if (ptp->wire_count > 1)
1849 break; 1849 break;
1850 } 1850 }
1851 } while (++level < PTP_LEVELS); 1851 } while (++level < PTP_LEVELS);
1852 pmap_pte_flush(); 1852 pmap_pte_flush();
1853} 1853}
1854 1854
1855/* 1855/*
1856 * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one) 1856 * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one)
1857 * 1857 *
1858 * => pmap should NOT be pmap_kernel() 1858 * => pmap should NOT be pmap_kernel()
1859 * => pmap should be locked 1859 * => pmap should be locked
1860 * => preemption should be disabled 1860 * => preemption should be disabled
1861 */ 1861 */
1862 1862
1863static struct vm_page * 1863static struct vm_page *
1864pmap_get_ptp(struct pmap *pmap, vaddr_t va, pd_entry_t * const *pdes) 1864pmap_get_ptp(struct pmap *pmap, vaddr_t va, pd_entry_t * const *pdes)
1865{ 1865{
1866 struct vm_page *ptp, *pptp; 1866 struct vm_page *ptp, *pptp;
1867 int i; 1867 int i;
1868 unsigned long index; 1868 unsigned long index;
1869 pd_entry_t *pva; 1869 pd_entry_t *pva;
1870 paddr_t ppa, pa; 1870 paddr_t ppa, pa;
1871 struct uvm_object *obj; 1871 struct uvm_object *obj;
1872 1872
1873 KASSERT(pmap != pmap_kernel()); 1873 KASSERT(pmap != pmap_kernel());
1874 KASSERT(mutex_owned(pmap->pm_lock)); 1874 KASSERT(mutex_owned(pmap->pm_lock));
1875 KASSERT(kpreempt_disabled()); 1875 KASSERT(kpreempt_disabled());
1876 1876
1877 ptp = NULL; 1877 ptp = NULL;
1878 pa = (paddr_t)-1; 1878 pa = (paddr_t)-1;
1879 1879
1880 /* 1880 /*
1881 * Loop through all page table levels seeing if we need to 1881 * Loop through all page table levels seeing if we need to
1882 * add a new page to that level. 1882 * add a new page to that level.
1883 */ 1883 */
1884 for (i = PTP_LEVELS; i > 1; i--) { 1884 for (i = PTP_LEVELS; i > 1; i--) {
1885 /* 1885 /*
1886 * Save values from previous round. 1886 * Save values from previous round.
1887 */ 1887 */
1888 pptp = ptp; 1888 pptp = ptp;
1889 ppa = pa; 1889 ppa = pa;
1890 1890
1891 index = pl_i(va, i); 1891 index = pl_i(va, i);
1892 pva = pdes[i - 2]; 1892 pva = pdes[i - 2];
1893 1893
1894 if (pmap_valid_entry(pva[index])) { 1894 if (pmap_valid_entry(pva[index])) {
1895 ppa = pmap_pte2pa(pva[index]); 1895 ppa = pmap_pte2pa(pva[index]);
1896 ptp = NULL; 1896 ptp = NULL;
1897 continue; 1897 continue;
1898 } 1898 }
1899 1899
1900 obj = &pmap->pm_obj[i-2]; 1900 obj = &pmap->pm_obj[i-2];
1901 PMAP_SUBOBJ_LOCK(pmap, i - 2); 1901 PMAP_SUBOBJ_LOCK(pmap, i - 2);
1902 ptp = uvm_pagealloc(obj, ptp_va2o(va, i - 1), NULL, 1902 ptp = uvm_pagealloc(obj, ptp_va2o(va, i - 1), NULL,
1903 UVM_PGA_USERESERVE|UVM_PGA_ZERO); 1903 UVM_PGA_USERESERVE|UVM_PGA_ZERO);
1904 PMAP_SUBOBJ_UNLOCK(pmap, i - 2); 1904 PMAP_SUBOBJ_UNLOCK(pmap, i - 2);
1905 1905
1906 if (ptp == NULL) 1906 if (ptp == NULL)
1907 return NULL; 1907 return NULL;
1908 1908
1909 ptp->flags &= ~PG_BUSY; /* never busy */ 1909 ptp->flags &= ~PG_BUSY; /* never busy */
1910 ptp->wire_count = 1; 1910 ptp->wire_count = 1;
1911 pmap->pm_ptphint[i - 2] = ptp; 1911 pmap->pm_ptphint[i - 2] = ptp;
1912 pa = VM_PAGE_TO_PHYS(ptp); 1912 pa = VM_PAGE_TO_PHYS(ptp);
1913 pmap_pte_set(&pva[index], (pd_entry_t) 1913 pmap_pte_set(&pva[index], (pd_entry_t)
1914 (pmap_pa2pte(pa) | PG_u | PG_RW | PG_V)); 1914 (pmap_pa2pte(pa) | PG_u | PG_RW | PG_V));
1915#if defined(XEN) && defined(__x86_64__) 1915#if defined(XEN) && defined(__x86_64__)
1916 /* 1916 /*
1917 * In Xen we must enter the mapping in kernel map too 1917 * In Xen we must enter the mapping in kernel map too
1918 * if pmap is curmap and modifying top level (PGD) 1918 * if pmap is curmap and modifying top level (PGD)
1919 */ 1919 */
1920 if(i == PTP_LEVELS && pmap != pmap_kernel()) { 1920 if(i == PTP_LEVELS && pmap != pmap_kernel()) {
1921 pmap_pte_set(&pmap_kernel()->pm_pdir[index], 1921 pmap_pte_set(&pmap_kernel()->pm_pdir[index],
1922 (pd_entry_t) (pmap_pa2pte(pa) 1922 (pd_entry_t) (pmap_pa2pte(pa)
1923 | PG_u | PG_RW | PG_V)); 1923 | PG_u | PG_RW | PG_V));
1924 } 1924 }
1925#endif /* XEN && __x86_64__ */ 1925#endif /* XEN && __x86_64__ */
1926 pmap_pte_flush(); 1926 pmap_pte_flush();
1927 pmap_stats_update(pmap, 1, 0); 1927 pmap_stats_update(pmap, 1, 0);
1928 /* 1928 /*
1929 * If we're not in the top level, increase the 1929 * If we're not in the top level, increase the
1930 * wire count of the parent page. 1930 * wire count of the parent page.
1931 */ 1931 */
1932 if (i < PTP_LEVELS) { 1932 if (i < PTP_LEVELS) {
1933 if (pptp == NULL) 1933 if (pptp == NULL)
1934 pptp = pmap_find_ptp(pmap, va, ppa, i); 1934 pptp = pmap_find_ptp(pmap, va, ppa, i);
1935#ifdef DIAGNOSTIC 1935#ifdef DIAGNOSTIC
1936 if (pptp == NULL) 1936 if (pptp == NULL)
1937 panic("pde page disappeared"); 1937 panic("pde page disappeared");
1938#endif 1938#endif
1939 pptp->wire_count++; 1939 pptp->wire_count++;
1940 } 1940 }
1941 } 1941 }
1942 1942
1943 /* 1943 /*
1944 * ptp is not NULL if we just allocated a new ptp. If it's 1944 * ptp is not NULL if we just allocated a new ptp. If it's
1945 * still NULL, we must look up the existing one. 1945 * still NULL, we must look up the existing one.
1946 */ 1946 */
1947 if (ptp == NULL) { 1947 if (ptp == NULL) {
1948 ptp = pmap_find_ptp(pmap, va, ppa, 1); 1948 ptp = pmap_find_ptp(pmap, va, ppa, 1);
1949#ifdef DIAGNOSTIC 1949#ifdef DIAGNOSTIC
1950 if (ptp == NULL) { 1950 if (ptp == NULL) {
1951 printf("va %" PRIxVADDR " ppa %" PRIxPADDR "\n", 1951 printf("va %" PRIxVADDR " ppa %" PRIxPADDR "\n",
1952 va, ppa); 1952 va, ppa);
1953 panic("pmap_get_ptp: unmanaged user PTP"); 1953 panic("pmap_get_ptp: unmanaged user PTP");
1954 } 1954 }
1955#endif 1955#endif
1956 } 1956 }
1957 1957
1958 pmap->pm_ptphint[0] = ptp; 1958 pmap->pm_ptphint[0] = ptp;
1959 return(ptp); 1959 return(ptp);
1960} 1960}
1961 1961
1962/* 1962/*
1963 * p m a p l i f e c y c l e f u n c t i o n s 1963 * p m a p l i f e c y c l e f u n c t i o n s
1964 */ 1964 */
1965 1965
1966/* 1966/*
1967 * pmap_pdp_ctor: constructor for the PDP cache. 1967 * pmap_pdp_ctor: constructor for the PDP cache.
1968 */ 1968 */
1969int 1969int
1970pmap_pdp_ctor(void *arg, void *v, int flags) 1970pmap_pdp_ctor(void *arg, void *v, int flags)
1971{ 1971{
1972 pd_entry_t *pdir = v; 1972 pd_entry_t *pdir = v;
1973 paddr_t pdirpa = 0; /* XXX: GCC */ 1973 paddr_t pdirpa = 0; /* XXX: GCC */
1974 vaddr_t object; 1974 vaddr_t object;
1975 int i; 1975 int i;
1976 1976
1977#if !defined(XEN) || !defined(__x86_64__) 1977#if !defined(XEN) || !defined(__x86_64__)
1978 int npde; 1978 int npde;
1979#endif 1979#endif
1980#ifdef XEN 1980#ifdef XEN
1981 int s; 1981 int s;
1982#endif 1982#endif
1983 1983
1984 /* 1984 /*
1985 * NOTE: The `pmap_lock' is held when the PDP is allocated. 1985 * NOTE: The `pmap_lock' is held when the PDP is allocated.
1986 */ 1986 */
1987 1987
1988#if defined(XEN) && defined(__x86_64__) 1988#if defined(XEN) && defined(__x86_64__)
1989 /* fetch the physical address of the page directory. */ 1989 /* fetch the physical address of the page directory. */
1990 (void) pmap_extract(pmap_kernel(), (vaddr_t) pdir, &pdirpa); 1990 (void) pmap_extract(pmap_kernel(), (vaddr_t) pdir, &pdirpa);
1991 1991
1992 /* zero init area */ 1992 /* zero init area */
1993 memset (pdir, 0, PAGE_SIZE); /* Xen wants a clean page */ 1993 memset (pdir, 0, PAGE_SIZE); /* Xen wants a clean page */
1994 /* 1994 /*
1995 * this pdir will NEVER be active in kernel mode 1995 * this pdir will NEVER be active in kernel mode
1996 * so mark recursive entry invalid 1996 * so mark recursive entry invalid
1997 */ 1997 */
1998 pdir[PDIR_SLOT_PTE] = pmap_pa2pte(pdirpa) | PG_u; 1998 pdir[PDIR_SLOT_PTE] = pmap_pa2pte(pdirpa) | PG_u;
1999 /* 1999 /*
2000 * PDP constructed this way won't be for kernel, 2000 * PDP constructed this way won't be for kernel,
2001 * hence we don't put kernel mappings on Xen. 2001 * hence we don't put kernel mappings on Xen.
2002 * But we need to make pmap_create() happy, so put a dummy (without 2002 * But we need to make pmap_create() happy, so put a dummy (without
2003 * PG_V) value at the right place. 2003 * PG_V) value at the right place.
2004 */ 2004 */
2005 pdir[PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1] - 1] = 2005 pdir[PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1] - 1] =
2006 (pd_entry_t)-1 & PG_FRAME; 2006 (pd_entry_t)-1 & PG_FRAME;
2007#else /* XEN && __x86_64__*/ 2007#else /* XEN && __x86_64__*/
2008 /* zero init area */ 2008 /* zero init area */
2009 memset(pdir, 0, PDIR_SLOT_PTE * sizeof(pd_entry_t)); 2009 memset(pdir, 0, PDIR_SLOT_PTE * sizeof(pd_entry_t));
2010 2010
2011 object = (vaddr_t)v; 2011 object = (vaddr_t)v;
2012 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) { 2012 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) {
2013 /* fetch the physical address of the page directory. */ 2013 /* fetch the physical address of the page directory. */
2014 (void) pmap_extract(pmap_kernel(), object, &pdirpa); 2014 (void) pmap_extract(pmap_kernel(), object, &pdirpa);
2015 /* put in recursive PDE to map the PTEs */ 2015 /* put in recursive PDE to map the PTEs */
2016 pdir[PDIR_SLOT_PTE + i] = pmap_pa2pte(pdirpa) | PG_V; 2016 pdir[PDIR_SLOT_PTE + i] = pmap_pa2pte(pdirpa) | PG_V;
2017#ifndef XEN 2017#ifndef XEN
2018 pdir[PDIR_SLOT_PTE + i] |= PG_KW; 2018 pdir[PDIR_SLOT_PTE + i] |= PG_KW;
2019#endif 2019#endif
2020 } 2020 }
2021 2021
2022 /* copy kernel's PDE */ 2022 /* copy kernel's PDE */
2023 npde = nkptp[PTP_LEVELS - 1]; 2023 npde = nkptp[PTP_LEVELS - 1];
2024 2024
2025 memcpy(&pdir[PDIR_SLOT_KERN], &PDP_BASE[PDIR_SLOT_KERN], 2025 memcpy(&pdir[PDIR_SLOT_KERN], &PDP_BASE[PDIR_SLOT_KERN],
2026 npde * sizeof(pd_entry_t)); 2026 npde * sizeof(pd_entry_t));
2027 2027
2028 /* zero the rest */ 2028 /* zero the rest */
2029 memset(&pdir[PDIR_SLOT_KERN + npde], 0, 2029 memset(&pdir[PDIR_SLOT_KERN + npde], 0,
2030 (NTOPLEVEL_PDES - (PDIR_SLOT_KERN + npde)) * sizeof(pd_entry_t)); 2030 (NTOPLEVEL_PDES - (PDIR_SLOT_KERN + npde)) * sizeof(pd_entry_t));
2031 2031
2032 if (VM_MIN_KERNEL_ADDRESS != KERNBASE) { 2032 if (VM_MIN_KERNEL_ADDRESS != KERNBASE) {
2033 int idx = pl_i(KERNBASE, PTP_LEVELS); 2033 int idx = pl_i(KERNBASE, PTP_LEVELS);
2034 2034
2035 pdir[idx] = PDP_BASE[idx]; 2035 pdir[idx] = PDP_BASE[idx];
2036 } 2036 }
2037#endif /* XEN && __x86_64__*/ 2037#endif /* XEN && __x86_64__*/
2038#ifdef XEN 2038#ifdef XEN
2039 s = splvm(); 2039 s = splvm();
2040 object = (vaddr_t)v; 2040 object = (vaddr_t)v;
2041 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) { 2041 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) {
2042 (void) pmap_extract(pmap_kernel(), object, &pdirpa); 2042 (void) pmap_extract(pmap_kernel(), object, &pdirpa);
2043 /* FIXME: This should use pmap_protect() .. */ 2043 /* FIXME: This should use pmap_protect() .. */
2044 pmap_kenter_pa(object, pdirpa, VM_PROT_READ, 0); 2044 pmap_kenter_pa(object, pdirpa, VM_PROT_READ, 0);
2045 pmap_update(pmap_kernel()); 2045 pmap_update(pmap_kernel());
2046 /* 2046 /*
2047 * pin as L2/L4 page, we have to do the page with the 2047 * pin as L2/L4 page, we have to do the page with the
2048 * PDIR_SLOT_PTE entries last 2048 * PDIR_SLOT_PTE entries last
2049 */ 2049 */
2050#ifdef PAE 2050#ifdef PAE
2051 if (i == l2tol3(PDIR_SLOT_PTE)) 2051 if (i == l2tol3(PDIR_SLOT_PTE))
2052 continue; 2052 continue;
2053#endif 2053#endif
2054 2054
2055#ifdef __x86_64__ 2055#ifdef __x86_64__
2056 xpq_queue_pin_l4_table(xpmap_ptom_masked(pdirpa)); 2056 xpq_queue_pin_l4_table(xpmap_ptom_masked(pdirpa));
2057#else 2057#else
2058 xpq_queue_pin_l2_table(xpmap_ptom_masked(pdirpa)); 2058 xpq_queue_pin_l2_table(xpmap_ptom_masked(pdirpa));
2059#endif 2059#endif
2060 } 2060 }
2061#ifdef PAE 2061#ifdef PAE
2062 object = ((vaddr_t)pdir) + PAGE_SIZE * l2tol3(PDIR_SLOT_PTE); 2062 object = ((vaddr_t)pdir) + PAGE_SIZE * l2tol3(PDIR_SLOT_PTE);
2063 (void)pmap_extract(pmap_kernel(), object, &pdirpa); 2063 (void)pmap_extract(pmap_kernel(), object, &pdirpa);
2064 xpq_queue_pin_l2_table(xpmap_ptom_masked(pdirpa)); 2064 xpq_queue_pin_l2_table(xpmap_ptom_masked(pdirpa));
2065#endif 2065#endif
2066 splx(s); 2066 splx(s);
2067#endif /* XEN */ 2067#endif /* XEN */
2068 2068
2069 return (0); 2069 return (0);
2070} 2070}
2071 2071
2072/* 2072/*
2073 * pmap_pdp_dtor: destructor for the PDP cache. 2073 * pmap_pdp_dtor: destructor for the PDP cache.
2074 */ 2074 */
2075 2075
2076void 2076void
2077pmap_pdp_dtor(void *arg, void *v) 2077pmap_pdp_dtor(void *arg, void *v)
2078{ 2078{
2079#ifdef XEN 2079#ifdef XEN
2080 paddr_t pdirpa = 0; /* XXX: GCC */ 2080 paddr_t pdirpa = 0; /* XXX: GCC */
2081 vaddr_t object = (vaddr_t)v; 2081 vaddr_t object = (vaddr_t)v;
2082 int i; 2082 int i;
2083 int s = splvm(); 2083 int s = splvm();
2084 pt_entry_t *pte; 2084 pt_entry_t *pte;
2085 2085
2086 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) { 2086 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) {
2087 /* fetch the physical address of the page directory. */ 2087 /* fetch the physical address of the page directory. */
2088 (void) pmap_extract(pmap_kernel(), object, &pdirpa); 2088 (void) pmap_extract(pmap_kernel(), object, &pdirpa);
2089 /* unpin page table */ 2089 /* unpin page table */
2090 xpq_queue_unpin_table(xpmap_ptom_masked(pdirpa)); 2090 xpq_queue_unpin_table(xpmap_ptom_masked(pdirpa));
2091 } 2091 }
2092 object = (vaddr_t)v; 2092 object = (vaddr_t)v;
2093 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) { 2093 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) {
2094 /* Set page RW again */ 2094 /* Set page RW again */
2095 pte = kvtopte(object); 2095 pte = kvtopte(object);
2096 xpq_queue_pte_update(xpmap_ptetomach(pte), *pte | PG_RW); 2096 xpq_queue_pte_update(xpmap_ptetomach(pte), *pte | PG_RW);
2097 xpq_queue_invlpg((vaddr_t)object); 2097 xpq_queue_invlpg((vaddr_t)object);
2098 } 2098 }
2099 splx(s); 2099 splx(s);
2100#endif /* XEN */ 2100#endif /* XEN */
2101} 2101}
2102 2102
2103#ifdef PAE 2103#ifdef PAE
2104 2104
2105/* pmap_pdp_alloc: Allocate a page for the pdp memory pool. */ 2105/* pmap_pdp_alloc: Allocate a page for the pdp memory pool. */
2106 2106
2107void * 2107void *
2108pmap_pdp_alloc(struct pool *pp, int flags) 2108pmap_pdp_alloc(struct pool *pp, int flags)
2109{ 2109{
2110 return (void *)uvm_km_alloc(kernel_map, 2110 return (void *)uvm_km_alloc(kernel_map,
2111 PAGE_SIZE * PDP_SIZE, PAGE_SIZE * PDP_SIZE, 2111 PAGE_SIZE * PDP_SIZE, PAGE_SIZE * PDP_SIZE,
2112 ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK) 2112 ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK)
2113 | UVM_KMF_WIRED); 2113 | UVM_KMF_WIRED);
2114} 2114}
2115 2115
2116/* 2116/*
2117 * pmap_pdp_free: free a PDP 2117 * pmap_pdp_free: free a PDP
2118 */ 2118 */
2119 2119
2120void 2120void
2121pmap_pdp_free(struct pool *pp, void *v) 2121pmap_pdp_free(struct pool *pp, void *v)
2122{ 2122{
2123 uvm_km_free(kernel_map, (vaddr_t)v, PAGE_SIZE * PDP_SIZE, 2123 uvm_km_free(kernel_map, (vaddr_t)v, PAGE_SIZE * PDP_SIZE,
2124 UVM_KMF_WIRED); 2124 UVM_KMF_WIRED);
2125} 2125}
2126#endif /* PAE */ 2126#endif /* PAE */
2127 2127
2128/* 2128/*
2129 * pmap_create: create a pmap 2129 * pmap_create: create a pmap
2130 * 2130 *
2131 * => note: old pmap interface took a "size" args which allowed for 2131 * => note: old pmap interface took a "size" args which allowed for
2132 * the creation of "software only" pmaps (not in bsd). 2132 * the creation of "software only" pmaps (not in bsd).
2133 */ 2133 */
2134 2134
2135struct pmap * 2135struct pmap *
2136pmap_create(void) 2136pmap_create(void)
2137{ 2137{
2138 struct pmap *pmap; 2138 struct pmap *pmap;
2139 int i; 2139 int i;
2140 2140
2141 pmap = pool_cache_get(&pmap_cache, PR_WAITOK); 2141 pmap = pool_cache_get(&pmap_cache, PR_WAITOK);
2142 2142
2143 /* init uvm_object */ 2143 /* init uvm_object */
2144 for (i = 0; i < PTP_LEVELS - 1; i++) { 2144 for (i = 0; i < PTP_LEVELS - 1; i++) {
2145 mutex_init(&pmap->pm_obj_lock[i], MUTEX_DEFAULT, IPL_NONE); 2145 mutex_init(&pmap->pm_obj_lock[i], MUTEX_DEFAULT, IPL_NONE);
2146 uvm_obj_init(&pmap->pm_obj[i], NULL, false, 1); 2146 uvm_obj_init(&pmap->pm_obj[i], NULL, false, 1);
2147 uvm_obj_setlock(&pmap->pm_obj[i], &pmap->pm_obj_lock[i]); 2147 uvm_obj_setlock(&pmap->pm_obj[i], &pmap->pm_obj_lock[i]);
2148 pmap->pm_ptphint[i] = NULL; 2148 pmap->pm_ptphint[i] = NULL;
2149 } 2149 }
2150 pmap->pm_stats.wired_count = 0; 2150 pmap->pm_stats.wired_count = 0;
2151 /* count the PDP allocd below */ 2151 /* count the PDP allocd below */
2152 pmap->pm_stats.resident_count = PDP_SIZE; 2152 pmap->pm_stats.resident_count = PDP_SIZE;
2153#if !defined(__x86_64__) 2153#if !defined(__x86_64__)
2154 pmap->pm_hiexec = 0; 2154 pmap->pm_hiexec = 0;
2155#endif /* !defined(__x86_64__) */ 2155#endif /* !defined(__x86_64__) */
2156 pmap->pm_flags = 0; 2156 pmap->pm_flags = 0;
2157 pmap->pm_cpus = 0; 2157 pmap->pm_cpus = 0;
2158 pmap->pm_kernel_cpus = 0; 2158 pmap->pm_kernel_cpus = 0;
2159 pmap->pm_gc_ptp = NULL; 2159 pmap->pm_gc_ptp = NULL;
2160 2160
2161 /* init the LDT */ 2161 /* init the LDT */
2162 pmap->pm_ldt = NULL; 2162 pmap->pm_ldt = NULL;
2163 pmap->pm_ldt_len = 0; 2163 pmap->pm_ldt_len = 0;
2164 pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL); 2164 pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL);
2165 2165
2166 /* allocate PDP */ 2166 /* allocate PDP */
2167 try_again: 2167 try_again:
2168 pmap->pm_pdir = pool_cache_get(&pmap_pdp_cache, PR_WAITOK); 2168 pmap->pm_pdir = pool_cache_get(&pmap_pdp_cache, PR_WAITOK);
2169 2169
2170 mutex_enter(&pmaps_lock); 2170 mutex_enter(&pmaps_lock);
2171 2171
2172 if (pmap->pm_pdir[PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1] - 1] == 0) { 2172 if (pmap->pm_pdir[PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1] - 1] == 0) {
2173 mutex_exit(&pmaps_lock); 2173 mutex_exit(&pmaps_lock);
2174 pool_cache_destruct_object(&pmap_pdp_cache, pmap->pm_pdir); 2174 pool_cache_destruct_object(&pmap_pdp_cache, pmap->pm_pdir);
2175 goto try_again; 2175 goto try_again;
2176 } 2176 }
2177 2177
2178 for (i = 0; i < PDP_SIZE; i++) 2178 for (i = 0; i < PDP_SIZE; i++)
2179 pmap->pm_pdirpa[i] = 2179 pmap->pm_pdirpa[i] =
2180 pmap_pte2pa(pmap->pm_pdir[PDIR_SLOT_PTE + i]); 2180 pmap_pte2pa(pmap->pm_pdir[PDIR_SLOT_PTE + i]);
2181 2181
2182 LIST_INSERT_HEAD(&pmaps, pmap, pm_list); 2182 LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
2183 2183
2184 mutex_exit(&pmaps_lock); 2184 mutex_exit(&pmaps_lock);
2185 2185
2186 return (pmap); 2186 return (pmap);
2187} 2187}
2188 2188
2189/* 2189/*
2190 * pmap_free_ptps: put a list of ptps back to the freelist. 2190 * pmap_free_ptps: put a list of ptps back to the freelist.
2191 */ 2191 */
2192 2192
2193static void 2193static void
2194pmap_free_ptps(struct vm_page *empty_ptps) 2194pmap_free_ptps(struct vm_page *empty_ptps)
2195{ 2195{
2196 struct vm_page *ptp; 2196 struct vm_page *ptp;
2197 struct pmap_page *pp; 2197 struct pmap_page *pp;
2198 2198
2199 while ((ptp = empty_ptps) != NULL) { 2199 while ((ptp = empty_ptps) != NULL) {
2200 pp = VM_PAGE_TO_PP(ptp); 2200 pp = VM_PAGE_TO_PP(ptp);
2201 empty_ptps = pp->pp_link; 2201 empty_ptps = pp->pp_link;
2202 LIST_INIT(&pp->pp_head.pvh_list); 2202 LIST_INIT(&pp->pp_head.pvh_list);
2203 uvm_pagefree(ptp); 2203 uvm_pagefree(ptp);
2204 } 2204 }
2205} 2205}
2206 2206
2207/* 2207/*
2208 * pmap_destroy: drop reference count on pmap. free pmap if 2208 * pmap_destroy: drop reference count on pmap. free pmap if
2209 * reference count goes to zero. 2209 * reference count goes to zero.
2210 */ 2210 */
2211 2211
2212void 2212void
2213pmap_destroy(struct pmap *pmap) 2213pmap_destroy(struct pmap *pmap)
2214{ 2214{
2215 int i; 2215 int i;
2216#ifdef DIAGNOSTIC 2216#ifdef DIAGNOSTIC
2217 struct cpu_info *ci; 2217 struct cpu_info *ci;
2218 CPU_INFO_ITERATOR cii; 2218 CPU_INFO_ITERATOR cii;
2219#endif /* DIAGNOSTIC */ 2219#endif /* DIAGNOSTIC */
2220 lwp_t *l; 2220 lwp_t *l;
2221 2221
2222 /* 2222 /*
2223 * If we have torn down this pmap, process deferred frees and 2223 * If we have torn down this pmap, process deferred frees and
2224 * invalidations. Free now if the system is low on memory. 2224 * invalidations. Free now if the system is low on memory.
2225 * Otherwise, free when the pmap is destroyed thus avoiding a 2225 * Otherwise, free when the pmap is destroyed thus avoiding a
2226 * TLB shootdown. 2226 * TLB shootdown.
2227 */ 2227 */
2228 l = curlwp; 2228 l = curlwp;
2229 if (__predict_false(l->l_md.md_gc_pmap == pmap)) { 2229 if (__predict_false(l->l_md.md_gc_pmap == pmap)) {
2230 if (uvmexp.free < uvmexp.freetarg) { 2230 if (uvmexp.free < uvmexp.freetarg) {
2231 pmap_update(pmap); 2231 pmap_update(pmap);
2232 } else { 2232 } else {
2233 KASSERT(pmap->pm_gc_ptp == NULL); 2233 KASSERT(pmap->pm_gc_ptp == NULL);
2234 pmap->pm_gc_ptp = l->l_md.md_gc_ptp; 2234 pmap->pm_gc_ptp = l->l_md.md_gc_ptp;
2235 l->l_md.md_gc_ptp = NULL; 2235 l->l_md.md_gc_ptp = NULL;
2236 l->l_md.md_gc_pmap = NULL; 2236 l->l_md.md_gc_pmap = NULL;
2237 } 2237 }
2238 } 2238 }
2239 2239
2240 /* 2240 /*
2241 * drop reference count 2241 * drop reference count
2242 */ 2242 */
2243 2243
2244 if (atomic_dec_uint_nv(&pmap->pm_obj[0].uo_refs) > 0) { 2244 if (atomic_dec_uint_nv(&pmap->pm_obj[0].uo_refs) > 0) {
2245 return; 2245 return;
2246 } 2246 }
2247 2247
2248#ifdef DIAGNOSTIC 2248#ifdef DIAGNOSTIC
2249 for (CPU_INFO_FOREACH(cii, ci)) 2249 for (CPU_INFO_FOREACH(cii, ci))
2250 if (ci->ci_pmap == pmap) 2250 if (ci->ci_pmap == pmap)
2251 panic("destroying pmap being used"); 2251 panic("destroying pmap being used");
2252#endif /* DIAGNOSTIC */ 2252#endif /* DIAGNOSTIC */
2253 2253
2254 /* 2254 /*
2255 * reference count is zero, free pmap resources and then free pmap. 2255 * reference count is zero, free pmap resources and then free pmap.
2256 */ 2256 */
2257#ifdef XEN 2257#ifdef XEN
2258 /* 2258 /*
2259 * Xen lazy APDP handling: 2259 * Xen lazy APDP handling:
2260 * clear APDP_PDE if pmap is the currently mapped 2260 * clear APDP_PDE if pmap is the currently mapped
2261 */ 2261 */
2262 if (xpmap_ptom_masked(pmap_pdirpa(pmap, 0)) == (*APDP_PDE & PG_FRAME)) { 2262 if (xpmap_ptom_masked(pmap_pdirpa(pmap, 0)) == (*APDP_PDE & PG_FRAME)) {
2263 kpreempt_disable(); 2263 kpreempt_disable();
2264 pmap_unmap_apdp(); 2264 pmap_unmap_apdp();
2265 pmap_pte_flush(); 2265 pmap_pte_flush();
2266 pmap_apte_flush(pmap_kernel()); 2266 pmap_apte_flush(pmap_kernel());
2267 kpreempt_enable(); 2267 kpreempt_enable();
2268 } 2268 }
2269#endif 2269#endif
2270 2270
2271 /* 2271 /*
2272 * remove it from global list of pmaps 2272 * remove it from global list of pmaps
2273 */ 2273 */
2274 2274
2275 mutex_enter(&pmaps_lock); 2275 mutex_enter(&pmaps_lock);
2276 LIST_REMOVE(pmap, pm_list); 2276 LIST_REMOVE(pmap, pm_list);
2277 mutex_exit(&pmaps_lock); 2277 mutex_exit(&pmaps_lock);
2278 2278
2279 /* 2279 /*
2280 * Process deferred PTP frees. No TLB shootdown required, as the 2280 * Process deferred PTP frees. No TLB shootdown required, as the
2281 * PTP pages are no longer visible to any CPU. 2281 * PTP pages are no longer visible to any CPU.
2282 */ 2282 */
2283 2283
2284 pmap_free_ptps(pmap->pm_gc_ptp); 2284 pmap_free_ptps(pmap->pm_gc_ptp);
2285 2285
2286 /* 2286 /*
2287 * destroyed pmap shouldn't have remaining PTPs 2287 * destroyed pmap shouldn't have remaining PTPs
2288 */ 2288 */
2289 2289
2290 for (i = 0; i < PTP_LEVELS - 1; i++) { 2290 for (i = 0; i < PTP_LEVELS - 1; i++) {
2291 KASSERT(pmap->pm_obj[i].uo_npages == 0); 2291 KASSERT(pmap->pm_obj[i].uo_npages == 0);
2292 KASSERT(TAILQ_EMPTY(&pmap->pm_obj[i].memq)); 2292 KASSERT(TAILQ_EMPTY(&pmap->pm_obj[i].memq));
2293 } 2293 }
2294 2294
2295 /* 2295 /*
2296 * MULTIPROCESSOR -- no need to flush out of other processors' 2296 * MULTIPROCESSOR -- no need to flush out of other processors'
2297 * APTE space because we do that in pmap_unmap_ptes(). 2297 * APTE space because we do that in pmap_unmap_ptes().
2298 */ 2298 */
2299 pool_cache_put(&pmap_pdp_cache, pmap->pm_pdir); 2299 pool_cache_put(&pmap_pdp_cache, pmap->pm_pdir);
2300 2300
2301#ifdef USER_LDT 2301#ifdef USER_LDT
2302 if (pmap->pm_ldt != NULL) { 2302 if (pmap->pm_ldt != NULL) {
2303 /* 2303 /*
2304 * no need to switch the LDT; this address space is gone, 2304 * no need to switch the LDT; this address space is gone,
2305 * nothing is using it. 2305 * nothing is using it.
2306 * 2306 *
2307 * No need to lock the pmap for ldt_free (or anything else), 2307 * No need to lock the pmap for ldt_free (or anything else),
2308 * we're the last one to use it. 2308 * we're the last one to use it.
2309 */ 2309 */
2310 mutex_enter(&cpu_lock); 2310 mutex_enter(&cpu_lock);
2311 ldt_free(pmap->pm_ldt_sel); 2311 ldt_free(pmap->pm_ldt_sel);
2312 mutex_exit(&cpu_lock); 2312 mutex_exit(&cpu_lock);
2313 uvm_km_free(kernel_map, (vaddr_t)pmap->pm_ldt, 2313 uvm_km_free(kernel_map, (vaddr_t)pmap->pm_ldt,
2314 pmap->pm_ldt_len, UVM_KMF_WIRED); 2314 pmap->pm_ldt_len, UVM_KMF_WIRED);
2315 } 2315 }
2316#endif 2316#endif
2317 2317
2318 for (i = 0; i < PTP_LEVELS - 1; i++) { 2318 for (i = 0; i < PTP_LEVELS - 1; i++) {
2319 uvm_obj_destroy(&pmap->pm_obj[i], false); 2319 uvm_obj_destroy(&pmap->pm_obj[i], false);
2320 mutex_destroy(&pmap->pm_obj_lock[i]); 2320 mutex_destroy(&pmap->pm_obj_lock[i]);
2321 } 2321 }
2322 pool_cache_put(&pmap_cache, pmap); 2322 pool_cache_put(&pmap_cache, pmap);
2323} 2323}
2324 2324
2325/* 2325/*
2326 * pmap_remove_all: pmap is being torn down by the current thread. 2326 * pmap_remove_all: pmap is being torn down by the current thread.
2327 * avoid unnecessary invalidations. 2327 * avoid unnecessary invalidations.
2328 */ 2328 */
2329 2329
2330void 2330void
2331pmap_remove_all(struct pmap *pmap) 2331pmap_remove_all(struct pmap *pmap)
2332{ 2332{
2333 lwp_t *l = curlwp; 2333 lwp_t *l = curlwp;
2334 2334
2335 KASSERT(l->l_md.md_gc_pmap == NULL); 2335 KASSERT(l->l_md.md_gc_pmap == NULL);
2336 2336
2337 l->l_md.md_gc_pmap = pmap; 2337 l->l_md.md_gc_pmap = pmap;
2338} 2338}
2339 2339
2340#if defined(PMAP_FORK) 2340#if defined(PMAP_FORK)
2341/* 2341/*
2342 * pmap_fork: perform any necessary data structure manipulation when 2342 * pmap_fork: perform any necessary data structure manipulation when
2343 * a VM space is forked. 2343 * a VM space is forked.
2344 */ 2344 */
2345 2345
2346void 2346void
2347pmap_fork(struct pmap *pmap1, struct pmap *pmap2) 2347pmap_fork(struct pmap *pmap1, struct pmap *pmap2)
2348{ 2348{
2349#ifdef USER_LDT 2349#ifdef USER_LDT
2350 union descriptor *new_ldt; 2350 union descriptor *new_ldt;
2351 size_t len; 2351 size_t len;
2352 int sel; 2352 int sel;
2353 2353
2354 if (__predict_true(pmap1->pm_ldt == NULL)) { 2354 if (__predict_true(pmap1->pm_ldt == NULL)) {
2355 return; 2355 return;
2356 } 2356 }
2357 2357
2358 retry: 2358 retry:
2359 if (pmap1->pm_ldt != NULL) { 2359 if (pmap1->pm_ldt != NULL) {
2360 len = pmap1->pm_ldt_len; 2360 len = pmap1->pm_ldt_len;
2361 new_ldt = (union descriptor *)uvm_km_alloc(kernel_map, len, 0, 2361 new_ldt = (union descriptor *)uvm_km_alloc(kernel_map, len, 0,
2362 UVM_KMF_WIRED); 2362 UVM_KMF_WIRED);
2363 mutex_enter(&cpu_lock); 2363 mutex_enter(&cpu_lock);
2364 sel = ldt_alloc(new_ldt, len); 2364 sel = ldt_alloc(new_ldt, len);
2365 if (sel == -1) { 2365 if (sel == -1) {
2366 mutex_exit(&cpu_lock); 2366 mutex_exit(&cpu_lock);
2367 uvm_km_free(kernel_map, (vaddr_t)new_ldt, len, 2367 uvm_km_free(kernel_map, (vaddr_t)new_ldt, len,
2368 UVM_KMF_WIRED); 2368 UVM_KMF_WIRED);
2369 printf("WARNING: pmap_fork: unable to allocate LDT\n"); 2369 printf("WARNING: pmap_fork: unable to allocate LDT\n");
2370 return; 2370 return;
2371 } 2371 }
2372 } else { 2372 } else {
2373 len = -1; 2373 len = -1;
2374 new_ldt = NULL; 2374 new_ldt = NULL;
2375 sel = -1; 2375 sel = -1;
2376 mutex_enter(&cpu_lock); 2376 mutex_enter(&cpu_lock);
2377 } 2377 }
2378 2378
2379 /* Copy the LDT, if necessary. */ 2379 /* Copy the LDT, if necessary. */
2380 if (pmap1->pm_ldt != NULL) { 2380 if (pmap1->pm_ldt != NULL) {
2381 if (len != pmap1->pm_ldt_len) { 2381 if (len != pmap1->pm_ldt_len) {
2382 if (len != -1) { 2382 if (len != -1) {
2383 ldt_free(sel); 2383 ldt_free(sel);
2384 uvm_km_free(kernel_map, (vaddr_t)new_ldt, 2384 uvm_km_free(kernel_map, (vaddr_t)new_ldt,
2385 len, UVM_KMF_WIRED); 2385 len, UVM_KMF_WIRED);
2386 } 2386 }
2387 mutex_exit(&cpu_lock); 2387 mutex_exit(&cpu_lock);
2388 goto retry; 2388 goto retry;
2389 } 2389 }
2390  2390
2391 memcpy(new_ldt, pmap1->pm_ldt, len); 2391 memcpy(new_ldt, pmap1->pm_ldt, len);
2392 pmap2->pm_ldt = new_ldt; 2392 pmap2->pm_ldt = new_ldt;
2393 pmap2->pm_ldt_len = pmap1->pm_ldt_len; 2393 pmap2->pm_ldt_len = pmap1->pm_ldt_len;
2394 pmap2->pm_ldt_sel = sel; 2394 pmap2->pm_ldt_sel = sel;
2395 len = -1; 2395 len = -1;
2396 } 2396 }
2397 2397
2398 if (len != -1) { 2398 if (len != -1) {
2399 ldt_free(sel); 2399 ldt_free(sel);
2400 uvm_km_free(kernel_map, (vaddr_t)new_ldt, len, 2400 uvm_km_free(kernel_map, (vaddr_t)new_ldt, len,
2401 UVM_KMF_WIRED); 2401 UVM_KMF_WIRED);
2402 } 2402 }
2403 mutex_exit(&cpu_lock); 2403 mutex_exit(&cpu_lock);
2404#endif /* USER_LDT */ 2404#endif /* USER_LDT */
2405} 2405}
2406#endif /* PMAP_FORK */ 2406#endif /* PMAP_FORK */
2407 2407
2408#ifdef USER_LDT 2408#ifdef USER_LDT
2409 2409
2410/* 2410/*
2411 * pmap_ldt_xcall: cross call used by pmap_ldt_sync. if the named pmap 2411 * pmap_ldt_xcall: cross call used by pmap_ldt_sync. if the named pmap
2412 * is active, reload LDTR. 2412 * is active, reload LDTR.
2413 */ 2413 */
2414static void 2414static void
2415pmap_ldt_xcall(void *arg1, void *arg2) 2415pmap_ldt_xcall(void *arg1, void *arg2)
2416{ 2416{
2417 struct pmap *pm; 2417 struct pmap *pm;
2418 2418
2419 kpreempt_disable(); 2419 kpreempt_disable();
2420 pm = arg1; 2420 pm = arg1;
2421 if (curcpu()->ci_pmap == pm) { 2421 if (curcpu()->ci_pmap == pm) {
2422 lldt(pm->pm_ldt_sel); 2422 lldt(pm->pm_ldt_sel);
2423 } 2423 }
2424 kpreempt_enable(); 2424 kpreempt_enable();
2425} 2425}
2426 2426
2427/* 2427/*
2428 * pmap_ldt_sync: LDT selector for the named pmap is changing. swap 2428 * pmap_ldt_sync: LDT selector for the named pmap is changing. swap
2429 * in the new selector on all CPUs. 2429 * in the new selector on all CPUs.
2430 */ 2430 */
2431void 2431void
2432pmap_ldt_sync(struct pmap *pm) 2432pmap_ldt_sync(struct pmap *pm)
2433{ 2433{
2434 uint64_t where; 2434 uint64_t where;
2435 2435
2436 KASSERT(mutex_owned(&cpu_lock)); 2436 KASSERT(mutex_owned(&cpu_lock));
2437 2437
2438 pmap_ldt_evcnt.ev_count++; 2438 pmap_ldt_evcnt.ev_count++;
2439 where = xc_broadcast(0, pmap_ldt_xcall, pm, NULL); 2439 where = xc_broadcast(0, pmap_ldt_xcall, pm, NULL);
2440 xc_wait(where); 2440 xc_wait(where);
2441} 2441}
2442 2442
2443/* 2443/*
2444 * pmap_ldt_cleanup: if the pmap has a local LDT, deallocate it, and 2444 * pmap_ldt_cleanup: if the pmap has a local LDT, deallocate it, and
2445 * restore the default. 2445 * restore the default.
2446 */ 2446 */
2447 2447
2448void 2448void
2449pmap_ldt_cleanup(struct lwp *l) 2449pmap_ldt_cleanup(struct lwp *l)
2450{ 2450{
2451 pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap; 2451 pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap;
2452 union descriptor *dp = NULL; 2452 union descriptor *dp = NULL;
2453 size_t len = 0; 2453 size_t len = 0;
2454 int sel = -1; 2454 int sel = -1;
2455 2455
2456 if (__predict_true(pmap->pm_ldt == NULL)) { 2456 if (__predict_true(pmap->pm_ldt == NULL)) {
2457 return; 2457 return;
2458 } 2458 }
2459 2459
2460 mutex_enter(&cpu_lock); 2460 mutex_enter(&cpu_lock);
2461 if (pmap->pm_ldt != NULL) { 2461 if (pmap->pm_ldt != NULL) {
2462 sel = pmap->pm_ldt_sel; 2462 sel = pmap->pm_ldt_sel;
2463 dp = pmap->pm_ldt; 2463 dp = pmap->pm_ldt;
2464 len = pmap->pm_ldt_len; 2464 len = pmap->pm_ldt_len;
2465 pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL); 2465 pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL);
2466 pmap->pm_ldt = NULL; 2466 pmap->pm_ldt = NULL;
2467 pmap->pm_ldt_len = 0; 2467 pmap->pm_ldt_len = 0;
2468 pmap_ldt_sync(pmap); 2468 pmap_ldt_sync(pmap);
2469 ldt_free(sel); 2469 ldt_free(sel);
2470 uvm_km_free(kernel_map, (vaddr_t)dp, len, UVM_KMF_WIRED); 2470 uvm_km_free(kernel_map, (vaddr_t)dp, len, UVM_KMF_WIRED);
2471 } 2471 }
2472 mutex_exit(&cpu_lock); 2472 mutex_exit(&cpu_lock);
2473} 2473}
2474#endif /* USER_LDT */ 2474#endif /* USER_LDT */
2475 2475
2476/* 2476/*
2477 * pmap_activate: activate a process' pmap 2477 * pmap_activate: activate a process' pmap
2478 * 2478 *
2479 * => must be called with kernel preemption disabled 2479 * => must be called with kernel preemption disabled
2480 * => if lwp is the curlwp, then set ci_want_pmapload so that 2480 * => if lwp is the curlwp, then set ci_want_pmapload so that
2481 * actual MMU context switch will be done by pmap_load() later 2481 * actual MMU context switch will be done by pmap_load() later
2482 */ 2482 */
2483 2483
2484void 2484void
2485pmap_activate(struct lwp *l) 2485pmap_activate(struct lwp *l)
2486{ 2486{
2487 struct cpu_info *ci; 2487 struct cpu_info *ci;
2488 struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); 2488 struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
2489 2489
2490 KASSERT(kpreempt_disabled()); 2490 KASSERT(kpreempt_disabled());
2491 2491
2492 ci = curcpu(); 2492 ci = curcpu();
2493 2493
2494 if (l == ci->ci_curlwp) { 2494 if (l == ci->ci_curlwp) {
2495 KASSERT(ci->ci_want_pmapload == 0); 2495 KASSERT(ci->ci_want_pmapload == 0);
2496 KASSERT(ci->ci_tlbstate != TLBSTATE_VALID); 2496 KASSERT(ci->ci_tlbstate != TLBSTATE_VALID);
2497#ifdef KSTACK_CHECK_DR0 2497#ifdef KSTACK_CHECK_DR0
2498 /* 2498 /*
2499 * setup breakpoint on the top of stack 2499 * setup breakpoint on the top of stack
2500 */ 2500 */
2501 if (l == &lwp0) 2501 if (l == &lwp0)
2502 dr0(0, 0, 0, 0); 2502 dr0(0, 0, 0, 0);
2503 else 2503 else
2504 dr0(KSTACK_LOWEST_ADDR(l), 1, 3, 1); 2504 dr0(KSTACK_LOWEST_ADDR(l), 1, 3, 1);
2505#endif 2505#endif
2506 2506
2507 /* 2507 /*
2508 * no need to switch to kernel vmspace because 2508 * no need to switch to kernel vmspace because
2509 * it's a subset of any vmspace. 2509 * it's a subset of any vmspace.
2510 */ 2510 */
2511 2511
2512 if (pmap == pmap_kernel()) { 2512 if (pmap == pmap_kernel()) {
2513 ci->ci_want_pmapload = 0; 2513 ci->ci_want_pmapload = 0;
2514 return; 2514 return;
2515 } 2515 }
2516 2516
2517 ci->ci_want_pmapload = 1; 2517 ci->ci_want_pmapload = 1;
2518 } 2518 }
2519} 2519}
2520 2520
2521/* 2521/*
2522 * pmap_reactivate: try to regain reference to the pmap. 2522 * pmap_reactivate: try to regain reference to the pmap.
2523 * 2523 *
2524 * => must be called with kernel preemption disabled 2524 * => must be called with kernel preemption disabled
2525 */ 2525 */
2526 2526
2527static bool 2527static bool
2528pmap_reactivate(struct pmap *pmap) 2528pmap_reactivate(struct pmap *pmap)
2529{ 2529{
2530 struct cpu_info *ci; 2530 struct cpu_info *ci;
2531 uint32_t cpumask; 2531 uint32_t cpumask;
2532 bool result;  2532 bool result;
2533 uint32_t oldcpus; 2533 uint32_t oldcpus;
2534 2534
2535 ci = curcpu(); 2535 ci = curcpu();
2536 cpumask = ci->ci_cpumask; 2536 cpumask = ci->ci_cpumask;
2537 2537
2538 KASSERT(kpreempt_disabled()); 2538 KASSERT(kpreempt_disabled());
2539#if defined(XEN) && defined(__x86_64__) 2539#if defined(XEN) && defined(__x86_64__)
2540 KASSERT(pmap_pdirpa(pmap, 0) == ci->ci_xen_current_user_pgd); 2540 KASSERT(pmap_pdirpa(pmap, 0) == ci->ci_xen_current_user_pgd);
2541#elif defined(PAE) 2541#elif defined(PAE)
2542 KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(ci->ci_pae_l3_pdir[0])); 2542 KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(ci->ci_pae_l3_pdir[0]));
2543#elif !defined(XEN)  2543#elif !defined(XEN)
2544 KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(rcr3())); 2544 KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(rcr3()));
2545#endif 2545#endif
2546 2546
2547 /* 2547 /*
2548 * if we still have a lazy reference to this pmap, 2548 * if we still have a lazy reference to this pmap,
2549 * we can assume that there was no tlb shootdown 2549 * we can assume that there was no tlb shootdown
2550 * for this pmap in the meantime. 2550 * for this pmap in the meantime.
2551 * 2551 *
2552 * the order of events here is important as we must 2552 * the order of events here is important as we must
2553 * synchronize with TLB shootdown interrupts. declare 2553 * synchronize with TLB shootdown interrupts. declare
2554 * interest in invalidations (TLBSTATE_VALID) and then 2554 * interest in invalidations (TLBSTATE_VALID) and then
2555 * check the cpumask, which the IPIs can change only 2555 * check the cpumask, which the IPIs can change only
2556 * when the state is TLBSTATE_LAZY. 2556 * when the state is TLBSTATE_LAZY.
2557 */ 2557 */
2558 2558
2559 ci->ci_tlbstate = TLBSTATE_VALID; 2559 ci->ci_tlbstate = TLBSTATE_VALID;
2560 oldcpus = pmap->pm_cpus; 2560 oldcpus = pmap->pm_cpus;
2561 KASSERT((pmap->pm_kernel_cpus & cpumask) != 0); 2561 KASSERT((pmap->pm_kernel_cpus & cpumask) != 0);
2562 if (oldcpus & cpumask) { 2562 if (oldcpus & cpumask) {
2563 /* got it */ 2563 /* got it */
2564 result = true; 2564 result = true;
2565 } else { 2565 } else {
2566 /* must reload */ 2566 /* must reload */
2567 atomic_or_32(&pmap->pm_cpus, cpumask); 2567 atomic_or_32(&pmap->pm_cpus, cpumask);
2568 result = false; 2568 result = false;
2569 } 2569 }
2570 2570
2571 return result; 2571 return result;
2572} 2572}
2573 2573
2574/* 2574/*
2575 * pmap_load: actually switch pmap. (fill in %cr3 and LDT info) 2575 * pmap_load: actually switch pmap. (fill in %cr3 and LDT info)
2576 */ 2576 */
2577 2577
2578void 2578void
2579pmap_load(void) 2579pmap_load(void)
2580{ 2580{
2581 struct cpu_info *ci; 2581 struct cpu_info *ci;
2582 uint32_t cpumask; 2582 uint32_t cpumask;
2583 struct pmap *pmap; 2583 struct pmap *pmap;
2584 struct pmap *oldpmap; 2584 struct pmap *oldpmap;
2585 struct lwp *l; 2585 struct lwp *l;
2586 struct pcb *pcb; 2586 struct pcb *pcb;
2587 uint64_t ncsw; 2587 uint64_t ncsw;
2588 2588
2589 kpreempt_disable(); 2589 kpreempt_disable();
2590 retry: 2590 retry:
2591 ci = curcpu(); 2591 ci = curcpu();
2592 if (!ci->ci_want_pmapload) { 2592 if (!ci->ci_want_pmapload) {
2593 kpreempt_enable(); 2593 kpreempt_enable();
2594 return; 2594 return;
2595 } 2595 }
2596 cpumask = ci->ci_cpumask; 2596 cpumask = ci->ci_cpumask;
2597 l = ci->ci_curlwp; 2597 l = ci->ci_curlwp;
2598 ncsw = l->l_ncsw; 2598 ncsw = l->l_ncsw;
2599 2599
2600 /* should be able to take ipis. */ 2600 /* should be able to take ipis. */
2601 KASSERT(ci->ci_ilevel < IPL_HIGH);  2601 KASSERT(ci->ci_ilevel < IPL_HIGH);
2602#ifdef XEN 2602#ifdef XEN
2603 /* XXX not yet KASSERT(x86_read_psl() != 0); */ 2603 /* XXX not yet KASSERT(x86_read_psl() != 0); */
2604#else 2604#else
2605 KASSERT((x86_read_psl() & PSL_I) != 0); 2605 KASSERT((x86_read_psl() & PSL_I) != 0);
2606#endif 2606#endif
2607 2607
2608 KASSERT(l != NULL); 2608 KASSERT(l != NULL);
2609 pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); 2609 pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
2610 KASSERT(pmap != pmap_kernel()); 2610 KASSERT(pmap != pmap_kernel());
2611 oldpmap = ci->ci_pmap; 2611 oldpmap = ci->ci_pmap;
2612 pcb = lwp_getpcb(l); 2612 pcb = lwp_getpcb(l);
2613 2613
2614 if (pmap == oldpmap) { 2614 if (pmap == oldpmap) {
2615 if (!pmap_reactivate(pmap)) { 2615 if (!pmap_reactivate(pmap)) {
2616 u_int gen = uvm_emap_gen_return(); 2616 u_int gen = uvm_emap_gen_return();
2617 2617
2618 /* 2618 /*
2619 * pmap has been changed during deactivated. 2619 * pmap has been changed during deactivated.
2620 * our tlb may be stale. 2620 * our tlb may be stale.
2621 */ 2621 */
2622 2622
2623 tlbflush(); 2623 tlbflush();
2624 uvm_emap_update(gen); 2624 uvm_emap_update(gen);
2625 } 2625 }
2626 2626
2627 ci->ci_want_pmapload = 0; 2627 ci->ci_want_pmapload = 0;
2628 kpreempt_enable(); 2628 kpreempt_enable();
2629 return; 2629 return;
2630 } 2630 }
2631 2631
2632 /* 2632 /*
2633 * grab a reference to the new pmap. 2633 * grab a reference to the new pmap.
2634 */ 2634 */
2635 2635
2636 pmap_reference(pmap); 2636 pmap_reference(pmap);
2637 2637
2638 /* 2638 /*
2639 * actually switch pmap. 2639 * actually switch pmap.
2640 */ 2640 */
2641 2641
2642 atomic_and_32(&oldpmap->pm_cpus, ~cpumask); 2642 atomic_and_32(&oldpmap->pm_cpus, ~cpumask);
2643 atomic_and_32(&oldpmap->pm_kernel_cpus, ~cpumask); 2643 atomic_and_32(&oldpmap->pm_kernel_cpus, ~cpumask);
2644 2644
2645#if defined(XEN) && defined(__x86_64__) 2645#if defined(XEN) && defined(__x86_64__)
2646 KASSERT(pmap_pdirpa(oldpmap, 0) == ci->ci_xen_current_user_pgd || 2646 KASSERT(pmap_pdirpa(oldpmap, 0) == ci->ci_xen_current_user_pgd ||
2647 oldpmap == pmap_kernel()); 2647 oldpmap == pmap_kernel());
2648#elif defined(PAE) 2648#elif defined(PAE)
2649 KASSERT(pmap_pdirpa(oldpmap, 0) == pmap_pte2pa(ci->ci_pae_l3_pdir[0])); 2649 KASSERT(pmap_pdirpa(oldpmap, 0) == pmap_pte2pa(ci->ci_pae_l3_pdir[0]));
2650#elif !defined(XEN) 2650#elif !defined(XEN)
2651 KASSERT(pmap_pdirpa(oldpmap, 0) == pmap_pte2pa(rcr3())); 2651 KASSERT(pmap_pdirpa(oldpmap, 0) == pmap_pte2pa(rcr3()));
2652#endif 2652#endif
2653 KASSERT((pmap->pm_cpus & cpumask) == 0); 2653 KASSERT((pmap->pm_cpus & cpumask) == 0);
2654 KASSERT((pmap->pm_kernel_cpus & cpumask) == 0); 2654 KASSERT((pmap->pm_kernel_cpus & cpumask) == 0);
2655 2655
2656 /* 2656 /*
2657 * mark the pmap in use by this processor. again we must 2657 * mark the pmap in use by this processor. again we must
2658 * synchronize with TLB shootdown interrupts, so set the 2658 * synchronize with TLB shootdown interrupts, so set the
2659 * state VALID first, then register us for shootdown events 2659 * state VALID first, then register us for shootdown events
2660 * on this pmap. 2660 * on this pmap.
2661 */ 2661 */
2662 2662
2663 ci->ci_tlbstate = TLBSTATE_VALID; 2663 ci->ci_tlbstate = TLBSTATE_VALID;
2664 atomic_or_32(&pmap->pm_cpus, cpumask); 2664 atomic_or_32(&pmap->pm_cpus, cpumask);
2665 atomic_or_32(&pmap->pm_kernel_cpus, cpumask); 2665 atomic_or_32(&pmap->pm_kernel_cpus, cpumask);
2666 ci->ci_pmap = pmap; 2666 ci->ci_pmap = pmap;
2667 2667
2668 /* 2668 /*
2669 * update tss. now that we have registered for invalidations 2669 * update tss. now that we have registered for invalidations
2670 * from other CPUs, we're good to load the page tables. 2670 * from other CPUs, we're good to load the page tables.
2671 */ 2671 */
2672#ifdef PAE 2672#ifdef PAE
2673 pcb->pcb_cr3 = ci->ci_pae_l3_pdirpa; 2673 pcb->pcb_cr3 = ci->ci_pae_l3_pdirpa;
2674#else 2674#else
2675 pcb->pcb_cr3 = pmap_pdirpa(pmap, 0); 2675 pcb->pcb_cr3 = pmap_pdirpa(pmap, 0);
2676#endif 2676#endif
2677 2677
2678#ifdef i386 2678#ifdef i386
2679#ifdef XEN 2679#ifdef XEN
2680 /* 2680 /*
2681 * clear APDP slot, in case it points to a page table that has  2681 * clear APDP slot, in case it points to a page table that has
2682 * been freed 2682 * been freed
2683 */ 2683 */
2684 if (*APDP_PDE) { 2684 if (*APDP_PDE) {
2685 pmap_unmap_apdp(); 2685 pmap_unmap_apdp();
2686 } 2686 }
2687 /* lldt() does pmap_pte_flush() */ 2687 /* lldt() does pmap_pte_flush() */
2688#endif /* XEN */ 2688#endif /* XEN */
2689 2689
2690#ifndef XEN 2690#ifndef XEN
2691 ci->ci_tss.tss_ldt = pmap->pm_ldt_sel; 2691 ci->ci_tss.tss_ldt = pmap->pm_ldt_sel;
2692 ci->ci_tss.tss_cr3 = pcb->pcb_cr3; 2692 ci->ci_tss.tss_cr3 = pcb->pcb_cr3;
2693#endif /* !XEN */ 2693#endif /* !XEN */
2694#endif /* i386 */ 2694#endif /* i386 */
2695 2695
2696 lldt(pmap->pm_ldt_sel); 2696 lldt(pmap->pm_ldt_sel);
2697 2697
2698 u_int gen = uvm_emap_gen_return(); 2698 u_int gen = uvm_emap_gen_return();
2699 cpu_load_pmap(pmap); 2699 cpu_load_pmap(pmap);
2700 uvm_emap_update(gen); 2700 uvm_emap_update(gen);
2701 2701
2702 ci->ci_want_pmapload = 0; 2702 ci->ci_want_pmapload = 0;
2703 2703
2704 /* 2704 /*
2705 * we're now running with the new pmap. drop the reference 2705 * we're now running with the new pmap. drop the reference
2706 * to the old pmap. if we block, we need to go around again. 2706 * to the old pmap. if we block, we need to go around again.
2707 */ 2707 */
2708 2708
2709 pmap_destroy(oldpmap); 2709 pmap_destroy(oldpmap);
2710 if (l->l_ncsw != ncsw) { 2710 if (l->l_ncsw != ncsw) {
2711 goto retry; 2711 goto retry;
2712 } 2712 }
2713 2713
2714 kpreempt_enable(); 2714 kpreempt_enable();
2715} 2715}
2716 2716
2717/* 2717/*
2718 * pmap_deactivate: deactivate a process' pmap. 2718 * pmap_deactivate: deactivate a process' pmap.
2719 * 2719 *
2720 * => Must be called with kernel preemption disabled (high IPL is enough). 2720 * => Must be called with kernel preemption disabled (high IPL is enough).
2721 */ 2721 */
2722void 2722void
2723pmap_deactivate(struct lwp *l) 2723pmap_deactivate(struct lwp *l)
2724{ 2724{
2725 struct pmap *pmap; 2725 struct pmap *pmap;
2726 struct cpu_info *ci; 2726 struct cpu_info *ci;
2727 2727
2728 KASSERT(kpreempt_disabled()); 2728 KASSERT(kpreempt_disabled());
2729 2729
2730 if (l != curlwp) { 2730 if (l != curlwp) {
2731 return; 2731 return;
2732 } 2732 }
2733 2733
2734 /* 2734 /*
2735 * Wait for pending TLB shootdowns to complete. Necessary because 2735 * Wait for pending TLB shootdowns to complete. Necessary because
2736 * TLB shootdown state is per-CPU, and the LWP may be coming off 2736 * TLB shootdown state is per-CPU, and the LWP may be coming off
2737 * the CPU before it has a chance to call pmap_update(), e.g. due 2737 * the CPU before it has a chance to call pmap_update(), e.g. due
2738 * to kernel preemption or blocking routine in between. 2738 * to kernel preemption or blocking routine in between.
2739 */ 2739 */
2740 pmap_tlb_shootnow(); 2740 pmap_tlb_shootnow();
2741 2741
2742 ci = curcpu(); 2742 ci = curcpu();
2743 2743
2744 if (ci->ci_want_pmapload) { 2744 if (ci->ci_want_pmapload) {
2745 KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map) 2745 KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map)
2746 != pmap_kernel()); 2746 != pmap_kernel());
2747 KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map) 2747 KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map)
2748 != ci->ci_pmap || ci->ci_tlbstate != TLBSTATE_VALID); 2748 != ci->ci_pmap || ci->ci_tlbstate != TLBSTATE_VALID);
2749 2749
2750 /* 2750 /*
2751 * userspace has not been touched. 2751 * userspace has not been touched.
2752 * nothing to do here. 2752 * nothing to do here.
2753 */ 2753 */
2754 2754
2755 ci->ci_want_pmapload = 0; 2755 ci->ci_want_pmapload = 0;
2756 return; 2756 return;
2757 } 2757 }
2758 2758
2759 pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); 2759 pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
2760 2760
2761 if (pmap == pmap_kernel()) { 2761 if (pmap == pmap_kernel()) {
2762 return; 2762 return;
2763 } 2763 }
2764 2764
2765#if defined(XEN) && defined(__x86_64__) 2765#if defined(XEN) && defined(__x86_64__)
2766 KASSERT(pmap_pdirpa(pmap, 0) == ci->ci_xen_current_user_pgd); 2766 KASSERT(pmap_pdirpa(pmap, 0) == ci->ci_xen_current_user_pgd);
2767#elif defined(PAE) 2767#elif defined(PAE)
2768 KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(ci->ci_pae_l3_pdir[0])); 2768 KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(ci->ci_pae_l3_pdir[0]));
2769#elif !defined(XEN)  2769#elif !defined(XEN)
2770 KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(rcr3())); 2770 KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(rcr3()));
2771#endif 2771#endif
2772 KASSERT(ci->ci_pmap == pmap); 2772 KASSERT(ci->ci_pmap == pmap);
2773 2773
2774 /* 2774 /*
2775 * we aren't interested in TLB invalidations for this pmap, 2775 * we aren't interested in TLB invalidations for this pmap,
2776 * at least for the time being. 2776 * at least for the time being.
2777 */ 2777 */
2778 2778
2779 KASSERT(ci->ci_tlbstate == TLBSTATE_VALID); 2779 KASSERT(ci->ci_tlbstate == TLBSTATE_VALID);
2780 ci->ci_tlbstate = TLBSTATE_LAZY; 2780 ci->ci_tlbstate = TLBSTATE_LAZY;
2781} 2781}
2782 2782
2783/* 2783/*
2784 * end of lifecycle functions 2784 * end of lifecycle functions
2785 */ 2785 */
2786 2786
2787/* 2787/*
2788 * some misc. functions 2788 * some misc. functions
2789 */ 2789 */
2790 2790
2791int 2791int
2792pmap_pdes_invalid(vaddr_t va, pd_entry_t * const *pdes, pd_entry_t *lastpde) 2792pmap_pdes_invalid(vaddr_t va, pd_entry_t * const *pdes, pd_entry_t *lastpde)
2793{ 2793{
2794 int i; 2794 int i;
2795 unsigned long index; 2795 unsigned long index;
2796 pd_entry_t pde; 2796 pd_entry_t pde;
2797 2797
2798 for (i = PTP_LEVELS; i > 1; i--) { 2798 for (i = PTP_LEVELS; i > 1; i--) {
2799 index = pl_i(va, i); 2799 index = pl_i(va, i);
2800 pde = pdes[i - 2][index]; 2800 pde = pdes[i - 2][index];
2801 if ((pde & PG_V) == 0) 2801 if ((pde & PG_V) == 0)
2802 return i; 2802 return i;
2803 } 2803 }
2804 if (lastpde != NULL) 2804 if (lastpde != NULL)
2805 *lastpde = pde; 2805 *lastpde = pde;
2806 return 0; 2806 return 0;
2807} 2807}
2808 2808
2809/* 2809/*
2810 * pmap_extract: extract a PA for the given VA 2810 * pmap_extract: extract a PA for the given VA
2811 */ 2811 */
2812 2812
2813bool 2813bool
2814pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pap) 2814pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pap)
2815{ 2815{
2816 pt_entry_t *ptes, pte; 2816 pt_entry_t *ptes, pte;
2817 pd_entry_t pde; 2817 pd_entry_t pde;
2818 pd_entry_t * const *pdes; 2818 pd_entry_t * const *pdes;
2819 struct pmap *pmap2; 2819 struct pmap *pmap2;
2820 struct cpu_info *ci; 2820 struct cpu_info *ci;
2821 paddr_t pa; 2821 paddr_t pa;
2822 lwp_t *l; 2822 lwp_t *l;
2823 bool hard, rv; 2823 bool hard, rv;
2824 2824
2825 rv = false; 2825 rv = false;
2826 pa = 0; 2826 pa = 0;
2827 l = curlwp; 2827 l = curlwp;
2828 2828
2829 KPREEMPT_DISABLE(l); 2829 KPREEMPT_DISABLE(l);
2830 ci = l->l_cpu; 2830 ci = l->l_cpu;
2831 if (__predict_true(!ci->ci_want_pmapload && ci->ci_pmap == pmap) || 2831 if (__predict_true(!ci->ci_want_pmapload && ci->ci_pmap == pmap) ||
2832 pmap == pmap_kernel()) { 2832 pmap == pmap_kernel()) {
2833 /* 2833 /*
2834 * no need to lock, because it's pmap_kernel() or our 2834 * no need to lock, because it's pmap_kernel() or our
2835 * own pmap and is active. if a user pmap, the caller 2835 * own pmap and is active. if a user pmap, the caller
2836 * will hold the vm_map write/read locked and so prevent 2836 * will hold the vm_map write/read locked and so prevent
2837 * entries from disappearing while we are here. ptps 2837 * entries from disappearing while we are here. ptps
2838 * can disappear via pmap_remove() and pmap_protect(), 2838 * can disappear via pmap_remove() and pmap_protect(),
2839 * but they are called with the vm_map write locked. 2839 * but they are called with the vm_map write locked.
2840 */ 2840 */
2841 hard = false; 2841 hard = false;
2842 ptes = PTE_BASE; 2842 ptes = PTE_BASE;
2843 pdes = normal_pdes; 2843 pdes = normal_pdes;
2844 } else { 2844 } else {
2845 /* we lose, do it the hard way. */ 2845 /* we lose, do it the hard way. */
2846 hard = true; 2846 hard = true;
2847 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); 2847 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
2848 } 2848 }
2849 if (pmap_pdes_valid(va, pdes, &pde)) { 2849 if (pmap_pdes_valid(va, pdes, &pde)) {
2850 pte = ptes[pl1_i(va)]; 2850 pte = ptes[pl1_i(va)];
2851 if (pde & PG_PS) { 2851 if (pde & PG_PS) {
2852 pa = (pde & PG_LGFRAME) | (va & (NBPD_L2 - 1)); 2852 pa = (pde & PG_LGFRAME) | (va & (NBPD_L2 - 1));
2853 rv = true; 2853 rv = true;
2854 } else if (__predict_true((pte & PG_V) != 0)) { 2854 } else if (__predict_true((pte & PG_V) != 0)) {
2855 pa = pmap_pte2pa(pte) | (va & (NBPD_L1 - 1)); 2855 pa = pmap_pte2pa(pte) | (va & (NBPD_L1 - 1));
2856 rv = true; 2856 rv = true;
2857 } 2857 }
2858 } 2858 }
2859 if (__predict_false(hard)) { 2859 if (__predict_false(hard)) {
2860 pmap_unmap_ptes(pmap, pmap2); 2860 pmap_unmap_ptes(pmap, pmap2);
2861 } 2861 }
2862 KPREEMPT_ENABLE(l); 2862 KPREEMPT_ENABLE(l);
2863 if (pap != NULL) { 2863 if (pap != NULL) {
2864 *pap = pa; 2864 *pap = pa;
2865 } 2865 }
2866 return rv; 2866 return rv;
2867} 2867}
2868 2868
2869 2869
2870/* 2870/*
2871 * vtophys: virtual address to physical address. For use by 2871 * vtophys: virtual address to physical address. For use by
2872 * machine-dependent code only. 2872 * machine-dependent code only.
2873 */ 2873 */
2874 2874
2875paddr_t 2875paddr_t
2876vtophys(vaddr_t va) 2876vtophys(vaddr_t va)
2877{ 2877{
2878 paddr_t pa; 2878 paddr_t pa;
2879 2879
2880 if (pmap_extract(pmap_kernel(), va, &pa) == true) 2880 if (pmap_extract(pmap_kernel(), va, &pa) == true)
2881 return (pa); 2881 return (pa);
2882 return (0); 2882 return (0);
2883} 2883}
2884 2884
2885__weak_alias(pmap_extract_ma, pmap_extract); 2885__strict_weak_alias(pmap_extract_ma, pmap_extract);
2886 2886
2887#ifdef XEN 2887#ifdef XEN
2888 2888
2889/* 2889/*
2890 * vtomach: virtual address to machine address. For use by 2890 * vtomach: virtual address to machine address. For use by
2891 * machine-dependent code only. 2891 * machine-dependent code only.
2892 */ 2892 */
2893 2893
2894paddr_t 2894paddr_t
2895vtomach(vaddr_t va) 2895vtomach(vaddr_t va)
2896{ 2896{
2897 paddr_t pa; 2897 paddr_t pa;
2898 2898
2899 if (pmap_extract_ma(pmap_kernel(), va, &pa) == true) 2899 if (pmap_extract_ma(pmap_kernel(), va, &pa) == true)
2900 return (pa); 2900 return (pa);
2901 return (0); 2901 return (0);
2902} 2902}
2903 2903
2904#endif /* XEN */ 2904#endif /* XEN */
2905 2905
2906/* 2906/*
2907 * pmap_virtual_space: used during bootup [pmap_steal_memory] to 2907 * pmap_virtual_space: used during bootup [pmap_steal_memory] to
2908 * determine the bounds of the kernel virtual addess space. 2908 * determine the bounds of the kernel virtual addess space.
2909 */ 2909 */
2910 2910
2911void 2911void
2912pmap_virtual_space(vaddr_t *startp, vaddr_t *endp) 2912pmap_virtual_space(vaddr_t *startp, vaddr_t *endp)
2913{ 2913{
2914 *startp = virtual_avail; 2914 *startp = virtual_avail;
2915 *endp = virtual_end; 2915 *endp = virtual_end;
2916} 2916}
2917 2917
2918/* 2918/*
2919 * pmap_map: map a range of PAs into kvm. 2919 * pmap_map: map a range of PAs into kvm.
2920 * 2920 *
2921 * => used during crash dump 2921 * => used during crash dump
2922 * => XXX: pmap_map() should be phased out? 2922 * => XXX: pmap_map() should be phased out?
2923 */ 2923 */
2924 2924
2925vaddr_t 2925vaddr_t
2926pmap_map(vaddr_t va, paddr_t spa, paddr_t epa, vm_prot_t prot) 2926pmap_map(vaddr_t va, paddr_t spa, paddr_t epa, vm_prot_t prot)
2927{ 2927{
2928 while (spa < epa) { 2928 while (spa < epa) {
2929 pmap_kenter_pa(va, spa, prot, 0); 2929 pmap_kenter_pa(va, spa, prot, 0);
2930 va += PAGE_SIZE; 2930 va += PAGE_SIZE;
2931 spa += PAGE_SIZE; 2931 spa += PAGE_SIZE;
2932 } 2932 }
2933 pmap_update(pmap_kernel()); 2933 pmap_update(pmap_kernel());
2934 return va; 2934 return va;
2935} 2935}
2936 2936
2937/* 2937/*
2938 * pmap_zero_page: zero a page 2938 * pmap_zero_page: zero a page
2939 */ 2939 */
2940 2940
2941void 2941void
2942pmap_zero_page(paddr_t pa) 2942pmap_zero_page(paddr_t pa)
2943{ 2943{
2944 pt_entry_t *zpte; 2944 pt_entry_t *zpte;
2945 void *zerova; 2945 void *zerova;
2946 int id; 2946 int id;
2947 2947
2948 kpreempt_disable(); 2948 kpreempt_disable();
2949 id = cpu_number(); 2949 id = cpu_number();
2950 zpte = PTESLEW(zero_pte, id); 2950 zpte = PTESLEW(zero_pte, id);
2951 zerova = VASLEW(zerop, id); 2951 zerova = VASLEW(zerop, id);
2952 2952
2953#ifdef DIAGNOSTIC 2953#ifdef DIAGNOSTIC
2954 if (*zpte) 2954 if (*zpte)
2955 panic("pmap_zero_page: lock botch"); 2955 panic("pmap_zero_page: lock botch");
2956#endif 2956#endif
2957 2957
2958 pmap_pte_set(zpte, pmap_pa2pte(pa) | PG_V | PG_RW | PG_M | PG_U | PG_k); 2958 pmap_pte_set(zpte, pmap_pa2pte(pa) | PG_V | PG_RW | PG_M | PG_U | PG_k);
2959 pmap_pte_flush(); 2959 pmap_pte_flush();
2960 pmap_update_pg((vaddr_t)zerova); /* flush TLB */ 2960 pmap_update_pg((vaddr_t)zerova); /* flush TLB */
2961 2961
2962 memset(zerova, 0, PAGE_SIZE); 2962 memset(zerova, 0, PAGE_SIZE);
2963 2963
2964#if defined(DIAGNOSTIC) || defined(XEN) 2964#if defined(DIAGNOSTIC) || defined(XEN)
2965 pmap_pte_set(zpte, 0); /* zap ! */ 2965 pmap_pte_set(zpte, 0); /* zap ! */
2966 pmap_pte_flush(); 2966 pmap_pte_flush();
2967#endif 2967#endif
2968 kpreempt_enable(); 2968 kpreempt_enable();
2969} 2969}
2970 2970
2971/* 2971/*
2972 * pmap_pagezeroidle: the same, for the idle loop page zero'er. 2972 * pmap_pagezeroidle: the same, for the idle loop page zero'er.
2973 * Returns true if the page was zero'd, false if we aborted for 2973 * Returns true if the page was zero'd, false if we aborted for
2974 * some reason. 2974 * some reason.
2975 */ 2975 */
2976 2976
2977bool 2977bool
2978pmap_pageidlezero(paddr_t pa) 2978pmap_pageidlezero(paddr_t pa)
2979{ 2979{
2980 pt_entry_t *zpte; 2980 pt_entry_t *zpte;
2981 void *zerova; 2981 void *zerova;
2982 bool rv; 2982 bool rv;
2983 int id; 2983 int id;
2984 2984
2985 id = cpu_number(); 2985 id = cpu_number();
2986 zpte = PTESLEW(zero_pte, id); 2986 zpte = PTESLEW(zero_pte, id);
2987 zerova = VASLEW(zerop, id); 2987 zerova = VASLEW(zerop, id);
2988 2988
2989 KASSERT(cpu_feature[0] & CPUID_SSE2); 2989 KASSERT(cpu_feature[0] & CPUID_SSE2);
2990 KASSERT(*zpte == 0); 2990 KASSERT(*zpte == 0);
2991 2991
2992 pmap_pte_set(zpte, pmap_pa2pte(pa) | PG_V | PG_RW | PG_M | PG_U | PG_k); 2992 pmap_pte_set(zpte, pmap_pa2pte(pa) | PG_V | PG_RW | PG_M | PG_U | PG_k);
2993 pmap_pte_flush(); 2993 pmap_pte_flush();
2994 pmap_update_pg((vaddr_t)zerova); /* flush TLB */ 2994 pmap_update_pg((vaddr_t)zerova); /* flush TLB */
2995 2995
2996 rv = sse2_idlezero_page(zerova); 2996 rv = sse2_idlezero_page(zerova);
2997 2997
2998#if defined(DIAGNOSTIC) || defined(XEN) 2998#if defined(DIAGNOSTIC) || defined(XEN)
2999 pmap_pte_set(zpte, 0); /* zap ! */ 2999 pmap_pte_set(zpte, 0); /* zap ! */
3000 pmap_pte_flush(); 3000 pmap_pte_flush();
3001#endif 3001#endif
3002 3002
3003 return rv; 3003 return rv;
3004} 3004}
3005 3005
3006/* 3006/*
3007 * pmap_copy_page: copy a page 3007 * pmap_copy_page: copy a page
3008 */ 3008 */
3009 3009
3010void 3010void
3011pmap_copy_page(paddr_t srcpa, paddr_t dstpa) 3011pmap_copy_page(paddr_t srcpa, paddr_t dstpa)
3012{ 3012{
3013 pt_entry_t *spte; 3013 pt_entry_t *spte;
3014 pt_entry_t *dpte; 3014 pt_entry_t *dpte;
3015 void *csrcva; 3015 void *csrcva;
3016 void *cdstva; 3016 void *cdstva;
3017 int id; 3017 int id;
3018 3018
3019 kpreempt_disable(); 3019 kpreempt_disable();
3020 id = cpu_number(); 3020 id = cpu_number();
3021 spte = PTESLEW(csrc_pte,id); 3021 spte = PTESLEW(csrc_pte,id);
3022 dpte = PTESLEW(cdst_pte,id); 3022 dpte = PTESLEW(cdst_pte,id);
3023 csrcva = VASLEW(csrcp, id); 3023 csrcva = VASLEW(csrcp, id);
3024 cdstva = VASLEW(cdstp, id); 3024 cdstva = VASLEW(cdstp, id);
3025  3025
3026 KASSERT(*spte == 0 && *dpte == 0); 3026 KASSERT(*spte == 0 && *dpte == 0);
3027 3027
3028 pmap_pte_set(spte, pmap_pa2pte(srcpa) | PG_V | PG_RW | PG_U | PG_k); 3028 pmap_pte_set(spte, pmap_pa2pte(srcpa) | PG_V | PG_RW | PG_U | PG_k);
3029 pmap_pte_set(dpte, 3029 pmap_pte_set(dpte,
3030 pmap_pa2pte(dstpa) | PG_V | PG_RW | PG_M | PG_U | PG_k); 3030 pmap_pa2pte(dstpa) | PG_V | PG_RW | PG_M | PG_U | PG_k);
3031 pmap_pte_flush(); 3031 pmap_pte_flush();
3032 pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva); 3032 pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva);
3033 3033
3034 memcpy(cdstva, csrcva, PAGE_SIZE); 3034 memcpy(cdstva, csrcva, PAGE_SIZE);
3035 3035
3036#if defined(DIAGNOSTIC) || defined(XEN) 3036#if defined(DIAGNOSTIC) || defined(XEN)
3037 pmap_pte_set(spte, 0); 3037 pmap_pte_set(spte, 0);
3038 pmap_pte_set(dpte, 0); 3038 pmap_pte_set(dpte, 0);
3039 pmap_pte_flush(); 3039 pmap_pte_flush();
3040#endif 3040#endif
3041 kpreempt_enable(); 3041 kpreempt_enable();
3042} 3042}
3043 3043
3044static pt_entry_t * 3044static pt_entry_t *
3045pmap_map_ptp(struct vm_page *ptp) 3045pmap_map_ptp(struct vm_page *ptp)
3046{ 3046{
3047 pt_entry_t *ptppte; 3047 pt_entry_t *ptppte;
3048 void *ptpva; 3048 void *ptpva;
3049 int id; 3049 int id;
3050 3050
3051 KASSERT(kpreempt_disabled()); 3051 KASSERT(kpreempt_disabled());
3052 3052
3053 id = cpu_number(); 3053 id = cpu_number();
3054 ptppte = PTESLEW(ptp_pte, id); 3054 ptppte = PTESLEW(ptp_pte, id);
3055 ptpva = VASLEW(ptpp, id); 3055 ptpva = VASLEW(ptpp, id);
3056#if !defined(XEN) 3056#if !defined(XEN)
3057 pmap_pte_set(ptppte, pmap_pa2pte(VM_PAGE_TO_PHYS(ptp)) | PG_V | PG_M | 3057 pmap_pte_set(ptppte, pmap_pa2pte(VM_PAGE_TO_PHYS(ptp)) | PG_V | PG_M |
3058 PG_RW | PG_U | PG_k); 3058 PG_RW | PG_U | PG_k);
3059#else 3059#else
3060 pmap_pte_set(ptppte, pmap_pa2pte(VM_PAGE_TO_PHYS(ptp)) | PG_V | PG_M | 3060 pmap_pte_set(ptppte, pmap_pa2pte(VM_PAGE_TO_PHYS(ptp)) | PG_V | PG_M |
3061 PG_U | PG_k); 3061 PG_U | PG_k);
3062#endif 3062#endif
3063 pmap_pte_flush(); 3063 pmap_pte_flush();
3064 pmap_update_pg((vaddr_t)ptpva); 3064 pmap_update_pg((vaddr_t)ptpva);
3065 3065
3066 return (pt_entry_t *)ptpva; 3066 return (pt_entry_t *)ptpva;
3067} 3067}
3068 3068
3069static void 3069static void
3070pmap_unmap_ptp(void) 3070pmap_unmap_ptp(void)
3071{ 3071{
3072#if defined(DIAGNOSTIC) || defined(XEN) 3072#if defined(DIAGNOSTIC) || defined(XEN)
3073 pt_entry_t *pte; 3073 pt_entry_t *pte;
3074 3074
3075 KASSERT(kpreempt_disabled()); 3075 KASSERT(kpreempt_disabled());
3076 3076
3077 pte = PTESLEW(ptp_pte, cpu_number()); 3077 pte = PTESLEW(ptp_pte, cpu_number());
3078 if (*pte != 0) { 3078 if (*pte != 0) {
3079 pmap_pte_set(pte, 0); 3079 pmap_pte_set(pte, 0);
3080 pmap_pte_flush(); 3080 pmap_pte_flush();
3081 } 3081 }
3082#endif 3082#endif
3083} 3083}
3084 3084
3085static pt_entry_t * 3085static pt_entry_t *
3086pmap_map_pte(struct pmap *pmap, struct vm_page *ptp, vaddr_t va) 3086pmap_map_pte(struct pmap *pmap, struct vm_page *ptp, vaddr_t va)
3087{ 3087{
3088 3088
3089 KASSERT(kpreempt_disabled()); 3089 KASSERT(kpreempt_disabled());
3090 if (pmap_is_curpmap(pmap)) { 3090 if (pmap_is_curpmap(pmap)) {
3091 return &PTE_BASE[pl1_i(va)]; /* (k)vtopte */ 3091 return &PTE_BASE[pl1_i(va)]; /* (k)vtopte */
3092 } 3092 }
3093 KASSERT(ptp != NULL); 3093 KASSERT(ptp != NULL);
3094 return pmap_map_ptp(ptp) + pl1_pi(va); 3094 return pmap_map_ptp(ptp) + pl1_pi(va);
3095} 3095}
3096 3096
3097static void 3097static void
3098pmap_unmap_pte(void) 3098pmap_unmap_pte(void)
3099{ 3099{
3100 3100
3101 KASSERT(kpreempt_disabled()); 3101 KASSERT(kpreempt_disabled());
3102 3102
3103 pmap_unmap_ptp(); 3103 pmap_unmap_ptp();
3104} 3104}
3105 3105
3106/* 3106/*
3107 * p m a p r e m o v e f u n c t i o n s 3107 * p m a p r e m o v e f u n c t i o n s
3108 * 3108 *
3109 * functions that remove mappings 3109 * functions that remove mappings
3110 */ 3110 */
3111 3111
3112/* 3112/*
3113 * pmap_remove_ptes: remove PTEs from a PTP 3113 * pmap_remove_ptes: remove PTEs from a PTP
3114 * 3114 *
3115 * => caller must hold pmap's lock 3115 * => caller must hold pmap's lock
3116 * => PTP must be mapped into KVA 3116 * => PTP must be mapped into KVA
3117 * => PTP should be null if pmap == pmap_kernel() 3117 * => PTP should be null if pmap == pmap_kernel()
3118 * => must be called with kernel preemption disabled 3118 * => must be called with kernel preemption disabled
3119 * => returns composite pte if at least one page should be shot down 3119 * => returns composite pte if at least one page should be shot down
3120 */ 3120 */
3121 3121
3122static void 3122static void
3123pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva, 3123pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva,
3124 vaddr_t startva, vaddr_t endva, struct pv_entry **pv_tofree) 3124 vaddr_t startva, vaddr_t endva, struct pv_entry **pv_tofree)
3125{ 3125{
3126 pt_entry_t *pte = (pt_entry_t *)ptpva; 3126 pt_entry_t *pte = (pt_entry_t *)ptpva;
3127 3127
3128 KASSERT(pmap == pmap_kernel() || mutex_owned(pmap->pm_lock)); 3128 KASSERT(pmap == pmap_kernel() || mutex_owned(pmap->pm_lock));
3129 KASSERT(kpreempt_disabled()); 3129 KASSERT(kpreempt_disabled());
3130 3130
3131 /* 3131 /*
3132 * note that ptpva points to the PTE that maps startva. this may 3132 * note that ptpva points to the PTE that maps startva. this may
3133 * or may not be the first PTE in the PTP. 3133 * or may not be the first PTE in the PTP.
3134 * 3134 *
3135 * we loop through the PTP while there are still PTEs to look at 3135 * we loop through the PTP while there are still PTEs to look at
3136 * and the wire_count is greater than 1 (because we use the wire_count 3136 * and the wire_count is greater than 1 (because we use the wire_count
3137 * to keep track of the number of real PTEs in the PTP). 3137 * to keep track of the number of real PTEs in the PTP).
3138 */ 3138 */
3139 while (startva < endva && (ptp == NULL || ptp->wire_count > 1)) { 3139 while (startva < endva && (ptp == NULL || ptp->wire_count > 1)) {
3140 (void)pmap_remove_pte(pmap, ptp, pte, startva, pv_tofree); 3140 (void)pmap_remove_pte(pmap, ptp, pte, startva, pv_tofree);
3141 startva += PAGE_SIZE; 3141 startva += PAGE_SIZE;
3142 pte++; 3142 pte++;
3143 } 3143 }
3144} 3144}
3145 3145
3146 3146
3147/* 3147/*
3148 * pmap_remove_pte: remove a single PTE from a PTP. 3148 * pmap_remove_pte: remove a single PTE from a PTP.
3149 * 3149 *
3150 * => caller must hold pmap's lock 3150 * => caller must hold pmap's lock
3151 * => PTP must be mapped into KVA 3151 * => PTP must be mapped into KVA
3152 * => PTP should be null if pmap == pmap_kernel() 3152 * => PTP should be null if pmap == pmap_kernel()
3153 * => returns true if we removed a mapping 3153 * => returns true if we removed a mapping
3154 * => must be called with kernel preemption disabled 3154 * => must be called with kernel preemption disabled
3155 */ 3155 */
3156static bool 3156static bool
3157pmap_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte, 3157pmap_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte,
3158 vaddr_t va, struct pv_entry **pv_tofree) 3158 vaddr_t va, struct pv_entry **pv_tofree)
3159{ 3159{
3160 struct pv_entry *pve; 3160 struct pv_entry *pve;
3161 struct vm_page *pg; 3161 struct vm_page *pg;
3162 struct pmap_page *pp; 3162 struct pmap_page *pp;
3163 pt_entry_t opte; 3163 pt_entry_t opte;
3164 3164
3165 KASSERT(pmap == pmap_kernel() || mutex_owned(pmap->pm_lock)); 3165 KASSERT(pmap == pmap_kernel() || mutex_owned(pmap->pm_lock));
3166 KASSERT(kpreempt_disabled()); 3166 KASSERT(kpreempt_disabled());
3167 3167
3168 if (!pmap_valid_entry(*pte)) { 3168 if (!pmap_valid_entry(*pte)) {
3169 /* VA not mapped. */ 3169 /* VA not mapped. */
3170 return false; 3170 return false;
3171 } 3171 }
3172 3172
3173 /* Atomically save the old PTE and zap it. */ 3173 /* Atomically save the old PTE and zap it. */
3174 opte = pmap_pte_testset(pte, 0); 3174 opte = pmap_pte_testset(pte, 0);
3175 if (!pmap_valid_entry(opte)) { 3175 if (!pmap_valid_entry(opte)) {
3176 return false; 3176 return false;
3177 } 3177 }
3178 3178
3179 pmap_exec_account(pmap, va, opte, 0); 3179 pmap_exec_account(pmap, va, opte, 0);
3180 pmap_stats_update_bypte(pmap, 0, opte); 3180 pmap_stats_update_bypte(pmap, 0, opte);
3181 3181
3182 if (ptp) { 3182 if (ptp) {
3183 /* 3183 /*
3184 * Dropping a PTE. Make sure that the PDE is flushed. 3184 * Dropping a PTE. Make sure that the PDE is flushed.
3185 */ 3185 */
3186 ptp->wire_count--; 3186 ptp->wire_count--;
3187 if (ptp->wire_count <= 1) { 3187 if (ptp->wire_count <= 1) {
3188 opte |= PG_U; 3188 opte |= PG_U;
3189 } 3189 }
3190 } 3190 }
3191 3191
3192 if ((opte & PG_U) != 0) { 3192 if ((opte & PG_U) != 0) {
3193 pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_REMOVE_PTE); 3193 pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_REMOVE_PTE);
3194 } 3194 }
3195 3195
3196 /* 3196 /*
3197 * If we are not on a pv_head list - we are done. 3197 * If we are not on a pv_head list - we are done.
3198 */ 3198 */
3199 if ((opte & PG_PVLIST) == 0) { 3199 if ((opte & PG_PVLIST) == 0) {
3200#if defined(DIAGNOSTIC) && !defined(DOM0OPS) 3200#if defined(DIAGNOSTIC) && !defined(DOM0OPS)
3201 if (PHYS_TO_VM_PAGE(pmap_pte2pa(opte)) != NULL) 3201 if (PHYS_TO_VM_PAGE(pmap_pte2pa(opte)) != NULL)
3202 panic("pmap_remove_pte: managed page without " 3202 panic("pmap_remove_pte: managed page without "
3203 "PG_PVLIST for %#" PRIxVADDR, va); 3203 "PG_PVLIST for %#" PRIxVADDR, va);
3204#endif 3204#endif
3205 return true; 3205 return true;
3206 } 3206 }
3207 3207
3208 pg = PHYS_TO_VM_PAGE(pmap_pte2pa(opte)); 3208 pg = PHYS_TO_VM_PAGE(pmap_pte2pa(opte));
3209 3209
3210 KASSERTMSG(pg != NULL, ("pmap_remove_pte: unmanaged page marked " 3210 KASSERTMSG(pg != NULL, ("pmap_remove_pte: unmanaged page marked "
3211 "PG_PVLIST, va = %#" PRIxVADDR ", pa = %#" PRIxPADDR, 3211 "PG_PVLIST, va = %#" PRIxVADDR ", pa = %#" PRIxPADDR,
3212 va, (paddr_t)pmap_pte2pa(opte))); 3212 va, (paddr_t)pmap_pte2pa(opte)));
3213 3213
3214 KASSERT(uvm_page_locked_p(pg)); 3214 KASSERT(uvm_page_locked_p(pg));
3215 3215
3216 /* Sync R/M bits. */ 3216 /* Sync R/M bits. */
3217 pp = VM_PAGE_TO_PP(pg); 3217 pp = VM_PAGE_TO_PP(pg);
3218 pp->pp_attrs |= opte; 3218 pp->pp_attrs |= opte;
3219 pve = pmap_remove_pv(pp, ptp, va); 3219 pve = pmap_remove_pv(pp, ptp, va);
3220 3220
3221 if (pve) {  3221 if (pve) {
3222 pve->pve_next = *pv_tofree; 3222 pve->pve_next = *pv_tofree;
3223 *pv_tofree = pve; 3223 *pv_tofree = pve;
3224 } 3224 }
3225 return true; 3225 return true;
3226} 3226}
3227 3227
3228/* 3228/*
3229 * pmap_remove: mapping removal function. 3229 * pmap_remove: mapping removal function.
3230 * 3230 *
3231 * => caller should not be holding any pmap locks 3231 * => caller should not be holding any pmap locks
3232 */ 3232 */
3233 3233
3234void 3234void
3235pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva) 3235pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
3236{ 3236{
3237 pt_entry_t *ptes; 3237 pt_entry_t *ptes;
3238 pd_entry_t pde; 3238 pd_entry_t pde;
3239 pd_entry_t * const *pdes; 3239 pd_entry_t * const *pdes;
3240 struct pv_entry *pv_tofree = NULL; 3240 struct pv_entry *pv_tofree = NULL;
3241 bool result; 3241 bool result;
3242 int i; 3242 int i;
3243 paddr_t ptppa; 3243 paddr_t ptppa;
3244 vaddr_t blkendva, va = sva; 3244 vaddr_t blkendva, va = sva;
3245 struct vm_page *ptp; 3245 struct vm_page *ptp;
3246 struct pmap *pmap2; 3246 struct pmap *pmap2;
3247 3247
3248 kpreempt_disable(); 3248 kpreempt_disable();
3249 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ 3249 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */
3250 3250
3251 /* 3251 /*
3252 * removing one page? take shortcut function. 3252 * removing one page? take shortcut function.
3253 */ 3253 */
3254 3254
3255 if (va + PAGE_SIZE == eva) { 3255 if (va + PAGE_SIZE == eva) {
3256 if (pmap_pdes_valid(va, pdes, &pde)) { 3256 if (pmap_pdes_valid(va, pdes, &pde)) {
3257 3257
3258 /* PA of the PTP */ 3258 /* PA of the PTP */
3259 ptppa = pmap_pte2pa(pde); 3259 ptppa = pmap_pte2pa(pde);
3260 3260
3261 /* Get PTP if non-kernel mapping. */ 3261 /* Get PTP if non-kernel mapping. */
3262 if (pmap != pmap_kernel()) { 3262 if (pmap != pmap_kernel()) {
3263 ptp = pmap_find_ptp(pmap, va, ptppa, 1); 3263 ptp = pmap_find_ptp(pmap, va, ptppa, 1);
3264 KASSERTMSG(ptp != NULL, 3264 KASSERTMSG(ptp != NULL,
3265 ("pmap_remove: unmanaged PTP detected") 3265 ("pmap_remove: unmanaged PTP detected")
3266 ); 3266 );
3267 } else { 3267 } else {
3268 /* Never free kernel PTPs. */ 3268 /* Never free kernel PTPs. */
3269 ptp = NULL; 3269 ptp = NULL;
3270 } 3270 }
3271 3271
3272 result = pmap_remove_pte(pmap, ptp, 3272 result = pmap_remove_pte(pmap, ptp,
3273 &ptes[pl1_i(va)], va, &pv_tofree); 3273 &ptes[pl1_i(va)], va, &pv_tofree);
3274 3274
3275 /* 3275 /*
3276 * if mapping removed and the PTP is no longer 3276 * if mapping removed and the PTP is no longer
3277 * being used, free it! 3277 * being used, free it!
3278 */ 3278 */
3279 3279
3280 if (result && ptp && ptp->wire_count <= 1) 3280 if (result && ptp && ptp->wire_count <= 1)
3281 pmap_free_ptp(pmap, ptp, va, ptes, pdes); 3281 pmap_free_ptp(pmap, ptp, va, ptes, pdes);
3282 } 3282 }
3283 } else for (/* null */ ; va < eva ; va = blkendva) { 3283 } else for (/* null */ ; va < eva ; va = blkendva) {
3284 int lvl; 3284 int lvl;
3285 3285
3286 /* determine range of block */ 3286 /* determine range of block */
3287 blkendva = x86_round_pdr(va+1); 3287 blkendva = x86_round_pdr(va+1);
3288 if (blkendva > eva) 3288 if (blkendva > eva)
3289 blkendva = eva; 3289 blkendva = eva;
3290 3290
3291 /* 3291 /*
3292 * XXXCDC: our PTE mappings should never be removed 3292 * XXXCDC: our PTE mappings should never be removed
3293 * with pmap_remove! if we allow this (and why would 3293 * with pmap_remove! if we allow this (and why would
3294 * we?) then we end up freeing the pmap's page 3294 * we?) then we end up freeing the pmap's page
3295 * directory page (PDP) before we are finished using 3295 * directory page (PDP) before we are finished using
3296 * it when we hit in in the recursive mapping. this 3296 * it when we hit in in the recursive mapping. this
3297 * is BAD. 3297 * is BAD.
3298 * 3298 *
3299 * long term solution is to move the PTEs out of user 3299 * long term solution is to move the PTEs out of user
3300 * address space. and into kernel address space (up 3300 * address space. and into kernel address space (up
3301 * with APTE). then we can set VM_MAXUSER_ADDRESS to 3301 * with APTE). then we can set VM_MAXUSER_ADDRESS to
3302 * be VM_MAX_ADDRESS. 3302 * be VM_MAX_ADDRESS.
3303 */ 3303 */
3304 3304
3305 /* XXXCDC: ugly hack to avoid freeing PDP here */ 3305 /* XXXCDC: ugly hack to avoid freeing PDP here */
3306 for (i = 0; i < PDP_SIZE; i++) { 3306 for (i = 0; i < PDP_SIZE; i++) {
3307 if (pl_i(va, PTP_LEVELS) == PDIR_SLOT_PTE+i) 3307 if (pl_i(va, PTP_LEVELS) == PDIR_SLOT_PTE+i)
3308 continue; 3308 continue;
3309 } 3309 }
3310 3310
3311 lvl = pmap_pdes_invalid(va, pdes, &pde); 3311 lvl = pmap_pdes_invalid(va, pdes, &pde);
3312 if (lvl != 0) { 3312 if (lvl != 0) {
3313 /* 3313 /*
3314 * skip a range corresponding to an invalid pde. 3314 * skip a range corresponding to an invalid pde.
3315 */ 3315 */
3316 blkendva = (va & ptp_masks[lvl - 1]) + nbpd[lvl - 1];  3316 blkendva = (va & ptp_masks[lvl - 1]) + nbpd[lvl - 1];
3317 continue; 3317 continue;
3318 } 3318 }
3319 3319
3320 /* PA of the PTP */ 3320 /* PA of the PTP */
3321 ptppa = pmap_pte2pa(pde); 3321 ptppa = pmap_pte2pa(pde);
3322 3322
3323 /* Get PTP if non-kernel mapping. */ 3323 /* Get PTP if non-kernel mapping. */
3324 if (pmap != pmap_kernel()) { 3324 if (pmap != pmap_kernel()) {
3325 ptp = pmap_find_ptp(pmap, va, ptppa, 1); 3325 ptp = pmap_find_ptp(pmap, va, ptppa, 1);
3326 KASSERTMSG(ptp != NULL, 3326 KASSERTMSG(ptp != NULL,
3327 ("pmap_remove: unmanaged PTP detected") 3327 ("pmap_remove: unmanaged PTP detected")
3328 ); 3328 );
3329 } else { 3329 } else {
3330 /* Never free kernel PTPs. */ 3330 /* Never free kernel PTPs. */
3331 ptp = NULL; 3331 ptp = NULL;
3332 } 3332 }
3333 3333
3334 pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[pl1_i(va)], va, 3334 pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[pl1_i(va)], va,
3335 blkendva, &pv_tofree); 3335 blkendva, &pv_tofree);
3336 3336
3337 /* if PTP is no longer being used, free it! */ 3337 /* if PTP is no longer being used, free it! */
3338 if (ptp && ptp->wire_count <= 1) { 3338 if (ptp && ptp->wire_count <= 1) {
3339 pmap_free_ptp(pmap, ptp, va, ptes, pdes); 3339 pmap_free_ptp(pmap, ptp, va, ptes, pdes);
3340 } 3340 }
3341 } 3341 }
3342 pmap_unmap_ptes(pmap, pmap2); /* unlock pmap */ 3342 pmap_unmap_ptes(pmap, pmap2); /* unlock pmap */
3343 kpreempt_enable(); 3343 kpreempt_enable();
3344 3344
3345 /* Now we free unused PVs */ 3345 /* Now we free unused PVs */
3346 if (pv_tofree) 3346 if (pv_tofree)
3347 pmap_free_pvs(pv_tofree); 3347 pmap_free_pvs(pv_tofree);
3348} 3348}
3349 3349
3350/* 3350/*
3351 * pmap_sync_pv: clear pte bits and return the old value of the pte. 3351 * pmap_sync_pv: clear pte bits and return the old value of the pte.
3352 * 3352 *
3353 * => Caller should disable kernel preemption. 3353 * => Caller should disable kernel preemption.
3354 * => issues tlb shootdowns if necessary. 3354 * => issues tlb shootdowns if necessary.
3355 */ 3355 */
3356 3356
3357static int 3357static int
3358pmap_sync_pv(struct pv_pte *pvpte, pt_entry_t expect, int clearbits, 3358pmap_sync_pv(struct pv_pte *pvpte, pt_entry_t expect, int clearbits,
3359 pt_entry_t *optep) 3359 pt_entry_t *optep)
3360{ 3360{
3361 struct pmap *pmap; 3361 struct pmap *pmap;
3362 struct vm_page *ptp; 3362 struct vm_page *ptp;
3363 vaddr_t va; 3363 vaddr_t va;
3364 pt_entry_t *ptep; 3364 pt_entry_t *ptep;
3365 pt_entry_t opte; 3365 pt_entry_t opte;
3366 pt_entry_t npte; 3366 pt_entry_t npte;
3367 bool need_shootdown; 3367 bool need_shootdown;
3368 3368
3369 ptp = pvpte->pte_ptp; 3369 ptp = pvpte->pte_ptp;
3370 va = pvpte->pte_va; 3370 va = pvpte->pte_va;
3371 KASSERT(ptp == NULL || ptp->uobject != NULL); 3371 KASSERT(ptp == NULL || ptp->uobject != NULL);
3372 KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset); 3372 KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
3373 pmap = ptp_to_pmap(ptp); 3373 pmap = ptp_to_pmap(ptp);
3374 3374
3375 KASSERT((expect & ~(PG_FRAME | PG_V)) == 0); 3375 KASSERT((expect & ~(PG_FRAME | PG_V)) == 0);
3376 KASSERT((expect & PG_V) != 0); 3376 KASSERT((expect & PG_V) != 0);
3377 KASSERT(clearbits == ~0 || (clearbits & ~(PG_M | PG_U | PG_RW)) == 0); 3377 KASSERT(clearbits == ~0 || (clearbits & ~(PG_M | PG_U | PG_RW)) == 0);
3378 KASSERT(kpreempt_disabled()); 3378 KASSERT(kpreempt_disabled());
3379 3379
3380 ptep = pmap_map_pte(pmap, ptp, va); 3380 ptep = pmap_map_pte(pmap, ptp, va);
3381 do { 3381 do {
3382 opte = *ptep; 3382 opte = *ptep;
3383 KASSERT((opte & (PG_M | PG_U)) != PG_M); 3383 KASSERT((opte & (PG_M | PG_U)) != PG_M);
3384 KASSERT((opte & (PG_U | PG_V)) != PG_U); 3384 KASSERT((opte & (PG_U | PG_V)) != PG_U);
3385 KASSERT(opte == 0 || (opte & PG_V) != 0); 3385 KASSERT(opte == 0 || (opte & PG_V) != 0);
3386 if ((opte & (PG_FRAME | PG_V)) != expect) { 3386 if ((opte & (PG_FRAME | PG_V)) != expect) {
3387 3387
3388 /* 3388 /*
3389 * we lost a race with a V->P operation like 3389 * we lost a race with a V->P operation like
3390 * pmap_remove(). wait for the competitor 3390 * pmap_remove(). wait for the competitor
3391 * reflecting pte bits into mp_attrs. 3391 * reflecting pte bits into mp_attrs.
3392 * 3392 *
3393 * issue a redundant TLB shootdown so that 3393 * issue a redundant TLB shootdown so that
3394 * we can wait for its completion. 3394 * we can wait for its completion.
3395 */ 3395 */
3396 3396
3397 pmap_unmap_pte(); 3397 pmap_unmap_pte();
3398 if (clearbits != 0) { 3398 if (clearbits != 0) {
3399 pmap_tlb_shootdown(pmap, va, 3399 pmap_tlb_shootdown(pmap, va,
3400 (pmap == pmap_kernel() ? PG_G : 0), 3400 (pmap == pmap_kernel() ? PG_G : 0),
3401 TLBSHOOT_SYNC_PV1); 3401 TLBSHOOT_SYNC_PV1);
3402 } 3402 }
3403 return EAGAIN; 3403 return EAGAIN;
3404 } 3404 }
3405 3405
3406 /* 3406 /*
3407 * check if there's anything to do on this pte. 3407 * check if there's anything to do on this pte.
3408 */ 3408 */
3409 3409
3410 if ((opte & clearbits) == 0) { 3410 if ((opte & clearbits) == 0) {
3411 need_shootdown = false; 3411 need_shootdown = false;
3412 break; 3412 break;
3413 } 3413 }
3414 3414
3415 /* 3415 /*
3416 * we need a shootdown if the pte is cached. (PG_U) 3416 * we need a shootdown if the pte is cached. (PG_U)
3417 * 3417 *
3418 * ...unless we are clearing only the PG_RW bit and 3418 * ...unless we are clearing only the PG_RW bit and
3419 * it isn't cached as RW. (PG_M) 3419 * it isn't cached as RW. (PG_M)
3420 */ 3420 */
3421 3421
3422 need_shootdown = (opte & PG_U) != 0 && 3422 need_shootdown = (opte & PG_U) != 0 &&
3423 !(clearbits == PG_RW && (opte & PG_M) == 0); 3423 !(clearbits == PG_RW && (opte & PG_M) == 0);
3424 3424
3425 npte = opte & ~clearbits; 3425 npte = opte & ~clearbits;
3426 3426
3427 /* 3427 /*
3428 * if we need a shootdown anyway, clear PG_U and PG_M. 3428 * if we need a shootdown anyway, clear PG_U and PG_M.
3429 */ 3429 */
3430 3430
3431 if (need_shootdown) { 3431 if (need_shootdown) {
3432 npte &= ~(PG_U | PG_M); 3432 npte &= ~(PG_U | PG_M);
3433 } 3433 }
3434 KASSERT((npte & (PG_M | PG_U)) != PG_M); 3434 KASSERT((npte & (PG_M | PG_U)) != PG_M);
3435 KASSERT((npte & (PG_U | PG_V)) != PG_U); 3435 KASSERT((npte & (PG_U | PG_V)) != PG_U);
3436 KASSERT(npte == 0 || (opte & PG_V) != 0); 3436 KASSERT(npte == 0 || (opte & PG_V) != 0);
3437 } while (pmap_pte_cas(ptep, opte, npte) != opte); 3437 } while (pmap_pte_cas(ptep, opte, npte) != opte);
3438 3438
3439 if (need_shootdown) { 3439 if (need_shootdown) {
3440 pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_SYNC_PV2); 3440 pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_SYNC_PV2);
3441 } 3441 }
3442 pmap_unmap_pte(); 3442 pmap_unmap_pte();
3443 3443
3444 *optep = opte; 3444 *optep = opte;
3445 return 0; 3445 return 0;
3446} 3446}
3447 3447
3448/* 3448/*
3449 * pmap_page_remove: remove a managed vm_page from all pmaps that map it 3449 * pmap_page_remove: remove a managed vm_page from all pmaps that map it
3450 * 3450 *
3451 * => R/M bits are sync'd back to attrs 3451 * => R/M bits are sync'd back to attrs
3452 */ 3452 */
3453 3453
3454void 3454void
3455pmap_page_remove(struct vm_page *pg) 3455pmap_page_remove(struct vm_page *pg)
3456{ 3456{
3457 struct pmap_page *pp; 3457 struct pmap_page *pp;
3458 struct pv_pte *pvpte; 3458 struct pv_pte *pvpte;
3459 struct pv_entry *killlist = NULL; 3459 struct pv_entry *killlist = NULL;
3460 struct vm_page *ptp; 3460 struct vm_page *ptp;
3461 pt_entry_t expect; 3461 pt_entry_t expect;
3462 lwp_t *l; 3462 lwp_t *l;
3463 int count; 3463 int count;
3464 3464
3465 KASSERT(uvm_page_locked_p(pg)); 3465 KASSERT(uvm_page_locked_p(pg));
3466 3466
3467 l = curlwp; 3467 l = curlwp;
3468 pp = VM_PAGE_TO_PP(pg); 3468 pp = VM_PAGE_TO_PP(pg);
3469 expect = pmap_pa2pte(VM_PAGE_TO_PHYS(pg)) | PG_V; 3469 expect = pmap_pa2pte(VM_PAGE_TO_PHYS(pg)) | PG_V;
3470 count = SPINLOCK_BACKOFF_MIN; 3470 count = SPINLOCK_BACKOFF_MIN;
3471 kpreempt_disable(); 3471 kpreempt_disable();
3472startover: 3472startover:
3473 while ((pvpte = pv_pte_first(pp)) != NULL) { 3473 while ((pvpte = pv_pte_first(pp)) != NULL) {
3474 struct pmap *pmap; 3474 struct pmap *pmap;
3475 struct pv_entry *pve; 3475 struct pv_entry *pve;
3476 pt_entry_t opte; 3476 pt_entry_t opte;
3477 vaddr_t va; 3477 vaddr_t va;
3478 int error; 3478 int error;
3479 3479
3480 /* 3480 /*
3481 * add a reference to the pmap before clearing the pte. 3481 * add a reference to the pmap before clearing the pte.
3482 * otherwise the pmap can disappear behind us. 3482 * otherwise the pmap can disappear behind us.
3483 */ 3483 */
3484 3484
3485 ptp = pvpte->pte_ptp; 3485 ptp = pvpte->pte_ptp;
3486 pmap = ptp_to_pmap(ptp); 3486 pmap = ptp_to_pmap(ptp);
3487 if (ptp != NULL) { 3487 if (ptp != NULL) {
3488 pmap_reference(pmap); 3488 pmap_reference(pmap);
3489 } 3489 }
3490 3490
3491 error = pmap_sync_pv(pvpte, expect, ~0, &opte); 3491 error = pmap_sync_pv(pvpte, expect, ~0, &opte);
3492 if (error == EAGAIN) { 3492 if (error == EAGAIN) {
3493 int hold_count; 3493 int hold_count;
3494 KERNEL_UNLOCK_ALL(curlwp, &hold_count); 3494 KERNEL_UNLOCK_ALL(curlwp, &hold_count);
3495 if (ptp != NULL) { 3495 if (ptp != NULL) {
3496 pmap_destroy(pmap); 3496 pmap_destroy(pmap);
3497 } 3497 }
3498 SPINLOCK_BACKOFF(count); 3498 SPINLOCK_BACKOFF(count);
3499 KERNEL_LOCK(hold_count, curlwp); 3499 KERNEL_LOCK(hold_count, curlwp);
3500 goto startover; 3500 goto startover;
3501 } 3501 }
3502 3502
3503 pp->pp_attrs |= opte; 3503 pp->pp_attrs |= opte;
3504 va = pvpte->pte_va; 3504 va = pvpte->pte_va;
3505 pve = pmap_remove_pv(pp, ptp, va); 3505 pve = pmap_remove_pv(pp, ptp, va);
3506 3506
3507 /* update the PTP reference count. free if last reference. */ 3507 /* update the PTP reference count. free if last reference. */
3508 if (ptp != NULL) { 3508 if (ptp != NULL) {
3509 struct pmap *pmap2; 3509 struct pmap *pmap2;
3510 pt_entry_t *ptes; 3510 pt_entry_t *ptes;
3511 pd_entry_t * const *pdes; 3511 pd_entry_t * const *pdes;
3512 3512
3513 KASSERT(pmap != pmap_kernel()); 3513 KASSERT(pmap != pmap_kernel());
3514 3514
3515 pmap_tlb_shootnow(); 3515 pmap_tlb_shootnow();
3516 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); 3516 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
3517 pmap_stats_update_bypte(pmap, 0, opte); 3517 pmap_stats_update_bypte(pmap, 0, opte);
3518 ptp->wire_count--; 3518 ptp->wire_count--;
3519 if (ptp->wire_count <= 1) { 3519 if (ptp->wire_count <= 1) {
3520 pmap_free_ptp(pmap, ptp, va, ptes, pdes); 3520 pmap_free_ptp(pmap, ptp, va, ptes, pdes);
3521 } 3521 }
3522 pmap_unmap_ptes(pmap, pmap2); 3522 pmap_unmap_ptes(pmap, pmap2);
3523 pmap_destroy(pmap); 3523 pmap_destroy(pmap);
3524 } else { 3524 } else {
3525 KASSERT(pmap == pmap_kernel()); 3525 KASSERT(pmap == pmap_kernel());
3526 pmap_stats_update_bypte(pmap, 0, opte); 3526 pmap_stats_update_bypte(pmap, 0, opte);
3527 } 3527 }
3528 3528
3529 if (pve != NULL) { 3529 if (pve != NULL) {
3530 pve->pve_next = killlist; /* mark it for death */ 3530 pve->pve_next = killlist; /* mark it for death */
3531 killlist = pve; 3531 killlist = pve;
3532 } 3532 }
3533 } 3533 }
3534 pmap_tlb_shootnow(); 3534 pmap_tlb_shootnow();
3535 kpreempt_enable(); 3535 kpreempt_enable();
3536 3536
3537 /* Now free unused pvs. */ 3537 /* Now free unused pvs. */
3538 pmap_free_pvs(killlist); 3538 pmap_free_pvs(killlist);
3539} 3539}
3540 3540
3541/* 3541/*
3542 * p m a p a t t r i b u t e f u n c t i o n s 3542 * p m a p a t t r i b u t e f u n c t i o n s
3543 * functions that test/change managed page's attributes 3543 * functions that test/change managed page's attributes
3544 * since a page can be mapped multiple times we must check each PTE that 3544 * since a page can be mapped multiple times we must check each PTE that
3545 * maps it by going down the pv lists. 3545 * maps it by going down the pv lists.
3546 */ 3546 */
3547 3547
3548/* 3548/*
3549 * pmap_test_attrs: test a page's attributes 3549 * pmap_test_attrs: test a page's attributes
3550 */ 3550 */
3551 3551
3552bool 3552bool
3553pmap_test_attrs(struct vm_page *pg, unsigned testbits) 3553pmap_test_attrs(struct vm_page *pg, unsigned testbits)
3554{ 3554{
3555 struct pmap_page *pp; 3555 struct pmap_page *pp;
3556 struct pv_pte *pvpte; 3556 struct pv_pte *pvpte;
3557 pt_entry_t expect; 3557 pt_entry_t expect;
3558 u_int result; 3558 u_int result;
3559 3559
3560 KASSERT(uvm_page_locked_p(pg)); 3560 KASSERT(uvm_page_locked_p(pg));
3561 3561
3562 pp = VM_PAGE_TO_PP(pg); 3562 pp = VM_PAGE_TO_PP(pg);
3563 if ((pp->pp_attrs & testbits) != 0) { 3563 if ((pp->pp_attrs & testbits) != 0) {
3564 return true; 3564 return true;
3565 } 3565 }
3566 expect = pmap_pa2pte(VM_PAGE_TO_PHYS(pg)) | PG_V; 3566 expect = pmap_pa2pte(VM_PAGE_TO_PHYS(pg)) | PG_V;
3567 kpreempt_disable(); 3567 kpreempt_disable();
3568 for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) { 3568 for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) {
3569 pt_entry_t opte; 3569 pt_entry_t opte;
3570 int error; 3570 int error;
3571 3571
3572 if ((pp->pp_attrs & testbits) != 0) { 3572 if ((pp->pp_attrs & testbits) != 0) {
3573 break; 3573 break;
3574 } 3574 }
3575 error = pmap_sync_pv(pvpte, expect, 0, &opte); 3575 error = pmap_sync_pv(pvpte, expect, 0, &opte);
3576 if (error == 0) { 3576 if (error == 0) {
3577 pp->pp_attrs |= opte; 3577 pp->pp_attrs |= opte;
3578 } 3578 }
3579 } 3579 }
3580 result = pp->pp_attrs & testbits; 3580 result = pp->pp_attrs & testbits;
3581 kpreempt_enable(); 3581 kpreempt_enable();
3582 3582
3583 /* 3583 /*
3584 * note that we will exit the for loop with a non-null pve if 3584 * note that we will exit the for loop with a non-null pve if
3585 * we have found the bits we are testing for. 3585 * we have found the bits we are testing for.
3586 */ 3586 */
3587 3587
3588 return result != 0; 3588 return result != 0;
3589} 3589}
3590 3590
3591/* 3591/*
3592 * pmap_clear_attrs: clear the specified attribute for a page. 3592 * pmap_clear_attrs: clear the specified attribute for a page.
3593 * 3593 *
3594 * => we return true if we cleared one of the bits we were asked to 3594 * => we return true if we cleared one of the bits we were asked to
3595 */ 3595 */
3596 3596
3597bool 3597bool
3598pmap_clear_attrs(struct vm_page *pg, unsigned clearbits) 3598pmap_clear_attrs(struct vm_page *pg, unsigned clearbits)
3599{ 3599{
3600 struct pmap_page *pp; 3600 struct pmap_page *pp;
3601 struct pv_pte *pvpte; 3601 struct pv_pte *pvpte;
3602 u_int result; 3602 u_int result;
3603 pt_entry_t expect; 3603 pt_entry_t expect;
3604 int count; 3604 int count;
3605 3605
3606 KASSERT(uvm_page_locked_p(pg)); 3606 KASSERT(uvm_page_locked_p(pg));
3607 3607
3608 pp = VM_PAGE_TO_PP(pg); 3608 pp = VM_PAGE_TO_PP(pg);
3609 expect = pmap_pa2pte(VM_PAGE_TO_PHYS(pg)) | PG_V; 3609 expect = pmap_pa2pte(VM_PAGE_TO_PHYS(pg)) | PG_V;
3610 count = SPINLOCK_BACKOFF_MIN; 3610 count = SPINLOCK_BACKOFF_MIN;
3611 kpreempt_disable(); 3611 kpreempt_disable();
3612startover: 3612startover:
3613 for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) { 3613 for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) {
3614 pt_entry_t opte; 3614 pt_entry_t opte;
3615 int error; 3615 int error;
3616 3616
3617 error = pmap_sync_pv(pvpte, expect, clearbits, &opte); 3617 error = pmap_sync_pv(pvpte, expect, clearbits, &opte);
3618 if (error == EAGAIN) { 3618 if (error == EAGAIN) {
3619 int hold_count; 3619 int hold_count;
3620 KERNEL_UNLOCK_ALL(curlwp, &hold_count); 3620 KERNEL_UNLOCK_ALL(curlwp, &hold_count);
3621 SPINLOCK_BACKOFF(count); 3621 SPINLOCK_BACKOFF(count);
3622 KERNEL_LOCK(hold_count, curlwp); 3622 KERNEL_LOCK(hold_count, curlwp);
3623 goto startover; 3623 goto startover;
3624 } 3624 }
3625 pp->pp_attrs |= opte; 3625 pp->pp_attrs |= opte;
3626 } 3626 }
3627 result = pp->pp_attrs & clearbits; 3627 result = pp->pp_attrs & clearbits;
3628 pp->pp_attrs &= ~clearbits; 3628 pp->pp_attrs &= ~clearbits;
3629 kpreempt_enable(); 3629 kpreempt_enable();
3630 3630
3631 return result != 0; 3631 return result != 0;
3632} 3632}
3633 3633
3634 3634
3635/* 3635/*
3636 * p m a p p r o t e c t i o n f u n c t i o n s 3636 * p m a p p r o t e c t i o n f u n c t i o n s
3637 */ 3637 */
3638 3638
3639/* 3639/*
3640 * pmap_page_protect: change the protection of all recorded mappings 3640 * pmap_page_protect: change the protection of all recorded mappings
3641 * of a managed page 3641 * of a managed page
3642 * 3642 *
3643 * => NOTE: this is an inline function in pmap.h 3643 * => NOTE: this is an inline function in pmap.h
3644 */ 3644 */
3645 3645
3646/* see pmap.h */ 3646/* see pmap.h */
3647 3647
3648/* 3648/*
3649 * pmap_protect: set the protection in of the pages in a pmap 3649 * pmap_protect: set the protection in of the pages in a pmap
3650 * 3650 *
3651 * => NOTE: this is an inline function in pmap.h 3651 * => NOTE: this is an inline function in pmap.h
3652 */ 3652 */
3653 3653
3654/* see pmap.h */ 3654/* see pmap.h */
3655 3655
3656/* 3656/*
3657 * pmap_write_protect: write-protect pages in a pmap. 3657 * pmap_write_protect: write-protect pages in a pmap.
3658 */ 3658 */
3659void 3659void
3660pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 3660pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
3661{ 3661{
3662 pt_entry_t *ptes; 3662 pt_entry_t *ptes;
3663 pt_entry_t * const *pdes; 3663 pt_entry_t * const *pdes;
3664 struct pmap *pmap2; 3664 struct pmap *pmap2;
3665 vaddr_t blockend, va; 3665 vaddr_t blockend, va;
3666 3666
3667 KASSERT(curlwp->l_md.md_gc_pmap != pmap); 3667 KASSERT(curlwp->l_md.md_gc_pmap != pmap);
3668 3668
3669 sva &= PG_FRAME; 3669 sva &= PG_FRAME;
3670 eva &= PG_FRAME; 3670 eva &= PG_FRAME;
3671 3671
3672 /* Acquire pmap. */ 3672 /* Acquire pmap. */
3673 kpreempt_disable(); 3673 kpreempt_disable();
3674 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); 3674 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
3675 3675
3676 for (va = sva ; va < eva ; va = blockend) { 3676 for (va = sva ; va < eva ; va = blockend) {
3677 pt_entry_t *spte, *epte; 3677 pt_entry_t *spte, *epte;
3678 int i; 3678 int i;
3679 3679
3680 blockend = (va & L2_FRAME) + NBPD_L2; 3680 blockend = (va & L2_FRAME) + NBPD_L2;
3681 if (blockend > eva) 3681 if (blockend > eva)
3682 blockend = eva; 3682 blockend = eva;
3683 3683
3684 /* 3684 /*
3685 * XXXCDC: our PTE mappings should never be write-protected! 3685 * XXXCDC: our PTE mappings should never be write-protected!
3686 * 3686 *
3687 * long term solution is to move the PTEs out of user 3687 * long term solution is to move the PTEs out of user
3688 * address space. and into kernel address space (up 3688 * address space. and into kernel address space (up
3689 * with APTE). then we can set VM_MAXUSER_ADDRESS to 3689 * with APTE). then we can set VM_MAXUSER_ADDRESS to
3690 * be VM_MAX_ADDRESS. 3690 * be VM_MAX_ADDRESS.
3691 */ 3691 */
3692 3692
3693 /* XXXCDC: ugly hack to avoid freeing PDP here */ 3693 /* XXXCDC: ugly hack to avoid freeing PDP here */
3694 for (i = 0; i < PDP_SIZE; i++) { 3694 for (i = 0; i < PDP_SIZE; i++) {
3695 if (pl_i(va, PTP_LEVELS) == PDIR_SLOT_PTE+i) 3695 if (pl_i(va, PTP_LEVELS) == PDIR_SLOT_PTE+i)
3696 continue; 3696 continue;
3697 } 3697 }
3698 3698
3699 /* Is it a valid block? */ 3699 /* Is it a valid block? */
3700 if (!pmap_pdes_valid(va, pdes, NULL)) { 3700 if (!pmap_pdes_valid(va, pdes, NULL)) {
3701 continue; 3701 continue;
3702 } 3702 }
3703 KASSERT(va < VM_MAXUSER_ADDRESS || va >= VM_MAX_ADDRESS); 3703 KASSERT(va < VM_MAXUSER_ADDRESS || va >= VM_MAX_ADDRESS);
3704 3704
3705 spte = &ptes[pl1_i(va)]; 3705 spte = &ptes[pl1_i(va)];
3706 epte = &ptes[pl1_i(blockend)]; 3706 epte = &ptes[pl1_i(blockend)];
3707 3707
3708 for (/*null */; spte < epte ; spte++) { 3708 for (/*null */; spte < epte ; spte++) {
3709 pt_entry_t opte, npte; 3709 pt_entry_t opte, npte;
3710 3710
3711 do { 3711 do {
3712 opte = *spte; 3712 opte = *spte;
3713 if ((~opte & (PG_RW | PG_V)) != 0) { 3713 if ((~opte & (PG_RW | PG_V)) != 0) {
3714 goto next; 3714 goto next;
3715 } 3715 }
3716 npte = opte & ~PG_RW; 3716 npte = opte & ~PG_RW;
3717 } while (pmap_pte_cas(spte, opte, npte) != opte); 3717 } while (pmap_pte_cas(spte, opte, npte) != opte);
3718 3718
3719 if ((opte & PG_M) != 0) { 3719 if ((opte & PG_M) != 0) {
3720 vaddr_t tva = x86_ptob(spte - ptes); 3720 vaddr_t tva = x86_ptob(spte - ptes);
3721 pmap_tlb_shootdown(pmap, tva, opte, 3721 pmap_tlb_shootdown(pmap, tva, opte,
3722 TLBSHOOT_WRITE_PROTECT); 3722 TLBSHOOT_WRITE_PROTECT);
3723 } 3723 }
3724next:; 3724next:;
3725 } 3725 }
3726 } 3726 }
3727 3727
3728 /* Release pmap. */ 3728 /* Release pmap. */
3729 pmap_unmap_ptes(pmap, pmap2); 3729 pmap_unmap_ptes(pmap, pmap2);
3730 kpreempt_enable(); 3730 kpreempt_enable();
3731} 3731}
3732 3732
3733/* 3733/*
3734 * pmap_unwire: clear the wired bit in the PTE. 3734 * pmap_unwire: clear the wired bit in the PTE.
3735 * 3735 *
3736 * => Mapping should already be present. 3736 * => Mapping should already be present.
3737 */ 3737 */
3738void 3738void
3739pmap_unwire(struct pmap *pmap, vaddr_t va) 3739pmap_unwire(struct pmap *pmap, vaddr_t va)
3740{ 3740{
3741 pt_entry_t *ptes, *ptep, opte; 3741 pt_entry_t *ptes, *ptep, opte;
3742 pd_entry_t * const *pdes; 3742 pd_entry_t * const *pdes;
3743 struct pmap *pmap2; 3743 struct pmap *pmap2;
3744 3744
3745 /* Acquire pmap. */ 3745 /* Acquire pmap. */
3746 kpreempt_disable(); 3746 kpreempt_disable();
3747 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); 3747 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
3748 3748
3749 if (!pmap_pdes_valid(va, pdes, NULL)) { 3749 if (!pmap_pdes_valid(va, pdes, NULL)) {
3750 panic("pmap_unwire: invalid PDE"); 3750 panic("pmap_unwire: invalid PDE");
3751 } 3751 }
3752 3752
3753 ptep = &ptes[pl1_i(va)]; 3753 ptep = &ptes[pl1_i(va)];
3754 opte = *ptep; 3754 opte = *ptep;
3755 KASSERT(pmap_valid_entry(opte)); 3755 KASSERT(pmap_valid_entry(opte));
3756 3756
3757 if (opte & PG_W) { 3757 if (opte & PG_W) {
3758 pt_entry_t npte = opte & ~PG_W; 3758 pt_entry_t npte = opte & ~PG_W;
3759 3759
3760 opte = pmap_pte_testset(ptep, npte); 3760 opte = pmap_pte_testset(ptep, npte);
3761 pmap_stats_update_bypte(pmap, npte, opte); 3761 pmap_stats_update_bypte(pmap, npte, opte);
3762 } else { 3762 } else {
3763 printf("pmap_unwire: wiring for pmap %p va 0x%lx " 3763 printf("pmap_unwire: wiring for pmap %p va 0x%lx "
3764 "did not change!\n", pmap, va); 3764 "did not change!\n", pmap, va);
3765 } 3765 }
3766 3766
3767 /* Release pmap. */ 3767 /* Release pmap. */
3768 pmap_unmap_ptes(pmap, pmap2); 3768 pmap_unmap_ptes(pmap, pmap2);
3769 kpreempt_enable(); 3769 kpreempt_enable();
3770} 3770}
3771 3771
3772/* 3772/*
3773 * pmap_copy: copy mappings from one pmap to another 3773 * pmap_copy: copy mappings from one pmap to another
3774 * 3774 *
3775 * => optional function 3775 * => optional function
3776 * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) 3776 * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
3777 */ 3777 */
3778 3778
3779/* 3779/*
3780 * defined as macro in pmap.h 3780 * defined as macro in pmap.h
3781 */ 3781 */
3782 3782
3783__weak_alias(pmap_enter, pmap_enter_default); 3783__strict_weak_alias(pmap_enter, pmap_enter_default);
3784 3784
3785int 3785int
3786pmap_enter_default(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, 3786pmap_enter_default(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot,
3787 u_int flags) 3787 u_int flags)
3788{ 3788{
3789 return pmap_enter_ma(pmap, va, pa, pa, prot, flags, 0); 3789 return pmap_enter_ma(pmap, va, pa, pa, prot, flags, 0);
3790} 3790}
3791 3791
3792/* 3792/*
3793 * pmap_enter: enter a mapping into a pmap 3793 * pmap_enter: enter a mapping into a pmap
3794 * 3794 *
3795 * => must be done "now" ... no lazy-evaluation 3795 * => must be done "now" ... no lazy-evaluation
3796 * => we set pmap => pv_head locking 3796 * => we set pmap => pv_head locking
3797 */ 3797 */
3798int 3798int
3799pmap_enter_ma(struct pmap *pmap, vaddr_t va, paddr_t ma, paddr_t pa, 3799pmap_enter_ma(struct pmap *pmap, vaddr_t va, paddr_t ma, paddr_t pa,
3800 vm_prot_t prot, u_int flags, int domid) 3800 vm_prot_t prot, u_int flags, int domid)
3801{ 3801{
3802 pt_entry_t *ptes, opte, npte; 3802 pt_entry_t *ptes, opte, npte;
3803 pt_entry_t *ptep; 3803 pt_entry_t *ptep;
3804 pd_entry_t * const *pdes; 3804 pd_entry_t * const *pdes;
3805 struct vm_page *ptp, *pg; 3805 struct vm_page *ptp, *pg;
3806 struct pmap_page *new_pp; 3806 struct pmap_page *new_pp;
3807 struct pmap_page *old_pp; 3807 struct pmap_page *old_pp;
3808 struct pv_entry *old_pve = NULL; 3808 struct pv_entry *old_pve = NULL;
3809 struct pv_entry *new_pve; 3809 struct pv_entry *new_pve;
3810 struct pv_entry *new_pve2; 3810 struct pv_entry *new_pve2;
3811 int error; 3811 int error;
3812 bool wired = (flags & PMAP_WIRED) != 0; 3812 bool wired = (flags & PMAP_WIRED) != 0;
3813 struct pmap *pmap2; 3813 struct pmap *pmap2;
3814 3814
3815 KASSERT(pmap_initialized); 3815 KASSERT(pmap_initialized);
3816 KASSERT(curlwp->l_md.md_gc_pmap != pmap); 3816 KASSERT(curlwp->l_md.md_gc_pmap != pmap);
3817 KASSERT(va < VM_MAX_KERNEL_ADDRESS); 3817 KASSERT(va < VM_MAX_KERNEL_ADDRESS);
3818 KASSERTMSG(va != (vaddr_t)PDP_BASE && va != (vaddr_t)APDP_BASE, 3818 KASSERTMSG(va != (vaddr_t)PDP_BASE && va != (vaddr_t)APDP_BASE,
3819 ("pmap_enter: trying to map over PDP/APDP!")); 3819 ("pmap_enter: trying to map over PDP/APDP!"));
3820 KASSERTMSG(va < VM_MIN_KERNEL_ADDRESS || 3820 KASSERTMSG(va < VM_MIN_KERNEL_ADDRESS ||
3821 pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)]), 3821 pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)]),
3822 ("pmap_enter: missing kernel PTP for VA %lx!", va)); 3822 ("pmap_enter: missing kernel PTP for VA %lx!", va));
3823 3823
3824#ifdef XEN 3824#ifdef XEN
3825 KASSERT(domid == DOMID_SELF || pa == 0); 3825 KASSERT(domid == DOMID_SELF || pa == 0);
3826#endif /* XEN */ 3826#endif /* XEN */
3827 3827
3828 npte = ma | protection_codes[prot] | PG_V; 3828 npte = ma | protection_codes[prot] | PG_V;
3829 npte |= pmap_pat_flags(flags); 3829 npte |= pmap_pat_flags(flags);
3830 if (wired) 3830 if (wired)
3831 npte |= PG_W; 3831 npte |= PG_W;
3832 if (va < VM_MAXUSER_ADDRESS) 3832 if (va < VM_MAXUSER_ADDRESS)
3833 npte |= PG_u; 3833 npte |= PG_u;
3834 else if (va < VM_MAX_ADDRESS) 3834 else if (va < VM_MAX_ADDRESS)
3835 npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */ 3835 npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */
3836 else 3836 else
3837 npte |= PG_k; 3837 npte |= PG_k;
3838 if (pmap == pmap_kernel()) 3838 if (pmap == pmap_kernel())
3839 npte |= pmap_pg_g; 3839 npte |= pmap_pg_g;
3840 if (flags & VM_PROT_ALL) { 3840 if (flags & VM_PROT_ALL) {
3841 npte |= PG_U; 3841 npte |= PG_U;
3842 if (flags & VM_PROT_WRITE) { 3842 if (flags & VM_PROT_WRITE) {
3843 KASSERT((npte & PG_RW) != 0); 3843 KASSERT((npte & PG_RW) != 0);
3844 npte |= PG_M; 3844 npte |= PG_M;
3845 } 3845 }
3846 } 3846 }
3847 3847
3848#ifdef XEN 3848#ifdef XEN
3849 if (domid != DOMID_SELF) 3849 if (domid != DOMID_SELF)
3850 pg = NULL; 3850 pg = NULL;
3851 else 3851 else
3852#endif 3852#endif
3853 pg = PHYS_TO_VM_PAGE(pa); 3853 pg = PHYS_TO_VM_PAGE(pa);
3854 if (pg != NULL) { 3854 if (pg != NULL) {
3855 /* This is a managed page */ 3855 /* This is a managed page */
3856 npte |= PG_PVLIST; 3856 npte |= PG_PVLIST;
3857 new_pp = VM_PAGE_TO_PP(pg); 3857 new_pp = VM_PAGE_TO_PP(pg);
3858 } else { 3858 } else {
3859 new_pp = NULL; 3859 new_pp = NULL;
3860 } 3860 }
3861 3861
3862 /* get pves. */ 3862 /* get pves. */
3863 new_pve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT); 3863 new_pve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);
3864 new_pve2 = pool_cache_get(&pmap_pv_cache, PR_NOWAIT); 3864 new_pve2 = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);
3865 if (new_pve == NULL || new_pve2 == NULL) { 3865 if (new_pve == NULL || new_pve2 == NULL) {
3866 if (flags & PMAP_CANFAIL) { 3866 if (flags & PMAP_CANFAIL) {
3867 error = ENOMEM; 3867 error = ENOMEM;
3868 goto out2; 3868 goto out2;
3869 } 3869 }
3870 panic("pmap_enter: pve allocation failed"); 3870 panic("pmap_enter: pve allocation failed");
3871 } 3871 }
3872 3872
3873 kpreempt_disable(); 3873 kpreempt_disable();
3874 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ 3874 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */
3875 if (pmap == pmap_kernel()) { 3875 if (pmap == pmap_kernel()) {
3876 ptp = NULL; 3876 ptp = NULL;
3877 } else { 3877 } else {
3878 ptp = pmap_get_ptp(pmap, va, pdes); 3878 ptp = pmap_get_ptp(pmap, va, pdes);
3879 if (ptp == NULL) { 3879 if (ptp == NULL) {
3880 pmap_unmap_ptes(pmap, pmap2); 3880 pmap_unmap_ptes(pmap, pmap2);
3881 if (flags & PMAP_CANFAIL) { 3881 if (flags & PMAP_CANFAIL) {
3882 error = ENOMEM; 3882 error = ENOMEM;
3883 goto out; 3883 goto out;
3884 } 3884 }
3885 panic("pmap_enter: get ptp failed"); 3885 panic("pmap_enter: get ptp failed");
3886 } 3886 }
3887 } 3887 }
3888 3888
3889 /* 3889 /*
3890 * update the pte. 3890 * update the pte.
3891 */ 3891 */
3892 3892
3893 ptep = &ptes[pl1_i(va)]; 3893 ptep = &ptes[pl1_i(va)];
3894 do { 3894 do {
3895 opte = *ptep; 3895 opte = *ptep;
3896 3896
3897 /* 3897 /*
3898 * if the same page, inherit PG_U and PG_M. 3898 * if the same page, inherit PG_U and PG_M.
3899 */ 3899 */
3900 if (((opte ^ npte) & (PG_FRAME | PG_V)) == 0) { 3900 if (((opte ^ npte) & (PG_FRAME | PG_V)) == 0) {
3901 npte |= opte & (PG_U | PG_M); 3901 npte |= opte & (PG_U | PG_M);
3902 } 3902 }
3903#if defined(XEN) 3903#if defined(XEN)
3904 if (domid != DOMID_SELF) { 3904 if (domid != DOMID_SELF) {
3905 /* pmap_pte_cas with error handling */ 3905 /* pmap_pte_cas with error handling */
3906 int s = splvm(); 3906 int s = splvm();
3907 if (opte != *ptep) { 3907 if (opte != *ptep) {
3908 splx(s); 3908 splx(s);
3909 continue; 3909 continue;
3910 } 3910 }
3911 error = xpq_update_foreign( 3911 error = xpq_update_foreign(
3912 vtomach((vaddr_t)ptep), npte, domid); 3912 vtomach((vaddr_t)ptep), npte, domid);
3913 splx(s); 3913 splx(s);
3914 if (error) { 3914 if (error) {
3915 if (ptp != NULL && ptp->wire_count <= 1) { 3915 if (ptp != NULL && ptp->wire_count <= 1) {
3916 pmap_free_ptp(pmap, ptp, va, ptes, pdes); 3916 pmap_free_ptp(pmap, ptp, va, ptes, pdes);
3917 } 3917 }
3918 pmap_unmap_ptes(pmap, pmap2); 3918 pmap_unmap_ptes(pmap, pmap2);
3919 goto out; 3919 goto out;
3920 } 3920 }
3921 break; 3921 break;
3922 } 3922 }
3923#endif /* defined(XEN) */ 3923#endif /* defined(XEN) */
3924 } while (pmap_pte_cas(ptep, opte, npte) != opte); 3924 } while (pmap_pte_cas(ptep, opte, npte) != opte);
3925 3925
3926 /* 3926 /*
3927 * update statistics and PTP's reference count. 3927 * update statistics and PTP's reference count.
3928 */ 3928 */
3929 3929
3930 pmap_stats_update_bypte(pmap, npte, opte); 3930 pmap_stats_update_bypte(pmap, npte, opte);
3931 if (ptp != NULL && !pmap_valid_entry(opte)) { 3931 if (ptp != NULL && !pmap_valid_entry(opte)) {
3932 ptp->wire_count++; 3932 ptp->wire_count++;
3933 } 3933 }
3934 KASSERT(ptp == NULL || ptp->wire_count > 1); 3934 KASSERT(ptp == NULL || ptp->wire_count > 1);
3935 3935
3936 /* 3936 /*
3937 * if the same page, we can skip pv_entry handling. 3937 * if the same page, we can skip pv_entry handling.
3938 */ 3938 */
3939 3939
3940 if (((opte ^ npte) & (PG_FRAME | PG_V)) == 0) { 3940 if (((opte ^ npte) & (PG_FRAME | PG_V)) == 0) {
3941 KASSERT(((opte ^ npte) & PG_PVLIST) == 0); 3941 KASSERT(((opte ^ npte) & PG_PVLIST) == 0);
3942 goto same_pa; 3942 goto same_pa;
3943 } 3943 }
3944 3944
3945 /* 3945 /*
3946 * if old page is managed, remove pv_entry from its list. 3946 * if old page is managed, remove pv_entry from its list.
3947 */ 3947 */
3948 3948
3949 if ((~opte & (PG_V | PG_PVLIST)) == 0) { 3949 if ((~opte & (PG_V | PG_PVLIST)) == 0) {
3950 pg = PHYS_TO_VM_PAGE(pmap_pte2pa(opte)); 3950 pg = PHYS_TO_VM_PAGE(pmap_pte2pa(opte));
3951 3951
3952 KASSERTMSG(pg != NULL, ("pmap_enter: PG_PVLIST mapping with " 3952 KASSERTMSG(pg != NULL, ("pmap_enter: PG_PVLIST mapping with "
3953 "unmanaged page pa = 0x%" PRIx64 " (0x%" PRIx64 ")", 3953 "unmanaged page pa = 0x%" PRIx64 " (0x%" PRIx64 ")",
3954 (int64_t)pa, (int64_t)atop(pa))); 3954 (int64_t)pa, (int64_t)atop(pa)));
3955 3955
3956 KASSERT(uvm_page_locked_p(pg)); 3956 KASSERT(uvm_page_locked_p(pg));
3957 3957
3958 old_pp = VM_PAGE_TO_PP(pg); 3958 old_pp = VM_PAGE_TO_PP(pg);
3959 old_pve = pmap_remove_pv(old_pp, ptp, va); 3959 old_pve = pmap_remove_pv(old_pp, ptp, va);
3960 old_pp->pp_attrs |= opte; 3960 old_pp->pp_attrs |= opte;
3961 } 3961 }
3962 3962
3963 /* 3963 /*
3964 * if new page is managed, insert pv_entry into its list. 3964 * if new page is managed, insert pv_entry into its list.
3965 */ 3965 */
3966 3966
3967 if (new_pp) { 3967 if (new_pp) {
3968 new_pve = pmap_enter_pv(new_pp, new_pve, &new_pve2, ptp, va); 3968 new_pve = pmap_enter_pv(new_pp, new_pve, &new_pve2, ptp, va);
3969 } 3969 }
3970 3970
3971same_pa: 3971same_pa:
3972 pmap_unmap_ptes(pmap, pmap2); 3972 pmap_unmap_ptes(pmap, pmap2);
3973 3973
3974 /* 3974 /*
3975 * shootdown tlb if necessary. 3975 * shootdown tlb if necessary.
3976 */ 3976 */
3977 3977
3978 if ((~opte & (PG_V | PG_U)) == 0 && 3978 if ((~opte & (PG_V | PG_U)) == 0 &&
3979 ((opte ^ npte) & (PG_FRAME | PG_RW)) != 0) { 3979 ((opte ^ npte) & (PG_FRAME | PG_RW)) != 0) {
3980 pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_ENTER); 3980 pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_ENTER);
3981 } 3981 }
3982 3982
3983 error = 0; 3983 error = 0;
3984out: 3984out:
3985 kpreempt_enable(); 3985 kpreempt_enable();
3986out2: 3986out2:
3987 if (old_pve != NULL) { 3987 if (old_pve != NULL) {
3988 pool_cache_put(&pmap_pv_cache, old_pve); 3988 pool_cache_put(&pmap_pv_cache, old_pve);
3989 } 3989 }
3990 if (new_pve != NULL) { 3990 if (new_pve != NULL) {
3991 pool_cache_put(&pmap_pv_cache, new_pve); 3991 pool_cache_put(&pmap_pv_cache, new_pve);
3992 } 3992 }
3993 if (new_pve2 != NULL) { 3993 if (new_pve2 != NULL) {
3994 pool_cache_put(&pmap_pv_cache, new_pve2); 3994 pool_cache_put(&pmap_pv_cache, new_pve2);
3995 } 3995 }
3996 3996
3997 return error; 3997 return error;
3998} 3998}
3999 3999
4000static bool 4000static bool
4001pmap_get_physpage(vaddr_t va, int level, paddr_t *paddrp) 4001pmap_get_physpage(vaddr_t va, int level, paddr_t *paddrp)
4002{ 4002{
4003 struct vm_page *ptp; 4003 struct vm_page *ptp;
4004 struct pmap *kpm = pmap_kernel(); 4004 struct pmap *kpm = pmap_kernel();
4005 4005
4006 if (uvm.page_init_done == false) { 4006 if (uvm.page_init_done == false) {
4007 /* 4007 /*
4008 * we're growing the kernel pmap early (from 4008 * we're growing the kernel pmap early (from
4009 * uvm_pageboot_alloc()). this case must be 4009 * uvm_pageboot_alloc()). this case must be
4010 * handled a little differently. 4010 * handled a little differently.
4011 */ 4011 */
4012 4012
4013 if (uvm_page_physget(paddrp) == false) 4013 if (uvm_page_physget(paddrp) == false)
4014 panic("pmap_get_physpage: out of memory"); 4014 panic("pmap_get_physpage: out of memory");
4015 kpreempt_disable(); 4015 kpreempt_disable();
4016 pmap_pte_set(early_zero_pte, 4016 pmap_pte_set(early_zero_pte,
4017 pmap_pa2pte(*paddrp) | PG_V | PG_RW | PG_k); 4017 pmap_pa2pte(*paddrp) | PG_V | PG_RW | PG_k);
4018 pmap_pte_flush(); 4018 pmap_pte_flush();
4019 pmap_update_pg((vaddr_t)early_zerop); 4019 pmap_update_pg((vaddr_t)early_zerop);
4020 memset(early_zerop, 0, PAGE_SIZE); 4020 memset(early_zerop, 0, PAGE_SIZE);
4021#if defined(DIAGNOSTIC) || defined (XEN) 4021#if defined(DIAGNOSTIC) || defined (XEN)
4022 pmap_pte_set(early_zero_pte, 0); 4022 pmap_pte_set(early_zero_pte, 0);
4023 pmap_pte_flush(); 4023 pmap_pte_flush();
4024#endif /* defined(DIAGNOSTIC) */ 4024#endif /* defined(DIAGNOSTIC) */
4025 kpreempt_enable(); 4025 kpreempt_enable();
4026 } else { 4026 } else {
4027 /* XXX */ 4027 /* XXX */
4028 ptp = uvm_pagealloc(NULL, 0, NULL, 4028 ptp = uvm_pagealloc(NULL, 0, NULL,
4029 UVM_PGA_USERESERVE|UVM_PGA_ZERO); 4029 UVM_PGA_USERESERVE|UVM_PGA_ZERO);
4030 if (ptp == NULL) 4030 if (ptp == NULL)
4031 panic("pmap_get_physpage: out of memory"); 4031 panic("pmap_get_physpage: out of memory");
4032 ptp->flags &= ~PG_BUSY; 4032 ptp->flags &= ~PG_BUSY;
4033 ptp->wire_count = 1; 4033 ptp->wire_count = 1;
4034 *paddrp = VM_PAGE_TO_PHYS(ptp); 4034 *paddrp = VM_PAGE_TO_PHYS(ptp);
4035 } 4035 }
4036 pmap_stats_update(kpm, 1, 0); 4036 pmap_stats_update(kpm, 1, 0);
4037 return true; 4037 return true;
4038} 4038}
4039 4039
4040/* 4040/*
4041 * Allocate the amount of specified ptps for a ptp level, and populate 4041 * Allocate the amount of specified ptps for a ptp level, and populate
4042 * all levels below accordingly, mapping virtual addresses starting at 4042 * all levels below accordingly, mapping virtual addresses starting at
4043 * kva. 4043 * kva.
4044 * 4044 *
4045 * Used by pmap_growkernel. 4045 * Used by pmap_growkernel.
4046 */ 4046 */
4047static void 4047static void
4048pmap_alloc_level(pd_entry_t * const *pdes, vaddr_t kva, int lvl, 4048pmap_alloc_level(pd_entry_t * const *pdes, vaddr_t kva, int lvl,
4049 long *needed_ptps) 4049 long *needed_ptps)
4050{ 4050{
4051 unsigned long i; 4051 unsigned long i;
4052 vaddr_t va; 4052 vaddr_t va;
4053 paddr_t pa; 4053 paddr_t pa;
4054 unsigned long index, endindex; 4054 unsigned long index, endindex;
4055 int level; 4055 int level;
4056 pd_entry_t *pdep; 4056 pd_entry_t *pdep;
4057#ifdef XEN 4057#ifdef XEN
4058 int s = splvm(); /* protect xpq_* */ 4058 int s = splvm(); /* protect xpq_* */
4059#endif 4059#endif
4060 4060
4061 for (level = lvl; level > 1; level--) { 4061 for (level = lvl; level > 1; level--) {
4062 if (level == PTP_LEVELS) 4062 if (level == PTP_LEVELS)
4063 pdep = pmap_kernel()->pm_pdir; 4063 pdep = pmap_kernel()->pm_pdir;
4064 else 4064 else
4065 pdep = pdes[level - 2]; 4065 pdep = pdes[level - 2];
4066 va = kva; 4066 va = kva;
4067 index = pl_i_roundup(kva, level); 4067 index = pl_i_roundup(kva, level);
4068 endindex = index + needed_ptps[level - 1] - 1; 4068 endindex = index + needed_ptps[level - 1] - 1;
4069 4069
4070 4070
4071 for (i = index; i <= endindex; i++) { 4071 for (i = index; i <= endindex; i++) {
4072 KASSERT(!pmap_valid_entry(pdep[i])); 4072 KASSERT(!pmap_valid_entry(pdep[i]));
4073 pmap_get_physpage(va, level - 1, &pa); 4073 pmap_get_physpage(va, level - 1, &pa);
4074#ifdef XEN 4074#ifdef XEN
4075 xpq_queue_pte_update((level == PTP_LEVELS) ? 4075 xpq_queue_pte_update((level == PTP_LEVELS) ?
4076 xpmap_ptom(pmap_pdirpa(pmap_kernel(), i)) : 4076 xpmap_ptom(pmap_pdirpa(pmap_kernel(), i)) :
4077 xpmap_ptetomach(&pdep[i]), 4077 xpmap_ptetomach(&pdep[i]),
4078 pmap_pa2pte(pa) | PG_k | PG_V | PG_RW); 4078 pmap_pa2pte(pa) | PG_k | PG_V | PG_RW);
4079#ifdef PAE 4079#ifdef PAE
4080 if (level == PTP_LEVELS && i > L2_SLOT_KERN) { 4080 if (level == PTP_LEVELS && i > L2_SLOT_KERN) {
4081 /* update real kernel PD too */ 4081 /* update real kernel PD too */
4082 xpq_queue_pte_update( 4082 xpq_queue_pte_update(
4083 xpmap_ptetomach(&pmap_kl2pd[l2tol2(i)]), 4083 xpmap_ptetomach(&pmap_kl2pd[l2tol2(i)]),
4084 pmap_pa2pte(pa) | PG_k | PG_V | PG_RW); 4084 pmap_pa2pte(pa) | PG_k | PG_V | PG_RW);
4085 } 4085 }
4086#endif 4086#endif
4087#else /* XEN */ 4087#else /* XEN */
4088 pdep[i] = pmap_pa2pte(pa) | PG_k | PG_V | PG_RW; 4088 pdep[i] = pmap_pa2pte(pa) | PG_k | PG_V | PG_RW;
4089#endif /* XEN */ 4089#endif /* XEN */
4090 KASSERT(level != PTP_LEVELS || nkptp[level - 1] + 4090 KASSERT(level != PTP_LEVELS || nkptp[level - 1] +
4091 pl_i(VM_MIN_KERNEL_ADDRESS, level) == i); 4091 pl_i(VM_MIN_KERNEL_ADDRESS, level) == i);
4092 nkptp[level - 1]++; 4092 nkptp[level - 1]++;
4093 va += nbpd[level - 1]; 4093 va += nbpd[level - 1];
4094 } 4094 }
4095 pmap_pte_flush(); 4095 pmap_pte_flush();
4096 } 4096 }
4097#ifdef XEN 4097#ifdef XEN
4098 splx(s); 4098 splx(s);
4099#endif 4099#endif
4100} 4100}
4101 4101
4102/* 4102/*
4103 * pmap_growkernel: increase usage of KVM space 4103 * pmap_growkernel: increase usage of KVM space
4104 * 4104 *
4105 * => we allocate new PTPs for the kernel and install them in all 4105 * => we allocate new PTPs for the kernel and install them in all
4106 * the pmaps on the system. 4106 * the pmaps on the system.
4107 */ 4107 */
4108 4108
4109vaddr_t 4109vaddr_t
4110pmap_growkernel(vaddr_t maxkvaddr) 4110pmap_growkernel(vaddr_t maxkvaddr)
4111{ 4111{
4112 struct pmap *kpm = pmap_kernel(); 4112 struct pmap *kpm = pmap_kernel();
4113#if !defined(XEN) || !defined(__x86_64__) 4113#if !defined(XEN) || !defined(__x86_64__)
4114 struct pmap *pm; 4114 struct pmap *pm;
4115#endif 4115#endif
4116 int s, i; 4116 int s, i;
4117 long needed_kptp[PTP_LEVELS], target_nptp, old; 4117 long needed_kptp[PTP_LEVELS], target_nptp, old;
4118 bool invalidate = false; 4118 bool invalidate = false;
4119 4119
4120 s = splvm(); /* to be safe */ 4120 s = splvm(); /* to be safe */
4121 mutex_enter(kpm->pm_lock); 4121 mutex_enter(kpm->pm_lock);
4122 4122
4123 if (maxkvaddr <= pmap_maxkvaddr) { 4123 if (maxkvaddr <= pmap_maxkvaddr) {
4124 mutex_exit(kpm->pm_lock); 4124 mutex_exit(kpm->pm_lock);
4125 splx(s); 4125 splx(s);
4126 return pmap_maxkvaddr; 4126 return pmap_maxkvaddr;
4127 } 4127 }
4128 4128
4129 maxkvaddr = x86_round_pdr(maxkvaddr); 4129 maxkvaddr = x86_round_pdr(maxkvaddr);
4130 old = nkptp[PTP_LEVELS - 1]; 4130 old = nkptp[PTP_LEVELS - 1];
4131 /* 4131 /*
4132 * This loop could be optimized more, but pmap_growkernel() 4132 * This loop could be optimized more, but pmap_growkernel()
4133 * is called infrequently. 4133 * is called infrequently.
4134 */ 4134 */
4135 for (i = PTP_LEVELS - 1; i >= 1; i--) { 4135 for (i = PTP_LEVELS - 1; i >= 1; i--) {
4136 target_nptp = pl_i_roundup(maxkvaddr, i + 1) - 4136 target_nptp = pl_i_roundup(maxkvaddr, i + 1) -
4137 pl_i_roundup(VM_MIN_KERNEL_ADDRESS, i + 1); 4137 pl_i_roundup(VM_MIN_KERNEL_ADDRESS, i + 1);
4138 /* 4138 /*
4139 * XXX only need to check toplevel. 4139 * XXX only need to check toplevel.
4140 */ 4140 */
4141 if (target_nptp > nkptpmax[i]) 4141 if (target_nptp > nkptpmax[i])
4142 panic("out of KVA space"); 4142 panic("out of KVA space");
4143 KASSERT(target_nptp >= nkptp[i]); 4143 KASSERT(target_nptp >= nkptp[i]);
4144 needed_kptp[i] = target_nptp - nkptp[i]; 4144 needed_kptp[i] = target_nptp - nkptp[i];
4145 } 4145 }
4146 4146
4147 pmap_alloc_level(normal_pdes, pmap_maxkvaddr, PTP_LEVELS, needed_kptp); 4147 pmap_alloc_level(normal_pdes, pmap_maxkvaddr, PTP_LEVELS, needed_kptp);
4148 4148
4149 /* 4149 /*
4150 * If the number of top level entries changed, update all 4150 * If the number of top level entries changed, update all
4151 * pmaps. 4151 * pmaps.
4152 */ 4152 */
4153 if (needed_kptp[PTP_LEVELS - 1] != 0) { 4153 if (needed_kptp[PTP_LEVELS - 1] != 0) {
4154#ifdef XEN 4154#ifdef XEN
4155#ifdef __x86_64__ 4155#ifdef __x86_64__
4156 /* nothing, kernel entries are never entered in user pmap */ 4156 /* nothing, kernel entries are never entered in user pmap */
4157#else /* __x86_64__ */ 4157#else /* __x86_64__ */
4158 mutex_enter(&pmaps_lock); 4158 mutex_enter(&pmaps_lock);
4159 LIST_FOREACH(pm, &pmaps, pm_list) { 4159 LIST_FOREACH(pm, &pmaps, pm_list) {
4160 int pdkidx; 4160 int pdkidx;
4161 for (pdkidx = PDIR_SLOT_KERN + old; 4161 for (pdkidx = PDIR_SLOT_KERN + old;
4162 pdkidx < PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1]; 4162 pdkidx < PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1];
4163 pdkidx++) { 4163 pdkidx++) {
4164 xpq_queue_pte_update( 4164 xpq_queue_pte_update(
4165 xpmap_ptom(pmap_pdirpa(pm, pdkidx)), 4165 xpmap_ptom(pmap_pdirpa(pm, pdkidx)),
4166 kpm->pm_pdir[pdkidx]); 4166 kpm->pm_pdir[pdkidx]);
4167 } 4167 }
4168 xpq_flush_queue(); 4168 xpq_flush_queue();
4169 } 4169 }
4170 mutex_exit(&pmaps_lock); 4170 mutex_exit(&pmaps_lock);
4171#endif /* __x86_64__ */ 4171#endif /* __x86_64__ */
4172#else /* XEN */ 4172#else /* XEN */
4173 unsigned newpdes; 4173 unsigned newpdes;
4174 newpdes = nkptp[PTP_LEVELS - 1] - old; 4174 newpdes = nkptp[PTP_LEVELS - 1] - old;
4175 mutex_enter(&pmaps_lock); 4175 mutex_enter(&pmaps_lock);
4176 LIST_FOREACH(pm, &pmaps, pm_list) { 4176 LIST_FOREACH(pm, &pmaps, pm_list) {
4177 memcpy(&pm->pm_pdir[PDIR_SLOT_KERN + old], 4177 memcpy(&pm->pm_pdir[PDIR_SLOT_KERN + old],
4178 &kpm->pm_pdir[PDIR_SLOT_KERN + old], 4178 &kpm->pm_pdir[PDIR_SLOT_KERN + old],
4179 newpdes * sizeof (pd_entry_t)); 4179 newpdes * sizeof (pd_entry_t));
4180 } 4180 }
4181 mutex_exit(&pmaps_lock); 4181 mutex_exit(&pmaps_lock);
4182#endif 4182#endif
4183 invalidate = true; 4183 invalidate = true;
4184 } 4184 }
4185 pmap_maxkvaddr = maxkvaddr; 4185 pmap_maxkvaddr = maxkvaddr;
4186 mutex_exit(kpm->pm_lock); 4186 mutex_exit(kpm->pm_lock);
4187 splx(s); 4187 splx(s);
4188 4188
4189 if (invalidate) { 4189 if (invalidate) {
4190 /* Invalidate the PDP cache. */ 4190 /* Invalidate the PDP cache. */
4191 pool_cache_invalidate(&pmap_pdp_cache); 4191 pool_cache_invalidate(&pmap_pdp_cache);
4192 } 4192 }
4193 4193
4194 return maxkvaddr; 4194 return maxkvaddr;
4195} 4195}
4196 4196
4197#ifdef DEBUG 4197#ifdef DEBUG
4198void pmap_dump(struct pmap *, vaddr_t, vaddr_t); 4198void pmap_dump(struct pmap *, vaddr_t, vaddr_t);
4199 4199
4200/* 4200/*
4201 * pmap_dump: dump all the mappings from a pmap 4201 * pmap_dump: dump all the mappings from a pmap
4202 * 4202 *
4203 * => caller should not be holding any pmap locks 4203 * => caller should not be holding any pmap locks
4204 */ 4204 */
4205 4205
4206void 4206void
4207pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) 4207pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
4208{ 4208{
4209 pt_entry_t *ptes, *pte; 4209 pt_entry_t *ptes, *pte;
4210 pd_entry_t * const *pdes; 4210 pd_entry_t * const *pdes;
4211 struct pmap *pmap2; 4211 struct pmap *pmap2;
4212 vaddr_t blkendva; 4212 vaddr_t blkendva;
4213 4213
4214 /* 4214 /*
4215 * if end is out of range truncate. 4215 * if end is out of range truncate.
4216 * if (end == start) update to max. 4216 * if (end == start) update to max.
4217 */ 4217 */
4218 4218
4219 if (eva > VM_MAXUSER_ADDRESS || eva <= sva) 4219 if (eva > VM_MAXUSER_ADDRESS || eva <= sva)
4220 eva = VM_MAXUSER_ADDRESS; 4220 eva = VM_MAXUSER_ADDRESS;
4221 4221
4222 /* 4222 /*
4223 * we lock in the pmap => pv_head direction 4223 * we lock in the pmap => pv_head direction
4224 */ 4224 */
4225 4225
4226 kpreempt_disable(); 4226 kpreempt_disable();
4227 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ 4227 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */
4228 4228
4229 /* 4229 /*
4230 * dumping a range of pages: we dump in PTP sized blocks (4MB) 4230 * dumping a range of pages: we dump in PTP sized blocks (4MB)
4231 */ 4231 */
4232 4232
4233 for (/* null */ ; sva < eva ; sva = blkendva) { 4233 for (/* null */ ; sva < eva ; sva = blkendva) {
4234 4234
4235 /* determine range of block */ 4235 /* determine range of block */
4236 blkendva = x86_round_pdr(sva+1); 4236 blkendva = x86_round_pdr(sva+1);
4237 if (blkendva > eva) 4237 if (blkendva > eva)
4238 blkendva = eva; 4238 blkendva = eva;
4239 4239
4240 /* valid block? */ 4240 /* valid block? */
4241 if (!pmap_pdes_valid(sva, pdes, NULL)) 4241 if (!pmap_pdes_valid(sva, pdes, NULL))
4242 continue; 4242 continue;
4243 4243
4244 pte = &ptes[pl1_i(sva)]; 4244 pte = &ptes[pl1_i(sva)];
4245 for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) { 4245 for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) {
4246 if (!pmap_valid_entry(*pte)) 4246 if (!pmap_valid_entry(*pte))
4247 continue; 4247 continue;
4248 printf("va %#" PRIxVADDR " -> pa %#" PRIxPADDR 4248 printf("va %#" PRIxVADDR " -> pa %#" PRIxPADDR
4249 " (pte=%#" PRIxPADDR ")\n", 4249 " (pte=%#" PRIxPADDR ")\n",
4250 sva, (paddr_t)pmap_pte2pa(*pte), (paddr_t)*pte); 4250 sva, (paddr_t)pmap_pte2pa(*pte), (paddr_t)*pte);
4251 } 4251 }
4252 } 4252 }
4253 pmap_unmap_ptes(pmap, pmap2); 4253 pmap_unmap_ptes(pmap, pmap2);
4254 kpreempt_enable(); 4254 kpreempt_enable();
4255} 4255}
4256#endif 4256#endif
4257 4257
4258/* 4258/*
4259 * pmap_update: process deferred invalidations and frees. 4259 * pmap_update: process deferred invalidations and frees.
4260 */ 4260 */
4261 4261
4262void 4262void
4263pmap_update(struct pmap *pmap) 4263pmap_update(struct pmap *pmap)
4264{ 4264{
4265 struct vm_page *empty_ptps; 4265 struct vm_page *empty_ptps;
4266 lwp_t *l = curlwp; 4266 lwp_t *l = curlwp;
4267 4267
4268 /* 4268 /*
4269 * If we have torn down this pmap, invalidate non-global TLB 4269 * If we have torn down this pmap, invalidate non-global TLB
4270 * entries on any processors using it. 4270 * entries on any processors using it.
4271 */ 4271 */
4272 KPREEMPT_DISABLE(l); 4272 KPREEMPT_DISABLE(l);
4273 if (__predict_false(l->l_md.md_gc_pmap == pmap)) { 4273 if (__predict_false(l->l_md.md_gc_pmap == pmap)) {
4274 l->l_md.md_gc_pmap = NULL; 4274 l->l_md.md_gc_pmap = NULL;
4275 pmap_tlb_shootdown(pmap, (vaddr_t)-1LL, 0, TLBSHOOT_UPDATE); 4275 pmap_tlb_shootdown(pmap, (vaddr_t)-1LL, 0, TLBSHOOT_UPDATE);
4276 } 4276 }
4277 /* 4277 /*
4278 * Initiate any pending TLB shootdowns. Wait for them to 4278 * Initiate any pending TLB shootdowns. Wait for them to
4279 * complete before returning control to the caller. 4279 * complete before returning control to the caller.
4280 */ 4280 */
4281 pmap_tlb_shootnow(); 4281 pmap_tlb_shootnow();
4282 KPREEMPT_ENABLE(l); 4282 KPREEMPT_ENABLE(l);
4283 4283
4284 /* 4284 /*
4285 * Now that shootdowns are complete, process deferred frees, 4285 * Now that shootdowns are complete, process deferred frees,
4286 * but not from interrupt context. 4286 * but not from interrupt context.
4287 */ 4287 */
4288 if (l->l_md.md_gc_ptp != NULL) { 4288 if (l->l_md.md_gc_ptp != NULL) {
4289 KASSERT((l->l_pflag & LP_INTR) == 0); 4289 KASSERT((l->l_pflag & LP_INTR) == 0);
4290 if (cpu_intr_p()) { 4290 if (cpu_intr_p()) {
4291 return; 4291 return;
4292 } 4292 }
4293 empty_ptps = l->l_md.md_gc_ptp; 4293 empty_ptps = l->l_md.md_gc_ptp;
4294 l->l_md.md_gc_ptp = NULL; 4294 l->l_md.md_gc_ptp = NULL;
4295 pmap_free_ptps(empty_ptps); 4295 pmap_free_ptps(empty_ptps);
4296 } 4296 }
4297} 4297}
4298 4298
4299#if PTP_LEVELS > 4 4299#if PTP_LEVELS > 4
4300#error "Unsupported number of page table mappings" 4300#error "Unsupported number of page table mappings"
4301#endif 4301#endif
4302 4302
4303paddr_t 4303paddr_t
4304pmap_init_tmp_pgtbl(paddr_t pg) 4304pmap_init_tmp_pgtbl(paddr_t pg)
4305{ 4305{
4306 static bool maps_loaded; 4306 static bool maps_loaded;
4307 static const paddr_t x86_tmp_pml_paddr[] = { 4307 static const paddr_t x86_tmp_pml_paddr[] = {
4308 4 * PAGE_SIZE, 4308 4 * PAGE_SIZE,
4309 5 * PAGE_SIZE, 4309 5 * PAGE_SIZE,
4310 6 * PAGE_SIZE, 4310 6 * PAGE_SIZE,
4311 7 * PAGE_SIZE 4311 7 * PAGE_SIZE
4312 }; 4312 };
4313 static vaddr_t x86_tmp_pml_vaddr[] = { 0, 0, 0, 0 }; 4313 static vaddr_t x86_tmp_pml_vaddr[] = { 0, 0, 0, 0 };
4314 4314
4315 pd_entry_t *tmp_pml, *kernel_pml; 4315 pd_entry_t *tmp_pml, *kernel_pml;
4316  4316
4317 int level; 4317 int level;
4318 4318
4319 if (!maps_loaded) { 4319 if (!maps_loaded) {
4320 for (level = 0; level < PTP_LEVELS; ++level) { 4320 for (level = 0; level < PTP_LEVELS; ++level) {
4321 x86_tmp_pml_vaddr[level] = 4321 x86_tmp_pml_vaddr[level] =
4322 uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 4322 uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
4323 UVM_KMF_VAONLY); 4323 UVM_KMF_VAONLY);
4324 4324
4325 if (x86_tmp_pml_vaddr[level] == 0) 4325 if (x86_tmp_pml_vaddr[level] == 0)
4326 panic("mapping of real mode PML failed\n"); 4326 panic("mapping of real mode PML failed\n");
4327 pmap_kenter_pa(x86_tmp_pml_vaddr[level], 4327 pmap_kenter_pa(x86_tmp_pml_vaddr[level],
4328 x86_tmp_pml_paddr[level], 4328 x86_tmp_pml_paddr[level],
4329 VM_PROT_READ | VM_PROT_WRITE, 0); 4329 VM_PROT_READ | VM_PROT_WRITE, 0);
4330 pmap_update(pmap_kernel()); 4330 pmap_update(pmap_kernel());
4331 } 4331 }
4332 maps_loaded = true; 4332 maps_loaded = true;
4333 } 4333 }
4334 4334
4335 /* Zero levels 1-3 */ 4335 /* Zero levels 1-3 */
4336 for (level = 0; level < PTP_LEVELS - 1; ++level) { 4336 for (level = 0; level < PTP_LEVELS - 1; ++level) {
4337 tmp_pml = (void *)x86_tmp_pml_vaddr[level]; 4337 tmp_pml = (void *)x86_tmp_pml_vaddr[level];
4338 memset(tmp_pml, 0, PAGE_SIZE); 4338 memset(tmp_pml, 0, PAGE_SIZE);
4339 } 4339 }
4340 4340
4341 /* Copy PML4 */ 4341 /* Copy PML4 */
4342 kernel_pml = pmap_kernel()->pm_pdir; 4342 kernel_pml = pmap_kernel()->pm_pdir;
4343 tmp_pml = (void *)x86_tmp_pml_vaddr[PTP_LEVELS - 1]; 4343 tmp_pml = (void *)x86_tmp_pml_vaddr[PTP_LEVELS - 1];
4344 memcpy(tmp_pml, kernel_pml, PAGE_SIZE); 4344 memcpy(tmp_pml, kernel_pml, PAGE_SIZE);
4345 4345
4346#ifdef PAE 4346#ifdef PAE
4347 /* 4347 /*
4348 * Use the last 4 entries of the L2 page as L3 PD entries. These 4348 * Use the last 4 entries of the L2 page as L3 PD entries. These
4349 * last entries are unlikely to be used for temporary mappings. 4349 * last entries are unlikely to be used for temporary mappings.
4350 * 508: maps 0->1GB (userland) 4350 * 508: maps 0->1GB (userland)
4351 * 509: unused 4351 * 509: unused
4352 * 510: unused 4352 * 510: unused
4353 * 511: maps 3->4GB (kernel) 4353 * 511: maps 3->4GB (kernel)
4354 */ 4354 */
4355 tmp_pml[508] = x86_tmp_pml_paddr[PTP_LEVELS - 1] | PG_V; 4355 tmp_pml[508] = x86_tmp_pml_paddr[PTP_LEVELS - 1] | PG_V;
4356 tmp_pml[509] = 0; 4356 tmp_pml[509] = 0;
4357 tmp_pml[510] = 0; 4357 tmp_pml[510] = 0;
4358 tmp_pml[511] = pmap_pdirpa(pmap_kernel(),PDIR_SLOT_KERN) | PG_V; 4358 tmp_pml[511] = pmap_pdirpa(pmap_kernel(),PDIR_SLOT_KERN) | PG_V;
4359#endif 4359#endif
4360 4360
4361 for (level = PTP_LEVELS - 1; level > 0; --level) { 4361 for (level = PTP_LEVELS - 1; level > 0; --level) {
4362 tmp_pml = (void *)x86_tmp_pml_vaddr[level]; 4362 tmp_pml = (void *)x86_tmp_pml_vaddr[level];
4363 4363
4364 tmp_pml[pl_i(pg, level + 1)] = 4364 tmp_pml[pl_i(pg, level + 1)] =
4365 (x86_tmp_pml_paddr[level - 1] & PG_FRAME) | PG_RW | PG_V; 4365 (x86_tmp_pml_paddr[level - 1] & PG_FRAME) | PG_RW | PG_V;
4366 } 4366 }
4367 4367
4368 tmp_pml = (void *)x86_tmp_pml_vaddr[0]; 4368 tmp_pml = (void *)x86_tmp_pml_vaddr[0];
4369 tmp_pml[pl_i(pg, 1)] = (pg & PG_FRAME) | PG_RW | PG_V; 4369 tmp_pml[pl_i(pg, 1)] = (pg & PG_FRAME) | PG_RW | PG_V;
4370 4370
4371#ifdef PAE 4371#ifdef PAE
4372 /* Return the PA of the L3 page (entry 508 of the L2 page) */ 4372 /* Return the PA of the L3 page (entry 508 of the L2 page) */
4373 return x86_tmp_pml_paddr[PTP_LEVELS - 1] + 508 * sizeof(pd_entry_t); 4373 return x86_tmp_pml_paddr[PTP_LEVELS - 1] + 508 * sizeof(pd_entry_t);
4374#endif 4374#endif
4375 4375
4376 return x86_tmp_pml_paddr[PTP_LEVELS - 1]; 4376 return x86_tmp_pml_paddr[PTP_LEVELS - 1];
4377} 4377}
4378 4378
4379u_int 4379u_int
4380x86_mmap_flags(paddr_t mdpgno) 4380x86_mmap_flags(paddr_t mdpgno)
4381{ 4381{
4382 u_int nflag = (mdpgno >> X86_MMAP_FLAG_SHIFT) & X86_MMAP_FLAG_MASK; 4382 u_int nflag = (mdpgno >> X86_MMAP_FLAG_SHIFT) & X86_MMAP_FLAG_MASK;
4383 u_int pflag = 0; 4383 u_int pflag = 0;
4384 4384
4385 if (nflag & X86_MMAP_FLAG_PREFETCH) 4385 if (nflag & X86_MMAP_FLAG_PREFETCH)
4386 pflag |= PMAP_WRITE_COMBINE; 4386 pflag |= PMAP_WRITE_COMBINE;
4387 4387
4388 return pflag; 4388 return pflag;
4389} 4389}