Sun Jan 29 11:37:08 2012 UTC ()
Remove apte related shootdowns.


(cherry)
diff -r1.157 -r1.158 src/sys/arch/x86/x86/pmap.c

cvs diff -r1.157 -r1.158 src/sys/arch/x86/x86/pmap.c (switch to unified diff)

--- src/sys/arch/x86/x86/pmap.c 2012/01/28 08:57:09 1.157
+++ src/sys/arch/x86/x86/pmap.c 2012/01/29 11:37:08 1.158
@@ -1,2912 +1,2898 @@ @@ -1,2912 +1,2898 @@
1/* $NetBSD: pmap.c,v 1.157 2012/01/28 08:57:09 cherry Exp $ */ 1/* $NetBSD: pmap.c,v 1.158 2012/01/29 11:37:08 cherry Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc. 4 * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran. 8 * by Andrew Doran.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * Copyright (c) 2007 Manuel Bouyer. 33 * Copyright (c) 2007 Manuel Bouyer.
34 * 34 *
35 * Redistribution and use in source and binary forms, with or without 35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions 36 * modification, are permitted provided that the following conditions
37 * are met: 37 * are met:
38 * 1. Redistributions of source code must retain the above copyright 38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer. 39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright 40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the 41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution. 42 * documentation and/or other materials provided with the distribution.
43 * 43 *
44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 * 54 *
55 */ 55 */
56 56
57/* 57/*
58 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> 58 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
59 * 59 *
60 * Permission to use, copy, modify, and distribute this software for any 60 * Permission to use, copy, modify, and distribute this software for any
61 * purpose with or without fee is hereby granted, provided that the above 61 * purpose with or without fee is hereby granted, provided that the above
62 * copyright notice and this permission notice appear in all copies. 62 * copyright notice and this permission notice appear in all copies.
63 * 63 *
64 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 64 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
65 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 65 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
66 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 66 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
67 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 67 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
68 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 68 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
69 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 69 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
70 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 70 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
71 */ 71 */
72 72
73/* 73/*
74 * Copyright (c) 1997 Charles D. Cranor and Washington University. 74 * Copyright (c) 1997 Charles D. Cranor and Washington University.
75 * All rights reserved. 75 * All rights reserved.
76 * 76 *
77 * Redistribution and use in source and binary forms, with or without 77 * Redistribution and use in source and binary forms, with or without
78 * modification, are permitted provided that the following conditions 78 * modification, are permitted provided that the following conditions
79 * are met: 79 * are met:
80 * 1. Redistributions of source code must retain the above copyright 80 * 1. Redistributions of source code must retain the above copyright
81 * notice, this list of conditions and the following disclaimer. 81 * notice, this list of conditions and the following disclaimer.
82 * 2. Redistributions in binary form must reproduce the above copyright 82 * 2. Redistributions in binary form must reproduce the above copyright
83 * notice, this list of conditions and the following disclaimer in the 83 * notice, this list of conditions and the following disclaimer in the
84 * documentation and/or other materials provided with the distribution. 84 * documentation and/or other materials provided with the distribution.
85 * 85 *
86 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 86 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
87 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 87 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
88 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 88 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
89 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 89 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
90 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 90 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
91 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 91 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
92 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 92 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
93 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 93 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
94 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 94 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
95 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 95 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
96 */ 96 */
97 97
98/* 98/*
99 * Copyright 2001 (c) Wasabi Systems, Inc. 99 * Copyright 2001 (c) Wasabi Systems, Inc.
100 * All rights reserved. 100 * All rights reserved.
101 * 101 *
102 * Written by Frank van der Linden for Wasabi Systems, Inc. 102 * Written by Frank van der Linden for Wasabi Systems, Inc.
103 * 103 *
104 * Redistribution and use in source and binary forms, with or without 104 * Redistribution and use in source and binary forms, with or without
105 * modification, are permitted provided that the following conditions 105 * modification, are permitted provided that the following conditions
106 * are met: 106 * are met:
107 * 1. Redistributions of source code must retain the above copyright 107 * 1. Redistributions of source code must retain the above copyright
108 * notice, this list of conditions and the following disclaimer. 108 * notice, this list of conditions and the following disclaimer.
109 * 2. Redistributions in binary form must reproduce the above copyright 109 * 2. Redistributions in binary form must reproduce the above copyright
110 * notice, this list of conditions and the following disclaimer in the 110 * notice, this list of conditions and the following disclaimer in the
111 * documentation and/or other materials provided with the distribution. 111 * documentation and/or other materials provided with the distribution.
112 * 3. All advertising materials mentioning features or use of this software 112 * 3. All advertising materials mentioning features or use of this software
113 * must display the following acknowledgement: 113 * must display the following acknowledgement:
114 * This product includes software developed for the NetBSD Project by 114 * This product includes software developed for the NetBSD Project by
115 * Wasabi Systems, Inc. 115 * Wasabi Systems, Inc.
116 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 116 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
117 * or promote products derived from this software without specific prior 117 * or promote products derived from this software without specific prior
118 * written permission. 118 * written permission.
119 * 119 *
120 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 120 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
121 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 121 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
122 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 122 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
123 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 123 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
124 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 124 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
125 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 125 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
126 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 126 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
127 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 127 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
128 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 128 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
129 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 129 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
130 * POSSIBILITY OF SUCH DAMAGE. 130 * POSSIBILITY OF SUCH DAMAGE.
131 */ 131 */
132 132
133/* 133/*
134 * This is the i386 pmap modified and generalized to support x86-64 134 * This is the i386 pmap modified and generalized to support x86-64
135 * as well. The idea is to hide the upper N levels of the page tables 135 * as well. The idea is to hide the upper N levels of the page tables
136 * inside pmap_get_ptp, pmap_free_ptp and pmap_growkernel. The rest 136 * inside pmap_get_ptp, pmap_free_ptp and pmap_growkernel. The rest
137 * is mostly untouched, except that it uses some more generalized 137 * is mostly untouched, except that it uses some more generalized
138 * macros and interfaces. 138 * macros and interfaces.
139 * 139 *
140 * This pmap has been tested on the i386 as well, and it can be easily 140 * This pmap has been tested on the i386 as well, and it can be easily
141 * adapted to PAE. 141 * adapted to PAE.
142 * 142 *
143 * fvdl@wasabisystems.com 18-Jun-2001 143 * fvdl@wasabisystems.com 18-Jun-2001
144 */ 144 */
145 145
146/* 146/*
147 * pmap.c: i386 pmap module rewrite 147 * pmap.c: i386 pmap module rewrite
148 * Chuck Cranor <chuck@netbsd> 148 * Chuck Cranor <chuck@netbsd>
149 * 11-Aug-97 149 * 11-Aug-97
150 * 150 *
151 * history of this pmap module: in addition to my own input, i used 151 * history of this pmap module: in addition to my own input, i used
152 * the following references for this rewrite of the i386 pmap: 152 * the following references for this rewrite of the i386 pmap:
153 * 153 *
154 * [1] the NetBSD i386 pmap. this pmap appears to be based on the 154 * [1] the NetBSD i386 pmap. this pmap appears to be based on the
155 * BSD hp300 pmap done by Mike Hibler at University of Utah. 155 * BSD hp300 pmap done by Mike Hibler at University of Utah.
156 * it was then ported to the i386 by William Jolitz of UUNET 156 * it was then ported to the i386 by William Jolitz of UUNET
157 * Technologies, Inc. Then Charles M. Hannum of the NetBSD 157 * Technologies, Inc. Then Charles M. Hannum of the NetBSD
158 * project fixed some bugs and provided some speed ups. 158 * project fixed some bugs and provided some speed ups.
159 * 159 *
160 * [2] the FreeBSD i386 pmap. this pmap seems to be the 160 * [2] the FreeBSD i386 pmap. this pmap seems to be the
161 * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson 161 * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson
162 * and David Greenman. 162 * and David Greenman.
163 * 163 *
164 * [3] the Mach pmap. this pmap, from CMU, seems to have migrated 164 * [3] the Mach pmap. this pmap, from CMU, seems to have migrated
165 * between several processors. the VAX version was done by 165 * between several processors. the VAX version was done by
166 * Avadis Tevanian, Jr., and Michael Wayne Young. the i386 166 * Avadis Tevanian, Jr., and Michael Wayne Young. the i386
167 * version was done by Lance Berc, Mike Kupfer, Bob Baron, 167 * version was done by Lance Berc, Mike Kupfer, Bob Baron,
168 * David Golub, and Richard Draves. the alpha version was 168 * David Golub, and Richard Draves. the alpha version was
169 * done by Alessandro Forin (CMU/Mach) and Chris Demetriou 169 * done by Alessandro Forin (CMU/Mach) and Chris Demetriou
170 * (NetBSD/alpha). 170 * (NetBSD/alpha).
171 */ 171 */
172 172
173#include <sys/cdefs.h> 173#include <sys/cdefs.h>
174__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.157 2012/01/28 08:57:09 cherry Exp $"); 174__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.158 2012/01/29 11:37:08 cherry Exp $");
175 175
176#include "opt_user_ldt.h" 176#include "opt_user_ldt.h"
177#include "opt_lockdebug.h" 177#include "opt_lockdebug.h"
178#include "opt_multiprocessor.h" 178#include "opt_multiprocessor.h"
179#include "opt_xen.h" 179#include "opt_xen.h"
180#if !defined(__x86_64__) 180#if !defined(__x86_64__)
181#include "opt_kstack_dr0.h" 181#include "opt_kstack_dr0.h"
182#endif /* !defined(__x86_64__) */ 182#endif /* !defined(__x86_64__) */
183 183
184#include <sys/param.h> 184#include <sys/param.h>
185#include <sys/systm.h> 185#include <sys/systm.h>
186#include <sys/proc.h> 186#include <sys/proc.h>
187#include <sys/pool.h> 187#include <sys/pool.h>
188#include <sys/kernel.h> 188#include <sys/kernel.h>
189#include <sys/atomic.h> 189#include <sys/atomic.h>
190#include <sys/cpu.h> 190#include <sys/cpu.h>
191#include <sys/intr.h> 191#include <sys/intr.h>
192#include <sys/xcall.h> 192#include <sys/xcall.h>
193#include <sys/kcore.h> 193#include <sys/kcore.h>
194 194
195#include <uvm/uvm.h> 195#include <uvm/uvm.h>
196 196
197#include <dev/isa/isareg.h> 197#include <dev/isa/isareg.h>
198 198
199#include <machine/specialreg.h> 199#include <machine/specialreg.h>
200#include <machine/gdt.h> 200#include <machine/gdt.h>
201#include <machine/isa_machdep.h> 201#include <machine/isa_machdep.h>
202#include <machine/cpuvar.h> 202#include <machine/cpuvar.h>
203#include <machine/cputypes.h> 203#include <machine/cputypes.h>
204 204
205#include <x86/pmap.h> 205#include <x86/pmap.h>
206#include <x86/pmap_pv.h> 206#include <x86/pmap_pv.h>
207 207
208#include <x86/i82489reg.h> 208#include <x86/i82489reg.h>
209#include <x86/i82489var.h> 209#include <x86/i82489var.h>
210 210
211#ifdef XEN 211#ifdef XEN
212#include <xen/xen-public/xen.h> 212#include <xen/xen-public/xen.h>
213#include <xen/hypervisor.h> 213#include <xen/hypervisor.h>
214#endif 214#endif
215 215
216/* 216/*
217 * general info: 217 * general info:
218 * 218 *
219 * - for an explanation of how the i386 MMU hardware works see 219 * - for an explanation of how the i386 MMU hardware works see
220 * the comments in <machine/pte.h>. 220 * the comments in <machine/pte.h>.
221 * 221 *
222 * - for an explanation of the general memory structure used by 222 * - for an explanation of the general memory structure used by
223 * this pmap (including the recursive mapping), see the comments 223 * this pmap (including the recursive mapping), see the comments
224 * in <machine/pmap.h>. 224 * in <machine/pmap.h>.
225 * 225 *
226 * this file contains the code for the "pmap module." the module's 226 * this file contains the code for the "pmap module." the module's
227 * job is to manage the hardware's virtual to physical address mappings. 227 * job is to manage the hardware's virtual to physical address mappings.
228 * note that there are two levels of mapping in the VM system: 228 * note that there are two levels of mapping in the VM system:
229 * 229 *
230 * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's 230 * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's
231 * to map ranges of virtual address space to objects/files. for 231 * to map ranges of virtual address space to objects/files. for
232 * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only 232 * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only
233 * to the file /bin/ls starting at offset zero." note that 233 * to the file /bin/ls starting at offset zero." note that
234 * the upper layer mapping is not concerned with how individual 234 * the upper layer mapping is not concerned with how individual
235 * vm_pages are mapped. 235 * vm_pages are mapped.
236 * 236 *
237 * [2] the lower layer of the VM system (the pmap) maintains the mappings 237 * [2] the lower layer of the VM system (the pmap) maintains the mappings
238 * from virtual addresses. it is concerned with which vm_page is 238 * from virtual addresses. it is concerned with which vm_page is
239 * mapped where. for example, when you run /bin/ls and start 239 * mapped where. for example, when you run /bin/ls and start
240 * at page 0x1000 the fault routine may lookup the correct page 240 * at page 0x1000 the fault routine may lookup the correct page
241 * of the /bin/ls file and then ask the pmap layer to establish 241 * of the /bin/ls file and then ask the pmap layer to establish
242 * a mapping for it. 242 * a mapping for it.
243 * 243 *
244 * note that information in the lower layer of the VM system can be 244 * note that information in the lower layer of the VM system can be
245 * thrown away since it can easily be reconstructed from the info 245 * thrown away since it can easily be reconstructed from the info
246 * in the upper layer. 246 * in the upper layer.
247 * 247 *
248 * data structures we use include: 248 * data structures we use include:
249 * 249 *
250 * - struct pmap: describes the address space of one thread 250 * - struct pmap: describes the address space of one thread
251 * - struct pv_entry: describes one <PMAP,VA> mapping of a PA 251 * - struct pv_entry: describes one <PMAP,VA> mapping of a PA
252 * - struct pv_head: there is one pv_head per managed page of 252 * - struct pv_head: there is one pv_head per managed page of
253 * physical memory. the pv_head points to a list of pv_entry 253 * physical memory. the pv_head points to a list of pv_entry
254 * structures which describe all the <PMAP,VA> pairs that this 254 * structures which describe all the <PMAP,VA> pairs that this
255 * page is mapped in. this is critical for page based operations 255 * page is mapped in. this is critical for page based operations
256 * such as pmap_page_protect() [change protection on _all_ mappings 256 * such as pmap_page_protect() [change protection on _all_ mappings
257 * of a page] 257 * of a page]
258 */ 258 */
259 259
260/* 260/*
261 * memory allocation 261 * memory allocation
262 * 262 *
263 * - there are three data structures that we must dynamically allocate: 263 * - there are three data structures that we must dynamically allocate:
264 * 264 *
265 * [A] new process' page directory page (PDP) 265 * [A] new process' page directory page (PDP)
266 * - plan 1: done at pmap_create() we use 266 * - plan 1: done at pmap_create() we use
267 * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this 267 * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this
268 * allocation. 268 * allocation.
269 * 269 *
270 * if we are low in free physical memory then we sleep in 270 * if we are low in free physical memory then we sleep in
271 * uvm_km_alloc -- in this case this is ok since we are creating 271 * uvm_km_alloc -- in this case this is ok since we are creating
272 * a new pmap and should not be holding any locks. 272 * a new pmap and should not be holding any locks.
273 * 273 *
274 * if the kernel is totally out of virtual space 274 * if the kernel is totally out of virtual space
275 * (i.e. uvm_km_alloc returns NULL), then we panic. 275 * (i.e. uvm_km_alloc returns NULL), then we panic.
276 * 276 *
277 * [B] new page tables pages (PTP) 277 * [B] new page tables pages (PTP)
278 * - call uvm_pagealloc() 278 * - call uvm_pagealloc()
279 * => success: zero page, add to pm_pdir 279 * => success: zero page, add to pm_pdir
280 * => failure: we are out of free vm_pages, let pmap_enter() 280 * => failure: we are out of free vm_pages, let pmap_enter()
281 * tell UVM about it. 281 * tell UVM about it.
282 * 282 *
283 * note: for kernel PTPs, we start with NKPTP of them. as we map 283 * note: for kernel PTPs, we start with NKPTP of them. as we map
284 * kernel memory (at uvm_map time) we check to see if we've grown 284 * kernel memory (at uvm_map time) we check to see if we've grown
285 * the kernel pmap. if so, we call the optional function 285 * the kernel pmap. if so, we call the optional function
286 * pmap_growkernel() to grow the kernel PTPs in advance. 286 * pmap_growkernel() to grow the kernel PTPs in advance.
287 * 287 *
288 * [C] pv_entry structures 288 * [C] pv_entry structures
289 */ 289 */
290 290
291/* 291/*
292 * locking 292 * locking
293 * 293 *
294 * we have the following locks that we must contend with: 294 * we have the following locks that we must contend with:
295 * 295 *
296 * mutexes: 296 * mutexes:
297 * 297 *
298 * - pmap lock (per pmap, part of uvm_object) 298 * - pmap lock (per pmap, part of uvm_object)
299 * this lock protects the fields in the pmap structure including 299 * this lock protects the fields in the pmap structure including
300 * the non-kernel PDEs in the PDP, and the PTEs. it also locks 300 * the non-kernel PDEs in the PDP, and the PTEs. it also locks
301 * in the alternate PTE space (since that is determined by the 301 * in the alternate PTE space (since that is determined by the
302 * entry in the PDP). 302 * entry in the PDP).
303 * 303 *
304 * - pvh_lock (per pv_head) 304 * - pvh_lock (per pv_head)
305 * this lock protects the pv_entry list which is chained off the 305 * this lock protects the pv_entry list which is chained off the
306 * pv_head structure for a specific managed PA. it is locked 306 * pv_head structure for a specific managed PA. it is locked
307 * when traversing the list (e.g. adding/removing mappings, 307 * when traversing the list (e.g. adding/removing mappings,
308 * syncing R/M bits, etc.) 308 * syncing R/M bits, etc.)
309 * 309 *
310 * - pmaps_lock 310 * - pmaps_lock
311 * this lock protects the list of active pmaps (headed by "pmaps"). 311 * this lock protects the list of active pmaps (headed by "pmaps").
312 * we lock it when adding or removing pmaps from this list. 312 * we lock it when adding or removing pmaps from this list.
313 */ 313 */
314 314
315const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER; 315const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER;
316const int ptp_shifts[] = PTP_SHIFT_INITIALIZER; 316const int ptp_shifts[] = PTP_SHIFT_INITIALIZER;
317const long nkptpmax[] = NKPTPMAX_INITIALIZER; 317const long nkptpmax[] = NKPTPMAX_INITIALIZER;
318const long nbpd[] = NBPD_INITIALIZER; 318const long nbpd[] = NBPD_INITIALIZER;
319pd_entry_t * const normal_pdes[] = PDES_INITIALIZER; 319pd_entry_t * const normal_pdes[] = PDES_INITIALIZER;
320 320
321long nkptp[] = NKPTP_INITIALIZER; 321long nkptp[] = NKPTP_INITIALIZER;
322 322
323struct pmap_head pmaps; 323struct pmap_head pmaps;
324kmutex_t pmaps_lock; 324kmutex_t pmaps_lock;
325 325
326static vaddr_t pmap_maxkvaddr; 326static vaddr_t pmap_maxkvaddr;
327 327
328/* 328/*
329 * XXX kludge: dummy locking to make KASSERTs in uvm_page.c comfortable. 329 * XXX kludge: dummy locking to make KASSERTs in uvm_page.c comfortable.
330 * actual locking is done by pm_lock. 330 * actual locking is done by pm_lock.
331 */ 331 */
332#if defined(DIAGNOSTIC) 332#if defined(DIAGNOSTIC)
333#define PMAP_SUBOBJ_LOCK(pm, idx) \ 333#define PMAP_SUBOBJ_LOCK(pm, idx) \
334 KASSERT(mutex_owned((pm)->pm_lock)); \ 334 KASSERT(mutex_owned((pm)->pm_lock)); \
335 if ((idx) != 0) \ 335 if ((idx) != 0) \
336 mutex_enter((pm)->pm_obj[(idx)].vmobjlock) 336 mutex_enter((pm)->pm_obj[(idx)].vmobjlock)
337#define PMAP_SUBOBJ_UNLOCK(pm, idx) \ 337#define PMAP_SUBOBJ_UNLOCK(pm, idx) \
338 KASSERT(mutex_owned((pm)->pm_lock)); \ 338 KASSERT(mutex_owned((pm)->pm_lock)); \
339 if ((idx) != 0) \ 339 if ((idx) != 0) \
340 mutex_exit((pm)->pm_obj[(idx)].vmobjlock) 340 mutex_exit((pm)->pm_obj[(idx)].vmobjlock)
341#else /* defined(DIAGNOSTIC) */ 341#else /* defined(DIAGNOSTIC) */
342#define PMAP_SUBOBJ_LOCK(pm, idx) /* nothing */ 342#define PMAP_SUBOBJ_LOCK(pm, idx) /* nothing */
343#define PMAP_SUBOBJ_UNLOCK(pm, idx) /* nothing */ 343#define PMAP_SUBOBJ_UNLOCK(pm, idx) /* nothing */
344#endif /* defined(DIAGNOSTIC) */ 344#endif /* defined(DIAGNOSTIC) */
345 345
346/* 346/*
347 * Misc. event counters. 347 * Misc. event counters.
348 */ 348 */
349struct evcnt pmap_iobmp_evcnt; 349struct evcnt pmap_iobmp_evcnt;
350struct evcnt pmap_ldt_evcnt; 350struct evcnt pmap_ldt_evcnt;
351 351
352/* 352/*
353 * PAT 353 * PAT
354 */ 354 */
355#define PATENTRY(n, type) (type << ((n) * 8)) 355#define PATENTRY(n, type) (type << ((n) * 8))
356#define PAT_UC 0x0ULL 356#define PAT_UC 0x0ULL
357#define PAT_WC 0x1ULL 357#define PAT_WC 0x1ULL
358#define PAT_WT 0x4ULL 358#define PAT_WT 0x4ULL
359#define PAT_WP 0x5ULL 359#define PAT_WP 0x5ULL
360#define PAT_WB 0x6ULL 360#define PAT_WB 0x6ULL
361#define PAT_UCMINUS 0x7ULL 361#define PAT_UCMINUS 0x7ULL
362 362
363static bool cpu_pat_enabled __read_mostly = false; 363static bool cpu_pat_enabled __read_mostly = false;
364 364
365/* 365/*
366 * global data structures 366 * global data structures
367 */ 367 */
368 368
369static struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ 369static struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */
370struct pmap *const kernel_pmap_ptr = &kernel_pmap_store; 370struct pmap *const kernel_pmap_ptr = &kernel_pmap_store;
371 371
372/* 372/*
373 * pmap_pg_g: if our processor supports PG_G in the PTE then we 373 * pmap_pg_g: if our processor supports PG_G in the PTE then we
374 * set pmap_pg_g to PG_G (otherwise it is zero). 374 * set pmap_pg_g to PG_G (otherwise it is zero).
375 */ 375 */
376 376
377int pmap_pg_g __read_mostly = 0; 377int pmap_pg_g __read_mostly = 0;
378 378
379/* 379/*
380 * pmap_largepages: if our processor supports PG_PS and we are 380 * pmap_largepages: if our processor supports PG_PS and we are
381 * using it, this is set to true. 381 * using it, this is set to true.
382 */ 382 */
383 383
384int pmap_largepages __read_mostly; 384int pmap_largepages __read_mostly;
385 385
386/* 386/*
387 * i386 physical memory comes in a big contig chunk with a small 387 * i386 physical memory comes in a big contig chunk with a small
388 * hole toward the front of it... the following two paddr_t's 388 * hole toward the front of it... the following two paddr_t's
389 * (shared with machdep.c) describe the physical address space 389 * (shared with machdep.c) describe the physical address space
390 * of this machine. 390 * of this machine.
391 */ 391 */
392paddr_t avail_start __read_mostly; /* PA of first available physical page */ 392paddr_t avail_start __read_mostly; /* PA of first available physical page */
393paddr_t avail_end __read_mostly; /* PA of last available physical page */ 393paddr_t avail_end __read_mostly; /* PA of last available physical page */
394 394
395#ifdef XEN 395#ifdef XEN
396#ifdef __x86_64__ 396#ifdef __x86_64__
397/* Dummy PGD for user cr3, used between pmap_deactivate() and pmap_activate() */ 397/* Dummy PGD for user cr3, used between pmap_deactivate() and pmap_activate() */
398static paddr_t xen_dummy_user_pgd; 398static paddr_t xen_dummy_user_pgd;
399#endif /* __x86_64__ */ 399#endif /* __x86_64__ */
400paddr_t pmap_pa_start; /* PA of first physical page for this domain */ 400paddr_t pmap_pa_start; /* PA of first physical page for this domain */
401paddr_t pmap_pa_end; /* PA of last physical page for this domain */ 401paddr_t pmap_pa_end; /* PA of last physical page for this domain */
402#endif /* XEN */ 402#endif /* XEN */
403 403
404#define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp) 404#define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp)
405 405
406#define PV_HASH_SIZE 32768 406#define PV_HASH_SIZE 32768
407#define PV_HASH_LOCK_CNT 32 407#define PV_HASH_LOCK_CNT 32
408 408
409struct pv_hash_lock { 409struct pv_hash_lock {
410 kmutex_t lock; 410 kmutex_t lock;
411} __aligned(CACHE_LINE_SIZE) pv_hash_locks[PV_HASH_LOCK_CNT] 411} __aligned(CACHE_LINE_SIZE) pv_hash_locks[PV_HASH_LOCK_CNT]
412 __aligned(CACHE_LINE_SIZE); 412 __aligned(CACHE_LINE_SIZE);
413 413
414struct pv_hash_head { 414struct pv_hash_head {
415 SLIST_HEAD(, pv_entry) hh_list; 415 SLIST_HEAD(, pv_entry) hh_list;
416} pv_hash_heads[PV_HASH_SIZE]; 416} pv_hash_heads[PV_HASH_SIZE];
417 417
418static u_int 418static u_int
419pvhash_hash(struct vm_page *ptp, vaddr_t va) 419pvhash_hash(struct vm_page *ptp, vaddr_t va)
420{ 420{
421 421
422 return (uintptr_t)ptp / sizeof(*ptp) + (va >> PAGE_SHIFT); 422 return (uintptr_t)ptp / sizeof(*ptp) + (va >> PAGE_SHIFT);
423} 423}
424 424
425static struct pv_hash_head * 425static struct pv_hash_head *
426pvhash_head(u_int hash) 426pvhash_head(u_int hash)
427{ 427{
428 428
429 return &pv_hash_heads[hash % PV_HASH_SIZE]; 429 return &pv_hash_heads[hash % PV_HASH_SIZE];
430} 430}
431 431
432static kmutex_t * 432static kmutex_t *
433pvhash_lock(u_int hash) 433pvhash_lock(u_int hash)
434{ 434{
435 435
436 return &pv_hash_locks[hash % PV_HASH_LOCK_CNT].lock; 436 return &pv_hash_locks[hash % PV_HASH_LOCK_CNT].lock;
437} 437}
438 438
439static struct pv_entry * 439static struct pv_entry *
440pvhash_remove(struct pv_hash_head *hh, struct vm_page *ptp, vaddr_t va) 440pvhash_remove(struct pv_hash_head *hh, struct vm_page *ptp, vaddr_t va)
441{ 441{
442 struct pv_entry *pve; 442 struct pv_entry *pve;
443 struct pv_entry *prev; 443 struct pv_entry *prev;
444 444
445 prev = NULL; 445 prev = NULL;
446 SLIST_FOREACH(pve, &hh->hh_list, pve_hash) { 446 SLIST_FOREACH(pve, &hh->hh_list, pve_hash) {
447 if (pve->pve_pte.pte_ptp == ptp && 447 if (pve->pve_pte.pte_ptp == ptp &&
448 pve->pve_pte.pte_va == va) { 448 pve->pve_pte.pte_va == va) {
449 if (prev != NULL) { 449 if (prev != NULL) {
450 SLIST_REMOVE_AFTER(prev, pve_hash); 450 SLIST_REMOVE_AFTER(prev, pve_hash);
451 } else { 451 } else {
452 SLIST_REMOVE_HEAD(&hh->hh_list, pve_hash); 452 SLIST_REMOVE_HEAD(&hh->hh_list, pve_hash);
453 } 453 }
454 break; 454 break;
455 } 455 }
456 prev = pve; 456 prev = pve;
457 } 457 }
458 return pve; 458 return pve;
459} 459}
460 460
461/* 461/*
462 * other data structures 462 * other data structures
463 */ 463 */
464 464
465static pt_entry_t protection_codes[8] __read_mostly; /* maps MI prot to i386 465static pt_entry_t protection_codes[8] __read_mostly; /* maps MI prot to i386
466 prot code */ 466 prot code */
467static bool pmap_initialized __read_mostly = false; /* pmap_init done yet? */ 467static bool pmap_initialized __read_mostly = false; /* pmap_init done yet? */
468 468
469/* 469/*
470 * the following two vaddr_t's are used during system startup 470 * the following two vaddr_t's are used during system startup
471 * to keep track of how much of the kernel's VM space we have used. 471 * to keep track of how much of the kernel's VM space we have used.
472 * once the system is started, the management of the remaining kernel 472 * once the system is started, the management of the remaining kernel
473 * VM space is turned over to the kernel_map vm_map. 473 * VM space is turned over to the kernel_map vm_map.
474 */ 474 */
475 475
476static vaddr_t virtual_avail __read_mostly; /* VA of first free KVA */ 476static vaddr_t virtual_avail __read_mostly; /* VA of first free KVA */
477static vaddr_t virtual_end __read_mostly; /* VA of last free KVA */ 477static vaddr_t virtual_end __read_mostly; /* VA of last free KVA */
478 478
479/* 479/*
480 * pool that pmap structures are allocated from 480 * pool that pmap structures are allocated from
481 */ 481 */
482 482
483static struct pool_cache pmap_cache; 483static struct pool_cache pmap_cache;
484 484
485/* 485/*
486 * pv_entry cache 486 * pv_entry cache
487 */ 487 */
488 488
489static struct pool_cache pmap_pv_cache; 489static struct pool_cache pmap_pv_cache;
490 490
491#ifdef __HAVE_DIRECT_MAP 491#ifdef __HAVE_DIRECT_MAP
492 492
493extern phys_ram_seg_t mem_clusters[]; 493extern phys_ram_seg_t mem_clusters[];
494extern int mem_cluster_cnt; 494extern int mem_cluster_cnt;
495 495
496#else 496#else
497 497
498/* 498/*
499 * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a 499 * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a
500 * maxcpus*NPTECL array of PTE's, to avoid cache line thrashing 500 * maxcpus*NPTECL array of PTE's, to avoid cache line thrashing
501 * due to false sharing. 501 * due to false sharing.
502 */ 502 */
503 503
504#ifdef MULTIPROCESSOR 504#ifdef MULTIPROCESSOR
505#define PTESLEW(pte, id) ((pte)+(id)*NPTECL) 505#define PTESLEW(pte, id) ((pte)+(id)*NPTECL)
506#define VASLEW(va,id) ((va)+(id)*NPTECL*PAGE_SIZE) 506#define VASLEW(va,id) ((va)+(id)*NPTECL*PAGE_SIZE)
507#else 507#else
508#define PTESLEW(pte, id) (pte) 508#define PTESLEW(pte, id) (pte)
509#define VASLEW(va,id) (va) 509#define VASLEW(va,id) (va)
510#endif 510#endif
511 511
512/* 512/*
513 * special VAs and the PTEs that map them 513 * special VAs and the PTEs that map them
514 */ 514 */
515static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte, *early_zero_pte; 515static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte, *early_zero_pte;
516static char *csrcp, *cdstp, *zerop, *ptpp, *early_zerop; 516static char *csrcp, *cdstp, *zerop, *ptpp, *early_zerop;
517 517
518#endif 518#endif
519 519
520int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int); 520int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int);
521 521
522/* PDP pool_cache(9) and its callbacks */ 522/* PDP pool_cache(9) and its callbacks */
523struct pool_cache pmap_pdp_cache; 523struct pool_cache pmap_pdp_cache;
524static int pmap_pdp_ctor(void *, void *, int); 524static int pmap_pdp_ctor(void *, void *, int);
525static void pmap_pdp_dtor(void *, void *); 525static void pmap_pdp_dtor(void *, void *);
526#ifdef PAE 526#ifdef PAE
527/* need to allocate items of 4 pages */ 527/* need to allocate items of 4 pages */
528static void *pmap_pdp_alloc(struct pool *, int); 528static void *pmap_pdp_alloc(struct pool *, int);
529static void pmap_pdp_free(struct pool *, void *); 529static void pmap_pdp_free(struct pool *, void *);
530static struct pool_allocator pmap_pdp_allocator = { 530static struct pool_allocator pmap_pdp_allocator = {
531 .pa_alloc = pmap_pdp_alloc, 531 .pa_alloc = pmap_pdp_alloc,
532 .pa_free = pmap_pdp_free, 532 .pa_free = pmap_pdp_free,
533 .pa_pagesz = PAGE_SIZE * PDP_SIZE, 533 .pa_pagesz = PAGE_SIZE * PDP_SIZE,
534}; 534};
535#endif /* PAE */ 535#endif /* PAE */
536 536
537extern vaddr_t idt_vaddr; /* we allocate IDT early */ 537extern vaddr_t idt_vaddr; /* we allocate IDT early */
538extern paddr_t idt_paddr; 538extern paddr_t idt_paddr;
539 539
540#ifdef _LP64 540#ifdef _LP64
541extern vaddr_t lo32_vaddr; 541extern vaddr_t lo32_vaddr;
542extern vaddr_t lo32_paddr; 542extern vaddr_t lo32_paddr;
543#endif 543#endif
544 544
545extern int end; 545extern int end;
546 546
547#ifdef i386 547#ifdef i386
548/* stuff to fix the pentium f00f bug */ 548/* stuff to fix the pentium f00f bug */
549extern vaddr_t pentium_idt_vaddr; 549extern vaddr_t pentium_idt_vaddr;
550#endif 550#endif
551 551
552 552
553/* 553/*
554 * local prototypes 554 * local prototypes
555 */ 555 */
556 556
557static struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t, 557static struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t,
558 pd_entry_t * const *); 558 pd_entry_t * const *);
559static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int); 559static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int);
560static void pmap_freepage(struct pmap *, struct vm_page *, int); 560static void pmap_freepage(struct pmap *, struct vm_page *, int);
561static void pmap_free_ptp(struct pmap *, struct vm_page *, 561static void pmap_free_ptp(struct pmap *, struct vm_page *,
562 vaddr_t, pt_entry_t *, 562 vaddr_t, pt_entry_t *,
563 pd_entry_t * const *); 563 pd_entry_t * const *);
564static bool pmap_is_active(struct pmap *, struct cpu_info *, bool); 564static bool pmap_is_active(struct pmap *, struct cpu_info *, bool);
565static bool pmap_remove_pte(struct pmap *, struct vm_page *, 565static bool pmap_remove_pte(struct pmap *, struct vm_page *,
566 pt_entry_t *, vaddr_t, 566 pt_entry_t *, vaddr_t,
567 struct pv_entry **); 567 struct pv_entry **);
568static void pmap_remove_ptes(struct pmap *, struct vm_page *, 568static void pmap_remove_ptes(struct pmap *, struct vm_page *,
569 vaddr_t, vaddr_t, vaddr_t, 569 vaddr_t, vaddr_t, vaddr_t,
570 struct pv_entry **); 570 struct pv_entry **);
571 571
572static bool pmap_get_physpage(vaddr_t, int, paddr_t *); 572static bool pmap_get_physpage(vaddr_t, int, paddr_t *);
573static void pmap_alloc_level(pd_entry_t * const *, vaddr_t, int, 573static void pmap_alloc_level(pd_entry_t * const *, vaddr_t, int,
574 long *); 574 long *);
575 575
576static bool pmap_reactivate(struct pmap *); 576static bool pmap_reactivate(struct pmap *);
577 577
578/* 578/*
579 * p m a p h e l p e r f u n c t i o n s 579 * p m a p h e l p e r f u n c t i o n s
580 */ 580 */
581 581
582static inline void 582static inline void
583pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff) 583pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff)
584{ 584{
585 585
586 if (pmap == pmap_kernel()) { 586 if (pmap == pmap_kernel()) {
587 atomic_add_long(&pmap->pm_stats.resident_count, resid_diff); 587 atomic_add_long(&pmap->pm_stats.resident_count, resid_diff);
588 atomic_add_long(&pmap->pm_stats.wired_count, wired_diff); 588 atomic_add_long(&pmap->pm_stats.wired_count, wired_diff);
589 } else { 589 } else {
590 KASSERT(mutex_owned(pmap->pm_lock)); 590 KASSERT(mutex_owned(pmap->pm_lock));
591 pmap->pm_stats.resident_count += resid_diff; 591 pmap->pm_stats.resident_count += resid_diff;
592 pmap->pm_stats.wired_count += wired_diff; 592 pmap->pm_stats.wired_count += wired_diff;
593 } 593 }
594} 594}
595 595
596static inline void 596static inline void
597pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte) 597pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte)
598{ 598{
599 int resid_diff = ((npte & PG_V) ? 1 : 0) - ((opte & PG_V) ? 1 : 0); 599 int resid_diff = ((npte & PG_V) ? 1 : 0) - ((opte & PG_V) ? 1 : 0);
600 int wired_diff = ((npte & PG_W) ? 1 : 0) - ((opte & PG_W) ? 1 : 0); 600 int wired_diff = ((npte & PG_W) ? 1 : 0) - ((opte & PG_W) ? 1 : 0);
601 601
602 KASSERT((npte & (PG_V | PG_W)) != PG_W); 602 KASSERT((npte & (PG_V | PG_W)) != PG_W);
603 KASSERT((opte & (PG_V | PG_W)) != PG_W); 603 KASSERT((opte & (PG_V | PG_W)) != PG_W);
604 604
605 pmap_stats_update(pmap, resid_diff, wired_diff); 605 pmap_stats_update(pmap, resid_diff, wired_diff);
606} 606}
607 607
608/* 608/*
609 * ptp_to_pmap: lookup pmap by ptp 609 * ptp_to_pmap: lookup pmap by ptp
610 */ 610 */
611 611
612static struct pmap * 612static struct pmap *
613ptp_to_pmap(struct vm_page *ptp) 613ptp_to_pmap(struct vm_page *ptp)
614{ 614{
615 struct pmap *pmap; 615 struct pmap *pmap;
616 616
617 if (ptp == NULL) { 617 if (ptp == NULL) {
618 return pmap_kernel(); 618 return pmap_kernel();
619 } 619 }
620 pmap = (struct pmap *)ptp->uobject; 620 pmap = (struct pmap *)ptp->uobject;
621 KASSERT(pmap != NULL); 621 KASSERT(pmap != NULL);
622 KASSERT(&pmap->pm_obj[0] == ptp->uobject); 622 KASSERT(&pmap->pm_obj[0] == ptp->uobject);
623 return pmap; 623 return pmap;
624} 624}
625 625
626static inline struct pv_pte * 626static inline struct pv_pte *
627pve_to_pvpte(struct pv_entry *pve) 627pve_to_pvpte(struct pv_entry *pve)
628{ 628{
629 629
630 KASSERT((void *)&pve->pve_pte == (void *)pve); 630 KASSERT((void *)&pve->pve_pte == (void *)pve);
631 return &pve->pve_pte; 631 return &pve->pve_pte;
632} 632}
633 633
634static inline struct pv_entry * 634static inline struct pv_entry *
635pvpte_to_pve(struct pv_pte *pvpte) 635pvpte_to_pve(struct pv_pte *pvpte)
636{ 636{
637 struct pv_entry *pve = (void *)pvpte; 637 struct pv_entry *pve = (void *)pvpte;
638 638
639 KASSERT(pve_to_pvpte(pve) == pvpte); 639 KASSERT(pve_to_pvpte(pve) == pvpte);
640 return pve; 640 return pve;
641} 641}
642 642
643/* 643/*
644 * pv_pte_first, pv_pte_next: PV list iterator. 644 * pv_pte_first, pv_pte_next: PV list iterator.
645 */ 645 */
646 646
647static struct pv_pte * 647static struct pv_pte *
648pv_pte_first(struct pmap_page *pp) 648pv_pte_first(struct pmap_page *pp)
649{ 649{
650 650
651 if ((pp->pp_flags & PP_EMBEDDED) != 0) { 651 if ((pp->pp_flags & PP_EMBEDDED) != 0) {
652 return &pp->pp_pte; 652 return &pp->pp_pte;
653 } 653 }
654 return pve_to_pvpte(LIST_FIRST(&pp->pp_head.pvh_list)); 654 return pve_to_pvpte(LIST_FIRST(&pp->pp_head.pvh_list));
655} 655}
656 656
657static struct pv_pte * 657static struct pv_pte *
658pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte) 658pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte)
659{ 659{
660 660
661 KASSERT(pvpte != NULL); 661 KASSERT(pvpte != NULL);
662 if (pvpte == &pp->pp_pte) { 662 if (pvpte == &pp->pp_pte) {
663 KASSERT((pp->pp_flags & PP_EMBEDDED) != 0); 663 KASSERT((pp->pp_flags & PP_EMBEDDED) != 0);
664 return NULL; 664 return NULL;
665 } 665 }
666 KASSERT((pp->pp_flags & PP_EMBEDDED) == 0); 666 KASSERT((pp->pp_flags & PP_EMBEDDED) == 0);
667 return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list)); 667 return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list));
668} 668}
669 669
670/* 670/*
671 * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? 671 * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]?
672 * of course the kernel is always loaded 672 * of course the kernel is always loaded
673 */ 673 */
674 674
675bool 675bool
676pmap_is_curpmap(struct pmap *pmap) 676pmap_is_curpmap(struct pmap *pmap)
677{ 677{
678#if defined(XEN) && defined(__x86_64__) 678#if defined(XEN) && defined(__x86_64__)
679 /* 679 /*
680 * Only kernel pmap is physically loaded. 680 * Only kernel pmap is physically loaded.
681 * User PGD may be active, but TLB will be flushed 681 * User PGD may be active, but TLB will be flushed
682 * with HYPERVISOR_iret anyway, so let's say no 682 * with HYPERVISOR_iret anyway, so let's say no
683 */ 683 */
684 return(pmap == pmap_kernel()); 684 return(pmap == pmap_kernel());
685#else /* XEN && __x86_64__*/ 685#else /* XEN && __x86_64__*/
686 return((pmap == pmap_kernel()) || 686 return((pmap == pmap_kernel()) ||
687 (pmap == curcpu()->ci_pmap)); 687 (pmap == curcpu()->ci_pmap));
688#endif 688#endif
689} 689}
690 690
691/* 691/*
692 * pmap_is_active: is this pmap loaded into the specified processor's %cr3? 692 * pmap_is_active: is this pmap loaded into the specified processor's %cr3?
693 */ 693 */
694 694
695inline static bool 695inline static bool
696pmap_is_active(struct pmap *pmap, struct cpu_info *ci, bool kernel) 696pmap_is_active(struct pmap *pmap, struct cpu_info *ci, bool kernel)
697{ 697{
698 698
699 return (pmap == pmap_kernel() || 699 return (pmap == pmap_kernel() ||
700 (pmap->pm_cpus & ci->ci_cpumask) != 0 || 700 (pmap->pm_cpus & ci->ci_cpumask) != 0 ||
701 (kernel && (pmap->pm_kernel_cpus & ci->ci_cpumask) != 0)); 701 (kernel && (pmap->pm_kernel_cpus & ci->ci_cpumask) != 0));
702} 702}
703 703
704/* 704/*
705 * Add a reference to the specified pmap. 705 * Add a reference to the specified pmap.
706 */ 706 */
707 707
708void 708void
709pmap_reference(struct pmap *pmap) 709pmap_reference(struct pmap *pmap)
710{ 710{
711 711
712 atomic_inc_uint(&pmap->pm_obj[0].uo_refs); 712 atomic_inc_uint(&pmap->pm_obj[0].uo_refs);
713} 713}
714 714
715/* 715/*
716 * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in 716 * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
717 * 717 *
718 * there are several pmaps involved. some or all of them might be same. 718 * there are several pmaps involved. some or all of them might be same.
719 * 719 *
720 * - the pmap given by the first argument 720 * - the pmap given by the first argument
721 * our caller wants to access this pmap's PTEs. 721 * our caller wants to access this pmap's PTEs.
722 * 722 *
723 * - pmap_kernel() 723 * - pmap_kernel()
724 * the kernel pmap. note that it only contains the kernel part 724 * the kernel pmap. note that it only contains the kernel part
725 * of the address space which is shared by any pmap. ie. any 725 * of the address space which is shared by any pmap. ie. any
726 * pmap can be used instead of pmap_kernel() for our purpose. 726 * pmap can be used instead of pmap_kernel() for our purpose.
727 * 727 *
728 * - ci->ci_pmap 728 * - ci->ci_pmap
729 * pmap currently loaded on the cpu. 729 * pmap currently loaded on the cpu.
730 * 730 *
731 * - vm_map_pmap(&curproc->p_vmspace->vm_map) 731 * - vm_map_pmap(&curproc->p_vmspace->vm_map)
732 * current process' pmap. 732 * current process' pmap.
733 * 733 *
734 * => we lock enough pmaps to keep things locked in 734 * => we lock enough pmaps to keep things locked in
735 * => must be undone with pmap_unmap_ptes before returning 735 * => must be undone with pmap_unmap_ptes before returning
736 */ 736 */
737 737
738void 738void
739pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2, 739pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2,
740 pd_entry_t **ptepp, pd_entry_t * const **pdeppp) 740 pd_entry_t **ptepp, pd_entry_t * const **pdeppp)
741{ 741{
742 struct pmap *curpmap; 742 struct pmap *curpmap;
743 struct cpu_info *ci; 743 struct cpu_info *ci;
744 uint32_t cpumask; 744 uint32_t cpumask;
745 lwp_t *l; 745 lwp_t *l;
746 746
747 /* The kernel's pmap is always accessible. */ 747 /* The kernel's pmap is always accessible. */
748 if (pmap == pmap_kernel()) { 748 if (pmap == pmap_kernel()) {
749 *pmap2 = NULL; 749 *pmap2 = NULL;
750 *ptepp = PTE_BASE; 750 *ptepp = PTE_BASE;
751 *pdeppp = normal_pdes; 751 *pdeppp = normal_pdes;
752 return; 752 return;
753 } 753 }
754 KASSERT(kpreempt_disabled()); 754 KASSERT(kpreempt_disabled());
755 755
756 l = curlwp; 756 l = curlwp;
757 retry: 757 retry:
758 mutex_enter(pmap->pm_lock); 758 mutex_enter(pmap->pm_lock);
759 ci = curcpu(); 759 ci = curcpu();
760 curpmap = ci->ci_pmap; 760 curpmap = ci->ci_pmap;
761 if (vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) { 761 if (vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) {
762 /* Our own pmap so just load it: easy. */ 762 /* Our own pmap so just load it: easy. */
763 if (__predict_false(ci->ci_want_pmapload)) { 763 if (__predict_false(ci->ci_want_pmapload)) {
764 mutex_exit(pmap->pm_lock); 764 mutex_exit(pmap->pm_lock);
765 pmap_load(); 765 pmap_load();
766 goto retry; 766 goto retry;
767 } 767 }
768 KASSERT(pmap == curpmap); 768 KASSERT(pmap == curpmap);
769 } else if (pmap == curpmap) { 769 } else if (pmap == curpmap) {
770 /* 770 /*
771 * Already on the CPU: make it valid. This is very 771 * Already on the CPU: make it valid. This is very
772 * often the case during exit(), when we have switched 772 * often the case during exit(), when we have switched
773 * to the kernel pmap in order to destroy a user pmap. 773 * to the kernel pmap in order to destroy a user pmap.
774 */ 774 */
775 if (!pmap_reactivate(pmap)) { 775 if (!pmap_reactivate(pmap)) {
776 u_int gen = uvm_emap_gen_return(); 776 u_int gen = uvm_emap_gen_return();
777 tlbflush(); 777 tlbflush();
778 uvm_emap_update(gen); 778 uvm_emap_update(gen);
779 } 779 }
780 } else { 780 } else {
781 /* 781 /*
782 * Toss current pmap from CPU, but keep a reference to it. 782 * Toss current pmap from CPU, but keep a reference to it.
783 * The reference will be dropped by pmap_unmap_ptes(). 783 * The reference will be dropped by pmap_unmap_ptes().
784 * Can happen if we block during exit(). 784 * Can happen if we block during exit().
785 */ 785 */
786 cpumask = ci->ci_cpumask; 786 cpumask = ci->ci_cpumask;
787 atomic_and_32(&curpmap->pm_cpus, ~cpumask); 787 atomic_and_32(&curpmap->pm_cpus, ~cpumask);
788 atomic_and_32(&curpmap->pm_kernel_cpus, ~cpumask); 788 atomic_and_32(&curpmap->pm_kernel_cpus, ~cpumask);
789 ci->ci_pmap = pmap; 789 ci->ci_pmap = pmap;
790 ci->ci_tlbstate = TLBSTATE_VALID; 790 ci->ci_tlbstate = TLBSTATE_VALID;
791 atomic_or_32(&pmap->pm_cpus, cpumask); 791 atomic_or_32(&pmap->pm_cpus, cpumask);
792 atomic_or_32(&pmap->pm_kernel_cpus, cpumask); 792 atomic_or_32(&pmap->pm_kernel_cpus, cpumask);
793 cpu_load_pmap(pmap); 793 cpu_load_pmap(pmap);
794 } 794 }
795 pmap->pm_ncsw = l->l_ncsw; 795 pmap->pm_ncsw = l->l_ncsw;
796 *pmap2 = curpmap; 796 *pmap2 = curpmap;
797 *ptepp = PTE_BASE; 797 *ptepp = PTE_BASE;
798#if defined(XEN) && defined(__x86_64__) 798#if defined(XEN) && defined(__x86_64__)
799 KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] == L4_BASE); 799 KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] == L4_BASE);
800 ci->ci_normal_pdes[PTP_LEVELS - 2] = pmap->pm_pdir; 800 ci->ci_normal_pdes[PTP_LEVELS - 2] = pmap->pm_pdir;
801 *pdeppp = ci->ci_normal_pdes; 801 *pdeppp = ci->ci_normal_pdes;
802#else /* XEN && __x86_64__ */ 802#else /* XEN && __x86_64__ */
803 *pdeppp = normal_pdes; 803 *pdeppp = normal_pdes;
804#endif /* XEN && __x86_64__ */ 804#endif /* XEN && __x86_64__ */
805} 805}
806 806
807/* 807/*
808 * pmap_unmap_ptes: unlock the PTE mapping of "pmap" 808 * pmap_unmap_ptes: unlock the PTE mapping of "pmap"
809 */ 809 */
810 810
811void 811void
812pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2) 812pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2)
813{ 813{
814 struct cpu_info *ci; 814 struct cpu_info *ci;
815 struct pmap *mypmap; 815 struct pmap *mypmap;
816 816
817 KASSERT(kpreempt_disabled()); 817 KASSERT(kpreempt_disabled());
818 818
819 /* The kernel's pmap is always accessible. */ 819 /* The kernel's pmap is always accessible. */
820 if (pmap == pmap_kernel()) { 820 if (pmap == pmap_kernel()) {
821 return; 821 return;
822 } 822 }
823 823
824 ci = curcpu(); 824 ci = curcpu();
825#if defined(XEN) && defined(__x86_64__) 825#if defined(XEN) && defined(__x86_64__)
826 /* Reset per-cpu normal_pdes */ 826 /* Reset per-cpu normal_pdes */
827 KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] != L4_BASE); 827 KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] != L4_BASE);
828 ci->ci_normal_pdes[PTP_LEVELS - 2] = L4_BASE; 828 ci->ci_normal_pdes[PTP_LEVELS - 2] = L4_BASE;
829#endif /* XEN && __x86_64__ */ 829#endif /* XEN && __x86_64__ */
830 /* 830 /*
831 * We cannot tolerate context switches while mapped in. 831 * We cannot tolerate context switches while mapped in.
832 * If it is our own pmap all we have to do is unlock. 832 * If it is our own pmap all we have to do is unlock.
833 */ 833 */
834 KASSERT(pmap->pm_ncsw == curlwp->l_ncsw); 834 KASSERT(pmap->pm_ncsw == curlwp->l_ncsw);
835 mypmap = vm_map_pmap(&curproc->p_vmspace->vm_map); 835 mypmap = vm_map_pmap(&curproc->p_vmspace->vm_map);
836 if (pmap == mypmap) { 836 if (pmap == mypmap) {
837 mutex_exit(pmap->pm_lock); 837 mutex_exit(pmap->pm_lock);
838 return; 838 return;
839 } 839 }
840 840
841 /* 841 /*
842 * Mark whatever's on the CPU now as lazy and unlock. 842 * Mark whatever's on the CPU now as lazy and unlock.
843 * If the pmap was already installed, we are done. 843 * If the pmap was already installed, we are done.
844 */ 844 */
845 ci->ci_tlbstate = TLBSTATE_LAZY; 845 ci->ci_tlbstate = TLBSTATE_LAZY;
846 ci->ci_want_pmapload = (mypmap != pmap_kernel()); 846 ci->ci_want_pmapload = (mypmap != pmap_kernel());
847 mutex_exit(pmap->pm_lock); 847 mutex_exit(pmap->pm_lock);
848 if (pmap == pmap2) { 848 if (pmap == pmap2) {
849 return; 849 return;
850 } 850 }
851 851
852 /* 852 /*
853 * We installed another pmap on the CPU. Grab a reference to 853 * We installed another pmap on the CPU. Grab a reference to
854 * it and leave in place. Toss the evicted pmap (can block). 854 * it and leave in place. Toss the evicted pmap (can block).
855 */ 855 */
856 pmap_reference(pmap); 856 pmap_reference(pmap);
857 pmap_destroy(pmap2); 857 pmap_destroy(pmap2);
858} 858}
859 859
860 860
861inline static void 861inline static void
862pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte) 862pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte)
863{ 863{
864 864
865#if !defined(__x86_64__) 865#if !defined(__x86_64__)
866 if (curproc == NULL || curproc->p_vmspace == NULL || 866 if (curproc == NULL || curproc->p_vmspace == NULL ||
867 pm != vm_map_pmap(&curproc->p_vmspace->vm_map)) 867 pm != vm_map_pmap(&curproc->p_vmspace->vm_map))
868 return; 868 return;
869 869
870 if ((opte ^ npte) & PG_X) 870 if ((opte ^ npte) & PG_X)
871 pmap_update_pg(va); 871 pmap_update_pg(va);
872 872
873 /* 873 /*
874 * Executability was removed on the last executable change. 874 * Executability was removed on the last executable change.
875 * Reset the code segment to something conservative and 875 * Reset the code segment to something conservative and
876 * let the trap handler deal with setting the right limit. 876 * let the trap handler deal with setting the right limit.
877 * We can't do that because of locking constraints on the vm map. 877 * We can't do that because of locking constraints on the vm map.
878 */ 878 */
879 879
880 if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) { 880 if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) {
881 struct trapframe *tf = curlwp->l_md.md_regs; 881 struct trapframe *tf = curlwp->l_md.md_regs;
882 882
883 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 883 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
884 pm->pm_hiexec = I386_MAX_EXE_ADDR; 884 pm->pm_hiexec = I386_MAX_EXE_ADDR;
885 } 885 }
886#endif /* !defined(__x86_64__) */ 886#endif /* !defined(__x86_64__) */
887} 887}
888 888
889#if !defined(__x86_64__) 889#if !defined(__x86_64__)
890/* 890/*
891 * Fixup the code segment to cover all potential executable mappings. 891 * Fixup the code segment to cover all potential executable mappings.
892 * returns 0 if no changes to the code segment were made. 892 * returns 0 if no changes to the code segment were made.
893 */ 893 */
894 894
895int 895int
896pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb) 896pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb)
897{ 897{
898 struct vm_map_entry *ent; 898 struct vm_map_entry *ent;
899 struct pmap *pm = vm_map_pmap(map); 899 struct pmap *pm = vm_map_pmap(map);
900 vaddr_t va = 0; 900 vaddr_t va = 0;
901 901
902 vm_map_lock_read(map); 902 vm_map_lock_read(map);
903 for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) { 903 for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) {
904 904
905 /* 905 /*
906 * This entry has greater va than the entries before. 906 * This entry has greater va than the entries before.
907 * We need to make it point to the last page, not past it. 907 * We need to make it point to the last page, not past it.
908 */ 908 */
909 909
910 if (ent->protection & VM_PROT_EXECUTE) 910 if (ent->protection & VM_PROT_EXECUTE)
911 va = trunc_page(ent->end) - PAGE_SIZE; 911 va = trunc_page(ent->end) - PAGE_SIZE;
912 } 912 }
913 vm_map_unlock_read(map); 913 vm_map_unlock_read(map);
914 if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL)) 914 if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL))
915 return (0); 915 return (0);
916 916
917 pm->pm_hiexec = va; 917 pm->pm_hiexec = va;
918 if (pm->pm_hiexec > I386_MAX_EXE_ADDR) { 918 if (pm->pm_hiexec > I386_MAX_EXE_ADDR) {
919 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); 919 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
920 } else { 920 } else {
921 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 921 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
922 return (0); 922 return (0);
923 } 923 }
924 return (1); 924 return (1);
925} 925}
926#endif /* !defined(__x86_64__) */ 926#endif /* !defined(__x86_64__) */
927 927
928void 928void
929pat_init(struct cpu_info *ci) 929pat_init(struct cpu_info *ci)
930{ 930{
931 uint64_t pat; 931 uint64_t pat;
932 932
933 if (!(ci->ci_feat_val[0] & CPUID_PAT)) 933 if (!(ci->ci_feat_val[0] & CPUID_PAT))
934 return; 934 return;
935 935
936 /* We change WT to WC. Leave all other entries the default values. */ 936 /* We change WT to WC. Leave all other entries the default values. */
937 pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) | 937 pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) |
938 PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) | 938 PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) |
939 PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) | 939 PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) |
940 PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC); 940 PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC);
941 941
942 wrmsr(MSR_CR_PAT, pat); 942 wrmsr(MSR_CR_PAT, pat);
943 cpu_pat_enabled = true; 943 cpu_pat_enabled = true;
944 aprint_debug_dev(ci->ci_dev, "PAT enabled\n"); 944 aprint_debug_dev(ci->ci_dev, "PAT enabled\n");
945} 945}
946 946
947static pt_entry_t 947static pt_entry_t
948pmap_pat_flags(u_int flags) 948pmap_pat_flags(u_int flags)
949{ 949{
950 u_int cacheflags = (flags & PMAP_CACHE_MASK); 950 u_int cacheflags = (flags & PMAP_CACHE_MASK);
951 951
952 if (!cpu_pat_enabled) { 952 if (!cpu_pat_enabled) {
953 switch (cacheflags) { 953 switch (cacheflags) {
954 case PMAP_NOCACHE: 954 case PMAP_NOCACHE:
955 case PMAP_NOCACHE_OVR: 955 case PMAP_NOCACHE_OVR:
956 /* results in PGC_UCMINUS on cpus which have 956 /* results in PGC_UCMINUS on cpus which have
957 * the cpuid PAT but PAT "disabled" 957 * the cpuid PAT but PAT "disabled"
958 */ 958 */
959 return PG_N; 959 return PG_N;
960 default: 960 default:
961 return 0; 961 return 0;
962 } 962 }
963 } 963 }
964 964
965 switch (cacheflags) { 965 switch (cacheflags) {
966 case PMAP_NOCACHE: 966 case PMAP_NOCACHE:
967 return PGC_UC; 967 return PGC_UC;
968 case PMAP_WRITE_COMBINE: 968 case PMAP_WRITE_COMBINE:
969 return PGC_WC; 969 return PGC_WC;
970 case PMAP_WRITE_BACK: 970 case PMAP_WRITE_BACK:
971 return PGC_WB; 971 return PGC_WB;
972 case PMAP_NOCACHE_OVR: 972 case PMAP_NOCACHE_OVR:
973 return PGC_UCMINUS; 973 return PGC_UCMINUS;
974 } 974 }
975 975
976 return 0; 976 return 0;
977} 977}
978 978
979/* 979/*
980 * p m a p k e n t e r f u n c t i o n s 980 * p m a p k e n t e r f u n c t i o n s
981 * 981 *
982 * functions to quickly enter/remove pages from the kernel address 982 * functions to quickly enter/remove pages from the kernel address
983 * space. pmap_kremove is exported to MI kernel. we make use of 983 * space. pmap_kremove is exported to MI kernel. we make use of
984 * the recursive PTE mappings. 984 * the recursive PTE mappings.
985 */ 985 */
986 986
987/* 987/*
988 * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking 988 * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking
989 * 989 *
990 * => no need to lock anything, assume va is already allocated 990 * => no need to lock anything, assume va is already allocated
991 * => should be faster than normal pmap enter function 991 * => should be faster than normal pmap enter function
992 */ 992 */
993 993
994void 994void
995pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 995pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
996{ 996{
997 pt_entry_t *pte, opte, npte; 997 pt_entry_t *pte, opte, npte;
998 998
999 KASSERT(!(prot & ~VM_PROT_ALL)); 999 KASSERT(!(prot & ~VM_PROT_ALL));
1000 1000
1001 if (va < VM_MIN_KERNEL_ADDRESS) 1001 if (va < VM_MIN_KERNEL_ADDRESS)
1002 pte = vtopte(va); 1002 pte = vtopte(va);
1003 else 1003 else
1004 pte = kvtopte(va); 1004 pte = kvtopte(va);
1005#ifdef DOM0OPS 1005#ifdef DOM0OPS
1006 if (pa < pmap_pa_start || pa >= pmap_pa_end) { 1006 if (pa < pmap_pa_start || pa >= pmap_pa_end) {
1007#ifdef DEBUG 1007#ifdef DEBUG
1008 printf_nolog("%s: pa 0x%" PRIx64 " for va 0x%" PRIx64 1008 printf_nolog("%s: pa 0x%" PRIx64 " for va 0x%" PRIx64
1009 " outside range\n", __func__, (int64_t)pa, (int64_t)va); 1009 " outside range\n", __func__, (int64_t)pa, (int64_t)va);
1010#endif /* DEBUG */ 1010#endif /* DEBUG */
1011 npte = pa; 1011 npte = pa;
1012 } else 1012 } else
1013#endif /* DOM0OPS */ 1013#endif /* DOM0OPS */
1014 npte = pmap_pa2pte(pa); 1014 npte = pmap_pa2pte(pa);
1015 npte |= protection_codes[prot] | PG_k | PG_V | pmap_pg_g; 1015 npte |= protection_codes[prot] | PG_k | PG_V | pmap_pg_g;
1016 npte |= pmap_pat_flags(flags); 1016 npte |= pmap_pat_flags(flags);
1017 opte = pmap_pte_testset(pte, npte); /* zap! */ 1017 opte = pmap_pte_testset(pte, npte); /* zap! */
1018#if defined(DIAGNOSTIC) 1018#if defined(DIAGNOSTIC)
1019 /* XXX For now... */ 1019 /* XXX For now... */
1020 if (opte & PG_PS) 1020 if (opte & PG_PS)
1021 panic("%s: PG_PS", __func__); 1021 panic("%s: PG_PS", __func__);
1022#endif 1022#endif
1023 if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) { 1023 if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) {
1024#if defined(DIAGNOSTIC) 1024#if defined(DIAGNOSTIC)
1025 printf_nolog("%s: mapping already present\n", __func__); 1025 printf_nolog("%s: mapping already present\n", __func__);
1026#endif 1026#endif
1027 /* This should not happen. */ 1027 /* This should not happen. */
1028 kpreempt_disable(); 1028 kpreempt_disable();
1029 pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER); 1029 pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER);
1030 kpreempt_enable(); 1030 kpreempt_enable();
1031 } 1031 }
1032} 1032}
1033 1033
1034void 1034void
1035pmap_emap_enter(vaddr_t va, paddr_t pa, vm_prot_t prot) 1035pmap_emap_enter(vaddr_t va, paddr_t pa, vm_prot_t prot)
1036{ 1036{
1037 pt_entry_t *pte, opte, npte; 1037 pt_entry_t *pte, opte, npte;
1038 1038
1039 KASSERT((prot & ~VM_PROT_ALL) == 0); 1039 KASSERT((prot & ~VM_PROT_ALL) == 0);
1040 pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va); 1040 pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va);
1041 1041
1042#ifdef DOM0OPS 1042#ifdef DOM0OPS
1043 if (pa < pmap_pa_start || pa >= pmap_pa_end) { 1043 if (pa < pmap_pa_start || pa >= pmap_pa_end) {
1044 npte = pa; 1044 npte = pa;
1045 } else 1045 } else
1046#endif 1046#endif
1047 npte = pmap_pa2pte(pa); 1047 npte = pmap_pa2pte(pa);
1048 1048
1049 npte = pmap_pa2pte(pa); 1049 npte = pmap_pa2pte(pa);
1050 npte |= protection_codes[prot] | PG_k | PG_V; 1050 npte |= protection_codes[prot] | PG_k | PG_V;
1051 opte = pmap_pte_testset(pte, npte); 1051 opte = pmap_pte_testset(pte, npte);
1052} 1052}
1053 1053
1054/* 1054/*
1055 * pmap_emap_sync: perform TLB flush or pmap load, if it was deferred. 1055 * pmap_emap_sync: perform TLB flush or pmap load, if it was deferred.
1056 */ 1056 */
1057void 1057void
1058pmap_emap_sync(bool canload) 1058pmap_emap_sync(bool canload)
1059{ 1059{
1060 struct cpu_info *ci = curcpu(); 1060 struct cpu_info *ci = curcpu();
1061 struct pmap *pmap; 1061 struct pmap *pmap;
1062 1062
1063 KASSERT(kpreempt_disabled()); 1063 KASSERT(kpreempt_disabled());
1064 if (__predict_true(ci->ci_want_pmapload && canload)) { 1064 if (__predict_true(ci->ci_want_pmapload && canload)) {
1065 /* 1065 /*
1066 * XXX: Hint for pmap_reactivate(), which might suggest to 1066 * XXX: Hint for pmap_reactivate(), which might suggest to
1067 * not perform TLB flush, if state has not changed. 1067 * not perform TLB flush, if state has not changed.
1068 */ 1068 */
1069 pmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map); 1069 pmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map);
1070 if (__predict_false(pmap == ci->ci_pmap)) { 1070 if (__predict_false(pmap == ci->ci_pmap)) {
1071 const uint32_t cpumask = ci->ci_cpumask; 1071 const uint32_t cpumask = ci->ci_cpumask;
1072 atomic_and_32(&pmap->pm_cpus, ~cpumask); 1072 atomic_and_32(&pmap->pm_cpus, ~cpumask);
1073 } 1073 }
1074 pmap_load(); 1074 pmap_load();
1075 KASSERT(ci->ci_want_pmapload == 0); 1075 KASSERT(ci->ci_want_pmapload == 0);
1076 } else { 1076 } else {
1077 tlbflush(); 1077 tlbflush();
1078 } 1078 }
1079 1079
1080} 1080}
1081 1081
1082void 1082void
1083pmap_emap_remove(vaddr_t sva, vsize_t len) 1083pmap_emap_remove(vaddr_t sva, vsize_t len)
1084{ 1084{
1085 pt_entry_t *pte, xpte; 1085 pt_entry_t *pte, xpte;
1086 vaddr_t va, eva = sva + len; 1086 vaddr_t va, eva = sva + len;
1087 1087
1088 for (va = sva; va < eva; va += PAGE_SIZE) { 1088 for (va = sva; va < eva; va += PAGE_SIZE) {
1089 pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va); 1089 pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va);
1090 xpte |= pmap_pte_testset(pte, 0); 1090 xpte |= pmap_pte_testset(pte, 0);
1091 } 1091 }
1092} 1092}
1093 1093
1094__strict_weak_alias(pmap_kenter_ma, pmap_kenter_pa); 1094__strict_weak_alias(pmap_kenter_ma, pmap_kenter_pa);
1095 1095
1096#if defined(__x86_64__) 1096#if defined(__x86_64__)
1097/* 1097/*
1098 * Change protection for a virtual address. Local for a CPU only, don't 1098 * Change protection for a virtual address. Local for a CPU only, don't
1099 * care about TLB shootdowns. 1099 * care about TLB shootdowns.
1100 * 1100 *
1101 * => must be called with preemption disabled 1101 * => must be called with preemption disabled
1102 */ 1102 */
1103void 1103void
1104pmap_changeprot_local(vaddr_t va, vm_prot_t prot) 1104pmap_changeprot_local(vaddr_t va, vm_prot_t prot)
1105{ 1105{
1106 pt_entry_t *pte, opte, npte; 1106 pt_entry_t *pte, opte, npte;
1107 1107
1108 KASSERT(kpreempt_disabled()); 1108 KASSERT(kpreempt_disabled());
1109 1109
1110 if (va < VM_MIN_KERNEL_ADDRESS) 1110 if (va < VM_MIN_KERNEL_ADDRESS)
1111 pte = vtopte(va); 1111 pte = vtopte(va);
1112 else 1112 else
1113 pte = kvtopte(va); 1113 pte = kvtopte(va);
1114 1114
1115 npte = opte = *pte; 1115 npte = opte = *pte;
1116 1116
1117 if ((prot & VM_PROT_WRITE) != 0) 1117 if ((prot & VM_PROT_WRITE) != 0)
1118 npte |= PG_RW; 1118 npte |= PG_RW;
1119 else 1119 else
1120 npte &= ~PG_RW; 1120 npte &= ~PG_RW;
1121 1121
1122 if (opte != npte) { 1122 if (opte != npte) {
1123 pmap_pte_set(pte, npte); 1123 pmap_pte_set(pte, npte);
1124 pmap_pte_flush(); 1124 pmap_pte_flush();
1125 invlpg(va); 1125 invlpg(va);
1126 } 1126 }
1127} 1127}
1128#endif /* defined(__x86_64__) */ 1128#endif /* defined(__x86_64__) */
1129 1129
1130/* 1130/*
1131 * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking 1131 * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking
1132 * 1132 *
1133 * => no need to lock anything 1133 * => no need to lock anything
1134 * => caller must dispose of any vm_page mapped in the va range 1134 * => caller must dispose of any vm_page mapped in the va range
1135 * => note: not an inline function 1135 * => note: not an inline function
1136 * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE 1136 * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE
1137 * => we assume kernel only unmaps valid addresses and thus don't bother 1137 * => we assume kernel only unmaps valid addresses and thus don't bother
1138 * checking the valid bit before doing TLB flushing 1138 * checking the valid bit before doing TLB flushing
1139 * => must be followed by call to pmap_update() before reuse of page 1139 * => must be followed by call to pmap_update() before reuse of page
1140 */ 1140 */
1141 1141
1142void 1142void
1143pmap_kremove(vaddr_t sva, vsize_t len) 1143pmap_kremove(vaddr_t sva, vsize_t len)
1144{ 1144{
1145 pt_entry_t *pte, opte; 1145 pt_entry_t *pte, opte;
1146 vaddr_t va, eva; 1146 vaddr_t va, eva;
1147 1147
1148 eva = sva + len; 1148 eva = sva + len;
1149 1149
1150 kpreempt_disable(); 1150 kpreempt_disable();
1151 for (va = sva; va < eva; va += PAGE_SIZE) { 1151 for (va = sva; va < eva; va += PAGE_SIZE) {
1152 if (va < VM_MIN_KERNEL_ADDRESS) 1152 if (va < VM_MIN_KERNEL_ADDRESS)
1153 pte = vtopte(va); 1153 pte = vtopte(va);
1154 else 1154 else
1155 pte = kvtopte(va); 1155 pte = kvtopte(va);
1156 opte = pmap_pte_testset(pte, 0); /* zap! */ 1156 opte = pmap_pte_testset(pte, 0); /* zap! */
1157 if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) { 1157 if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) {
1158 pmap_tlb_shootdown(pmap_kernel(), va, opte, 1158 pmap_tlb_shootdown(pmap_kernel(), va, opte,
1159 TLBSHOOT_KREMOVE); 1159 TLBSHOOT_KREMOVE);
1160 } 1160 }
1161 KASSERT((opte & PG_PS) == 0); 1161 KASSERT((opte & PG_PS) == 0);
1162 KASSERT((opte & PG_PVLIST) == 0); 1162 KASSERT((opte & PG_PVLIST) == 0);
1163 } 1163 }
1164 kpreempt_enable(); 1164 kpreempt_enable();
1165} 1165}
1166 1166
1167/* 1167/*
1168 * p m a p i n i t f u n c t i o n s 1168 * p m a p i n i t f u n c t i o n s
1169 * 1169 *
1170 * pmap_bootstrap and pmap_init are called during system startup 1170 * pmap_bootstrap and pmap_init are called during system startup
1171 * to init the pmap module. pmap_bootstrap() does a low level 1171 * to init the pmap module. pmap_bootstrap() does a low level
1172 * init just to get things rolling. pmap_init() finishes the job. 1172 * init just to get things rolling. pmap_init() finishes the job.
1173 */ 1173 */
1174 1174
1175/* 1175/*
1176 * pmap_bootstrap: get the system in a state where it can run with VM 1176 * pmap_bootstrap: get the system in a state where it can run with VM
1177 * properly enabled (called before main()). the VM system is 1177 * properly enabled (called before main()). the VM system is
1178 * fully init'd later... 1178 * fully init'd later...
1179 * 1179 *
1180 * => on i386, locore.s has already enabled the MMU by allocating 1180 * => on i386, locore.s has already enabled the MMU by allocating
1181 * a PDP for the kernel, and nkpde PTP's for the kernel. 1181 * a PDP for the kernel, and nkpde PTP's for the kernel.
1182 * => kva_start is the first free virtual address in kernel space 1182 * => kva_start is the first free virtual address in kernel space
1183 */ 1183 */
1184 1184
1185void 1185void
1186pmap_bootstrap(vaddr_t kva_start) 1186pmap_bootstrap(vaddr_t kva_start)
1187{ 1187{
1188 struct pmap *kpm; 1188 struct pmap *kpm;
1189 pt_entry_t *pte; 1189 pt_entry_t *pte;
1190 int i; 1190 int i;
1191 vaddr_t kva; 1191 vaddr_t kva;
1192#ifndef XEN 1192#ifndef XEN
1193 pd_entry_t *pde; 1193 pd_entry_t *pde;
1194 unsigned long p1i; 1194 unsigned long p1i;
1195 vaddr_t kva_end; 1195 vaddr_t kva_end;
1196#endif 1196#endif
1197#ifdef __HAVE_DIRECT_MAP 1197#ifdef __HAVE_DIRECT_MAP
1198 phys_ram_seg_t *mc; 1198 phys_ram_seg_t *mc;
1199 long ndmpdp; 1199 long ndmpdp;
1200 paddr_t dmpd, dmpdp, pdp; 1200 paddr_t dmpd, dmpdp, pdp;
1201 vaddr_t tmpva; 1201 vaddr_t tmpva;
1202#endif 1202#endif
1203 1203
1204 pt_entry_t pg_nx = (cpu_feature[2] & CPUID_NOX ? PG_NX : 0); 1204 pt_entry_t pg_nx = (cpu_feature[2] & CPUID_NOX ? PG_NX : 0);
1205 1205
1206 /* 1206 /*
1207 * set up our local static global vars that keep track of the 1207 * set up our local static global vars that keep track of the
1208 * usage of KVM before kernel_map is set up 1208 * usage of KVM before kernel_map is set up
1209 */ 1209 */
1210 1210
1211 virtual_avail = kva_start; /* first free KVA */ 1211 virtual_avail = kva_start; /* first free KVA */
1212 virtual_end = VM_MAX_KERNEL_ADDRESS; /* last KVA */ 1212 virtual_end = VM_MAX_KERNEL_ADDRESS; /* last KVA */
1213 1213
1214 /* 1214 /*
1215 * set up protection_codes: we need to be able to convert from 1215 * set up protection_codes: we need to be able to convert from
1216 * a MI protection code (some combo of VM_PROT...) to something 1216 * a MI protection code (some combo of VM_PROT...) to something
1217 * we can jam into a i386 PTE. 1217 * we can jam into a i386 PTE.
1218 */ 1218 */
1219 1219
1220 protection_codes[VM_PROT_NONE] = pg_nx; /* --- */ 1220 protection_codes[VM_PROT_NONE] = pg_nx; /* --- */
1221 protection_codes[VM_PROT_EXECUTE] = PG_RO | PG_X; /* --x */ 1221 protection_codes[VM_PROT_EXECUTE] = PG_RO | PG_X; /* --x */
1222 protection_codes[VM_PROT_READ] = PG_RO | pg_nx; /* -r- */ 1222 protection_codes[VM_PROT_READ] = PG_RO | pg_nx; /* -r- */
1223 protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO | PG_X;/* -rx */ 1223 protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO | PG_X;/* -rx */
1224 protection_codes[VM_PROT_WRITE] = PG_RW | pg_nx; /* w-- */ 1224 protection_codes[VM_PROT_WRITE] = PG_RW | pg_nx; /* w-- */
1225 protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW | PG_X;/* w-x */ 1225 protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW | PG_X;/* w-x */
1226 protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW | pg_nx; 1226 protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW | pg_nx;
1227 /* wr- */ 1227 /* wr- */
1228 protection_codes[VM_PROT_ALL] = PG_RW | PG_X; /* wrx */ 1228 protection_codes[VM_PROT_ALL] = PG_RW | PG_X; /* wrx */
1229 1229
1230 /* 1230 /*
1231 * now we init the kernel's pmap 1231 * now we init the kernel's pmap
1232 * 1232 *
1233 * the kernel pmap's pm_obj is not used for much. however, in 1233 * the kernel pmap's pm_obj is not used for much. however, in
1234 * user pmaps the pm_obj contains the list of active PTPs. 1234 * user pmaps the pm_obj contains the list of active PTPs.
1235 * the pm_obj currently does not have a pager. it might be possible 1235 * the pm_obj currently does not have a pager. it might be possible
1236 * to add a pager that would allow a process to read-only mmap its 1236 * to add a pager that would allow a process to read-only mmap its
1237 * own page tables (fast user level vtophys?). this may or may not 1237 * own page tables (fast user level vtophys?). this may or may not
1238 * be useful. 1238 * be useful.
1239 */ 1239 */
1240 1240
1241 kpm = pmap_kernel(); 1241 kpm = pmap_kernel();
1242 for (i = 0; i < PTP_LEVELS - 1; i++) { 1242 for (i = 0; i < PTP_LEVELS - 1; i++) {
1243 mutex_init(&kpm->pm_obj_lock[i], MUTEX_DEFAULT, IPL_NONE); 1243 mutex_init(&kpm->pm_obj_lock[i], MUTEX_DEFAULT, IPL_NONE);
1244 uvm_obj_init(&kpm->pm_obj[i], NULL, false, 1); 1244 uvm_obj_init(&kpm->pm_obj[i], NULL, false, 1);
1245 uvm_obj_setlock(&kpm->pm_obj[i], &kpm->pm_obj_lock[i]); 1245 uvm_obj_setlock(&kpm->pm_obj[i], &kpm->pm_obj_lock[i]);
1246 kpm->pm_ptphint[i] = NULL; 1246 kpm->pm_ptphint[i] = NULL;
1247 } 1247 }
1248 memset(&kpm->pm_list, 0, sizeof(kpm->pm_list)); /* pm_list not used */ 1248 memset(&kpm->pm_list, 0, sizeof(kpm->pm_list)); /* pm_list not used */
1249 1249
1250 kpm->pm_pdir = (pd_entry_t *)(PDPpaddr + KERNBASE); 1250 kpm->pm_pdir = (pd_entry_t *)(PDPpaddr + KERNBASE);
1251 for (i = 0; i < PDP_SIZE; i++) 1251 for (i = 0; i < PDP_SIZE; i++)
1252 kpm->pm_pdirpa[i] = PDPpaddr + PAGE_SIZE * i; 1252 kpm->pm_pdirpa[i] = PDPpaddr + PAGE_SIZE * i;
1253 1253
1254 kpm->pm_stats.wired_count = kpm->pm_stats.resident_count = 1254 kpm->pm_stats.wired_count = kpm->pm_stats.resident_count =
1255 x86_btop(kva_start - VM_MIN_KERNEL_ADDRESS); 1255 x86_btop(kva_start - VM_MIN_KERNEL_ADDRESS);
1256 1256
1257 /* 1257 /*
1258 * the above is just a rough estimate and not critical to the proper 1258 * the above is just a rough estimate and not critical to the proper
1259 * operation of the system. 1259 * operation of the system.
1260 */ 1260 */
1261 1261
1262#ifndef XEN 1262#ifndef XEN
1263 /* 1263 /*
1264 * Begin to enable global TLB entries if they are supported. 1264 * Begin to enable global TLB entries if they are supported.
1265 * The G bit has no effect until the CR4_PGE bit is set in CR4, 1265 * The G bit has no effect until the CR4_PGE bit is set in CR4,
1266 * which happens in cpu_init(), which is run on each cpu 1266 * which happens in cpu_init(), which is run on each cpu
1267 * (and happens later) 1267 * (and happens later)
1268 */ 1268 */
1269 1269
1270 if (cpu_feature[0] & CPUID_PGE) { 1270 if (cpu_feature[0] & CPUID_PGE) {
1271 pmap_pg_g = PG_G; /* enable software */ 1271 pmap_pg_g = PG_G; /* enable software */
1272 1272
1273 /* add PG_G attribute to already mapped kernel pages */ 1273 /* add PG_G attribute to already mapped kernel pages */
1274 if (KERNBASE == VM_MIN_KERNEL_ADDRESS) { 1274 if (KERNBASE == VM_MIN_KERNEL_ADDRESS) {
1275 kva_end = virtual_avail; 1275 kva_end = virtual_avail;
1276 } else { 1276 } else {
1277 extern vaddr_t eblob, esym; 1277 extern vaddr_t eblob, esym;
1278 kva_end = (vaddr_t)&end; 1278 kva_end = (vaddr_t)&end;
1279 if (esym > kva_end) 1279 if (esym > kva_end)
1280 kva_end = esym; 1280 kva_end = esym;
1281 if (eblob > kva_end) 1281 if (eblob > kva_end)
1282 kva_end = eblob; 1282 kva_end = eblob;
1283 kva_end = roundup(kva_end, PAGE_SIZE); 1283 kva_end = roundup(kva_end, PAGE_SIZE);
1284 } 1284 }
1285 for (kva = KERNBASE; kva < kva_end; kva += PAGE_SIZE) { 1285 for (kva = KERNBASE; kva < kva_end; kva += PAGE_SIZE) {
1286 p1i = pl1_i(kva); 1286 p1i = pl1_i(kva);
1287 if (pmap_valid_entry(PTE_BASE[p1i])) 1287 if (pmap_valid_entry(PTE_BASE[p1i]))
1288 PTE_BASE[p1i] |= PG_G; 1288 PTE_BASE[p1i] |= PG_G;
1289 } 1289 }
1290 } 1290 }
1291 1291
1292 /* 1292 /*
1293 * enable large pages if they are supported. 1293 * enable large pages if they are supported.
1294 */ 1294 */
1295 1295
1296 if (cpu_feature[0] & CPUID_PSE) { 1296 if (cpu_feature[0] & CPUID_PSE) {
1297 paddr_t pa; 1297 paddr_t pa;
1298 extern char __data_start; 1298 extern char __data_start;
1299 1299
1300 lcr4(rcr4() | CR4_PSE); /* enable hardware (via %cr4) */ 1300 lcr4(rcr4() | CR4_PSE); /* enable hardware (via %cr4) */
1301 pmap_largepages = 1; /* enable software */ 1301 pmap_largepages = 1; /* enable software */
1302 1302
1303 /* 1303 /*
1304 * the TLB must be flushed after enabling large pages 1304 * the TLB must be flushed after enabling large pages
1305 * on Pentium CPUs, according to section 3.6.2.2 of 1305 * on Pentium CPUs, according to section 3.6.2.2 of
1306 * "Intel Architecture Software Developer's Manual, 1306 * "Intel Architecture Software Developer's Manual,
1307 * Volume 3: System Programming". 1307 * Volume 3: System Programming".
1308 */ 1308 */
1309 tlbflushg(); 1309 tlbflushg();
1310 1310
1311 /* 1311 /*
1312 * now, remap the kernel text using large pages. we 1312 * now, remap the kernel text using large pages. we
1313 * assume that the linker has properly aligned the 1313 * assume that the linker has properly aligned the
1314 * .data segment to a NBPD_L2 boundary. 1314 * .data segment to a NBPD_L2 boundary.
1315 */ 1315 */
1316 kva_end = rounddown((vaddr_t)&__data_start, NBPD_L1); 1316 kva_end = rounddown((vaddr_t)&__data_start, NBPD_L1);
1317 for (pa = 0, kva = KERNBASE; kva + NBPD_L2 <= kva_end; 1317 for (pa = 0, kva = KERNBASE; kva + NBPD_L2 <= kva_end;
1318 kva += NBPD_L2, pa += NBPD_L2) { 1318 kva += NBPD_L2, pa += NBPD_L2) {
1319 pde = &L2_BASE[pl2_i(kva)]; 1319 pde = &L2_BASE[pl2_i(kva)];
1320 *pde = pa | pmap_pg_g | PG_PS | 1320 *pde = pa | pmap_pg_g | PG_PS |
1321 PG_KR | PG_V; /* zap! */ 1321 PG_KR | PG_V; /* zap! */
1322 tlbflushg(); 1322 tlbflushg();
1323 } 1323 }
1324#if defined(DEBUG) 1324#if defined(DEBUG)
1325 aprint_normal("kernel text is mapped with %" PRIuPSIZE " large " 1325 aprint_normal("kernel text is mapped with %" PRIuPSIZE " large "
1326 "pages and %" PRIuPSIZE " normal pages\n", 1326 "pages and %" PRIuPSIZE " normal pages\n",
1327 howmany(kva - KERNBASE, NBPD_L2), 1327 howmany(kva - KERNBASE, NBPD_L2),
1328 howmany((vaddr_t)&__data_start - kva, NBPD_L1)); 1328 howmany((vaddr_t)&__data_start - kva, NBPD_L1));
1329#endif /* defined(DEBUG) */ 1329#endif /* defined(DEBUG) */
1330 } 1330 }
1331#endif /* !XEN */ 1331#endif /* !XEN */
1332 1332
1333#ifdef __HAVE_DIRECT_MAP 1333#ifdef __HAVE_DIRECT_MAP
1334 1334
1335 tmpva = (KERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2); 1335 tmpva = (KERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2);
1336 pte = PTE_BASE + pl1_i(tmpva); 1336 pte = PTE_BASE + pl1_i(tmpva);
1337 1337
1338 /* 1338 /*
1339 * Map the direct map. Use 1GB pages if they are available, 1339 * Map the direct map. Use 1GB pages if they are available,
1340 * otherwise use 2MB pages. 1340 * otherwise use 2MB pages.
1341 */ 1341 */
1342 1342
1343 mc = &mem_clusters[mem_cluster_cnt - 1]; 1343 mc = &mem_clusters[mem_cluster_cnt - 1];
1344 ndmpdp = (mc->start + mc->size + NBPD_L3 - 1) >> L3_SHIFT; 1344 ndmpdp = (mc->start + mc->size + NBPD_L3 - 1) >> L3_SHIFT;
1345 dmpdp = avail_start; avail_start += PAGE_SIZE; 1345 dmpdp = avail_start; avail_start += PAGE_SIZE;
1346 1346
1347 if (cpu_feature[2] & CPUID_P1GB) { 1347 if (cpu_feature[2] & CPUID_P1GB) {
1348 for (i = 0; i < ndmpdp; i++) { 1348 for (i = 0; i < ndmpdp; i++) {
1349 pdp = (paddr_t)&(((pd_entry_t *)dmpdp)[i]); 1349 pdp = (paddr_t)&(((pd_entry_t *)dmpdp)[i]);
1350 *pte = (pdp & PG_FRAME) | PG_V | PG_RW; 1350 *pte = (pdp & PG_FRAME) | PG_V | PG_RW;
1351 pmap_update_pg(tmpva); 1351 pmap_update_pg(tmpva);
1352 1352
1353 pde = (pd_entry_t *)(tmpva + (pdp & ~PG_FRAME)); 1353 pde = (pd_entry_t *)(tmpva + (pdp & ~PG_FRAME));
1354 *pde = ((paddr_t)i << L3_SHIFT) | 1354 *pde = ((paddr_t)i << L3_SHIFT) |
1355 PG_RW | PG_V | PG_U | PG_PS | PG_G; 1355 PG_RW | PG_V | PG_U | PG_PS | PG_G;
1356 } 1356 }
1357 } else { 1357 } else {
1358 dmpd = avail_start; avail_start += ndmpdp * PAGE_SIZE; 1358 dmpd = avail_start; avail_start += ndmpdp * PAGE_SIZE;
1359 1359
1360 for (i = 0; i < NPDPG * ndmpdp; i++) { 1360 for (i = 0; i < NPDPG * ndmpdp; i++) {
1361 pdp = (paddr_t)&(((pd_entry_t *)dmpd)[i]); 1361 pdp = (paddr_t)&(((pd_entry_t *)dmpd)[i]);
1362 *pte = (pdp & PG_FRAME) | PG_V | PG_RW; 1362 *pte = (pdp & PG_FRAME) | PG_V | PG_RW;
1363 pmap_update_pg(tmpva); 1363 pmap_update_pg(tmpva);
1364 1364
1365 pde = (pd_entry_t *)(tmpva + (pdp & ~PG_FRAME)); 1365 pde = (pd_entry_t *)(tmpva + (pdp & ~PG_FRAME));
1366 *pde = ((paddr_t)i << L2_SHIFT) | 1366 *pde = ((paddr_t)i << L2_SHIFT) |
1367 PG_RW | PG_V | PG_U | PG_PS | PG_G; 1367 PG_RW | PG_V | PG_U | PG_PS | PG_G;
1368 } 1368 }
1369 for (i = 0; i < ndmpdp; i++) { 1369 for (i = 0; i < ndmpdp; i++) {
1370 pdp = (paddr_t)&(((pd_entry_t *)dmpdp)[i]); 1370 pdp = (paddr_t)&(((pd_entry_t *)dmpdp)[i]);
1371 *pte = (pdp & PG_FRAME) | PG_V | PG_RW; 1371 *pte = (pdp & PG_FRAME) | PG_V | PG_RW;
1372 pmap_update_pg((vaddr_t)tmpva); 1372 pmap_update_pg((vaddr_t)tmpva);
1373 1373
1374 pde = (pd_entry_t *)(tmpva + (pdp & ~PG_FRAME)); 1374 pde = (pd_entry_t *)(tmpva + (pdp & ~PG_FRAME));
1375 *pde = (dmpd + (i << PAGE_SHIFT)) | 1375 *pde = (dmpd + (i << PAGE_SHIFT)) |
1376 PG_RW | PG_V | PG_U; 1376 PG_RW | PG_V | PG_U;
1377 } 1377 }
1378 } 1378 }
1379 1379
1380 kpm->pm_pdir[PDIR_SLOT_DIRECT] = dmpdp | PG_KW | PG_V | PG_U; 1380 kpm->pm_pdir[PDIR_SLOT_DIRECT] = dmpdp | PG_KW | PG_V | PG_U;
1381 1381
1382 tlbflush(); 1382 tlbflush();
1383 1383
1384#else 1384#else
1385 if (VM_MIN_KERNEL_ADDRESS != KERNBASE) { 1385 if (VM_MIN_KERNEL_ADDRESS != KERNBASE) {
1386 /* 1386 /*
1387 * zero_pte is stuck at the end of mapped space for the kernel 1387 * zero_pte is stuck at the end of mapped space for the kernel
1388 * image (disjunct from kva space). This is done so that it 1388 * image (disjunct from kva space). This is done so that it
1389 * can safely be used in pmap_growkernel (pmap_get_physpage), 1389 * can safely be used in pmap_growkernel (pmap_get_physpage),
1390 * when it's called for the first time. 1390 * when it's called for the first time.
1391 * XXXfvdl fix this for MULTIPROCESSOR later. 1391 * XXXfvdl fix this for MULTIPROCESSOR later.
1392 */ 1392 */
1393 1393
1394 early_zerop = (void *)(KERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2); 1394 early_zerop = (void *)(KERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2);
1395 early_zero_pte = PTE_BASE + pl1_i((vaddr_t)early_zerop); 1395 early_zero_pte = PTE_BASE + pl1_i((vaddr_t)early_zerop);
1396 } 1396 }
1397 1397
1398 /* 1398 /*
1399 * now we allocate the "special" VAs which are used for tmp mappings 1399 * now we allocate the "special" VAs which are used for tmp mappings
1400 * by the pmap (and other modules). we allocate the VAs by advancing 1400 * by the pmap (and other modules). we allocate the VAs by advancing
1401 * virtual_avail (note that there are no pages mapped at these VAs). 1401 * virtual_avail (note that there are no pages mapped at these VAs).
1402 * we find the PTE that maps the allocated VA via the linear PTE 1402 * we find the PTE that maps the allocated VA via the linear PTE
1403 * mapping. 1403 * mapping.
1404 */ 1404 */
1405 1405
1406 pte = PTE_BASE + pl1_i(virtual_avail); 1406 pte = PTE_BASE + pl1_i(virtual_avail);
1407 1407
1408#ifdef MULTIPROCESSOR 1408#ifdef MULTIPROCESSOR
1409 /* 1409 /*
1410 * Waste some VA space to avoid false sharing of cache lines 1410 * Waste some VA space to avoid false sharing of cache lines
1411 * for page table pages: Give each possible CPU a cache line 1411 * for page table pages: Give each possible CPU a cache line
1412 * of PTE's (8) to play with, though we only need 4. We could 1412 * of PTE's (8) to play with, though we only need 4. We could
1413 * recycle some of this waste by putting the idle stacks here 1413 * recycle some of this waste by putting the idle stacks here
1414 * as well; we could waste less space if we knew the largest 1414 * as well; we could waste less space if we knew the largest
1415 * CPU ID beforehand. 1415 * CPU ID beforehand.
1416 */ 1416 */
1417 csrcp = (char *) virtual_avail; csrc_pte = pte; 1417 csrcp = (char *) virtual_avail; csrc_pte = pte;
1418 1418
1419 cdstp = (char *) virtual_avail+PAGE_SIZE; cdst_pte = pte+1; 1419 cdstp = (char *) virtual_avail+PAGE_SIZE; cdst_pte = pte+1;
1420 1420
1421 zerop = (char *) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2; 1421 zerop = (char *) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2;
1422 1422
1423 ptpp = (char *) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3; 1423 ptpp = (char *) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3;
1424 1424
1425 virtual_avail += PAGE_SIZE * maxcpus * NPTECL; 1425 virtual_avail += PAGE_SIZE * maxcpus * NPTECL;
1426 pte += maxcpus * NPTECL; 1426 pte += maxcpus * NPTECL;
1427#else 1427#else
1428 csrcp = (void *) virtual_avail; csrc_pte = pte; /* allocate */ 1428 csrcp = (void *) virtual_avail; csrc_pte = pte; /* allocate */
1429 virtual_avail += PAGE_SIZE; pte++; /* advance */ 1429 virtual_avail += PAGE_SIZE; pte++; /* advance */
1430 1430
1431 cdstp = (void *) virtual_avail; cdst_pte = pte; 1431 cdstp = (void *) virtual_avail; cdst_pte = pte;
1432 virtual_avail += PAGE_SIZE; pte++; 1432 virtual_avail += PAGE_SIZE; pte++;
1433 1433
1434 zerop = (void *) virtual_avail; zero_pte = pte; 1434 zerop = (void *) virtual_avail; zero_pte = pte;
1435 virtual_avail += PAGE_SIZE; pte++; 1435 virtual_avail += PAGE_SIZE; pte++;
1436 1436
1437 ptpp = (void *) virtual_avail; ptp_pte = pte; 1437 ptpp = (void *) virtual_avail; ptp_pte = pte;
1438 virtual_avail += PAGE_SIZE; pte++; 1438 virtual_avail += PAGE_SIZE; pte++;
1439#endif 1439#endif
1440 1440
1441 if (VM_MIN_KERNEL_ADDRESS == KERNBASE) { 1441 if (VM_MIN_KERNEL_ADDRESS == KERNBASE) {
1442 early_zerop = zerop; 1442 early_zerop = zerop;
1443 early_zero_pte = zero_pte; 1443 early_zero_pte = zero_pte;
1444 } 1444 }
1445#endif 1445#endif
1446 1446
1447 /* 1447 /*
1448 * Nothing after this point actually needs pte. 1448 * Nothing after this point actually needs pte.
1449 */ 1449 */
1450 pte = (void *)0xdeadbeef; 1450 pte = (void *)0xdeadbeef;
1451 1451
1452#ifdef XEN 1452#ifdef XEN
1453#ifdef __x86_64__ 1453#ifdef __x86_64__
1454 /* 1454 /*
1455 * We want a dummy page directory for Xen: 1455 * We want a dummy page directory for Xen:
1456 * when deactivate a pmap, Xen will still consider it active. 1456 * when deactivate a pmap, Xen will still consider it active.
1457 * So we set user PGD to this one to lift all protection on 1457 * So we set user PGD to this one to lift all protection on
1458 * the now inactive page tables set. 1458 * the now inactive page tables set.
1459 */ 1459 */
1460 xen_dummy_user_pgd = avail_start; 1460 xen_dummy_user_pgd = avail_start;
1461 avail_start += PAGE_SIZE; 1461 avail_start += PAGE_SIZE;
1462  1462
1463 /* Zero fill it, the less checks in Xen it requires the better */ 1463 /* Zero fill it, the less checks in Xen it requires the better */
1464 memset((void *) (xen_dummy_user_pgd + KERNBASE), 0, PAGE_SIZE); 1464 memset((void *) (xen_dummy_user_pgd + KERNBASE), 0, PAGE_SIZE);
1465 /* Mark read-only */ 1465 /* Mark read-only */
1466 HYPERVISOR_update_va_mapping(xen_dummy_user_pgd + KERNBASE, 1466 HYPERVISOR_update_va_mapping(xen_dummy_user_pgd + KERNBASE,
1467 pmap_pa2pte(xen_dummy_user_pgd) | PG_u | PG_V, UVMF_INVLPG); 1467 pmap_pa2pte(xen_dummy_user_pgd) | PG_u | PG_V, UVMF_INVLPG);
1468 /* Pin as L4 */ 1468 /* Pin as L4 */
1469 xpq_queue_pin_l4_table(xpmap_ptom_masked(xen_dummy_user_pgd)); 1469 xpq_queue_pin_l4_table(xpmap_ptom_masked(xen_dummy_user_pgd));
1470#endif /* __x86_64__ */ 1470#endif /* __x86_64__ */
1471 idt_vaddr = virtual_avail; /* don't need pte */ 1471 idt_vaddr = virtual_avail; /* don't need pte */
1472 idt_paddr = avail_start; /* steal a page */ 1472 idt_paddr = avail_start; /* steal a page */
1473 /* 1473 /*
1474 * Xen require one more page as we can't store 1474 * Xen require one more page as we can't store
1475 * GDT and LDT on the same page 1475 * GDT and LDT on the same page
1476 */ 1476 */
1477 virtual_avail += 3 * PAGE_SIZE; 1477 virtual_avail += 3 * PAGE_SIZE;
1478 avail_start += 3 * PAGE_SIZE; 1478 avail_start += 3 * PAGE_SIZE;
1479#else /* XEN */ 1479#else /* XEN */
1480 idt_vaddr = virtual_avail; /* don't need pte */ 1480 idt_vaddr = virtual_avail; /* don't need pte */
1481 idt_paddr = avail_start; /* steal a page */ 1481 idt_paddr = avail_start; /* steal a page */
1482#if defined(__x86_64__) 1482#if defined(__x86_64__)
1483 virtual_avail += 2 * PAGE_SIZE; 1483 virtual_avail += 2 * PAGE_SIZE;
1484 avail_start += 2 * PAGE_SIZE; 1484 avail_start += 2 * PAGE_SIZE;
1485#else /* defined(__x86_64__) */ 1485#else /* defined(__x86_64__) */
1486 virtual_avail += PAGE_SIZE; 1486 virtual_avail += PAGE_SIZE;
1487 avail_start += PAGE_SIZE; 1487 avail_start += PAGE_SIZE;
1488 /* pentium f00f bug stuff */ 1488 /* pentium f00f bug stuff */
1489 pentium_idt_vaddr = virtual_avail; /* don't need pte */ 1489 pentium_idt_vaddr = virtual_avail; /* don't need pte */
1490 virtual_avail += PAGE_SIZE; 1490 virtual_avail += PAGE_SIZE;
1491#endif /* defined(__x86_64__) */ 1491#endif /* defined(__x86_64__) */
1492#endif /* XEN */ 1492#endif /* XEN */
1493 1493
1494#ifdef _LP64 1494#ifdef _LP64
1495 /* 1495 /*
1496 * Grab a page below 4G for things that need it (i.e. 1496 * Grab a page below 4G for things that need it (i.e.
1497 * having an initial %cr3 for the MP trampoline). 1497 * having an initial %cr3 for the MP trampoline).
1498 */ 1498 */
1499 lo32_vaddr = virtual_avail; 1499 lo32_vaddr = virtual_avail;
1500 virtual_avail += PAGE_SIZE; 1500 virtual_avail += PAGE_SIZE;
1501 lo32_paddr = avail_start; 1501 lo32_paddr = avail_start;
1502 avail_start += PAGE_SIZE; 1502 avail_start += PAGE_SIZE;
1503#endif 1503#endif
1504 1504
1505 /* 1505 /*
1506 * now we reserve some VM for mapping pages when doing a crash dump 1506 * now we reserve some VM for mapping pages when doing a crash dump
1507 */ 1507 */
1508 1508
1509 virtual_avail = reserve_dumppages(virtual_avail); 1509 virtual_avail = reserve_dumppages(virtual_avail);
1510 1510
1511 /* 1511 /*
1512 * init the static-global locks and global lists. 1512 * init the static-global locks and global lists.
1513 * 1513 *
1514 * => pventry::pvh_lock (initialized elsewhere) must also be 1514 * => pventry::pvh_lock (initialized elsewhere) must also be
1515 * a spin lock, again at IPL_VM to prevent deadlock, and 1515 * a spin lock, again at IPL_VM to prevent deadlock, and
1516 * again is never taken from interrupt context. 1516 * again is never taken from interrupt context.
1517 */ 1517 */
1518 1518
1519 mutex_init(&pmaps_lock, MUTEX_DEFAULT, IPL_NONE); 1519 mutex_init(&pmaps_lock, MUTEX_DEFAULT, IPL_NONE);
1520 LIST_INIT(&pmaps); 1520 LIST_INIT(&pmaps);
1521 1521
1522 /* 1522 /*
1523 * ensure the TLB is sync'd with reality by flushing it... 1523 * ensure the TLB is sync'd with reality by flushing it...
1524 */ 1524 */
1525 1525
1526 tlbflushg(); 1526 tlbflushg();
1527 1527
1528 /* 1528 /*
1529 * calculate pmap_maxkvaddr from nkptp[]. 1529 * calculate pmap_maxkvaddr from nkptp[].
1530 */ 1530 */
1531 1531
1532 kva = VM_MIN_KERNEL_ADDRESS; 1532 kva = VM_MIN_KERNEL_ADDRESS;
1533 for (i = PTP_LEVELS - 1; i >= 1; i--) { 1533 for (i = PTP_LEVELS - 1; i >= 1; i--) {
1534 kva += nkptp[i] * nbpd[i]; 1534 kva += nkptp[i] * nbpd[i];
1535 } 1535 }
1536 pmap_maxkvaddr = kva; 1536 pmap_maxkvaddr = kva;
1537} 1537}
1538 1538
1539#if defined(__x86_64__) 1539#if defined(__x86_64__)
1540/* 1540/*
1541 * Pre-allocate PTPs for low memory, so that 1:1 mappings for various 1541 * Pre-allocate PTPs for low memory, so that 1:1 mappings for various
1542 * trampoline code can be entered. 1542 * trampoline code can be entered.
1543 */ 1543 */
1544void 1544void
1545pmap_prealloc_lowmem_ptps(void) 1545pmap_prealloc_lowmem_ptps(void)
1546{ 1546{
1547 int level; 1547 int level;
1548 paddr_t newp; 1548 paddr_t newp;
1549#ifdef XEN 1549#ifdef XEN
1550 paddr_t pdes_pa; 1550 paddr_t pdes_pa;
1551 1551
1552 pdes_pa = pmap_pdirpa(pmap_kernel(), 0); 1552 pdes_pa = pmap_pdirpa(pmap_kernel(), 0);
1553 level = PTP_LEVELS; 1553 level = PTP_LEVELS;
1554 for (;;) { 1554 for (;;) {
1555 newp = avail_start; 1555 newp = avail_start;
1556 avail_start += PAGE_SIZE; 1556 avail_start += PAGE_SIZE;
1557 HYPERVISOR_update_va_mapping ((vaddr_t)early_zerop, 1557 HYPERVISOR_update_va_mapping ((vaddr_t)early_zerop,
1558 xpmap_ptom_masked(newp) | PG_u | PG_V | PG_RW, UVMF_INVLPG); 1558 xpmap_ptom_masked(newp) | PG_u | PG_V | PG_RW, UVMF_INVLPG);
1559 memset(early_zerop, 0, PAGE_SIZE); 1559 memset(early_zerop, 0, PAGE_SIZE);
1560 /* Mark R/O before installing */ 1560 /* Mark R/O before installing */
1561 HYPERVISOR_update_va_mapping ((vaddr_t)early_zerop, 1561 HYPERVISOR_update_va_mapping ((vaddr_t)early_zerop,
1562 xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG); 1562 xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG);
1563 if (newp < (NKL2_KIMG_ENTRIES * NBPD_L2)) 1563 if (newp < (NKL2_KIMG_ENTRIES * NBPD_L2))
1564 HYPERVISOR_update_va_mapping (newp + KERNBASE, 1564 HYPERVISOR_update_va_mapping (newp + KERNBASE,
1565 xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG); 1565 xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG);
1566 /* Update the pmap_kernel() L4 shadow */ 1566 /* Update the pmap_kernel() L4 shadow */
1567 xpq_queue_pte_update ( 1567 xpq_queue_pte_update (
1568 xpmap_ptom_masked(pdes_pa) 1568 xpmap_ptom_masked(pdes_pa)
1569 + (pl_i(0, level) * sizeof (pd_entry_t)), 1569 + (pl_i(0, level) * sizeof (pd_entry_t)),
1570 xpmap_ptom_masked(newp) | PG_RW | PG_u | PG_V); 1570 xpmap_ptom_masked(newp) | PG_RW | PG_u | PG_V);
1571 /* sync to per-cpu PD */ 1571 /* sync to per-cpu PD */
1572 xpq_queue_pte_update( 1572 xpq_queue_pte_update(
1573 xpmap_ptom_masked(cpu_info_primary.ci_kpm_pdirpa + 1573 xpmap_ptom_masked(cpu_info_primary.ci_kpm_pdirpa +
1574 pl_i(0, PTP_LEVELS) * 1574 pl_i(0, PTP_LEVELS) *
1575 sizeof(pd_entry_t)), 1575 sizeof(pd_entry_t)),
1576 pmap_kernel()->pm_pdir[pl_i(0, PTP_LEVELS)]); 1576 pmap_kernel()->pm_pdir[pl_i(0, PTP_LEVELS)]);
1577 pmap_pte_flush(); 1577 pmap_pte_flush();
1578 level--; 1578 level--;
1579 if (level <= 1) 1579 if (level <= 1)
1580 break; 1580 break;
1581 pdes_pa = newp; 1581 pdes_pa = newp;
1582 } 1582 }
1583#else /* XEN */ 1583#else /* XEN */
1584 pd_entry_t *pdes; 1584 pd_entry_t *pdes;
1585 1585
1586 pdes = pmap_kernel()->pm_pdir; 1586 pdes = pmap_kernel()->pm_pdir;
1587 level = PTP_LEVELS; 1587 level = PTP_LEVELS;
1588 for (;;) { 1588 for (;;) {
1589 newp = avail_start; 1589 newp = avail_start;
1590 avail_start += PAGE_SIZE; 1590 avail_start += PAGE_SIZE;
1591#ifdef __HAVE_DIRECT_MAP 1591#ifdef __HAVE_DIRECT_MAP
1592 memset((void *)PMAP_DIRECT_MAP(newp), 0, PAGE_SIZE); 1592 memset((void *)PMAP_DIRECT_MAP(newp), 0, PAGE_SIZE);
1593#else 1593#else
1594 pmap_pte_set(early_zero_pte, (newp & PG_FRAME) | PG_V | PG_RW); 1594 pmap_pte_set(early_zero_pte, (newp & PG_FRAME) | PG_V | PG_RW);
1595 pmap_pte_flush(); 1595 pmap_pte_flush();
1596 pmap_update_pg((vaddr_t)early_zerop); 1596 pmap_update_pg((vaddr_t)early_zerop);
1597 memset(early_zerop, 0, PAGE_SIZE); 1597 memset(early_zerop, 0, PAGE_SIZE);
1598#endif 1598#endif
1599 pdes[pl_i(0, level)] = (newp & PG_FRAME) | PG_V | PG_RW; 1599 pdes[pl_i(0, level)] = (newp & PG_FRAME) | PG_V | PG_RW;
1600 level--; 1600 level--;
1601 if (level <= 1) 1601 if (level <= 1)
1602 break; 1602 break;
1603 pdes = normal_pdes[level - 2]; 1603 pdes = normal_pdes[level - 2];
1604 } 1604 }
1605#endif /* XEN */ 1605#endif /* XEN */
1606} 1606}
1607#endif /* defined(__x86_64__) */ 1607#endif /* defined(__x86_64__) */
1608 1608
1609/* 1609/*
1610 * pmap_init: called from uvm_init, our job is to get the pmap 1610 * pmap_init: called from uvm_init, our job is to get the pmap
1611 * system ready to manage mappings... 1611 * system ready to manage mappings...
1612 */ 1612 */
1613 1613
1614void 1614void
1615pmap_init(void) 1615pmap_init(void)
1616{ 1616{
1617 int i; 1617 int i;
1618 1618
1619 for (i = 0; i < PV_HASH_SIZE; i++) { 1619 for (i = 0; i < PV_HASH_SIZE; i++) {
1620 SLIST_INIT(&pv_hash_heads[i].hh_list); 1620 SLIST_INIT(&pv_hash_heads[i].hh_list);
1621 } 1621 }
1622 for (i = 0; i < PV_HASH_LOCK_CNT; i++) { 1622 for (i = 0; i < PV_HASH_LOCK_CNT; i++) {
1623 mutex_init(&pv_hash_locks[i].lock, MUTEX_NODEBUG, IPL_VM); 1623 mutex_init(&pv_hash_locks[i].lock, MUTEX_NODEBUG, IPL_VM);
1624 } 1624 }
1625 1625
1626 /* 1626 /*
1627 * initialize caches. 1627 * initialize caches.
1628 */ 1628 */
1629 1629
1630 pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap), 0, 0, 0, 1630 pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap), 0, 0, 0,
1631 "pmappl", NULL, IPL_NONE, NULL, NULL, NULL); 1631 "pmappl", NULL, IPL_NONE, NULL, NULL, NULL);
1632#ifdef PAE 1632#ifdef PAE
1633 pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE * PDP_SIZE, 0, 0, 0, 1633 pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE * PDP_SIZE, 0, 0, 0,
1634 "pdppl", &pmap_pdp_allocator, IPL_NONE, 1634 "pdppl", &pmap_pdp_allocator, IPL_NONE,
1635 pmap_pdp_ctor, pmap_pdp_dtor, NULL); 1635 pmap_pdp_ctor, pmap_pdp_dtor, NULL);
1636#else /* PAE */ 1636#else /* PAE */
1637 pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE, 0, 0, 0, 1637 pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE, 0, 0, 0,
1638 "pdppl", NULL, IPL_NONE, pmap_pdp_ctor, pmap_pdp_dtor, NULL); 1638 "pdppl", NULL, IPL_NONE, pmap_pdp_ctor, pmap_pdp_dtor, NULL);
1639#endif /* PAE */ 1639#endif /* PAE */
1640 pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0, 1640 pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0,
1641 PR_LARGECACHE, "pvpl", &pool_allocator_kmem, IPL_NONE, NULL, 1641 PR_LARGECACHE, "pvpl", &pool_allocator_kmem, IPL_NONE, NULL,
1642 NULL, NULL); 1642 NULL, NULL);
1643 1643
1644 pmap_tlb_init(); 1644 pmap_tlb_init();
1645 1645
1646 evcnt_attach_dynamic(&pmap_iobmp_evcnt, EVCNT_TYPE_MISC, 1646 evcnt_attach_dynamic(&pmap_iobmp_evcnt, EVCNT_TYPE_MISC,
1647 NULL, "x86", "io bitmap copy"); 1647 NULL, "x86", "io bitmap copy");
1648 evcnt_attach_dynamic(&pmap_ldt_evcnt, EVCNT_TYPE_MISC, 1648 evcnt_attach_dynamic(&pmap_ldt_evcnt, EVCNT_TYPE_MISC,
1649 NULL, "x86", "ldt sync"); 1649 NULL, "x86", "ldt sync");
1650 1650
1651 /* 1651 /*
1652 * done: pmap module is up (and ready for business) 1652 * done: pmap module is up (and ready for business)
1653 */ 1653 */
1654 1654
1655 pmap_initialized = true; 1655 pmap_initialized = true;
1656} 1656}
1657 1657
1658/* 1658/*
1659 * pmap_cpu_init_late: perform late per-CPU initialization. 1659 * pmap_cpu_init_late: perform late per-CPU initialization.
1660 */ 1660 */
1661 1661
1662#ifndef XEN 1662#ifndef XEN
1663void 1663void
1664pmap_cpu_init_late(struct cpu_info *ci) 1664pmap_cpu_init_late(struct cpu_info *ci)
1665{ 1665{
1666 /* 1666 /*
1667 * The BP has already its own PD page allocated during early 1667 * The BP has already its own PD page allocated during early
1668 * MD startup. 1668 * MD startup.
1669 */ 1669 */
1670 if (ci == &cpu_info_primary) 1670 if (ci == &cpu_info_primary)
1671 return; 1671 return;
1672 1672
1673#ifdef PAE 1673#ifdef PAE
1674 cpu_alloc_l3_page(ci); 1674 cpu_alloc_l3_page(ci);
1675#endif 1675#endif
1676} 1676}
1677#endif 1677#endif
1678 1678
1679/* 1679/*
1680 * p v _ e n t r y f u n c t i o n s 1680 * p v _ e n t r y f u n c t i o n s
1681 */ 1681 */
1682 1682
1683/* 1683/*
1684 * pmap_free_pvs: free a list of pv_entrys 1684 * pmap_free_pvs: free a list of pv_entrys
1685 */ 1685 */
1686 1686
1687static void 1687static void
1688pmap_free_pvs(struct pv_entry *pve) 1688pmap_free_pvs(struct pv_entry *pve)
1689{ 1689{
1690 struct pv_entry *next; 1690 struct pv_entry *next;
1691 1691
1692 for ( /* null */ ; pve != NULL ; pve = next) { 1692 for ( /* null */ ; pve != NULL ; pve = next) {
1693 next = pve->pve_next; 1693 next = pve->pve_next;
1694 pool_cache_put(&pmap_pv_cache, pve); 1694 pool_cache_put(&pmap_pv_cache, pve);
1695 } 1695 }
1696} 1696}
1697 1697
1698/* 1698/*
1699 * main pv_entry manipulation functions: 1699 * main pv_entry manipulation functions:
1700 * pmap_enter_pv: enter a mapping onto a pv_head list 1700 * pmap_enter_pv: enter a mapping onto a pv_head list
1701 * pmap_remove_pv: remove a mapping from a pv_head list 1701 * pmap_remove_pv: remove a mapping from a pv_head list
1702 * 1702 *
1703 * NOTE: Both pmap_enter_pv and pmap_remove_pv expect the caller to lock  1703 * NOTE: Both pmap_enter_pv and pmap_remove_pv expect the caller to lock
1704 * the pvh before calling 1704 * the pvh before calling
1705 */ 1705 */
1706 1706
1707/* 1707/*
1708 * insert_pv: a helper of pmap_enter_pv 1708 * insert_pv: a helper of pmap_enter_pv
1709 */ 1709 */
1710 1710
1711static void 1711static void
1712insert_pv(struct pmap_page *pp, struct pv_entry *pve) 1712insert_pv(struct pmap_page *pp, struct pv_entry *pve)
1713{ 1713{
1714 struct pv_hash_head *hh; 1714 struct pv_hash_head *hh;
1715 kmutex_t *lock; 1715 kmutex_t *lock;
1716 u_int hash; 1716 u_int hash;
1717 1717
1718 hash = pvhash_hash(pve->pve_pte.pte_ptp, pve->pve_pte.pte_va); 1718 hash = pvhash_hash(pve->pve_pte.pte_ptp, pve->pve_pte.pte_va);
1719 lock = pvhash_lock(hash); 1719 lock = pvhash_lock(hash);
1720 hh = pvhash_head(hash); 1720 hh = pvhash_head(hash);
1721 mutex_spin_enter(lock); 1721 mutex_spin_enter(lock);
1722 SLIST_INSERT_HEAD(&hh->hh_list, pve, pve_hash); 1722 SLIST_INSERT_HEAD(&hh->hh_list, pve, pve_hash);
1723 mutex_spin_exit(lock); 1723 mutex_spin_exit(lock);
1724 1724
1725 LIST_INSERT_HEAD(&pp->pp_head.pvh_list, pve, pve_list); 1725 LIST_INSERT_HEAD(&pp->pp_head.pvh_list, pve, pve_list);
1726} 1726}
1727 1727
1728/* 1728/*
1729 * pmap_enter_pv: enter a mapping onto a pv_head lst 1729 * pmap_enter_pv: enter a mapping onto a pv_head lst
1730 * 1730 *
1731 * => caller should adjust ptp's wire_count before calling 1731 * => caller should adjust ptp's wire_count before calling
1732 */ 1732 */
1733 1733
1734static struct pv_entry * 1734static struct pv_entry *
1735pmap_enter_pv(struct pmap_page *pp, 1735pmap_enter_pv(struct pmap_page *pp,
1736 struct pv_entry *pve, /* preallocated pve for us to use */ 1736 struct pv_entry *pve, /* preallocated pve for us to use */
1737 struct pv_entry **sparepve, 1737 struct pv_entry **sparepve,
1738 struct vm_page *ptp, 1738 struct vm_page *ptp,
1739 vaddr_t va) 1739 vaddr_t va)
1740{ 1740{
1741 1741
1742 KASSERT(ptp == NULL || ptp->wire_count >= 2); 1742 KASSERT(ptp == NULL || ptp->wire_count >= 2);
1743 KASSERT(ptp == NULL || ptp->uobject != NULL); 1743 KASSERT(ptp == NULL || ptp->uobject != NULL);
1744 KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset); 1744 KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
1745 1745
1746 if ((pp->pp_flags & PP_EMBEDDED) == 0) { 1746 if ((pp->pp_flags & PP_EMBEDDED) == 0) {
1747 if (LIST_EMPTY(&pp->pp_head.pvh_list)) { 1747 if (LIST_EMPTY(&pp->pp_head.pvh_list)) {
1748 pp->pp_flags |= PP_EMBEDDED; 1748 pp->pp_flags |= PP_EMBEDDED;
1749 pp->pp_pte.pte_ptp = ptp; 1749 pp->pp_pte.pte_ptp = ptp;
1750 pp->pp_pte.pte_va = va; 1750 pp->pp_pte.pte_va = va;
1751 1751
1752 return pve; 1752 return pve;
1753 } 1753 }
1754 } else { 1754 } else {
1755 struct pv_entry *pve2; 1755 struct pv_entry *pve2;
1756 1756
1757 pve2 = *sparepve; 1757 pve2 = *sparepve;
1758 *sparepve = NULL; 1758 *sparepve = NULL;
1759 1759
1760 pve2->pve_pte = pp->pp_pte; 1760 pve2->pve_pte = pp->pp_pte;
1761 pp->pp_flags &= ~PP_EMBEDDED; 1761 pp->pp_flags &= ~PP_EMBEDDED;
1762 LIST_INIT(&pp->pp_head.pvh_list); 1762 LIST_INIT(&pp->pp_head.pvh_list);
1763 insert_pv(pp, pve2); 1763 insert_pv(pp, pve2);
1764 } 1764 }
1765 1765
1766 pve->pve_pte.pte_ptp = ptp; 1766 pve->pve_pte.pte_ptp = ptp;
1767 pve->pve_pte.pte_va = va; 1767 pve->pve_pte.pte_va = va;
1768 insert_pv(pp, pve); 1768 insert_pv(pp, pve);
1769 1769
1770 return NULL; 1770 return NULL;
1771} 1771}
1772 1772
1773/* 1773/*
1774 * pmap_remove_pv: try to remove a mapping from a pv_list 1774 * pmap_remove_pv: try to remove a mapping from a pv_list
1775 * 1775 *
1776 * => caller should adjust ptp's wire_count and free PTP if needed 1776 * => caller should adjust ptp's wire_count and free PTP if needed
1777 * => we return the removed pve 1777 * => we return the removed pve
1778 */ 1778 */
1779 1779
1780static struct pv_entry * 1780static struct pv_entry *
1781pmap_remove_pv(struct pmap_page *pp, struct vm_page *ptp, vaddr_t va) 1781pmap_remove_pv(struct pmap_page *pp, struct vm_page *ptp, vaddr_t va)
1782{ 1782{
1783 struct pv_hash_head *hh; 1783 struct pv_hash_head *hh;
1784 struct pv_entry *pve; 1784 struct pv_entry *pve;
1785 kmutex_t *lock; 1785 kmutex_t *lock;
1786 u_int hash; 1786 u_int hash;
1787 1787
1788 KASSERT(ptp == NULL || ptp->uobject != NULL); 1788 KASSERT(ptp == NULL || ptp->uobject != NULL);
1789 KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset); 1789 KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
1790 1790
1791 if ((pp->pp_flags & PP_EMBEDDED) != 0) { 1791 if ((pp->pp_flags & PP_EMBEDDED) != 0) {
1792 KASSERT(pp->pp_pte.pte_ptp == ptp); 1792 KASSERT(pp->pp_pte.pte_ptp == ptp);
1793 KASSERT(pp->pp_pte.pte_va == va); 1793 KASSERT(pp->pp_pte.pte_va == va);
1794 1794
1795 pp->pp_flags &= ~PP_EMBEDDED; 1795 pp->pp_flags &= ~PP_EMBEDDED;
1796 LIST_INIT(&pp->pp_head.pvh_list); 1796 LIST_INIT(&pp->pp_head.pvh_list);
1797 1797
1798 return NULL; 1798 return NULL;
1799 } 1799 }
1800 1800
1801 hash = pvhash_hash(ptp, va); 1801 hash = pvhash_hash(ptp, va);
1802 lock = pvhash_lock(hash); 1802 lock = pvhash_lock(hash);
1803 hh = pvhash_head(hash); 1803 hh = pvhash_head(hash);
1804 mutex_spin_enter(lock); 1804 mutex_spin_enter(lock);
1805 pve = pvhash_remove(hh, ptp, va); 1805 pve = pvhash_remove(hh, ptp, va);
1806 mutex_spin_exit(lock); 1806 mutex_spin_exit(lock);
1807 1807
1808 LIST_REMOVE(pve, pve_list); 1808 LIST_REMOVE(pve, pve_list);
1809 1809
1810 return pve; 1810 return pve;
1811} 1811}
1812 1812
1813/* 1813/*
1814 * p t p f u n c t i o n s 1814 * p t p f u n c t i o n s
1815 */ 1815 */
1816 1816
1817static inline struct vm_page * 1817static inline struct vm_page *
1818pmap_find_ptp(struct pmap *pmap, vaddr_t va, paddr_t pa, int level) 1818pmap_find_ptp(struct pmap *pmap, vaddr_t va, paddr_t pa, int level)
1819{ 1819{
1820 int lidx = level - 1; 1820 int lidx = level - 1;
1821 struct vm_page *pg; 1821 struct vm_page *pg;
1822 1822
1823 KASSERT(mutex_owned(pmap->pm_lock)); 1823 KASSERT(mutex_owned(pmap->pm_lock));
1824 1824
1825 if (pa != (paddr_t)-1 && pmap->pm_ptphint[lidx] && 1825 if (pa != (paddr_t)-1 && pmap->pm_ptphint[lidx] &&
1826 pa == VM_PAGE_TO_PHYS(pmap->pm_ptphint[lidx])) { 1826 pa == VM_PAGE_TO_PHYS(pmap->pm_ptphint[lidx])) {
1827 return (pmap->pm_ptphint[lidx]); 1827 return (pmap->pm_ptphint[lidx]);
1828 } 1828 }
1829 PMAP_SUBOBJ_LOCK(pmap, lidx); 1829 PMAP_SUBOBJ_LOCK(pmap, lidx);
1830 pg = uvm_pagelookup(&pmap->pm_obj[lidx], ptp_va2o(va, level)); 1830 pg = uvm_pagelookup(&pmap->pm_obj[lidx], ptp_va2o(va, level));
1831 PMAP_SUBOBJ_UNLOCK(pmap, lidx); 1831 PMAP_SUBOBJ_UNLOCK(pmap, lidx);
1832 1832
1833 KASSERT(pg == NULL || pg->wire_count >= 1); 1833 KASSERT(pg == NULL || pg->wire_count >= 1);
1834 return pg; 1834 return pg;
1835} 1835}
1836 1836
1837static inline void 1837static inline void
1838pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level) 1838pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level)
1839{ 1839{
1840 lwp_t *l; 1840 lwp_t *l;
1841 int lidx; 1841 int lidx;
1842 struct uvm_object *obj; 1842 struct uvm_object *obj;
1843 1843
1844 KASSERT(ptp->wire_count == 1); 1844 KASSERT(ptp->wire_count == 1);
1845 1845
1846 lidx = level - 1; 1846 lidx = level - 1;
1847 1847
1848 obj = &pmap->pm_obj[lidx]; 1848 obj = &pmap->pm_obj[lidx];
1849 pmap_stats_update(pmap, -1, 0); 1849 pmap_stats_update(pmap, -1, 0);
1850 if (lidx != 0) 1850 if (lidx != 0)
1851 mutex_enter(obj->vmobjlock); 1851 mutex_enter(obj->vmobjlock);
1852 if (pmap->pm_ptphint[lidx] == ptp) 1852 if (pmap->pm_ptphint[lidx] == ptp)
1853 pmap->pm_ptphint[lidx] = TAILQ_FIRST(&obj->memq); 1853 pmap->pm_ptphint[lidx] = TAILQ_FIRST(&obj->memq);
1854 ptp->wire_count = 0; 1854 ptp->wire_count = 0;
1855 uvm_pagerealloc(ptp, NULL, 0); 1855 uvm_pagerealloc(ptp, NULL, 0);
1856 l = curlwp; 1856 l = curlwp;
1857 KASSERT((l->l_pflag & LP_INTR) == 0); 1857 KASSERT((l->l_pflag & LP_INTR) == 0);
1858 VM_PAGE_TO_PP(ptp)->pp_link = l->l_md.md_gc_ptp; 1858 VM_PAGE_TO_PP(ptp)->pp_link = l->l_md.md_gc_ptp;
1859 l->l_md.md_gc_ptp = ptp; 1859 l->l_md.md_gc_ptp = ptp;
1860 if (lidx != 0) 1860 if (lidx != 0)
1861 mutex_exit(obj->vmobjlock); 1861 mutex_exit(obj->vmobjlock);
1862} 1862}
1863 1863
1864static void 1864static void
1865pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va, 1865pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va,
1866 pt_entry_t *ptes, pd_entry_t * const *pdes) 1866 pt_entry_t *ptes, pd_entry_t * const *pdes)
1867{ 1867{
1868 unsigned long index; 1868 unsigned long index;
1869 int level; 1869 int level;
1870 vaddr_t invaladdr; 1870 vaddr_t invaladdr;
1871 pd_entry_t opde; 1871 pd_entry_t opde;
1872#ifdef XEN 
1873 struct pmap *curpmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map); 
1874#ifdef MULTIPROCESSOR 
1875 vaddr_t invaladdr2; 
1876#endif 
1877#endif 
1878 1872
1879 KASSERT(pmap != pmap_kernel()); 1873 KASSERT(pmap != pmap_kernel());
1880 KASSERT(mutex_owned(pmap->pm_lock)); 1874 KASSERT(mutex_owned(pmap->pm_lock));
1881 KASSERT(kpreempt_disabled()); 1875 KASSERT(kpreempt_disabled());
1882 1876
1883 level = 1; 1877 level = 1;
1884 do { 1878 do {
1885 index = pl_i(va, level + 1); 1879 index = pl_i(va, level + 1);
1886 opde = pmap_pte_testset(&pdes[level - 1][index], 0); 1880 opde = pmap_pte_testset(&pdes[level - 1][index], 0);
1887#if defined(XEN) 1881#if defined(XEN)
1888# if defined(__x86_64__) 1882# if defined(__x86_64__)
1889 /* 1883 /*
1890 * If ptp is a L3 currently mapped in kernel space, 1884 * If ptp is a L3 currently mapped in kernel space,
1891 * on any cpu, clear it before freeing 1885 * on any cpu, clear it before freeing
1892 */ 1886 */
1893 if (level == PTP_LEVELS - 1) { 1887 if (level == PTP_LEVELS - 1) {
1894 /* 1888 /*
1895 * Update the per-cpu PD on all cpus the current 1889 * Update the per-cpu PD on all cpus the current
1896 * pmap is active on  1890 * pmap is active on
1897 */  1891 */
1898 xen_kpm_sync(pmap, index); 1892 xen_kpm_sync(pmap, index);
1899 1893
1900 } 1894 }
1901# endif /*__x86_64__ */ 1895# endif /*__x86_64__ */
1902 invaladdr = level == 1 ? (vaddr_t)ptes : 1896 invaladdr = level == 1 ? (vaddr_t)ptes :
1903 (vaddr_t)pdes[level - 2]; 1897 (vaddr_t)pdes[level - 2];
1904 pmap_tlb_shootdown(curpmap, invaladdr + index * PAGE_SIZE, 1898 pmap_tlb_shootdown(pmap, invaladdr + index * PAGE_SIZE,
1905 opde, TLBSHOOT_FREE_PTP1); 1899 opde, TLBSHOOT_FREE_PTP1);
1906# if defined(MULTIPROCESSOR) 
1907 invaladdr2 = level == 1 ? (vaddr_t)PTE_BASE : 
1908 (vaddr_t)normal_pdes[level - 2]; 
1909 if (pmap != curpmap || invaladdr != invaladdr2) { 
1910 pmap_tlb_shootdown(pmap, invaladdr2 + index * PAGE_SIZE, 
1911 opde, TLBSHOOT_FREE_PTP2); 
1912 } 
1913# endif /* MULTIPROCESSOR */ 
1914#else /* XEN */ 1900#else /* XEN */
1915 invaladdr = level == 1 ? (vaddr_t)ptes : 1901 invaladdr = level == 1 ? (vaddr_t)ptes :
1916 (vaddr_t)pdes[level - 2]; 1902 (vaddr_t)pdes[level - 2];
1917 pmap_tlb_shootdown(pmap, invaladdr + index * PAGE_SIZE, 1903 pmap_tlb_shootdown(pmap, invaladdr + index * PAGE_SIZE,
1918 opde, TLBSHOOT_FREE_PTP1); 1904 opde, TLBSHOOT_FREE_PTP1);
1919#endif /* XEN */ 1905#endif /* XEN */
1920 pmap_freepage(pmap, ptp, level); 1906 pmap_freepage(pmap, ptp, level);
1921 if (level < PTP_LEVELS - 1) { 1907 if (level < PTP_LEVELS - 1) {
1922 ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1); 1908 ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1);
1923 ptp->wire_count--; 1909 ptp->wire_count--;
1924 if (ptp->wire_count > 1) 1910 if (ptp->wire_count > 1)
1925 break; 1911 break;
1926 } 1912 }
1927 } while (++level < PTP_LEVELS); 1913 } while (++level < PTP_LEVELS);
1928 pmap_pte_flush(); 1914 pmap_pte_flush();
1929} 1915}
1930 1916
1931/* 1917/*
1932 * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one) 1918 * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one)
1933 * 1919 *
1934 * => pmap should NOT be pmap_kernel() 1920 * => pmap should NOT be pmap_kernel()
1935 * => pmap should be locked 1921 * => pmap should be locked
1936 * => preemption should be disabled 1922 * => preemption should be disabled
1937 */ 1923 */
1938 1924
1939static struct vm_page * 1925static struct vm_page *
1940pmap_get_ptp(struct pmap *pmap, vaddr_t va, pd_entry_t * const *pdes) 1926pmap_get_ptp(struct pmap *pmap, vaddr_t va, pd_entry_t * const *pdes)
1941{ 1927{
1942 struct vm_page *ptp, *pptp; 1928 struct vm_page *ptp, *pptp;
1943 int i; 1929 int i;
1944 unsigned long index; 1930 unsigned long index;
1945 pd_entry_t *pva; 1931 pd_entry_t *pva;
1946 paddr_t ppa, pa; 1932 paddr_t ppa, pa;
1947 struct uvm_object *obj; 1933 struct uvm_object *obj;
1948 1934
1949 KASSERT(pmap != pmap_kernel()); 1935 KASSERT(pmap != pmap_kernel());
1950 KASSERT(mutex_owned(pmap->pm_lock)); 1936 KASSERT(mutex_owned(pmap->pm_lock));
1951 KASSERT(kpreempt_disabled()); 1937 KASSERT(kpreempt_disabled());
1952 1938
1953 ptp = NULL; 1939 ptp = NULL;
1954 pa = (paddr_t)-1; 1940 pa = (paddr_t)-1;
1955 1941
1956 /* 1942 /*
1957 * Loop through all page table levels seeing if we need to 1943 * Loop through all page table levels seeing if we need to
1958 * add a new page to that level. 1944 * add a new page to that level.
1959 */ 1945 */
1960 for (i = PTP_LEVELS; i > 1; i--) { 1946 for (i = PTP_LEVELS; i > 1; i--) {
1961 /* 1947 /*
1962 * Save values from previous round. 1948 * Save values from previous round.
1963 */ 1949 */
1964 pptp = ptp; 1950 pptp = ptp;
1965 ppa = pa; 1951 ppa = pa;
1966 1952
1967 index = pl_i(va, i); 1953 index = pl_i(va, i);
1968 pva = pdes[i - 2]; 1954 pva = pdes[i - 2];
1969 1955
1970 if (pmap_valid_entry(pva[index])) { 1956 if (pmap_valid_entry(pva[index])) {
1971 ppa = pmap_pte2pa(pva[index]); 1957 ppa = pmap_pte2pa(pva[index]);
1972 ptp = NULL; 1958 ptp = NULL;
1973 continue; 1959 continue;
1974 } 1960 }
1975 1961
1976 obj = &pmap->pm_obj[i-2]; 1962 obj = &pmap->pm_obj[i-2];
1977 PMAP_SUBOBJ_LOCK(pmap, i - 2); 1963 PMAP_SUBOBJ_LOCK(pmap, i - 2);
1978 ptp = uvm_pagealloc(obj, ptp_va2o(va, i - 1), NULL, 1964 ptp = uvm_pagealloc(obj, ptp_va2o(va, i - 1), NULL,
1979 UVM_PGA_USERESERVE|UVM_PGA_ZERO); 1965 UVM_PGA_USERESERVE|UVM_PGA_ZERO);
1980 PMAP_SUBOBJ_UNLOCK(pmap, i - 2); 1966 PMAP_SUBOBJ_UNLOCK(pmap, i - 2);
1981 1967
1982 if (ptp == NULL) 1968 if (ptp == NULL)
1983 return NULL; 1969 return NULL;
1984 1970
1985 ptp->flags &= ~PG_BUSY; /* never busy */ 1971 ptp->flags &= ~PG_BUSY; /* never busy */
1986 ptp->wire_count = 1; 1972 ptp->wire_count = 1;
1987 pmap->pm_ptphint[i - 2] = ptp; 1973 pmap->pm_ptphint[i - 2] = ptp;
1988 pa = VM_PAGE_TO_PHYS(ptp); 1974 pa = VM_PAGE_TO_PHYS(ptp);
1989 pmap_pte_set(&pva[index], (pd_entry_t) 1975 pmap_pte_set(&pva[index], (pd_entry_t)
1990 (pmap_pa2pte(pa) | PG_u | PG_RW | PG_V)); 1976 (pmap_pa2pte(pa) | PG_u | PG_RW | PG_V));
1991#if defined(XEN) && defined(__x86_64__) 1977#if defined(XEN) && defined(__x86_64__)
1992 /* 1978 /*
1993 * In Xen we must enter the mapping in kernel map too 1979 * In Xen we must enter the mapping in kernel map too
1994 * if pmap is curmap and modifying top level (PGD) 1980 * if pmap is curmap and modifying top level (PGD)
1995 */ 1981 */
1996 if(i == PTP_LEVELS && pmap != pmap_kernel()) { 1982 if(i == PTP_LEVELS && pmap != pmap_kernel()) {
1997 /* 1983 /*
1998 * Update the per-cpu PD on all cpus the current 1984 * Update the per-cpu PD on all cpus the current
1999 * pmap is active on  1985 * pmap is active on
2000 */  1986 */
2001 xen_kpm_sync(pmap, index); 1987 xen_kpm_sync(pmap, index);
2002 } 1988 }
2003#endif /* XEN && __x86_64__ */ 1989#endif /* XEN && __x86_64__ */
2004 pmap_pte_flush(); 1990 pmap_pte_flush();
2005 pmap_stats_update(pmap, 1, 0); 1991 pmap_stats_update(pmap, 1, 0);
2006 /* 1992 /*
2007 * If we're not in the top level, increase the 1993 * If we're not in the top level, increase the
2008 * wire count of the parent page. 1994 * wire count of the parent page.
2009 */ 1995 */
2010 if (i < PTP_LEVELS) { 1996 if (i < PTP_LEVELS) {
2011 if (pptp == NULL) 1997 if (pptp == NULL)
2012 pptp = pmap_find_ptp(pmap, va, ppa, i); 1998 pptp = pmap_find_ptp(pmap, va, ppa, i);
2013#ifdef DIAGNOSTIC 1999#ifdef DIAGNOSTIC
2014 if (pptp == NULL) 2000 if (pptp == NULL)
2015 panic("pde page disappeared"); 2001 panic("pde page disappeared");
2016#endif 2002#endif
2017 pptp->wire_count++; 2003 pptp->wire_count++;
2018 } 2004 }
2019 } 2005 }
2020 2006
2021 /* 2007 /*
2022 * ptp is not NULL if we just allocated a new ptp. If it's 2008 * ptp is not NULL if we just allocated a new ptp. If it's
2023 * still NULL, we must look up the existing one. 2009 * still NULL, we must look up the existing one.
2024 */ 2010 */
2025 if (ptp == NULL) { 2011 if (ptp == NULL) {
2026 ptp = pmap_find_ptp(pmap, va, ppa, 1); 2012 ptp = pmap_find_ptp(pmap, va, ppa, 1);
2027#ifdef DIAGNOSTIC 2013#ifdef DIAGNOSTIC
2028 if (ptp == NULL) { 2014 if (ptp == NULL) {
2029 printf("va %" PRIxVADDR " ppa %" PRIxPADDR "\n", 2015 printf("va %" PRIxVADDR " ppa %" PRIxPADDR "\n",
2030 va, ppa); 2016 va, ppa);
2031 panic("pmap_get_ptp: unmanaged user PTP"); 2017 panic("pmap_get_ptp: unmanaged user PTP");
2032 } 2018 }
2033#endif 2019#endif
2034 } 2020 }
2035 2021
2036 pmap->pm_ptphint[0] = ptp; 2022 pmap->pm_ptphint[0] = ptp;
2037 return(ptp); 2023 return(ptp);
2038} 2024}
2039 2025
2040/* 2026/*
2041 * p m a p l i f e c y c l e f u n c t i o n s 2027 * p m a p l i f e c y c l e f u n c t i o n s
2042 */ 2028 */
2043 2029
2044/* 2030/*
2045 * pmap_pdp_ctor: constructor for the PDP cache. 2031 * pmap_pdp_ctor: constructor for the PDP cache.
2046 */ 2032 */
2047static int 2033static int
2048pmap_pdp_ctor(void *arg, void *v, int flags) 2034pmap_pdp_ctor(void *arg, void *v, int flags)
2049{ 2035{
2050 pd_entry_t *pdir = v; 2036 pd_entry_t *pdir = v;
2051 paddr_t pdirpa = 0; /* XXX: GCC */ 2037 paddr_t pdirpa = 0; /* XXX: GCC */
2052 vaddr_t object; 2038 vaddr_t object;
2053 int i; 2039 int i;
2054 2040
2055#if !defined(XEN) || !defined(__x86_64__) 2041#if !defined(XEN) || !defined(__x86_64__)
2056 int npde; 2042 int npde;
2057#endif 2043#endif
2058#ifdef XEN 2044#ifdef XEN
2059 int s; 2045 int s;
2060#endif 2046#endif
2061 2047
2062 /* 2048 /*
2063 * NOTE: The `pmaps_lock' is held when the PDP is allocated. 2049 * NOTE: The `pmaps_lock' is held when the PDP is allocated.
2064 */ 2050 */
2065 2051
2066#if defined(XEN) && defined(__x86_64__) 2052#if defined(XEN) && defined(__x86_64__)
2067 /* fetch the physical address of the page directory. */ 2053 /* fetch the physical address of the page directory. */
2068 (void) pmap_extract(pmap_kernel(), (vaddr_t) pdir, &pdirpa); 2054 (void) pmap_extract(pmap_kernel(), (vaddr_t) pdir, &pdirpa);
2069 2055
2070 /* zero init area */ 2056 /* zero init area */
2071 memset (pdir, 0, PAGE_SIZE); /* Xen wants a clean page */ 2057 memset (pdir, 0, PAGE_SIZE); /* Xen wants a clean page */
2072 /* 2058 /*
2073 * this pdir will NEVER be active in kernel mode 2059 * this pdir will NEVER be active in kernel mode
2074 * so mark recursive entry invalid 2060 * so mark recursive entry invalid
2075 */ 2061 */
2076 pdir[PDIR_SLOT_PTE] = pmap_pa2pte(pdirpa) | PG_u; 2062 pdir[PDIR_SLOT_PTE] = pmap_pa2pte(pdirpa) | PG_u;
2077 /* 2063 /*
2078 * PDP constructed this way won't be for kernel, 2064 * PDP constructed this way won't be for kernel,
2079 * hence we don't put kernel mappings on Xen. 2065 * hence we don't put kernel mappings on Xen.
2080 * But we need to make pmap_create() happy, so put a dummy (without 2066 * But we need to make pmap_create() happy, so put a dummy (without
2081 * PG_V) value at the right place. 2067 * PG_V) value at the right place.
2082 */ 2068 */
2083 pdir[PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1] - 1] = 2069 pdir[PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1] - 1] =
2084 (pd_entry_t)-1 & PG_FRAME; 2070 (pd_entry_t)-1 & PG_FRAME;
2085#else /* XEN && __x86_64__*/ 2071#else /* XEN && __x86_64__*/
2086 /* zero init area */ 2072 /* zero init area */
2087 memset(pdir, 0, PDIR_SLOT_PTE * sizeof(pd_entry_t)); 2073 memset(pdir, 0, PDIR_SLOT_PTE * sizeof(pd_entry_t));
2088 2074
2089 object = (vaddr_t)v; 2075 object = (vaddr_t)v;
2090 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) { 2076 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) {
2091 /* fetch the physical address of the page directory. */ 2077 /* fetch the physical address of the page directory. */
2092 (void) pmap_extract(pmap_kernel(), object, &pdirpa); 2078 (void) pmap_extract(pmap_kernel(), object, &pdirpa);
2093 /* put in recursive PDE to map the PTEs */ 2079 /* put in recursive PDE to map the PTEs */
2094 pdir[PDIR_SLOT_PTE + i] = pmap_pa2pte(pdirpa) | PG_V; 2080 pdir[PDIR_SLOT_PTE + i] = pmap_pa2pte(pdirpa) | PG_V;
2095#ifndef XEN 2081#ifndef XEN
2096 pdir[PDIR_SLOT_PTE + i] |= PG_KW; 2082 pdir[PDIR_SLOT_PTE + i] |= PG_KW;
2097#endif 2083#endif
2098 } 2084 }
2099 2085
2100 /* copy kernel's PDE */ 2086 /* copy kernel's PDE */
2101 npde = nkptp[PTP_LEVELS - 1]; 2087 npde = nkptp[PTP_LEVELS - 1];
2102 2088
2103 memcpy(&pdir[PDIR_SLOT_KERN], &PDP_BASE[PDIR_SLOT_KERN], 2089 memcpy(&pdir[PDIR_SLOT_KERN], &PDP_BASE[PDIR_SLOT_KERN],
2104 npde * sizeof(pd_entry_t)); 2090 npde * sizeof(pd_entry_t));
2105 2091
2106 /* zero the rest */ 2092 /* zero the rest */
2107 memset(&pdir[PDIR_SLOT_KERN + npde], 0, 2093 memset(&pdir[PDIR_SLOT_KERN + npde], 0,
2108 (NTOPLEVEL_PDES - (PDIR_SLOT_KERN + npde)) * sizeof(pd_entry_t)); 2094 (NTOPLEVEL_PDES - (PDIR_SLOT_KERN + npde)) * sizeof(pd_entry_t));
2109 2095
2110 if (VM_MIN_KERNEL_ADDRESS != KERNBASE) { 2096 if (VM_MIN_KERNEL_ADDRESS != KERNBASE) {
2111 int idx = pl_i(KERNBASE, PTP_LEVELS); 2097 int idx = pl_i(KERNBASE, PTP_LEVELS);
2112 2098
2113 pdir[idx] = PDP_BASE[idx]; 2099 pdir[idx] = PDP_BASE[idx];
2114 } 2100 }
2115 2101
2116#ifdef __HAVE_DIRECT_MAP 2102#ifdef __HAVE_DIRECT_MAP
2117 pdir[PDIR_SLOT_DIRECT] = PDP_BASE[PDIR_SLOT_DIRECT]; 2103 pdir[PDIR_SLOT_DIRECT] = PDP_BASE[PDIR_SLOT_DIRECT];
2118#endif 2104#endif
2119 2105
2120#endif /* XEN && __x86_64__*/ 2106#endif /* XEN && __x86_64__*/
2121#ifdef XEN 2107#ifdef XEN
2122 s = splvm(); 2108 s = splvm();
2123 object = (vaddr_t)v; 2109 object = (vaddr_t)v;
2124 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) { 2110 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) {
2125 (void) pmap_extract(pmap_kernel(), object, &pdirpa); 2111 (void) pmap_extract(pmap_kernel(), object, &pdirpa);
2126 /* FIXME: This should use pmap_protect() .. */ 2112 /* FIXME: This should use pmap_protect() .. */
2127 pmap_kenter_pa(object, pdirpa, VM_PROT_READ, 0); 2113 pmap_kenter_pa(object, pdirpa, VM_PROT_READ, 0);
2128 pmap_update(pmap_kernel()); 2114 pmap_update(pmap_kernel());
2129 /* 2115 /*
2130 * pin as L2/L4 page, we have to do the page with the 2116 * pin as L2/L4 page, we have to do the page with the
2131 * PDIR_SLOT_PTE entries last 2117 * PDIR_SLOT_PTE entries last
2132 */ 2118 */
2133#ifdef PAE 2119#ifdef PAE
2134 if (i == l2tol3(PDIR_SLOT_PTE)) 2120 if (i == l2tol3(PDIR_SLOT_PTE))
2135 continue; 2121 continue;
2136#endif 2122#endif
2137 2123
2138#ifdef __x86_64__ 2124#ifdef __x86_64__
2139 xpq_queue_pin_l4_table(xpmap_ptom_masked(pdirpa)); 2125 xpq_queue_pin_l4_table(xpmap_ptom_masked(pdirpa));
2140#else 2126#else
2141 xpq_queue_pin_l2_table(xpmap_ptom_masked(pdirpa)); 2127 xpq_queue_pin_l2_table(xpmap_ptom_masked(pdirpa));
2142#endif 2128#endif
2143 } 2129 }
2144#ifdef PAE 2130#ifdef PAE
2145 object = ((vaddr_t)pdir) + PAGE_SIZE * l2tol3(PDIR_SLOT_PTE); 2131 object = ((vaddr_t)pdir) + PAGE_SIZE * l2tol3(PDIR_SLOT_PTE);
2146 (void)pmap_extract(pmap_kernel(), object, &pdirpa); 2132 (void)pmap_extract(pmap_kernel(), object, &pdirpa);
2147 xpq_queue_pin_l2_table(xpmap_ptom_masked(pdirpa)); 2133 xpq_queue_pin_l2_table(xpmap_ptom_masked(pdirpa));
2148#endif 2134#endif
2149 splx(s); 2135 splx(s);
2150#endif /* XEN */ 2136#endif /* XEN */
2151 2137
2152 return (0); 2138 return (0);
2153} 2139}
2154 2140
2155/* 2141/*
2156 * pmap_pdp_dtor: destructor for the PDP cache. 2142 * pmap_pdp_dtor: destructor for the PDP cache.
2157 */ 2143 */
2158 2144
2159static void 2145static void
2160pmap_pdp_dtor(void *arg, void *v) 2146pmap_pdp_dtor(void *arg, void *v)
2161{ 2147{
2162#ifdef XEN 2148#ifdef XEN
2163 paddr_t pdirpa = 0; /* XXX: GCC */ 2149 paddr_t pdirpa = 0; /* XXX: GCC */
2164 vaddr_t object = (vaddr_t)v; 2150 vaddr_t object = (vaddr_t)v;
2165 int i; 2151 int i;
2166 int s = splvm(); 2152 int s = splvm();
2167 pt_entry_t *pte; 2153 pt_entry_t *pte;
2168 2154
2169 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) { 2155 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) {
2170 /* fetch the physical address of the page directory. */ 2156 /* fetch the physical address of the page directory. */
2171 (void) pmap_extract(pmap_kernel(), object, &pdirpa); 2157 (void) pmap_extract(pmap_kernel(), object, &pdirpa);
2172 /* unpin page table */ 2158 /* unpin page table */
2173 xpq_queue_unpin_table(xpmap_ptom_masked(pdirpa)); 2159 xpq_queue_unpin_table(xpmap_ptom_masked(pdirpa));
2174 } 2160 }
2175 object = (vaddr_t)v; 2161 object = (vaddr_t)v;
2176 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) { 2162 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) {
2177 /* Set page RW again */ 2163 /* Set page RW again */
2178 pte = kvtopte(object); 2164 pte = kvtopte(object);
2179 xpq_queue_pte_update(xpmap_ptetomach(pte), *pte | PG_RW); 2165 xpq_queue_pte_update(xpmap_ptetomach(pte), *pte | PG_RW);
2180 xpq_queue_invlpg((vaddr_t)object); 2166 xpq_queue_invlpg((vaddr_t)object);
2181 } 2167 }
2182 splx(s); 2168 splx(s);
2183#endif /* XEN */ 2169#endif /* XEN */
2184} 2170}
2185 2171
2186#ifdef PAE 2172#ifdef PAE
2187 2173
2188/* pmap_pdp_alloc: Allocate a page for the pdp memory pool. */ 2174/* pmap_pdp_alloc: Allocate a page for the pdp memory pool. */
2189 2175
2190static void * 2176static void *
2191pmap_pdp_alloc(struct pool *pp, int flags) 2177pmap_pdp_alloc(struct pool *pp, int flags)
2192{ 2178{
2193 return (void *)uvm_km_alloc(kernel_map, 2179 return (void *)uvm_km_alloc(kernel_map,
2194 PAGE_SIZE * PDP_SIZE, PAGE_SIZE * PDP_SIZE, 2180 PAGE_SIZE * PDP_SIZE, PAGE_SIZE * PDP_SIZE,
2195 ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK) 2181 ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK)
2196 | UVM_KMF_WIRED); 2182 | UVM_KMF_WIRED);
2197} 2183}
2198 2184
2199/* 2185/*
2200 * pmap_pdp_free: free a PDP 2186 * pmap_pdp_free: free a PDP
2201 */ 2187 */
2202 2188
2203static void 2189static void
2204pmap_pdp_free(struct pool *pp, void *v) 2190pmap_pdp_free(struct pool *pp, void *v)
2205{ 2191{
2206 uvm_km_free(kernel_map, (vaddr_t)v, PAGE_SIZE * PDP_SIZE, 2192 uvm_km_free(kernel_map, (vaddr_t)v, PAGE_SIZE * PDP_SIZE,
2207 UVM_KMF_WIRED); 2193 UVM_KMF_WIRED);
2208} 2194}
2209#endif /* PAE */ 2195#endif /* PAE */
2210 2196
2211/* 2197/*
2212 * pmap_create: create a pmap 2198 * pmap_create: create a pmap
2213 * 2199 *
2214 * => note: old pmap interface took a "size" args which allowed for 2200 * => note: old pmap interface took a "size" args which allowed for
2215 * the creation of "software only" pmaps (not in bsd). 2201 * the creation of "software only" pmaps (not in bsd).
2216 */ 2202 */
2217 2203
2218struct pmap * 2204struct pmap *
2219pmap_create(void) 2205pmap_create(void)
2220{ 2206{
2221 struct pmap *pmap; 2207 struct pmap *pmap;
2222 int i; 2208 int i;
2223 2209
2224 pmap = pool_cache_get(&pmap_cache, PR_WAITOK); 2210 pmap = pool_cache_get(&pmap_cache, PR_WAITOK);
2225 2211
2226 /* init uvm_object */ 2212 /* init uvm_object */
2227 for (i = 0; i < PTP_LEVELS - 1; i++) { 2213 for (i = 0; i < PTP_LEVELS - 1; i++) {
2228 mutex_init(&pmap->pm_obj_lock[i], MUTEX_DEFAULT, IPL_NONE); 2214 mutex_init(&pmap->pm_obj_lock[i], MUTEX_DEFAULT, IPL_NONE);
2229 uvm_obj_init(&pmap->pm_obj[i], NULL, false, 1); 2215 uvm_obj_init(&pmap->pm_obj[i], NULL, false, 1);
2230 uvm_obj_setlock(&pmap->pm_obj[i], &pmap->pm_obj_lock[i]); 2216 uvm_obj_setlock(&pmap->pm_obj[i], &pmap->pm_obj_lock[i]);
2231 pmap->pm_ptphint[i] = NULL; 2217 pmap->pm_ptphint[i] = NULL;
2232 } 2218 }
2233 pmap->pm_stats.wired_count = 0; 2219 pmap->pm_stats.wired_count = 0;
2234 /* count the PDP allocd below */ 2220 /* count the PDP allocd below */
2235 pmap->pm_stats.resident_count = PDP_SIZE; 2221 pmap->pm_stats.resident_count = PDP_SIZE;
2236#if !defined(__x86_64__) 2222#if !defined(__x86_64__)
2237 pmap->pm_hiexec = 0; 2223 pmap->pm_hiexec = 0;
2238#endif /* !defined(__x86_64__) */ 2224#endif /* !defined(__x86_64__) */
2239 pmap->pm_flags = 0; 2225 pmap->pm_flags = 0;
2240 pmap->pm_cpus = 0; 2226 pmap->pm_cpus = 0;
2241 pmap->pm_kernel_cpus = 0; 2227 pmap->pm_kernel_cpus = 0;
2242 pmap->pm_gc_ptp = NULL; 2228 pmap->pm_gc_ptp = NULL;
2243 2229
2244 /* init the LDT */ 2230 /* init the LDT */
2245 pmap->pm_ldt = NULL; 2231 pmap->pm_ldt = NULL;
2246 pmap->pm_ldt_len = 0; 2232 pmap->pm_ldt_len = 0;
2247 pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL); 2233 pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL);
2248 2234
2249 /* allocate PDP */ 2235 /* allocate PDP */
2250 try_again: 2236 try_again:
2251 pmap->pm_pdir = pool_cache_get(&pmap_pdp_cache, PR_WAITOK); 2237 pmap->pm_pdir = pool_cache_get(&pmap_pdp_cache, PR_WAITOK);
2252 2238
2253 mutex_enter(&pmaps_lock); 2239 mutex_enter(&pmaps_lock);
2254 2240
2255 if (pmap->pm_pdir[PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1] - 1] == 0) { 2241 if (pmap->pm_pdir[PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1] - 1] == 0) {
2256 mutex_exit(&pmaps_lock); 2242 mutex_exit(&pmaps_lock);
2257 pool_cache_destruct_object(&pmap_pdp_cache, pmap->pm_pdir); 2243 pool_cache_destruct_object(&pmap_pdp_cache, pmap->pm_pdir);
2258 goto try_again; 2244 goto try_again;
2259 } 2245 }
2260 2246
2261 for (i = 0; i < PDP_SIZE; i++) 2247 for (i = 0; i < PDP_SIZE; i++)
2262 pmap->pm_pdirpa[i] = 2248 pmap->pm_pdirpa[i] =
2263 pmap_pte2pa(pmap->pm_pdir[PDIR_SLOT_PTE + i]); 2249 pmap_pte2pa(pmap->pm_pdir[PDIR_SLOT_PTE + i]);
2264 2250
2265 LIST_INSERT_HEAD(&pmaps, pmap, pm_list); 2251 LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
2266 2252
2267 mutex_exit(&pmaps_lock); 2253 mutex_exit(&pmaps_lock);
2268 2254
2269 return (pmap); 2255 return (pmap);
2270} 2256}
2271 2257
2272/* 2258/*
2273 * pmap_free_ptps: put a list of ptps back to the freelist. 2259 * pmap_free_ptps: put a list of ptps back to the freelist.
2274 */ 2260 */
2275 2261
2276static void 2262static void
2277pmap_free_ptps(struct vm_page *empty_ptps) 2263pmap_free_ptps(struct vm_page *empty_ptps)
2278{ 2264{
2279 struct vm_page *ptp; 2265 struct vm_page *ptp;
2280 struct pmap_page *pp; 2266 struct pmap_page *pp;
2281 2267
2282 while ((ptp = empty_ptps) != NULL) { 2268 while ((ptp = empty_ptps) != NULL) {
2283 pp = VM_PAGE_TO_PP(ptp); 2269 pp = VM_PAGE_TO_PP(ptp);
2284 empty_ptps = pp->pp_link; 2270 empty_ptps = pp->pp_link;
2285 LIST_INIT(&pp->pp_head.pvh_list); 2271 LIST_INIT(&pp->pp_head.pvh_list);
2286 uvm_pagefree(ptp); 2272 uvm_pagefree(ptp);
2287 } 2273 }
2288} 2274}
2289 2275
2290/* 2276/*
2291 * pmap_destroy: drop reference count on pmap. free pmap if 2277 * pmap_destroy: drop reference count on pmap. free pmap if
2292 * reference count goes to zero. 2278 * reference count goes to zero.
2293 */ 2279 */
2294 2280
2295void 2281void
2296pmap_destroy(struct pmap *pmap) 2282pmap_destroy(struct pmap *pmap)
2297{ 2283{
2298 int i; 2284 int i;
2299#ifdef DIAGNOSTIC 2285#ifdef DIAGNOSTIC
2300 struct cpu_info *ci; 2286 struct cpu_info *ci;
2301 CPU_INFO_ITERATOR cii; 2287 CPU_INFO_ITERATOR cii;
2302#endif /* DIAGNOSTIC */ 2288#endif /* DIAGNOSTIC */
2303 lwp_t *l; 2289 lwp_t *l;
2304 2290
2305 /* 2291 /*
2306 * If we have torn down this pmap, process deferred frees and 2292 * If we have torn down this pmap, process deferred frees and
2307 * invalidations. Free now if the system is low on memory. 2293 * invalidations. Free now if the system is low on memory.
2308 * Otherwise, free when the pmap is destroyed thus avoiding a 2294 * Otherwise, free when the pmap is destroyed thus avoiding a
2309 * TLB shootdown. 2295 * TLB shootdown.
2310 */ 2296 */
2311 l = curlwp; 2297 l = curlwp;
2312 if (__predict_false(l->l_md.md_gc_pmap == pmap)) { 2298 if (__predict_false(l->l_md.md_gc_pmap == pmap)) {
2313 if (uvmexp.free < uvmexp.freetarg) { 2299 if (uvmexp.free < uvmexp.freetarg) {
2314 pmap_update(pmap); 2300 pmap_update(pmap);
2315 } else { 2301 } else {
2316 KASSERT(pmap->pm_gc_ptp == NULL); 2302 KASSERT(pmap->pm_gc_ptp == NULL);
2317 pmap->pm_gc_ptp = l->l_md.md_gc_ptp; 2303 pmap->pm_gc_ptp = l->l_md.md_gc_ptp;
2318 l->l_md.md_gc_ptp = NULL; 2304 l->l_md.md_gc_ptp = NULL;
2319 l->l_md.md_gc_pmap = NULL; 2305 l->l_md.md_gc_pmap = NULL;
2320 } 2306 }
2321 } 2307 }
2322 2308
2323 /* 2309 /*
2324 * drop reference count 2310 * drop reference count
2325 */ 2311 */
2326 2312
2327 if (atomic_dec_uint_nv(&pmap->pm_obj[0].uo_refs) > 0) { 2313 if (atomic_dec_uint_nv(&pmap->pm_obj[0].uo_refs) > 0) {
2328 return; 2314 return;
2329 } 2315 }
2330 2316
2331#ifdef DIAGNOSTIC 2317#ifdef DIAGNOSTIC
2332 for (CPU_INFO_FOREACH(cii, ci)) 2318 for (CPU_INFO_FOREACH(cii, ci))
2333 if (ci->ci_pmap == pmap) 2319 if (ci->ci_pmap == pmap)
2334 panic("destroying pmap being used"); 2320 panic("destroying pmap being used");
2335#endif /* DIAGNOSTIC */ 2321#endif /* DIAGNOSTIC */
2336 2322
2337 /* 2323 /*
2338 * reference count is zero, free pmap resources and then free pmap. 2324 * reference count is zero, free pmap resources and then free pmap.
2339 */ 2325 */
2340 2326
2341 /* 2327 /*
2342 * remove it from global list of pmaps 2328 * remove it from global list of pmaps
2343 */ 2329 */
2344 2330
2345 mutex_enter(&pmaps_lock); 2331 mutex_enter(&pmaps_lock);
2346 LIST_REMOVE(pmap, pm_list); 2332 LIST_REMOVE(pmap, pm_list);
2347 mutex_exit(&pmaps_lock); 2333 mutex_exit(&pmaps_lock);
2348 2334
2349 /* 2335 /*
2350 * Process deferred PTP frees. No TLB shootdown required, as the 2336 * Process deferred PTP frees. No TLB shootdown required, as the
2351 * PTP pages are no longer visible to any CPU. 2337 * PTP pages are no longer visible to any CPU.
2352 */ 2338 */
2353 2339
2354 pmap_free_ptps(pmap->pm_gc_ptp); 2340 pmap_free_ptps(pmap->pm_gc_ptp);
2355 2341
2356 /* 2342 /*
2357 * destroyed pmap shouldn't have remaining PTPs 2343 * destroyed pmap shouldn't have remaining PTPs
2358 */ 2344 */
2359 2345
2360 for (i = 0; i < PTP_LEVELS - 1; i++) { 2346 for (i = 0; i < PTP_LEVELS - 1; i++) {
2361 KASSERT(pmap->pm_obj[i].uo_npages == 0); 2347 KASSERT(pmap->pm_obj[i].uo_npages == 0);
2362 KASSERT(TAILQ_EMPTY(&pmap->pm_obj[i].memq)); 2348 KASSERT(TAILQ_EMPTY(&pmap->pm_obj[i].memq));
2363 } 2349 }
2364 2350
2365 /* 2351 /*
2366 * MULTIPROCESSOR -- no need to flush out of other processors' 2352 * MULTIPROCESSOR -- no need to flush out of other processors'
2367 * APTE space because we do that in pmap_unmap_ptes(). 2353 * APTE space because we do that in pmap_unmap_ptes().
2368 */ 2354 */
2369 pool_cache_put(&pmap_pdp_cache, pmap->pm_pdir); 2355 pool_cache_put(&pmap_pdp_cache, pmap->pm_pdir);
2370 2356
2371#ifdef USER_LDT 2357#ifdef USER_LDT
2372 if (pmap->pm_ldt != NULL) { 2358 if (pmap->pm_ldt != NULL) {
2373 /* 2359 /*
2374 * no need to switch the LDT; this address space is gone, 2360 * no need to switch the LDT; this address space is gone,
2375 * nothing is using it. 2361 * nothing is using it.
2376 * 2362 *
2377 * No need to lock the pmap for ldt_free (or anything else), 2363 * No need to lock the pmap for ldt_free (or anything else),
2378 * we're the last one to use it. 2364 * we're the last one to use it.
2379 */ 2365 */
2380 mutex_enter(&cpu_lock); 2366 mutex_enter(&cpu_lock);
2381 ldt_free(pmap->pm_ldt_sel); 2367 ldt_free(pmap->pm_ldt_sel);
2382 mutex_exit(&cpu_lock); 2368 mutex_exit(&cpu_lock);
2383 uvm_km_free(kernel_map, (vaddr_t)pmap->pm_ldt, 2369 uvm_km_free(kernel_map, (vaddr_t)pmap->pm_ldt,
2384 pmap->pm_ldt_len, UVM_KMF_WIRED); 2370 pmap->pm_ldt_len, UVM_KMF_WIRED);
2385 } 2371 }
2386#endif 2372#endif
2387 2373
2388 for (i = 0; i < PTP_LEVELS - 1; i++) { 2374 for (i = 0; i < PTP_LEVELS - 1; i++) {
2389 uvm_obj_destroy(&pmap->pm_obj[i], false); 2375 uvm_obj_destroy(&pmap->pm_obj[i], false);
2390 mutex_destroy(&pmap->pm_obj_lock[i]); 2376 mutex_destroy(&pmap->pm_obj_lock[i]);
2391 } 2377 }
2392 pool_cache_put(&pmap_cache, pmap); 2378 pool_cache_put(&pmap_cache, pmap);
2393} 2379}
2394 2380
2395/* 2381/*
2396 * pmap_remove_all: pmap is being torn down by the current thread. 2382 * pmap_remove_all: pmap is being torn down by the current thread.
2397 * avoid unnecessary invalidations. 2383 * avoid unnecessary invalidations.
2398 */ 2384 */
2399 2385
2400void 2386void
2401pmap_remove_all(struct pmap *pmap) 2387pmap_remove_all(struct pmap *pmap)
2402{ 2388{
2403 lwp_t *l = curlwp; 2389 lwp_t *l = curlwp;
2404 2390
2405 KASSERT(l->l_md.md_gc_pmap == NULL); 2391 KASSERT(l->l_md.md_gc_pmap == NULL);
2406 2392
2407 l->l_md.md_gc_pmap = pmap; 2393 l->l_md.md_gc_pmap = pmap;
2408} 2394}
2409 2395
2410#if defined(PMAP_FORK) 2396#if defined(PMAP_FORK)
2411/* 2397/*
2412 * pmap_fork: perform any necessary data structure manipulation when 2398 * pmap_fork: perform any necessary data structure manipulation when
2413 * a VM space is forked. 2399 * a VM space is forked.
2414 */ 2400 */
2415 2401
2416void 2402void
2417pmap_fork(struct pmap *pmap1, struct pmap *pmap2) 2403pmap_fork(struct pmap *pmap1, struct pmap *pmap2)
2418{ 2404{
2419#ifdef USER_LDT 2405#ifdef USER_LDT
2420 union descriptor *new_ldt; 2406 union descriptor *new_ldt;
2421 size_t len; 2407 size_t len;
2422 int sel; 2408 int sel;
2423 2409
2424 if (__predict_true(pmap1->pm_ldt == NULL)) { 2410 if (__predict_true(pmap1->pm_ldt == NULL)) {
2425 return; 2411 return;
2426 } 2412 }
2427 2413
2428 retry: 2414 retry:
2429 if (pmap1->pm_ldt != NULL) { 2415 if (pmap1->pm_ldt != NULL) {
2430 len = pmap1->pm_ldt_len; 2416 len = pmap1->pm_ldt_len;
2431 new_ldt = (union descriptor *)uvm_km_alloc(kernel_map, len, 0, 2417 new_ldt = (union descriptor *)uvm_km_alloc(kernel_map, len, 0,
2432 UVM_KMF_WIRED); 2418 UVM_KMF_WIRED);
2433 mutex_enter(&cpu_lock); 2419 mutex_enter(&cpu_lock);
2434 sel = ldt_alloc(new_ldt, len); 2420 sel = ldt_alloc(new_ldt, len);
2435 if (sel == -1) { 2421 if (sel == -1) {
2436 mutex_exit(&cpu_lock); 2422 mutex_exit(&cpu_lock);
2437 uvm_km_free(kernel_map, (vaddr_t)new_ldt, len, 2423 uvm_km_free(kernel_map, (vaddr_t)new_ldt, len,
2438 UVM_KMF_WIRED); 2424 UVM_KMF_WIRED);
2439 printf("WARNING: pmap_fork: unable to allocate LDT\n"); 2425 printf("WARNING: pmap_fork: unable to allocate LDT\n");
2440 return; 2426 return;
2441 } 2427 }
2442 } else { 2428 } else {
2443 len = -1; 2429 len = -1;
2444 new_ldt = NULL; 2430 new_ldt = NULL;
2445 sel = -1; 2431 sel = -1;
2446 mutex_enter(&cpu_lock); 2432 mutex_enter(&cpu_lock);
2447 } 2433 }
2448 2434
2449 /* Copy the LDT, if necessary. */ 2435 /* Copy the LDT, if necessary. */
2450 if (pmap1->pm_ldt != NULL) { 2436 if (pmap1->pm_ldt != NULL) {
2451 if (len != pmap1->pm_ldt_len) { 2437 if (len != pmap1->pm_ldt_len) {
2452 if (len != -1) { 2438 if (len != -1) {
2453 ldt_free(sel); 2439 ldt_free(sel);
2454 uvm_km_free(kernel_map, (vaddr_t)new_ldt, 2440 uvm_km_free(kernel_map, (vaddr_t)new_ldt,
2455 len, UVM_KMF_WIRED); 2441 len, UVM_KMF_WIRED);
2456 } 2442 }
2457 mutex_exit(&cpu_lock); 2443 mutex_exit(&cpu_lock);
2458 goto retry; 2444 goto retry;
2459 } 2445 }
2460  2446
2461 memcpy(new_ldt, pmap1->pm_ldt, len); 2447 memcpy(new_ldt, pmap1->pm_ldt, len);
2462 pmap2->pm_ldt = new_ldt; 2448 pmap2->pm_ldt = new_ldt;
2463 pmap2->pm_ldt_len = pmap1->pm_ldt_len; 2449 pmap2->pm_ldt_len = pmap1->pm_ldt_len;
2464 pmap2->pm_ldt_sel = sel; 2450 pmap2->pm_ldt_sel = sel;
2465 len = -1; 2451 len = -1;
2466 } 2452 }
2467 2453
2468 if (len != -1) { 2454 if (len != -1) {
2469 ldt_free(sel); 2455 ldt_free(sel);
2470 uvm_km_free(kernel_map, (vaddr_t)new_ldt, len, 2456 uvm_km_free(kernel_map, (vaddr_t)new_ldt, len,
2471 UVM_KMF_WIRED); 2457 UVM_KMF_WIRED);
2472 } 2458 }
2473 mutex_exit(&cpu_lock); 2459 mutex_exit(&cpu_lock);
2474#endif /* USER_LDT */ 2460#endif /* USER_LDT */
2475} 2461}
2476#endif /* PMAP_FORK */ 2462#endif /* PMAP_FORK */
2477 2463
2478#ifdef USER_LDT 2464#ifdef USER_LDT
2479 2465
2480/* 2466/*
2481 * pmap_ldt_xcall: cross call used by pmap_ldt_sync. if the named pmap 2467 * pmap_ldt_xcall: cross call used by pmap_ldt_sync. if the named pmap
2482 * is active, reload LDTR. 2468 * is active, reload LDTR.
2483 */ 2469 */
2484static void 2470static void
2485pmap_ldt_xcall(void *arg1, void *arg2) 2471pmap_ldt_xcall(void *arg1, void *arg2)
2486{ 2472{
2487 struct pmap *pm; 2473 struct pmap *pm;
2488 2474
2489 kpreempt_disable(); 2475 kpreempt_disable();
2490 pm = arg1; 2476 pm = arg1;
2491 if (curcpu()->ci_pmap == pm) { 2477 if (curcpu()->ci_pmap == pm) {
2492 lldt(pm->pm_ldt_sel); 2478 lldt(pm->pm_ldt_sel);
2493 } 2479 }
2494 kpreempt_enable(); 2480 kpreempt_enable();
2495} 2481}
2496 2482
2497/* 2483/*
2498 * pmap_ldt_sync: LDT selector for the named pmap is changing. swap 2484 * pmap_ldt_sync: LDT selector for the named pmap is changing. swap
2499 * in the new selector on all CPUs. 2485 * in the new selector on all CPUs.
2500 */ 2486 */
2501void 2487void
2502pmap_ldt_sync(struct pmap *pm) 2488pmap_ldt_sync(struct pmap *pm)
2503{ 2489{
2504 uint64_t where; 2490 uint64_t where;
2505 2491
2506 KASSERT(mutex_owned(&cpu_lock)); 2492 KASSERT(mutex_owned(&cpu_lock));
2507 2493
2508 pmap_ldt_evcnt.ev_count++; 2494 pmap_ldt_evcnt.ev_count++;
2509 where = xc_broadcast(0, pmap_ldt_xcall, pm, NULL); 2495 where = xc_broadcast(0, pmap_ldt_xcall, pm, NULL);
2510 xc_wait(where); 2496 xc_wait(where);
2511} 2497}
2512 2498
2513/* 2499/*
2514 * pmap_ldt_cleanup: if the pmap has a local LDT, deallocate it, and 2500 * pmap_ldt_cleanup: if the pmap has a local LDT, deallocate it, and
2515 * restore the default. 2501 * restore the default.
2516 */ 2502 */
2517 2503
2518void 2504void
2519pmap_ldt_cleanup(struct lwp *l) 2505pmap_ldt_cleanup(struct lwp *l)
2520{ 2506{
2521 pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap; 2507 pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap;
2522 union descriptor *dp = NULL; 2508 union descriptor *dp = NULL;
2523 size_t len = 0; 2509 size_t len = 0;
2524 int sel = -1; 2510 int sel = -1;
2525 2511
2526 if (__predict_true(pmap->pm_ldt == NULL)) { 2512 if (__predict_true(pmap->pm_ldt == NULL)) {
2527 return; 2513 return;
2528 } 2514 }
2529 2515
2530 mutex_enter(&cpu_lock); 2516 mutex_enter(&cpu_lock);
2531 if (pmap->pm_ldt != NULL) { 2517 if (pmap->pm_ldt != NULL) {
2532 sel = pmap->pm_ldt_sel; 2518 sel = pmap->pm_ldt_sel;
2533 dp = pmap->pm_ldt; 2519 dp = pmap->pm_ldt;
2534 len = pmap->pm_ldt_len; 2520 len = pmap->pm_ldt_len;
2535 pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL); 2521 pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL);
2536 pmap->pm_ldt = NULL; 2522 pmap->pm_ldt = NULL;
2537 pmap->pm_ldt_len = 0; 2523 pmap->pm_ldt_len = 0;
2538 pmap_ldt_sync(pmap); 2524 pmap_ldt_sync(pmap);
2539 ldt_free(sel); 2525 ldt_free(sel);
2540 uvm_km_free(kernel_map, (vaddr_t)dp, len, UVM_KMF_WIRED); 2526 uvm_km_free(kernel_map, (vaddr_t)dp, len, UVM_KMF_WIRED);
2541 } 2527 }
2542 mutex_exit(&cpu_lock); 2528 mutex_exit(&cpu_lock);
2543} 2529}
2544#endif /* USER_LDT */ 2530#endif /* USER_LDT */
2545 2531
2546/* 2532/*
2547 * pmap_activate: activate a process' pmap 2533 * pmap_activate: activate a process' pmap
2548 * 2534 *
2549 * => must be called with kernel preemption disabled 2535 * => must be called with kernel preemption disabled
2550 * => if lwp is the curlwp, then set ci_want_pmapload so that 2536 * => if lwp is the curlwp, then set ci_want_pmapload so that
2551 * actual MMU context switch will be done by pmap_load() later 2537 * actual MMU context switch will be done by pmap_load() later
2552 */ 2538 */
2553 2539
2554void 2540void
2555pmap_activate(struct lwp *l) 2541pmap_activate(struct lwp *l)
2556{ 2542{
2557 struct cpu_info *ci; 2543 struct cpu_info *ci;
2558 struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); 2544 struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
2559 2545
2560 KASSERT(kpreempt_disabled()); 2546 KASSERT(kpreempt_disabled());
2561 2547
2562 ci = curcpu(); 2548 ci = curcpu();
2563 2549
2564 if (l == ci->ci_curlwp) { 2550 if (l == ci->ci_curlwp) {
2565 KASSERT(ci->ci_want_pmapload == 0); 2551 KASSERT(ci->ci_want_pmapload == 0);
2566 KASSERT(ci->ci_tlbstate != TLBSTATE_VALID); 2552 KASSERT(ci->ci_tlbstate != TLBSTATE_VALID);
2567#ifdef KSTACK_CHECK_DR0 2553#ifdef KSTACK_CHECK_DR0
2568 /* 2554 /*
2569 * setup breakpoint on the top of stack 2555 * setup breakpoint on the top of stack
2570 */ 2556 */
2571 if (l == &lwp0) 2557 if (l == &lwp0)
2572 dr0(0, 0, 0, 0); 2558 dr0(0, 0, 0, 0);
2573 else 2559 else
2574 dr0(KSTACK_LOWEST_ADDR(l), 1, 3, 1); 2560 dr0(KSTACK_LOWEST_ADDR(l), 1, 3, 1);
2575#endif 2561#endif
2576 2562
2577 /* 2563 /*
2578 * no need to switch to kernel vmspace because 2564 * no need to switch to kernel vmspace because
2579 * it's a subset of any vmspace. 2565 * it's a subset of any vmspace.
2580 */ 2566 */
2581 2567
2582 if (pmap == pmap_kernel()) { 2568 if (pmap == pmap_kernel()) {
2583 ci->ci_want_pmapload = 0; 2569 ci->ci_want_pmapload = 0;
2584 return; 2570 return;
2585 } 2571 }
2586 2572
2587 ci->ci_want_pmapload = 1; 2573 ci->ci_want_pmapload = 1;
2588 } 2574 }
2589} 2575}
2590 2576
2591/* 2577/*
2592 * pmap_reactivate: try to regain reference to the pmap. 2578 * pmap_reactivate: try to regain reference to the pmap.
2593 * 2579 *
2594 * => must be called with kernel preemption disabled 2580 * => must be called with kernel preemption disabled
2595 */ 2581 */
2596 2582
2597static bool 2583static bool
2598pmap_reactivate(struct pmap *pmap) 2584pmap_reactivate(struct pmap *pmap)
2599{ 2585{
2600 struct cpu_info *ci; 2586 struct cpu_info *ci;
2601 uint32_t cpumask; 2587 uint32_t cpumask;
2602 bool result;  2588 bool result;
2603 uint32_t oldcpus; 2589 uint32_t oldcpus;
2604 2590
2605 ci = curcpu(); 2591 ci = curcpu();
2606 cpumask = ci->ci_cpumask; 2592 cpumask = ci->ci_cpumask;
2607 2593
2608 KASSERT(kpreempt_disabled()); 2594 KASSERT(kpreempt_disabled());
2609#if defined(XEN) && defined(__x86_64__) 2595#if defined(XEN) && defined(__x86_64__)
2610 KASSERT(pmap_pdirpa(pmap, 0) == ci->ci_xen_current_user_pgd); 2596 KASSERT(pmap_pdirpa(pmap, 0) == ci->ci_xen_current_user_pgd);
2611#elif defined(PAE) 2597#elif defined(PAE)
2612 KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(ci->ci_pae_l3_pdir[0])); 2598 KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(ci->ci_pae_l3_pdir[0]));
2613#elif !defined(XEN)  2599#elif !defined(XEN)
2614 KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(rcr3())); 2600 KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(rcr3()));
2615#endif 2601#endif
2616 2602
2617 /* 2603 /*
2618 * if we still have a lazy reference to this pmap, 2604 * if we still have a lazy reference to this pmap,
2619 * we can assume that there was no tlb shootdown 2605 * we can assume that there was no tlb shootdown
2620 * for this pmap in the meantime. 2606 * for this pmap in the meantime.
2621 * 2607 *
2622 * the order of events here is important as we must 2608 * the order of events here is important as we must
2623 * synchronize with TLB shootdown interrupts. declare 2609 * synchronize with TLB shootdown interrupts. declare
2624 * interest in invalidations (TLBSTATE_VALID) and then 2610 * interest in invalidations (TLBSTATE_VALID) and then
2625 * check the cpumask, which the IPIs can change only 2611 * check the cpumask, which the IPIs can change only
2626 * when the state is TLBSTATE_LAZY. 2612 * when the state is TLBSTATE_LAZY.
2627 */ 2613 */
2628 2614
2629 ci->ci_tlbstate = TLBSTATE_VALID; 2615 ci->ci_tlbstate = TLBSTATE_VALID;
2630 oldcpus = pmap->pm_cpus; 2616 oldcpus = pmap->pm_cpus;
2631 KASSERT((pmap->pm_kernel_cpus & cpumask) != 0); 2617 KASSERT((pmap->pm_kernel_cpus & cpumask) != 0);
2632 if (oldcpus & cpumask) { 2618 if (oldcpus & cpumask) {
2633 /* got it */ 2619 /* got it */
2634 result = true; 2620 result = true;
2635 } else { 2621 } else {
2636 /* must reload */ 2622 /* must reload */
2637 atomic_or_32(&pmap->pm_cpus, cpumask); 2623 atomic_or_32(&pmap->pm_cpus, cpumask);
2638 result = false; 2624 result = false;
2639 } 2625 }
2640 2626
2641 return result; 2627 return result;
2642} 2628}
2643 2629
2644/* 2630/*
2645 * pmap_load: actually switch pmap. (fill in %cr3 and LDT info) 2631 * pmap_load: actually switch pmap. (fill in %cr3 and LDT info)
2646 * 2632 *
2647 * ensures that the current process' pmap is loaded on the current cpu's MMU 2633 * ensures that the current process' pmap is loaded on the current cpu's MMU
2648 * and there's no stale TLB entries. 2634 * and there's no stale TLB entries.
2649 * 2635 *
2650 * the caller should disable preemption or do check-and-retry to prevent 2636 * the caller should disable preemption or do check-and-retry to prevent
2651 * a preemption from undoing our efforts. 2637 * a preemption from undoing our efforts.
2652 * 2638 *
2653 * this function can block. 2639 * this function can block.
2654 */ 2640 */
2655 2641
2656void 2642void
2657pmap_load(void) 2643pmap_load(void)
2658{ 2644{
2659 struct cpu_info *ci; 2645 struct cpu_info *ci;
2660 uint32_t cpumask; 2646 uint32_t cpumask;
2661 struct pmap *pmap; 2647 struct pmap *pmap;
2662 struct pmap *oldpmap; 2648 struct pmap *oldpmap;
2663 struct lwp *l; 2649 struct lwp *l;
2664 struct pcb *pcb; 2650 struct pcb *pcb;
2665 uint64_t ncsw; 2651 uint64_t ncsw;
2666 2652
2667 kpreempt_disable(); 2653 kpreempt_disable();
2668 retry: 2654 retry:
2669 ci = curcpu(); 2655 ci = curcpu();
2670 if (!ci->ci_want_pmapload) { 2656 if (!ci->ci_want_pmapload) {
2671 kpreempt_enable(); 2657 kpreempt_enable();
2672 return; 2658 return;
2673 } 2659 }
2674 cpumask = ci->ci_cpumask; 2660 cpumask = ci->ci_cpumask;
2675 l = ci->ci_curlwp; 2661 l = ci->ci_curlwp;
2676 ncsw = l->l_ncsw; 2662 ncsw = l->l_ncsw;
2677 2663
2678 /* should be able to take ipis. */ 2664 /* should be able to take ipis. */
2679 KASSERT(ci->ci_ilevel < IPL_HIGH);  2665 KASSERT(ci->ci_ilevel < IPL_HIGH);
2680#ifdef XEN 2666#ifdef XEN
2681 /* Check to see if interrupts are enabled (ie; no events are masked) */ 2667 /* Check to see if interrupts are enabled (ie; no events are masked) */
2682 KASSERT(x86_read_psl() == 0); 2668 KASSERT(x86_read_psl() == 0);
2683#else 2669#else
2684 KASSERT((x86_read_psl() & PSL_I) != 0); 2670 KASSERT((x86_read_psl() & PSL_I) != 0);
2685#endif 2671#endif
2686 2672
2687 KASSERT(l != NULL); 2673 KASSERT(l != NULL);
2688 pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); 2674 pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
2689 KASSERT(pmap != pmap_kernel()); 2675 KASSERT(pmap != pmap_kernel());
2690 oldpmap = ci->ci_pmap; 2676 oldpmap = ci->ci_pmap;
2691 pcb = lwp_getpcb(l); 2677 pcb = lwp_getpcb(l);
2692 2678
2693 if (pmap == oldpmap) { 2679 if (pmap == oldpmap) {
2694 if (!pmap_reactivate(pmap)) { 2680 if (!pmap_reactivate(pmap)) {
2695 u_int gen = uvm_emap_gen_return(); 2681 u_int gen = uvm_emap_gen_return();
2696 2682
2697 /* 2683 /*
2698 * pmap has been changed during deactivated. 2684 * pmap has been changed during deactivated.
2699 * our tlb may be stale. 2685 * our tlb may be stale.
2700 */ 2686 */
2701 2687
2702 tlbflush(); 2688 tlbflush();
2703 uvm_emap_update(gen); 2689 uvm_emap_update(gen);
2704 } 2690 }
2705 2691
2706 ci->ci_want_pmapload = 0; 2692 ci->ci_want_pmapload = 0;
2707 kpreempt_enable(); 2693 kpreempt_enable();
2708 return; 2694 return;
2709 } 2695 }
2710 2696
2711 /* 2697 /*
2712 * grab a reference to the new pmap. 2698 * grab a reference to the new pmap.
2713 */ 2699 */
2714 2700
2715 pmap_reference(pmap); 2701 pmap_reference(pmap);
2716 2702
2717 /* 2703 /*
2718 * actually switch pmap. 2704 * actually switch pmap.
2719 */ 2705 */
2720 2706
2721 atomic_and_32(&oldpmap->pm_cpus, ~cpumask); 2707 atomic_and_32(&oldpmap->pm_cpus, ~cpumask);
2722 atomic_and_32(&oldpmap->pm_kernel_cpus, ~cpumask); 2708 atomic_and_32(&oldpmap->pm_kernel_cpus, ~cpumask);
2723 2709
2724#if defined(XEN) && defined(__x86_64__) 2710#if defined(XEN) && defined(__x86_64__)
2725 KASSERT(pmap_pdirpa(oldpmap, 0) == ci->ci_xen_current_user_pgd || 2711 KASSERT(pmap_pdirpa(oldpmap, 0) == ci->ci_xen_current_user_pgd ||
2726 oldpmap == pmap_kernel()); 2712 oldpmap == pmap_kernel());
2727#elif defined(PAE) 2713#elif defined(PAE)
2728 KASSERT(pmap_pdirpa(oldpmap, 0) == pmap_pte2pa(ci->ci_pae_l3_pdir[0])); 2714 KASSERT(pmap_pdirpa(oldpmap, 0) == pmap_pte2pa(ci->ci_pae_l3_pdir[0]));
2729#elif !defined(XEN) 2715#elif !defined(XEN)
2730 KASSERT(pmap_pdirpa(oldpmap, 0) == pmap_pte2pa(rcr3())); 2716 KASSERT(pmap_pdirpa(oldpmap, 0) == pmap_pte2pa(rcr3()));
2731#endif 2717#endif
2732 KASSERT((pmap->pm_cpus & cpumask) == 0); 2718 KASSERT((pmap->pm_cpus & cpumask) == 0);
2733 KASSERT((pmap->pm_kernel_cpus & cpumask) == 0); 2719 KASSERT((pmap->pm_kernel_cpus & cpumask) == 0);
2734 2720
2735 /* 2721 /*
2736 * mark the pmap in use by this processor. again we must 2722 * mark the pmap in use by this processor. again we must
2737 * synchronize with TLB shootdown interrupts, so set the 2723 * synchronize with TLB shootdown interrupts, so set the
2738 * state VALID first, then register us for shootdown events 2724 * state VALID first, then register us for shootdown events
2739 * on this pmap. 2725 * on this pmap.
2740 */ 2726 */
2741 2727
2742 ci->ci_tlbstate = TLBSTATE_VALID; 2728 ci->ci_tlbstate = TLBSTATE_VALID;
2743 atomic_or_32(&pmap->pm_cpus, cpumask); 2729 atomic_or_32(&pmap->pm_cpus, cpumask);
2744 atomic_or_32(&pmap->pm_kernel_cpus, cpumask); 2730 atomic_or_32(&pmap->pm_kernel_cpus, cpumask);
2745 ci->ci_pmap = pmap; 2731 ci->ci_pmap = pmap;
2746 2732
2747 /* 2733 /*
2748 * update tss. now that we have registered for invalidations 2734 * update tss. now that we have registered for invalidations
2749 * from other CPUs, we're good to load the page tables. 2735 * from other CPUs, we're good to load the page tables.
2750 */ 2736 */
2751#ifdef PAE 2737#ifdef PAE
2752 pcb->pcb_cr3 = ci->ci_pae_l3_pdirpa; 2738 pcb->pcb_cr3 = ci->ci_pae_l3_pdirpa;
2753#else 2739#else
2754 pcb->pcb_cr3 = pmap_pdirpa(pmap, 0); 2740 pcb->pcb_cr3 = pmap_pdirpa(pmap, 0);
2755#endif 2741#endif
2756 2742
2757#ifdef i386 2743#ifdef i386
2758#ifndef XEN 2744#ifndef XEN
2759 ci->ci_tss.tss_ldt = pmap->pm_ldt_sel; 2745 ci->ci_tss.tss_ldt = pmap->pm_ldt_sel;
2760 ci->ci_tss.tss_cr3 = pcb->pcb_cr3; 2746 ci->ci_tss.tss_cr3 = pcb->pcb_cr3;
2761#endif /* !XEN */ 2747#endif /* !XEN */
2762#endif /* i386 */ 2748#endif /* i386 */
2763 2749
2764 lldt(pmap->pm_ldt_sel); 2750 lldt(pmap->pm_ldt_sel);
2765 2751
2766 u_int gen = uvm_emap_gen_return(); 2752 u_int gen = uvm_emap_gen_return();
2767 cpu_load_pmap(pmap); 2753 cpu_load_pmap(pmap);
2768 uvm_emap_update(gen); 2754 uvm_emap_update(gen);
2769 2755
2770 ci->ci_want_pmapload = 0; 2756 ci->ci_want_pmapload = 0;
2771 2757
2772 /* 2758 /*
2773 * we're now running with the new pmap. drop the reference 2759 * we're now running with the new pmap. drop the reference
2774 * to the old pmap. if we block, we need to go around again. 2760 * to the old pmap. if we block, we need to go around again.
2775 */ 2761 */
2776 2762
2777 pmap_destroy(oldpmap); 2763 pmap_destroy(oldpmap);
2778 if (l->l_ncsw != ncsw) { 2764 if (l->l_ncsw != ncsw) {
2779 goto retry; 2765 goto retry;
2780 } 2766 }
2781 2767
2782 kpreempt_enable(); 2768 kpreempt_enable();
2783} 2769}
2784 2770
2785/* 2771/*
2786 * pmap_deactivate: deactivate a process' pmap. 2772 * pmap_deactivate: deactivate a process' pmap.
2787 * 2773 *
2788 * => Must be called with kernel preemption disabled (high IPL is enough). 2774 * => Must be called with kernel preemption disabled (high IPL is enough).
2789 */ 2775 */
2790void 2776void
2791pmap_deactivate(struct lwp *l) 2777pmap_deactivate(struct lwp *l)
2792{ 2778{
2793 struct pmap *pmap; 2779 struct pmap *pmap;
2794 struct cpu_info *ci; 2780 struct cpu_info *ci;
2795 2781
2796 KASSERT(kpreempt_disabled()); 2782 KASSERT(kpreempt_disabled());
2797 2783
2798 if (l != curlwp) { 2784 if (l != curlwp) {
2799 return; 2785 return;
2800 } 2786 }
2801 2787
2802 /* 2788 /*
2803 * Wait for pending TLB shootdowns to complete. Necessary because 2789 * Wait for pending TLB shootdowns to complete. Necessary because
2804 * TLB shootdown state is per-CPU, and the LWP may be coming off 2790 * TLB shootdown state is per-CPU, and the LWP may be coming off
2805 * the CPU before it has a chance to call pmap_update(), e.g. due 2791 * the CPU before it has a chance to call pmap_update(), e.g. due
2806 * to kernel preemption or blocking routine in between. 2792 * to kernel preemption or blocking routine in between.
2807 */ 2793 */
2808 pmap_tlb_shootnow(); 2794 pmap_tlb_shootnow();
2809 2795
2810 ci = curcpu(); 2796 ci = curcpu();
2811 2797
2812 if (ci->ci_want_pmapload) { 2798 if (ci->ci_want_pmapload) {
2813 /* 2799 /*
2814 * ci_want_pmapload means that our pmap is not loaded on 2800 * ci_want_pmapload means that our pmap is not loaded on
2815 * the CPU or TLB might be stale. note that pmap_kernel() 2801 * the CPU or TLB might be stale. note that pmap_kernel()
2816 * is always considered loaded. 2802 * is always considered loaded.
2817 */ 2803 */
2818 KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map) 2804 KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map)
2819 != pmap_kernel()); 2805 != pmap_kernel());
2820 KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map) 2806 KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map)
2821 != ci->ci_pmap || ci->ci_tlbstate != TLBSTATE_VALID); 2807 != ci->ci_pmap || ci->ci_tlbstate != TLBSTATE_VALID);
2822 2808
2823 /* 2809 /*
2824 * userspace has not been touched. 2810 * userspace has not been touched.
2825 * nothing to do here. 2811 * nothing to do here.
2826 */ 2812 */
2827 2813
2828 ci->ci_want_pmapload = 0; 2814 ci->ci_want_pmapload = 0;
2829 return; 2815 return;
2830 } 2816 }
2831 2817
2832 pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); 2818 pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
2833 2819
2834 if (pmap == pmap_kernel()) { 2820 if (pmap == pmap_kernel()) {
2835 return; 2821 return;
2836 } 2822 }
2837 2823
2838#if defined(XEN) && defined(__x86_64__) 2824#if defined(XEN) && defined(__x86_64__)
2839 KASSERT(pmap_pdirpa(pmap, 0) == ci->ci_xen_current_user_pgd); 2825 KASSERT(pmap_pdirpa(pmap, 0) == ci->ci_xen_current_user_pgd);
2840#elif defined(PAE) 2826#elif defined(PAE)
2841 KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(ci->ci_pae_l3_pdir[0])); 2827 KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(ci->ci_pae_l3_pdir[0]));
2842#elif !defined(XEN)  2828#elif !defined(XEN)
2843 KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(rcr3())); 2829 KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(rcr3()));
2844#endif 2830#endif
2845 KASSERT(ci->ci_pmap == pmap); 2831 KASSERT(ci->ci_pmap == pmap);
2846 2832
2847 /* 2833 /*
2848 * we aren't interested in TLB invalidations for this pmap, 2834 * we aren't interested in TLB invalidations for this pmap,
2849 * at least for the time being. 2835 * at least for the time being.
2850 */ 2836 */
2851 2837
2852 KASSERT(ci->ci_tlbstate == TLBSTATE_VALID); 2838 KASSERT(ci->ci_tlbstate == TLBSTATE_VALID);
2853 ci->ci_tlbstate = TLBSTATE_LAZY; 2839 ci->ci_tlbstate = TLBSTATE_LAZY;
2854} 2840}
2855 2841
2856/* 2842/*
2857 * end of lifecycle functions 2843 * end of lifecycle functions
2858 */ 2844 */
2859 2845
2860/* 2846/*
2861 * some misc. functions 2847 * some misc. functions
2862 */ 2848 */
2863 2849
2864int 2850int
2865pmap_pdes_invalid(vaddr_t va, pd_entry_t * const *pdes, pd_entry_t *lastpde) 2851pmap_pdes_invalid(vaddr_t va, pd_entry_t * const *pdes, pd_entry_t *lastpde)
2866{ 2852{
2867 int i; 2853 int i;
2868 unsigned long index; 2854 unsigned long index;
2869 pd_entry_t pde; 2855 pd_entry_t pde;
2870 2856
2871 for (i = PTP_LEVELS; i > 1; i--) { 2857 for (i = PTP_LEVELS; i > 1; i--) {
2872 index = pl_i(va, i); 2858 index = pl_i(va, i);
2873 pde = pdes[i - 2][index]; 2859 pde = pdes[i - 2][index];
2874 if ((pde & PG_V) == 0) 2860 if ((pde & PG_V) == 0)
2875 return i; 2861 return i;
2876 } 2862 }
2877 if (lastpde != NULL) 2863 if (lastpde != NULL)
2878 *lastpde = pde; 2864 *lastpde = pde;
2879 return 0; 2865 return 0;
2880} 2866}
2881 2867
2882/* 2868/*
2883 * pmap_extract: extract a PA for the given VA 2869 * pmap_extract: extract a PA for the given VA
2884 */ 2870 */
2885 2871
2886bool 2872bool
2887pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pap) 2873pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pap)
2888{ 2874{
2889 pt_entry_t *ptes, pte; 2875 pt_entry_t *ptes, pte;
2890 pd_entry_t pde; 2876 pd_entry_t pde;
2891 pd_entry_t * const *pdes; 2877 pd_entry_t * const *pdes;
2892 struct pmap *pmap2; 2878 struct pmap *pmap2;
2893 struct cpu_info *ci; 2879 struct cpu_info *ci;
2894 paddr_t pa; 2880 paddr_t pa;
2895 lwp_t *l; 2881 lwp_t *l;
2896 bool hard, rv; 2882 bool hard, rv;
2897 2883
2898#ifdef __HAVE_DIRECT_MAP 2884#ifdef __HAVE_DIRECT_MAP
2899 if (va >= PMAP_DIRECT_BASE && va < PMAP_DIRECT_END) { 2885 if (va >= PMAP_DIRECT_BASE && va < PMAP_DIRECT_END) {
2900 if (pap != NULL) { 2886 if (pap != NULL) {
2901 *pap = va - PMAP_DIRECT_BASE; 2887 *pap = va - PMAP_DIRECT_BASE;
2902 } 2888 }
2903 return true; 2889 return true;
2904 } 2890 }
2905#endif 2891#endif
2906 2892
2907 rv = false; 2893 rv = false;
2908 pa = 0; 2894 pa = 0;
2909 l = curlwp; 2895 l = curlwp;
2910 2896
2911 KPREEMPT_DISABLE(l); 2897 KPREEMPT_DISABLE(l);
2912 ci = l->l_cpu; 2898 ci = l->l_cpu;