Fri Dec 9 17:32:51 2011 UTC ()
only use PG_G on leaf PTEs.
go back to tlbflush(), all the global entries
that we create in pmap_bootstrap() are permanent.


(chs)
diff -r1.146 -r1.147 src/sys/arch/x86/x86/pmap.c

cvs diff -r1.146 -r1.147 src/sys/arch/x86/x86/pmap.c (switch to unified diff)

--- src/sys/arch/x86/x86/pmap.c 2011/12/08 22:36:42 1.146
+++ src/sys/arch/x86/x86/pmap.c 2011/12/09 17:32:51 1.147
@@ -1,2373 +1,2373 @@ @@ -1,2373 +1,2373 @@
1/* $NetBSD: pmap.c,v 1.146 2011/12/08 22:36:42 rmind Exp $ */ 1/* $NetBSD: pmap.c,v 1.147 2011/12/09 17:32:51 chs Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc. 4 * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran. 8 * by Andrew Doran.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * Copyright (c) 2007 Manuel Bouyer. 33 * Copyright (c) 2007 Manuel Bouyer.
34 * 34 *
35 * Redistribution and use in source and binary forms, with or without 35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions 36 * modification, are permitted provided that the following conditions
37 * are met: 37 * are met:
38 * 1. Redistributions of source code must retain the above copyright 38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer. 39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright 40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the 41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution. 42 * documentation and/or other materials provided with the distribution.
43 * 43 *
44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 * 54 *
55 */ 55 */
56 56
57/* 57/*
58 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> 58 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
59 * 59 *
60 * Permission to use, copy, modify, and distribute this software for any 60 * Permission to use, copy, modify, and distribute this software for any
61 * purpose with or without fee is hereby granted, provided that the above 61 * purpose with or without fee is hereby granted, provided that the above
62 * copyright notice and this permission notice appear in all copies. 62 * copyright notice and this permission notice appear in all copies.
63 * 63 *
64 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 64 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
65 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 65 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
66 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 66 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
67 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 67 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
68 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 68 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
69 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 69 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
70 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 70 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
71 */ 71 */
72 72
73/* 73/*
74 * Copyright (c) 1997 Charles D. Cranor and Washington University. 74 * Copyright (c) 1997 Charles D. Cranor and Washington University.
75 * All rights reserved. 75 * All rights reserved.
76 * 76 *
77 * Redistribution and use in source and binary forms, with or without 77 * Redistribution and use in source and binary forms, with or without
78 * modification, are permitted provided that the following conditions 78 * modification, are permitted provided that the following conditions
79 * are met: 79 * are met:
80 * 1. Redistributions of source code must retain the above copyright 80 * 1. Redistributions of source code must retain the above copyright
81 * notice, this list of conditions and the following disclaimer. 81 * notice, this list of conditions and the following disclaimer.
82 * 2. Redistributions in binary form must reproduce the above copyright 82 * 2. Redistributions in binary form must reproduce the above copyright
83 * notice, this list of conditions and the following disclaimer in the 83 * notice, this list of conditions and the following disclaimer in the
84 * documentation and/or other materials provided with the distribution. 84 * documentation and/or other materials provided with the distribution.
85 * 85 *
86 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 86 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
87 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 87 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
88 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 88 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
89 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 89 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
90 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 90 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
91 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 91 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
92 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 92 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
93 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 93 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
94 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 94 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
95 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 95 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
96 */ 96 */
97 97
98/* 98/*
99 * Copyright 2001 (c) Wasabi Systems, Inc. 99 * Copyright 2001 (c) Wasabi Systems, Inc.
100 * All rights reserved. 100 * All rights reserved.
101 * 101 *
102 * Written by Frank van der Linden for Wasabi Systems, Inc. 102 * Written by Frank van der Linden for Wasabi Systems, Inc.
103 * 103 *
104 * Redistribution and use in source and binary forms, with or without 104 * Redistribution and use in source and binary forms, with or without
105 * modification, are permitted provided that the following conditions 105 * modification, are permitted provided that the following conditions
106 * are met: 106 * are met:
107 * 1. Redistributions of source code must retain the above copyright 107 * 1. Redistributions of source code must retain the above copyright
108 * notice, this list of conditions and the following disclaimer. 108 * notice, this list of conditions and the following disclaimer.
109 * 2. Redistributions in binary form must reproduce the above copyright 109 * 2. Redistributions in binary form must reproduce the above copyright
110 * notice, this list of conditions and the following disclaimer in the 110 * notice, this list of conditions and the following disclaimer in the
111 * documentation and/or other materials provided with the distribution. 111 * documentation and/or other materials provided with the distribution.
112 * 3. All advertising materials mentioning features or use of this software 112 * 3. All advertising materials mentioning features or use of this software
113 * must display the following acknowledgement: 113 * must display the following acknowledgement:
114 * This product includes software developed for the NetBSD Project by 114 * This product includes software developed for the NetBSD Project by
115 * Wasabi Systems, Inc. 115 * Wasabi Systems, Inc.
116 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 116 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
117 * or promote products derived from this software without specific prior 117 * or promote products derived from this software without specific prior
118 * written permission. 118 * written permission.
119 * 119 *
120 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 120 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
121 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 121 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
122 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 122 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
123 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 123 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
124 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 124 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
125 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 125 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
126 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 126 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
127 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 127 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
128 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 128 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
129 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 129 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
130 * POSSIBILITY OF SUCH DAMAGE. 130 * POSSIBILITY OF SUCH DAMAGE.
131 */ 131 */
132 132
133/* 133/*
134 * This is the i386 pmap modified and generalized to support x86-64 134 * This is the i386 pmap modified and generalized to support x86-64
135 * as well. The idea is to hide the upper N levels of the page tables 135 * as well. The idea is to hide the upper N levels of the page tables
136 * inside pmap_get_ptp, pmap_free_ptp and pmap_growkernel. The rest 136 * inside pmap_get_ptp, pmap_free_ptp and pmap_growkernel. The rest
137 * is mostly untouched, except that it uses some more generalized 137 * is mostly untouched, except that it uses some more generalized
138 * macros and interfaces. 138 * macros and interfaces.
139 * 139 *
140 * This pmap has been tested on the i386 as well, and it can be easily 140 * This pmap has been tested on the i386 as well, and it can be easily
141 * adapted to PAE. 141 * adapted to PAE.
142 * 142 *
143 * fvdl@wasabisystems.com 18-Jun-2001 143 * fvdl@wasabisystems.com 18-Jun-2001
144 */ 144 */
145 145
146/* 146/*
147 * pmap.c: i386 pmap module rewrite 147 * pmap.c: i386 pmap module rewrite
148 * Chuck Cranor <chuck@netbsd> 148 * Chuck Cranor <chuck@netbsd>
149 * 11-Aug-97 149 * 11-Aug-97
150 * 150 *
151 * history of this pmap module: in addition to my own input, i used 151 * history of this pmap module: in addition to my own input, i used
152 * the following references for this rewrite of the i386 pmap: 152 * the following references for this rewrite of the i386 pmap:
153 * 153 *
154 * [1] the NetBSD i386 pmap. this pmap appears to be based on the 154 * [1] the NetBSD i386 pmap. this pmap appears to be based on the
155 * BSD hp300 pmap done by Mike Hibler at University of Utah. 155 * BSD hp300 pmap done by Mike Hibler at University of Utah.
156 * it was then ported to the i386 by William Jolitz of UUNET 156 * it was then ported to the i386 by William Jolitz of UUNET
157 * Technologies, Inc. Then Charles M. Hannum of the NetBSD 157 * Technologies, Inc. Then Charles M. Hannum of the NetBSD
158 * project fixed some bugs and provided some speed ups. 158 * project fixed some bugs and provided some speed ups.
159 * 159 *
160 * [2] the FreeBSD i386 pmap. this pmap seems to be the 160 * [2] the FreeBSD i386 pmap. this pmap seems to be the
161 * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson 161 * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson
162 * and David Greenman. 162 * and David Greenman.
163 * 163 *
164 * [3] the Mach pmap. this pmap, from CMU, seems to have migrated 164 * [3] the Mach pmap. this pmap, from CMU, seems to have migrated
165 * between several processors. the VAX version was done by 165 * between several processors. the VAX version was done by
166 * Avadis Tevanian, Jr., and Michael Wayne Young. the i386 166 * Avadis Tevanian, Jr., and Michael Wayne Young. the i386
167 * version was done by Lance Berc, Mike Kupfer, Bob Baron, 167 * version was done by Lance Berc, Mike Kupfer, Bob Baron,
168 * David Golub, and Richard Draves. the alpha version was 168 * David Golub, and Richard Draves. the alpha version was
169 * done by Alessandro Forin (CMU/Mach) and Chris Demetriou 169 * done by Alessandro Forin (CMU/Mach) and Chris Demetriou
170 * (NetBSD/alpha). 170 * (NetBSD/alpha).
171 */ 171 */
172 172
173#include <sys/cdefs.h> 173#include <sys/cdefs.h>
174__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.146 2011/12/08 22:36:42 rmind Exp $"); 174__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.147 2011/12/09 17:32:51 chs Exp $");
175 175
176#include "opt_user_ldt.h" 176#include "opt_user_ldt.h"
177#include "opt_lockdebug.h" 177#include "opt_lockdebug.h"
178#include "opt_multiprocessor.h" 178#include "opt_multiprocessor.h"
179#include "opt_xen.h" 179#include "opt_xen.h"
180#if !defined(__x86_64__) 180#if !defined(__x86_64__)
181#include "opt_kstack_dr0.h" 181#include "opt_kstack_dr0.h"
182#endif /* !defined(__x86_64__) */ 182#endif /* !defined(__x86_64__) */
183 183
184#include <sys/param.h> 184#include <sys/param.h>
185#include <sys/systm.h> 185#include <sys/systm.h>
186#include <sys/proc.h> 186#include <sys/proc.h>
187#include <sys/pool.h> 187#include <sys/pool.h>
188#include <sys/kernel.h> 188#include <sys/kernel.h>
189#include <sys/atomic.h> 189#include <sys/atomic.h>
190#include <sys/cpu.h> 190#include <sys/cpu.h>
191#include <sys/intr.h> 191#include <sys/intr.h>
192#include <sys/xcall.h> 192#include <sys/xcall.h>
193#include <sys/kcore.h> 193#include <sys/kcore.h>
194 194
195#include <uvm/uvm.h> 195#include <uvm/uvm.h>
196 196
197#include <dev/isa/isareg.h> 197#include <dev/isa/isareg.h>
198 198
199#include <machine/specialreg.h> 199#include <machine/specialreg.h>
200#include <machine/gdt.h> 200#include <machine/gdt.h>
201#include <machine/isa_machdep.h> 201#include <machine/isa_machdep.h>
202#include <machine/cpuvar.h> 202#include <machine/cpuvar.h>
203#include <machine/cputypes.h> 203#include <machine/cputypes.h>
204 204
205#include <x86/pmap.h> 205#include <x86/pmap.h>
206#include <x86/pmap_pv.h> 206#include <x86/pmap_pv.h>
207 207
208#include <x86/i82489reg.h> 208#include <x86/i82489reg.h>
209#include <x86/i82489var.h> 209#include <x86/i82489var.h>
210 210
211#ifdef XEN 211#ifdef XEN
212#include <xen/xen-public/xen.h> 212#include <xen/xen-public/xen.h>
213#include <xen/hypervisor.h> 213#include <xen/hypervisor.h>
214#endif 214#endif
215 215
216/* 216/*
217 * general info: 217 * general info:
218 * 218 *
219 * - for an explanation of how the i386 MMU hardware works see 219 * - for an explanation of how the i386 MMU hardware works see
220 * the comments in <machine/pte.h>. 220 * the comments in <machine/pte.h>.
221 * 221 *
222 * - for an explanation of the general memory structure used by 222 * - for an explanation of the general memory structure used by
223 * this pmap (including the recursive mapping), see the comments 223 * this pmap (including the recursive mapping), see the comments
224 * in <machine/pmap.h>. 224 * in <machine/pmap.h>.
225 * 225 *
226 * this file contains the code for the "pmap module." the module's 226 * this file contains the code for the "pmap module." the module's
227 * job is to manage the hardware's virtual to physical address mappings. 227 * job is to manage the hardware's virtual to physical address mappings.
228 * note that there are two levels of mapping in the VM system: 228 * note that there are two levels of mapping in the VM system:
229 * 229 *
230 * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's 230 * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's
231 * to map ranges of virtual address space to objects/files. for 231 * to map ranges of virtual address space to objects/files. for
232 * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only 232 * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only
233 * to the file /bin/ls starting at offset zero." note that 233 * to the file /bin/ls starting at offset zero." note that
234 * the upper layer mapping is not concerned with how individual 234 * the upper layer mapping is not concerned with how individual
235 * vm_pages are mapped. 235 * vm_pages are mapped.
236 * 236 *
237 * [2] the lower layer of the VM system (the pmap) maintains the mappings 237 * [2] the lower layer of the VM system (the pmap) maintains the mappings
238 * from virtual addresses. it is concerned with which vm_page is 238 * from virtual addresses. it is concerned with which vm_page is
239 * mapped where. for example, when you run /bin/ls and start 239 * mapped where. for example, when you run /bin/ls and start
240 * at page 0x1000 the fault routine may lookup the correct page 240 * at page 0x1000 the fault routine may lookup the correct page
241 * of the /bin/ls file and then ask the pmap layer to establish 241 * of the /bin/ls file and then ask the pmap layer to establish
242 * a mapping for it. 242 * a mapping for it.
243 * 243 *
244 * note that information in the lower layer of the VM system can be 244 * note that information in the lower layer of the VM system can be
245 * thrown away since it can easily be reconstructed from the info 245 * thrown away since it can easily be reconstructed from the info
246 * in the upper layer. 246 * in the upper layer.
247 * 247 *
248 * data structures we use include: 248 * data structures we use include:
249 * 249 *
250 * - struct pmap: describes the address space of one thread 250 * - struct pmap: describes the address space of one thread
251 * - struct pv_entry: describes one <PMAP,VA> mapping of a PA 251 * - struct pv_entry: describes one <PMAP,VA> mapping of a PA
252 * - struct pv_head: there is one pv_head per managed page of 252 * - struct pv_head: there is one pv_head per managed page of
253 * physical memory. the pv_head points to a list of pv_entry 253 * physical memory. the pv_head points to a list of pv_entry
254 * structures which describe all the <PMAP,VA> pairs that this 254 * structures which describe all the <PMAP,VA> pairs that this
255 * page is mapped in. this is critical for page based operations 255 * page is mapped in. this is critical for page based operations
256 * such as pmap_page_protect() [change protection on _all_ mappings 256 * such as pmap_page_protect() [change protection on _all_ mappings
257 * of a page] 257 * of a page]
258 */ 258 */
259 259
260/* 260/*
261 * memory allocation 261 * memory allocation
262 * 262 *
263 * - there are three data structures that we must dynamically allocate: 263 * - there are three data structures that we must dynamically allocate:
264 * 264 *
265 * [A] new process' page directory page (PDP) 265 * [A] new process' page directory page (PDP)
266 * - plan 1: done at pmap_create() we use 266 * - plan 1: done at pmap_create() we use
267 * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this 267 * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this
268 * allocation. 268 * allocation.
269 * 269 *
270 * if we are low in free physical memory then we sleep in 270 * if we are low in free physical memory then we sleep in
271 * uvm_km_alloc -- in this case this is ok since we are creating 271 * uvm_km_alloc -- in this case this is ok since we are creating
272 * a new pmap and should not be holding any locks. 272 * a new pmap and should not be holding any locks.
273 * 273 *
274 * if the kernel is totally out of virtual space 274 * if the kernel is totally out of virtual space
275 * (i.e. uvm_km_alloc returns NULL), then we panic. 275 * (i.e. uvm_km_alloc returns NULL), then we panic.
276 * 276 *
277 * [B] new page tables pages (PTP) 277 * [B] new page tables pages (PTP)
278 * - call uvm_pagealloc() 278 * - call uvm_pagealloc()
279 * => success: zero page, add to pm_pdir 279 * => success: zero page, add to pm_pdir
280 * => failure: we are out of free vm_pages, let pmap_enter() 280 * => failure: we are out of free vm_pages, let pmap_enter()
281 * tell UVM about it. 281 * tell UVM about it.
282 * 282 *
283 * note: for kernel PTPs, we start with NKPTP of them. as we map 283 * note: for kernel PTPs, we start with NKPTP of them. as we map
284 * kernel memory (at uvm_map time) we check to see if we've grown 284 * kernel memory (at uvm_map time) we check to see if we've grown
285 * the kernel pmap. if so, we call the optional function 285 * the kernel pmap. if so, we call the optional function
286 * pmap_growkernel() to grow the kernel PTPs in advance. 286 * pmap_growkernel() to grow the kernel PTPs in advance.
287 * 287 *
288 * [C] pv_entry structures 288 * [C] pv_entry structures
289 */ 289 */
290 290
291/* 291/*
292 * locking 292 * locking
293 * 293 *
294 * we have the following locks that we must contend with: 294 * we have the following locks that we must contend with:
295 * 295 *
296 * mutexes: 296 * mutexes:
297 * 297 *
298 * - pmap lock (per pmap, part of uvm_object) 298 * - pmap lock (per pmap, part of uvm_object)
299 * this lock protects the fields in the pmap structure including 299 * this lock protects the fields in the pmap structure including
300 * the non-kernel PDEs in the PDP, and the PTEs. it also locks 300 * the non-kernel PDEs in the PDP, and the PTEs. it also locks
301 * in the alternate PTE space (since that is determined by the 301 * in the alternate PTE space (since that is determined by the
302 * entry in the PDP). 302 * entry in the PDP).
303 * 303 *
304 * - pvh_lock (per pv_head) 304 * - pvh_lock (per pv_head)
305 * this lock protects the pv_entry list which is chained off the 305 * this lock protects the pv_entry list which is chained off the
306 * pv_head structure for a specific managed PA. it is locked 306 * pv_head structure for a specific managed PA. it is locked
307 * when traversing the list (e.g. adding/removing mappings, 307 * when traversing the list (e.g. adding/removing mappings,
308 * syncing R/M bits, etc.) 308 * syncing R/M bits, etc.)
309 * 309 *
310 * - pmaps_lock 310 * - pmaps_lock
311 * this lock protects the list of active pmaps (headed by "pmaps"). 311 * this lock protects the list of active pmaps (headed by "pmaps").
312 * we lock it when adding or removing pmaps from this list. 312 * we lock it when adding or removing pmaps from this list.
313 */ 313 */
314 314
315const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER; 315const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER;
316const int ptp_shifts[] = PTP_SHIFT_INITIALIZER; 316const int ptp_shifts[] = PTP_SHIFT_INITIALIZER;
317const long nkptpmax[] = NKPTPMAX_INITIALIZER; 317const long nkptpmax[] = NKPTPMAX_INITIALIZER;
318const long nbpd[] = NBPD_INITIALIZER; 318const long nbpd[] = NBPD_INITIALIZER;
319pd_entry_t * const normal_pdes[] = PDES_INITIALIZER; 319pd_entry_t * const normal_pdes[] = PDES_INITIALIZER;
320 320
321long nkptp[] = NKPTP_INITIALIZER; 321long nkptp[] = NKPTP_INITIALIZER;
322 322
323struct pmap_head pmaps; 323struct pmap_head pmaps;
324kmutex_t pmaps_lock; 324kmutex_t pmaps_lock;
325 325
326static vaddr_t pmap_maxkvaddr; 326static vaddr_t pmap_maxkvaddr;
327 327
328/* 328/*
329 * XXX kludge: dummy locking to make KASSERTs in uvm_page.c comfortable. 329 * XXX kludge: dummy locking to make KASSERTs in uvm_page.c comfortable.
330 * actual locking is done by pm_lock. 330 * actual locking is done by pm_lock.
331 */ 331 */
332#if defined(DIAGNOSTIC) 332#if defined(DIAGNOSTIC)
333#define PMAP_SUBOBJ_LOCK(pm, idx) \ 333#define PMAP_SUBOBJ_LOCK(pm, idx) \
334 KASSERT(mutex_owned((pm)->pm_lock)); \ 334 KASSERT(mutex_owned((pm)->pm_lock)); \
335 if ((idx) != 0) \ 335 if ((idx) != 0) \
336 mutex_enter((pm)->pm_obj[(idx)].vmobjlock) 336 mutex_enter((pm)->pm_obj[(idx)].vmobjlock)
337#define PMAP_SUBOBJ_UNLOCK(pm, idx) \ 337#define PMAP_SUBOBJ_UNLOCK(pm, idx) \
338 KASSERT(mutex_owned((pm)->pm_lock)); \ 338 KASSERT(mutex_owned((pm)->pm_lock)); \
339 if ((idx) != 0) \ 339 if ((idx) != 0) \
340 mutex_exit((pm)->pm_obj[(idx)].vmobjlock) 340 mutex_exit((pm)->pm_obj[(idx)].vmobjlock)
341#else /* defined(DIAGNOSTIC) */ 341#else /* defined(DIAGNOSTIC) */
342#define PMAP_SUBOBJ_LOCK(pm, idx) /* nothing */ 342#define PMAP_SUBOBJ_LOCK(pm, idx) /* nothing */
343#define PMAP_SUBOBJ_UNLOCK(pm, idx) /* nothing */ 343#define PMAP_SUBOBJ_UNLOCK(pm, idx) /* nothing */
344#endif /* defined(DIAGNOSTIC) */ 344#endif /* defined(DIAGNOSTIC) */
345 345
346/* 346/*
347 * Misc. event counters. 347 * Misc. event counters.
348 */ 348 */
349struct evcnt pmap_iobmp_evcnt; 349struct evcnt pmap_iobmp_evcnt;
350struct evcnt pmap_ldt_evcnt; 350struct evcnt pmap_ldt_evcnt;
351 351
352/* 352/*
353 * PAT 353 * PAT
354 */ 354 */
355#define PATENTRY(n, type) (type << ((n) * 8)) 355#define PATENTRY(n, type) (type << ((n) * 8))
356#define PAT_UC 0x0ULL 356#define PAT_UC 0x0ULL
357#define PAT_WC 0x1ULL 357#define PAT_WC 0x1ULL
358#define PAT_WT 0x4ULL 358#define PAT_WT 0x4ULL
359#define PAT_WP 0x5ULL 359#define PAT_WP 0x5ULL
360#define PAT_WB 0x6ULL 360#define PAT_WB 0x6ULL
361#define PAT_UCMINUS 0x7ULL 361#define PAT_UCMINUS 0x7ULL
362 362
363static bool cpu_pat_enabled __read_mostly = false; 363static bool cpu_pat_enabled __read_mostly = false;
364 364
365/* 365/*
366 * global data structures 366 * global data structures
367 */ 367 */
368 368
369static struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ 369static struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */
370struct pmap *const kernel_pmap_ptr = &kernel_pmap_store; 370struct pmap *const kernel_pmap_ptr = &kernel_pmap_store;
371 371
372/* 372/*
373 * pmap_pg_g: if our processor supports PG_G in the PTE then we 373 * pmap_pg_g: if our processor supports PG_G in the PTE then we
374 * set pmap_pg_g to PG_G (otherwise it is zero). 374 * set pmap_pg_g to PG_G (otherwise it is zero).
375 */ 375 */
376 376
377int pmap_pg_g __read_mostly = 0; 377int pmap_pg_g __read_mostly = 0;
378 378
379/* 379/*
380 * pmap_largepages: if our processor supports PG_PS and we are 380 * pmap_largepages: if our processor supports PG_PS and we are
381 * using it, this is set to true. 381 * using it, this is set to true.
382 */ 382 */
383 383
384int pmap_largepages __read_mostly; 384int pmap_largepages __read_mostly;
385 385
386/* 386/*
387 * i386 physical memory comes in a big contig chunk with a small 387 * i386 physical memory comes in a big contig chunk with a small
388 * hole toward the front of it... the following two paddr_t's 388 * hole toward the front of it... the following two paddr_t's
389 * (shared with machdep.c) describe the physical address space 389 * (shared with machdep.c) describe the physical address space
390 * of this machine. 390 * of this machine.
391 */ 391 */
392paddr_t avail_start __read_mostly; /* PA of first available physical page */ 392paddr_t avail_start __read_mostly; /* PA of first available physical page */
393paddr_t avail_end __read_mostly; /* PA of last available physical page */ 393paddr_t avail_end __read_mostly; /* PA of last available physical page */
394 394
395#ifdef XEN 395#ifdef XEN
396#ifdef __x86_64__ 396#ifdef __x86_64__
397/* Dummy PGD for user cr3, used between pmap_deactivate() and pmap_activate() */ 397/* Dummy PGD for user cr3, used between pmap_deactivate() and pmap_activate() */
398static paddr_t xen_dummy_user_pgd; 398static paddr_t xen_dummy_user_pgd;
399#endif /* __x86_64__ */ 399#endif /* __x86_64__ */
400paddr_t pmap_pa_start; /* PA of first physical page for this domain */ 400paddr_t pmap_pa_start; /* PA of first physical page for this domain */
401paddr_t pmap_pa_end; /* PA of last physical page for this domain */ 401paddr_t pmap_pa_end; /* PA of last physical page for this domain */
402#endif /* XEN */ 402#endif /* XEN */
403 403
404#define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp) 404#define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp)
405 405
406#define PV_HASH_SIZE 32768 406#define PV_HASH_SIZE 32768
407#define PV_HASH_LOCK_CNT 32 407#define PV_HASH_LOCK_CNT 32
408 408
409struct pv_hash_lock { 409struct pv_hash_lock {
410 kmutex_t lock; 410 kmutex_t lock;
411} __aligned(CACHE_LINE_SIZE) pv_hash_locks[PV_HASH_LOCK_CNT] 411} __aligned(CACHE_LINE_SIZE) pv_hash_locks[PV_HASH_LOCK_CNT]
412 __aligned(CACHE_LINE_SIZE); 412 __aligned(CACHE_LINE_SIZE);
413 413
414struct pv_hash_head { 414struct pv_hash_head {
415 SLIST_HEAD(, pv_entry) hh_list; 415 SLIST_HEAD(, pv_entry) hh_list;
416} pv_hash_heads[PV_HASH_SIZE]; 416} pv_hash_heads[PV_HASH_SIZE];
417 417
418static u_int 418static u_int
419pvhash_hash(struct vm_page *ptp, vaddr_t va) 419pvhash_hash(struct vm_page *ptp, vaddr_t va)
420{ 420{
421 421
422 return (uintptr_t)ptp / sizeof(*ptp) + (va >> PAGE_SHIFT); 422 return (uintptr_t)ptp / sizeof(*ptp) + (va >> PAGE_SHIFT);
423} 423}
424 424
425static struct pv_hash_head * 425static struct pv_hash_head *
426pvhash_head(u_int hash) 426pvhash_head(u_int hash)
427{ 427{
428 428
429 return &pv_hash_heads[hash % PV_HASH_SIZE]; 429 return &pv_hash_heads[hash % PV_HASH_SIZE];
430} 430}
431 431
432static kmutex_t * 432static kmutex_t *
433pvhash_lock(u_int hash) 433pvhash_lock(u_int hash)
434{ 434{
435 435
436 return &pv_hash_locks[hash % PV_HASH_LOCK_CNT].lock; 436 return &pv_hash_locks[hash % PV_HASH_LOCK_CNT].lock;
437} 437}
438 438
439static struct pv_entry * 439static struct pv_entry *
440pvhash_remove(struct pv_hash_head *hh, struct vm_page *ptp, vaddr_t va) 440pvhash_remove(struct pv_hash_head *hh, struct vm_page *ptp, vaddr_t va)
441{ 441{
442 struct pv_entry *pve; 442 struct pv_entry *pve;
443 struct pv_entry *prev; 443 struct pv_entry *prev;
444 444
445 prev = NULL; 445 prev = NULL;
446 SLIST_FOREACH(pve, &hh->hh_list, pve_hash) { 446 SLIST_FOREACH(pve, &hh->hh_list, pve_hash) {
447 if (pve->pve_pte.pte_ptp == ptp && 447 if (pve->pve_pte.pte_ptp == ptp &&
448 pve->pve_pte.pte_va == va) { 448 pve->pve_pte.pte_va == va) {
449 if (prev != NULL) { 449 if (prev != NULL) {
450 SLIST_REMOVE_AFTER(prev, pve_hash); 450 SLIST_REMOVE_AFTER(prev, pve_hash);
451 } else { 451 } else {
452 SLIST_REMOVE_HEAD(&hh->hh_list, pve_hash); 452 SLIST_REMOVE_HEAD(&hh->hh_list, pve_hash);
453 } 453 }
454 break; 454 break;
455 } 455 }
456 prev = pve; 456 prev = pve;
457 } 457 }
458 return pve; 458 return pve;
459} 459}
460 460
461/* 461/*
462 * other data structures 462 * other data structures
463 */ 463 */
464 464
465static pt_entry_t protection_codes[8] __read_mostly; /* maps MI prot to i386 465static pt_entry_t protection_codes[8] __read_mostly; /* maps MI prot to i386
466 prot code */ 466 prot code */
467static bool pmap_initialized __read_mostly = false; /* pmap_init done yet? */ 467static bool pmap_initialized __read_mostly = false; /* pmap_init done yet? */
468 468
469/* 469/*
470 * the following two vaddr_t's are used during system startup 470 * the following two vaddr_t's are used during system startup
471 * to keep track of how much of the kernel's VM space we have used. 471 * to keep track of how much of the kernel's VM space we have used.
472 * once the system is started, the management of the remaining kernel 472 * once the system is started, the management of the remaining kernel
473 * VM space is turned over to the kernel_map vm_map. 473 * VM space is turned over to the kernel_map vm_map.
474 */ 474 */
475 475
476static vaddr_t virtual_avail __read_mostly; /* VA of first free KVA */ 476static vaddr_t virtual_avail __read_mostly; /* VA of first free KVA */
477static vaddr_t virtual_end __read_mostly; /* VA of last free KVA */ 477static vaddr_t virtual_end __read_mostly; /* VA of last free KVA */
478 478
479/* 479/*
480 * pool that pmap structures are allocated from 480 * pool that pmap structures are allocated from
481 */ 481 */
482 482
483static struct pool_cache pmap_cache; 483static struct pool_cache pmap_cache;
484 484
485/* 485/*
486 * pv_entry cache 486 * pv_entry cache
487 */ 487 */
488 488
489static struct pool_cache pmap_pv_cache; 489static struct pool_cache pmap_pv_cache;
490 490
491#ifdef __HAVE_DIRECT_MAP 491#ifdef __HAVE_DIRECT_MAP
492 492
493extern phys_ram_seg_t mem_clusters[]; 493extern phys_ram_seg_t mem_clusters[];
494extern int mem_cluster_cnt; 494extern int mem_cluster_cnt;
495 495
496#else 496#else
497 497
498/* 498/*
499 * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a 499 * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a
500 * maxcpus*NPTECL array of PTE's, to avoid cache line thrashing 500 * maxcpus*NPTECL array of PTE's, to avoid cache line thrashing
501 * due to false sharing. 501 * due to false sharing.
502 */ 502 */
503 503
504#ifdef MULTIPROCESSOR 504#ifdef MULTIPROCESSOR
505#define PTESLEW(pte, id) ((pte)+(id)*NPTECL) 505#define PTESLEW(pte, id) ((pte)+(id)*NPTECL)
506#define VASLEW(va,id) ((va)+(id)*NPTECL*PAGE_SIZE) 506#define VASLEW(va,id) ((va)+(id)*NPTECL*PAGE_SIZE)
507#else 507#else
508#define PTESLEW(pte, id) (pte) 508#define PTESLEW(pte, id) (pte)
509#define VASLEW(va,id) (va) 509#define VASLEW(va,id) (va)
510#endif 510#endif
511 511
512/* 512/*
513 * special VAs and the PTEs that map them 513 * special VAs and the PTEs that map them
514 */ 514 */
515static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte, *early_zero_pte; 515static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte, *early_zero_pte;
516static char *csrcp, *cdstp, *zerop, *ptpp, *early_zerop; 516static char *csrcp, *cdstp, *zerop, *ptpp, *early_zerop;
517 517
518#endif 518#endif
519 519
520int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int); 520int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int);
521 521
522/* PDP pool_cache(9) and its callbacks */ 522/* PDP pool_cache(9) and its callbacks */
523struct pool_cache pmap_pdp_cache; 523struct pool_cache pmap_pdp_cache;
524static int pmap_pdp_ctor(void *, void *, int); 524static int pmap_pdp_ctor(void *, void *, int);
525static void pmap_pdp_dtor(void *, void *); 525static void pmap_pdp_dtor(void *, void *);
526#ifdef PAE 526#ifdef PAE
527/* need to allocate items of 4 pages */ 527/* need to allocate items of 4 pages */
528static void *pmap_pdp_alloc(struct pool *, int); 528static void *pmap_pdp_alloc(struct pool *, int);
529static void pmap_pdp_free(struct pool *, void *); 529static void pmap_pdp_free(struct pool *, void *);
530static struct pool_allocator pmap_pdp_allocator = { 530static struct pool_allocator pmap_pdp_allocator = {
531 .pa_alloc = pmap_pdp_alloc, 531 .pa_alloc = pmap_pdp_alloc,
532 .pa_free = pmap_pdp_free, 532 .pa_free = pmap_pdp_free,
533 .pa_pagesz = PAGE_SIZE * PDP_SIZE, 533 .pa_pagesz = PAGE_SIZE * PDP_SIZE,
534}; 534};
535#endif /* PAE */ 535#endif /* PAE */
536 536
537extern vaddr_t idt_vaddr; /* we allocate IDT early */ 537extern vaddr_t idt_vaddr; /* we allocate IDT early */
538extern paddr_t idt_paddr; 538extern paddr_t idt_paddr;
539 539
540#ifdef _LP64 540#ifdef _LP64
541extern vaddr_t lo32_vaddr; 541extern vaddr_t lo32_vaddr;
542extern vaddr_t lo32_paddr; 542extern vaddr_t lo32_paddr;
543#endif 543#endif
544 544
545extern int end; 545extern int end;
546 546
547#ifdef i386 547#ifdef i386
548/* stuff to fix the pentium f00f bug */ 548/* stuff to fix the pentium f00f bug */
549extern vaddr_t pentium_idt_vaddr; 549extern vaddr_t pentium_idt_vaddr;
550#endif 550#endif
551 551
552 552
553/* 553/*
554 * local prototypes 554 * local prototypes
555 */ 555 */
556 556
557static struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t, 557static struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t,
558 pd_entry_t * const *); 558 pd_entry_t * const *);
559static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int); 559static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int);
560static void pmap_freepage(struct pmap *, struct vm_page *, int); 560static void pmap_freepage(struct pmap *, struct vm_page *, int);
561static void pmap_free_ptp(struct pmap *, struct vm_page *, 561static void pmap_free_ptp(struct pmap *, struct vm_page *,
562 vaddr_t, pt_entry_t *, 562 vaddr_t, pt_entry_t *,
563 pd_entry_t * const *); 563 pd_entry_t * const *);
564static bool pmap_is_active(struct pmap *, struct cpu_info *, bool); 564static bool pmap_is_active(struct pmap *, struct cpu_info *, bool);
565static bool pmap_remove_pte(struct pmap *, struct vm_page *, 565static bool pmap_remove_pte(struct pmap *, struct vm_page *,
566 pt_entry_t *, vaddr_t, 566 pt_entry_t *, vaddr_t,
567 struct pv_entry **); 567 struct pv_entry **);
568static void pmap_remove_ptes(struct pmap *, struct vm_page *, 568static void pmap_remove_ptes(struct pmap *, struct vm_page *,
569 vaddr_t, vaddr_t, vaddr_t, 569 vaddr_t, vaddr_t, vaddr_t,
570 struct pv_entry **); 570 struct pv_entry **);
571 571
572static bool pmap_get_physpage(vaddr_t, int, paddr_t *); 572static bool pmap_get_physpage(vaddr_t, int, paddr_t *);
573static void pmap_alloc_level(pd_entry_t * const *, vaddr_t, int, 573static void pmap_alloc_level(pd_entry_t * const *, vaddr_t, int,
574 long *); 574 long *);
575 575
576static bool pmap_reactivate(struct pmap *); 576static bool pmap_reactivate(struct pmap *);
577 577
578/* 578/*
579 * p m a p h e l p e r f u n c t i o n s 579 * p m a p h e l p e r f u n c t i o n s
580 */ 580 */
581 581
582static inline void 582static inline void
583pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff) 583pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff)
584{ 584{
585 585
586 if (pmap == pmap_kernel()) { 586 if (pmap == pmap_kernel()) {
587 atomic_add_long(&pmap->pm_stats.resident_count, resid_diff); 587 atomic_add_long(&pmap->pm_stats.resident_count, resid_diff);
588 atomic_add_long(&pmap->pm_stats.wired_count, wired_diff); 588 atomic_add_long(&pmap->pm_stats.wired_count, wired_diff);
589 } else { 589 } else {
590 KASSERT(mutex_owned(pmap->pm_lock)); 590 KASSERT(mutex_owned(pmap->pm_lock));
591 pmap->pm_stats.resident_count += resid_diff; 591 pmap->pm_stats.resident_count += resid_diff;
592 pmap->pm_stats.wired_count += wired_diff; 592 pmap->pm_stats.wired_count += wired_diff;
593 } 593 }
594} 594}
595 595
596static inline void 596static inline void
597pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte) 597pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte)
598{ 598{
599 int resid_diff = ((npte & PG_V) ? 1 : 0) - ((opte & PG_V) ? 1 : 0); 599 int resid_diff = ((npte & PG_V) ? 1 : 0) - ((opte & PG_V) ? 1 : 0);
600 int wired_diff = ((npte & PG_W) ? 1 : 0) - ((opte & PG_W) ? 1 : 0); 600 int wired_diff = ((npte & PG_W) ? 1 : 0) - ((opte & PG_W) ? 1 : 0);
601 601
602 KASSERT((npte & (PG_V | PG_W)) != PG_W); 602 KASSERT((npte & (PG_V | PG_W)) != PG_W);
603 KASSERT((opte & (PG_V | PG_W)) != PG_W); 603 KASSERT((opte & (PG_V | PG_W)) != PG_W);
604 604
605 pmap_stats_update(pmap, resid_diff, wired_diff); 605 pmap_stats_update(pmap, resid_diff, wired_diff);
606} 606}
607 607
608/* 608/*
609 * ptp_to_pmap: lookup pmap by ptp 609 * ptp_to_pmap: lookup pmap by ptp
610 */ 610 */
611 611
612static struct pmap * 612static struct pmap *
613ptp_to_pmap(struct vm_page *ptp) 613ptp_to_pmap(struct vm_page *ptp)
614{ 614{
615 struct pmap *pmap; 615 struct pmap *pmap;
616 616
617 if (ptp == NULL) { 617 if (ptp == NULL) {
618 return pmap_kernel(); 618 return pmap_kernel();
619 } 619 }
620 pmap = (struct pmap *)ptp->uobject; 620 pmap = (struct pmap *)ptp->uobject;
621 KASSERT(pmap != NULL); 621 KASSERT(pmap != NULL);
622 KASSERT(&pmap->pm_obj[0] == ptp->uobject); 622 KASSERT(&pmap->pm_obj[0] == ptp->uobject);
623 return pmap; 623 return pmap;
624} 624}
625 625
626static inline struct pv_pte * 626static inline struct pv_pte *
627pve_to_pvpte(struct pv_entry *pve) 627pve_to_pvpte(struct pv_entry *pve)
628{ 628{
629 629
630 KASSERT((void *)&pve->pve_pte == (void *)pve); 630 KASSERT((void *)&pve->pve_pte == (void *)pve);
631 return &pve->pve_pte; 631 return &pve->pve_pte;
632} 632}
633 633
634static inline struct pv_entry * 634static inline struct pv_entry *
635pvpte_to_pve(struct pv_pte *pvpte) 635pvpte_to_pve(struct pv_pte *pvpte)
636{ 636{
637 struct pv_entry *pve = (void *)pvpte; 637 struct pv_entry *pve = (void *)pvpte;
638 638
639 KASSERT(pve_to_pvpte(pve) == pvpte); 639 KASSERT(pve_to_pvpte(pve) == pvpte);
640 return pve; 640 return pve;
641} 641}
642 642
643/* 643/*
644 * pv_pte_first, pv_pte_next: PV list iterator. 644 * pv_pte_first, pv_pte_next: PV list iterator.
645 */ 645 */
646 646
647static struct pv_pte * 647static struct pv_pte *
648pv_pte_first(struct pmap_page *pp) 648pv_pte_first(struct pmap_page *pp)
649{ 649{
650 650
651 if ((pp->pp_flags & PP_EMBEDDED) != 0) { 651 if ((pp->pp_flags & PP_EMBEDDED) != 0) {
652 return &pp->pp_pte; 652 return &pp->pp_pte;
653 } 653 }
654 return pve_to_pvpte(LIST_FIRST(&pp->pp_head.pvh_list)); 654 return pve_to_pvpte(LIST_FIRST(&pp->pp_head.pvh_list));
655} 655}
656 656
657static struct pv_pte * 657static struct pv_pte *
658pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte) 658pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte)
659{ 659{
660 660
661 KASSERT(pvpte != NULL); 661 KASSERT(pvpte != NULL);
662 if (pvpte == &pp->pp_pte) { 662 if (pvpte == &pp->pp_pte) {
663 KASSERT((pp->pp_flags & PP_EMBEDDED) != 0); 663 KASSERT((pp->pp_flags & PP_EMBEDDED) != 0);
664 return NULL; 664 return NULL;
665 } 665 }
666 KASSERT((pp->pp_flags & PP_EMBEDDED) == 0); 666 KASSERT((pp->pp_flags & PP_EMBEDDED) == 0);
667 return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list)); 667 return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list));
668} 668}
669 669
670/* 670/*
671 * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? 671 * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]?
672 * of course the kernel is always loaded 672 * of course the kernel is always loaded
673 */ 673 */
674 674
675bool 675bool
676pmap_is_curpmap(struct pmap *pmap) 676pmap_is_curpmap(struct pmap *pmap)
677{ 677{
678#if defined(XEN) && defined(__x86_64__) 678#if defined(XEN) && defined(__x86_64__)
679 /* 679 /*
680 * Only kernel pmap is physically loaded. 680 * Only kernel pmap is physically loaded.
681 * User PGD may be active, but TLB will be flushed 681 * User PGD may be active, but TLB will be flushed
682 * with HYPERVISOR_iret anyway, so let's say no 682 * with HYPERVISOR_iret anyway, so let's say no
683 */ 683 */
684 return(pmap == pmap_kernel()); 684 return(pmap == pmap_kernel());
685#else /* XEN && __x86_64__*/ 685#else /* XEN && __x86_64__*/
686 return((pmap == pmap_kernel()) || 686 return((pmap == pmap_kernel()) ||
687 (pmap == curcpu()->ci_pmap)); 687 (pmap == curcpu()->ci_pmap));
688#endif 688#endif
689} 689}
690 690
691/* 691/*
692 * pmap_is_active: is this pmap loaded into the specified processor's %cr3? 692 * pmap_is_active: is this pmap loaded into the specified processor's %cr3?
693 */ 693 */
694 694
695inline static bool 695inline static bool
696pmap_is_active(struct pmap *pmap, struct cpu_info *ci, bool kernel) 696pmap_is_active(struct pmap *pmap, struct cpu_info *ci, bool kernel)
697{ 697{
698 698
699 return (pmap == pmap_kernel() || 699 return (pmap == pmap_kernel() ||
700 (pmap->pm_cpus & ci->ci_cpumask) != 0 || 700 (pmap->pm_cpus & ci->ci_cpumask) != 0 ||
701 (kernel && (pmap->pm_kernel_cpus & ci->ci_cpumask) != 0)); 701 (kernel && (pmap->pm_kernel_cpus & ci->ci_cpumask) != 0));
702} 702}
703 703
704/* 704/*
705 * Add a reference to the specified pmap. 705 * Add a reference to the specified pmap.
706 */ 706 */
707 707
708void 708void
709pmap_reference(struct pmap *pmap) 709pmap_reference(struct pmap *pmap)
710{ 710{
711 711
712 atomic_inc_uint(&pmap->pm_obj[0].uo_refs); 712 atomic_inc_uint(&pmap->pm_obj[0].uo_refs);
713} 713}
714 714
715#ifndef XEN 715#ifndef XEN
716 716
717/* 717/*
718 * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in 718 * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
719 * 719 *
720 * there are several pmaps involved. some or all of them might be same. 720 * there are several pmaps involved. some or all of them might be same.
721 * 721 *
722 * - the pmap given by the first argument 722 * - the pmap given by the first argument
723 * our caller wants to access this pmap's PTEs. 723 * our caller wants to access this pmap's PTEs.
724 * 724 *
725 * - pmap_kernel() 725 * - pmap_kernel()
726 * the kernel pmap. note that it only contains the kernel part 726 * the kernel pmap. note that it only contains the kernel part
727 * of the address space which is shared by any pmap. ie. any 727 * of the address space which is shared by any pmap. ie. any
728 * pmap can be used instead of pmap_kernel() for our purpose. 728 * pmap can be used instead of pmap_kernel() for our purpose.
729 * 729 *
730 * - ci->ci_pmap 730 * - ci->ci_pmap
731 * pmap currently loaded on the cpu. 731 * pmap currently loaded on the cpu.
732 * 732 *
733 * - vm_map_pmap(&curproc->p_vmspace->vm_map) 733 * - vm_map_pmap(&curproc->p_vmspace->vm_map)
734 * current process' pmap. 734 * current process' pmap.
735 * 735 *
736 * => we lock enough pmaps to keep things locked in 736 * => we lock enough pmaps to keep things locked in
737 * => must be undone with pmap_unmap_ptes before returning 737 * => must be undone with pmap_unmap_ptes before returning
738 */ 738 */
739 739
740void 740void
741pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2, 741pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2,
742 pd_entry_t **ptepp, pd_entry_t * const **pdeppp) 742 pd_entry_t **ptepp, pd_entry_t * const **pdeppp)
743{ 743{
744 struct pmap *curpmap; 744 struct pmap *curpmap;
745 struct cpu_info *ci; 745 struct cpu_info *ci;
746 uint32_t cpumask; 746 uint32_t cpumask;
747 lwp_t *l; 747 lwp_t *l;
748 748
749 /* The kernel's pmap is always accessible. */ 749 /* The kernel's pmap is always accessible. */
750 if (pmap == pmap_kernel()) { 750 if (pmap == pmap_kernel()) {
751 *pmap2 = NULL; 751 *pmap2 = NULL;
752 *ptepp = PTE_BASE; 752 *ptepp = PTE_BASE;
753 *pdeppp = normal_pdes; 753 *pdeppp = normal_pdes;
754 return; 754 return;
755 } 755 }
756 KASSERT(kpreempt_disabled()); 756 KASSERT(kpreempt_disabled());
757 757
758 l = curlwp; 758 l = curlwp;
759 retry: 759 retry:
760 mutex_enter(pmap->pm_lock); 760 mutex_enter(pmap->pm_lock);
761 ci = curcpu(); 761 ci = curcpu();
762 curpmap = ci->ci_pmap; 762 curpmap = ci->ci_pmap;
763 if (vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) { 763 if (vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) {
764 /* Our own pmap so just load it: easy. */ 764 /* Our own pmap so just load it: easy. */
765 if (__predict_false(ci->ci_want_pmapload)) { 765 if (__predict_false(ci->ci_want_pmapload)) {
766 mutex_exit(pmap->pm_lock); 766 mutex_exit(pmap->pm_lock);
767 pmap_load(); 767 pmap_load();
768 goto retry; 768 goto retry;
769 } 769 }
770 KASSERT(pmap == curpmap); 770 KASSERT(pmap == curpmap);
771 } else if (pmap == curpmap) { 771 } else if (pmap == curpmap) {
772 /* 772 /*
773 * Already on the CPU: make it valid. This is very 773 * Already on the CPU: make it valid. This is very
774 * often the case during exit(), when we have switched 774 * often the case during exit(), when we have switched
775 * to the kernel pmap in order to destroy a user pmap. 775 * to the kernel pmap in order to destroy a user pmap.
776 */ 776 */
777 if (!pmap_reactivate(pmap)) { 777 if (!pmap_reactivate(pmap)) {
778 u_int gen = uvm_emap_gen_return(); 778 u_int gen = uvm_emap_gen_return();
779 tlbflush(); 779 tlbflush();
780 uvm_emap_update(gen); 780 uvm_emap_update(gen);
781 } 781 }
782 } else { 782 } else {
783 /* 783 /*
784 * Toss current pmap from CPU, but keep a reference to it. 784 * Toss current pmap from CPU, but keep a reference to it.
785 * The reference will be dropped by pmap_unmap_ptes(). 785 * The reference will be dropped by pmap_unmap_ptes().
786 * Can happen if we block during exit(). 786 * Can happen if we block during exit().
787 */ 787 */
788 cpumask = ci->ci_cpumask; 788 cpumask = ci->ci_cpumask;
789 atomic_and_32(&curpmap->pm_cpus, ~cpumask); 789 atomic_and_32(&curpmap->pm_cpus, ~cpumask);
790 atomic_and_32(&curpmap->pm_kernel_cpus, ~cpumask); 790 atomic_and_32(&curpmap->pm_kernel_cpus, ~cpumask);
791 ci->ci_pmap = pmap; 791 ci->ci_pmap = pmap;
792 ci->ci_tlbstate = TLBSTATE_VALID; 792 ci->ci_tlbstate = TLBSTATE_VALID;
793 atomic_or_32(&pmap->pm_cpus, cpumask); 793 atomic_or_32(&pmap->pm_cpus, cpumask);
794 atomic_or_32(&pmap->pm_kernel_cpus, cpumask); 794 atomic_or_32(&pmap->pm_kernel_cpus, cpumask);
795 cpu_load_pmap(pmap); 795 cpu_load_pmap(pmap);
796 } 796 }
797 pmap->pm_ncsw = l->l_ncsw; 797 pmap->pm_ncsw = l->l_ncsw;
798 *pmap2 = curpmap; 798 *pmap2 = curpmap;
799 *ptepp = PTE_BASE; 799 *ptepp = PTE_BASE;
800 *pdeppp = normal_pdes; 800 *pdeppp = normal_pdes;
801} 801}
802 802
803/* 803/*
804 * pmap_unmap_ptes: unlock the PTE mapping of "pmap" 804 * pmap_unmap_ptes: unlock the PTE mapping of "pmap"
805 */ 805 */
806 806
807void 807void
808pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2) 808pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2)
809{ 809{
810 struct cpu_info *ci; 810 struct cpu_info *ci;
811 struct pmap *mypmap; 811 struct pmap *mypmap;
812 812
813 KASSERT(kpreempt_disabled()); 813 KASSERT(kpreempt_disabled());
814 814
815 /* The kernel's pmap is always accessible. */ 815 /* The kernel's pmap is always accessible. */
816 if (pmap == pmap_kernel()) { 816 if (pmap == pmap_kernel()) {
817 return; 817 return;
818 } 818 }
819 819
820 /* 820 /*
821 * We cannot tolerate context switches while mapped in. 821 * We cannot tolerate context switches while mapped in.
822 * If it is our own pmap all we have to do is unlock. 822 * If it is our own pmap all we have to do is unlock.
823 */ 823 */
824 KASSERT(pmap->pm_ncsw == curlwp->l_ncsw); 824 KASSERT(pmap->pm_ncsw == curlwp->l_ncsw);
825 mypmap = vm_map_pmap(&curproc->p_vmspace->vm_map); 825 mypmap = vm_map_pmap(&curproc->p_vmspace->vm_map);
826 if (pmap == mypmap) { 826 if (pmap == mypmap) {
827 mutex_exit(pmap->pm_lock); 827 mutex_exit(pmap->pm_lock);
828 return; 828 return;
829 } 829 }
830 830
831 /* 831 /*
832 * Mark whatever's on the CPU now as lazy and unlock. 832 * Mark whatever's on the CPU now as lazy and unlock.
833 * If the pmap was already installed, we are done. 833 * If the pmap was already installed, we are done.
834 */ 834 */
835 ci = curcpu(); 835 ci = curcpu();
836 ci->ci_tlbstate = TLBSTATE_LAZY; 836 ci->ci_tlbstate = TLBSTATE_LAZY;
837 ci->ci_want_pmapload = (mypmap != pmap_kernel()); 837 ci->ci_want_pmapload = (mypmap != pmap_kernel());
838 mutex_exit(pmap->pm_lock); 838 mutex_exit(pmap->pm_lock);
839 if (pmap == pmap2) { 839 if (pmap == pmap2) {
840 return; 840 return;
841 } 841 }
842 842
843 /* 843 /*
844 * We installed another pmap on the CPU. Grab a reference to 844 * We installed another pmap on the CPU. Grab a reference to
845 * it and leave in place. Toss the evicted pmap (can block). 845 * it and leave in place. Toss the evicted pmap (can block).
846 */ 846 */
847 pmap_reference(pmap); 847 pmap_reference(pmap);
848 pmap_destroy(pmap2); 848 pmap_destroy(pmap2);
849} 849}
850 850
851#endif 851#endif
852 852
853inline static void 853inline static void
854pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte) 854pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte)
855{ 855{
856 856
857#if !defined(__x86_64__) 857#if !defined(__x86_64__)
858 if (curproc == NULL || curproc->p_vmspace == NULL || 858 if (curproc == NULL || curproc->p_vmspace == NULL ||
859 pm != vm_map_pmap(&curproc->p_vmspace->vm_map)) 859 pm != vm_map_pmap(&curproc->p_vmspace->vm_map))
860 return; 860 return;
861 861
862 if ((opte ^ npte) & PG_X) 862 if ((opte ^ npte) & PG_X)
863 pmap_update_pg(va); 863 pmap_update_pg(va);
864 864
865 /* 865 /*
866 * Executability was removed on the last executable change. 866 * Executability was removed on the last executable change.
867 * Reset the code segment to something conservative and 867 * Reset the code segment to something conservative and
868 * let the trap handler deal with setting the right limit. 868 * let the trap handler deal with setting the right limit.
869 * We can't do that because of locking constraints on the vm map. 869 * We can't do that because of locking constraints on the vm map.
870 */ 870 */
871 871
872 if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) { 872 if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) {
873 struct trapframe *tf = curlwp->l_md.md_regs; 873 struct trapframe *tf = curlwp->l_md.md_regs;
874 874
875 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 875 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
876 pm->pm_hiexec = I386_MAX_EXE_ADDR; 876 pm->pm_hiexec = I386_MAX_EXE_ADDR;
877 } 877 }
878#endif /* !defined(__x86_64__) */ 878#endif /* !defined(__x86_64__) */
879} 879}
880 880
881#if !defined(__x86_64__) 881#if !defined(__x86_64__)
882/* 882/*
883 * Fixup the code segment to cover all potential executable mappings. 883 * Fixup the code segment to cover all potential executable mappings.
884 * returns 0 if no changes to the code segment were made. 884 * returns 0 if no changes to the code segment were made.
885 */ 885 */
886 886
887int 887int
888pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb) 888pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb)
889{ 889{
890 struct vm_map_entry *ent; 890 struct vm_map_entry *ent;
891 struct pmap *pm = vm_map_pmap(map); 891 struct pmap *pm = vm_map_pmap(map);
892 vaddr_t va = 0; 892 vaddr_t va = 0;
893 893
894 vm_map_lock_read(map); 894 vm_map_lock_read(map);
895 for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) { 895 for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) {
896 896
897 /* 897 /*
898 * This entry has greater va than the entries before. 898 * This entry has greater va than the entries before.
899 * We need to make it point to the last page, not past it. 899 * We need to make it point to the last page, not past it.
900 */ 900 */
901 901
902 if (ent->protection & VM_PROT_EXECUTE) 902 if (ent->protection & VM_PROT_EXECUTE)
903 va = trunc_page(ent->end) - PAGE_SIZE; 903 va = trunc_page(ent->end) - PAGE_SIZE;
904 } 904 }
905 vm_map_unlock_read(map); 905 vm_map_unlock_read(map);
906 if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL)) 906 if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL))
907 return (0); 907 return (0);
908 908
909 pm->pm_hiexec = va; 909 pm->pm_hiexec = va;
910 if (pm->pm_hiexec > I386_MAX_EXE_ADDR) { 910 if (pm->pm_hiexec > I386_MAX_EXE_ADDR) {
911 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); 911 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
912 } else { 912 } else {
913 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 913 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
914 return (0); 914 return (0);
915 } 915 }
916 return (1); 916 return (1);
917} 917}
918#endif /* !defined(__x86_64__) */ 918#endif /* !defined(__x86_64__) */
919 919
920void 920void
921pat_init(struct cpu_info *ci) 921pat_init(struct cpu_info *ci)
922{ 922{
923 uint64_t pat; 923 uint64_t pat;
924 924
925 if (!(ci->ci_feat_val[0] & CPUID_PAT)) 925 if (!(ci->ci_feat_val[0] & CPUID_PAT))
926 return; 926 return;
927 927
928 /* We change WT to WC. Leave all other entries the default values. */ 928 /* We change WT to WC. Leave all other entries the default values. */
929 pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) | 929 pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) |
930 PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) | 930 PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) |
931 PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) | 931 PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) |
932 PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC); 932 PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC);
933 933
934 wrmsr(MSR_CR_PAT, pat); 934 wrmsr(MSR_CR_PAT, pat);
935 cpu_pat_enabled = true; 935 cpu_pat_enabled = true;
936 aprint_debug_dev(ci->ci_dev, "PAT enabled\n"); 936 aprint_debug_dev(ci->ci_dev, "PAT enabled\n");
937} 937}
938 938
939static pt_entry_t 939static pt_entry_t
940pmap_pat_flags(u_int flags) 940pmap_pat_flags(u_int flags)
941{ 941{
942 u_int cacheflags = (flags & PMAP_CACHE_MASK); 942 u_int cacheflags = (flags & PMAP_CACHE_MASK);
943 943
944 if (!cpu_pat_enabled) { 944 if (!cpu_pat_enabled) {
945 switch (cacheflags) { 945 switch (cacheflags) {
946 case PMAP_NOCACHE: 946 case PMAP_NOCACHE:
947 case PMAP_NOCACHE_OVR: 947 case PMAP_NOCACHE_OVR:
948 /* results in PGC_UCMINUS on cpus which have 948 /* results in PGC_UCMINUS on cpus which have
949 * the cpuid PAT but PAT "disabled" 949 * the cpuid PAT but PAT "disabled"
950 */ 950 */
951 return PG_N; 951 return PG_N;
952 default: 952 default:
953 return 0; 953 return 0;
954 } 954 }
955 } 955 }
956 956
957 switch (cacheflags) { 957 switch (cacheflags) {
958 case PMAP_NOCACHE: 958 case PMAP_NOCACHE:
959 return PGC_UC; 959 return PGC_UC;
960 case PMAP_WRITE_COMBINE: 960 case PMAP_WRITE_COMBINE:
961 return PGC_WC; 961 return PGC_WC;
962 case PMAP_WRITE_BACK: 962 case PMAP_WRITE_BACK:
963 return PGC_WB; 963 return PGC_WB;
964 case PMAP_NOCACHE_OVR: 964 case PMAP_NOCACHE_OVR:
965 return PGC_UCMINUS; 965 return PGC_UCMINUS;
966 } 966 }
967 967
968 return 0; 968 return 0;
969} 969}
970 970
971/* 971/*
972 * p m a p k e n t e r f u n c t i o n s 972 * p m a p k e n t e r f u n c t i o n s
973 * 973 *
974 * functions to quickly enter/remove pages from the kernel address 974 * functions to quickly enter/remove pages from the kernel address
975 * space. pmap_kremove is exported to MI kernel. we make use of 975 * space. pmap_kremove is exported to MI kernel. we make use of
976 * the recursive PTE mappings. 976 * the recursive PTE mappings.
977 */ 977 */
978 978
979/* 979/*
980 * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking 980 * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking
981 * 981 *
982 * => no need to lock anything, assume va is already allocated 982 * => no need to lock anything, assume va is already allocated
983 * => should be faster than normal pmap enter function 983 * => should be faster than normal pmap enter function
984 */ 984 */
985 985
986void 986void
987pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 987pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
988{ 988{
989 pt_entry_t *pte, opte, npte; 989 pt_entry_t *pte, opte, npte;
990 990
991 KASSERT(!(prot & ~VM_PROT_ALL)); 991 KASSERT(!(prot & ~VM_PROT_ALL));
992 992
993 if (va < VM_MIN_KERNEL_ADDRESS) 993 if (va < VM_MIN_KERNEL_ADDRESS)
994 pte = vtopte(va); 994 pte = vtopte(va);
995 else 995 else
996 pte = kvtopte(va); 996 pte = kvtopte(va);
997#ifdef DOM0OPS 997#ifdef DOM0OPS
998 if (pa < pmap_pa_start || pa >= pmap_pa_end) { 998 if (pa < pmap_pa_start || pa >= pmap_pa_end) {
999#ifdef DEBUG 999#ifdef DEBUG
1000 printf_nolog("%s: pa 0x%" PRIx64 " for va 0x%" PRIx64 1000 printf_nolog("%s: pa 0x%" PRIx64 " for va 0x%" PRIx64
1001 " outside range\n", __func__, (int64_t)pa, (int64_t)va); 1001 " outside range\n", __func__, (int64_t)pa, (int64_t)va);
1002#endif /* DEBUG */ 1002#endif /* DEBUG */
1003 npte = pa; 1003 npte = pa;
1004 } else 1004 } else
1005#endif /* DOM0OPS */ 1005#endif /* DOM0OPS */
1006 npte = pmap_pa2pte(pa); 1006 npte = pmap_pa2pte(pa);
1007 npte |= protection_codes[prot] | PG_k | PG_V | pmap_pg_g; 1007 npte |= protection_codes[prot] | PG_k | PG_V | pmap_pg_g;
1008 npte |= pmap_pat_flags(flags); 1008 npte |= pmap_pat_flags(flags);
1009 opte = pmap_pte_testset(pte, npte); /* zap! */ 1009 opte = pmap_pte_testset(pte, npte); /* zap! */
1010#if defined(DIAGNOSTIC) 1010#if defined(DIAGNOSTIC)
1011 /* XXX For now... */ 1011 /* XXX For now... */
1012 if (opte & PG_PS) 1012 if (opte & PG_PS)
1013 panic("%s: PG_PS", __func__); 1013 panic("%s: PG_PS", __func__);
1014#endif 1014#endif
1015 if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) { 1015 if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) {
1016#if defined(DIAGNOSTIC) 1016#if defined(DIAGNOSTIC)
1017 printf_nolog("%s: mapping already present\n", __func__); 1017 printf_nolog("%s: mapping already present\n", __func__);
1018#endif 1018#endif
1019 /* This should not happen. */ 1019 /* This should not happen. */
1020 kpreempt_disable(); 1020 kpreempt_disable();
1021 pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER); 1021 pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER);
1022 kpreempt_enable(); 1022 kpreempt_enable();
1023 } 1023 }
1024} 1024}
1025 1025
1026void 1026void
1027pmap_emap_enter(vaddr_t va, paddr_t pa, vm_prot_t prot) 1027pmap_emap_enter(vaddr_t va, paddr_t pa, vm_prot_t prot)
1028{ 1028{
1029 pt_entry_t *pte, opte, npte; 1029 pt_entry_t *pte, opte, npte;
1030 1030
1031 KASSERT((prot & ~VM_PROT_ALL) == 0); 1031 KASSERT((prot & ~VM_PROT_ALL) == 0);
1032 pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va); 1032 pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va);
1033 1033
1034#ifdef DOM0OPS 1034#ifdef DOM0OPS
1035 if (pa < pmap_pa_start || pa >= pmap_pa_end) { 1035 if (pa < pmap_pa_start || pa >= pmap_pa_end) {
1036 npte = pa; 1036 npte = pa;
1037 } else 1037 } else
1038#endif 1038#endif
1039 npte = pmap_pa2pte(pa); 1039 npte = pmap_pa2pte(pa);
1040 1040
1041 npte = pmap_pa2pte(pa); 1041 npte = pmap_pa2pte(pa);
1042 npte |= protection_codes[prot] | PG_k | PG_V; 1042 npte |= protection_codes[prot] | PG_k | PG_V;
1043 opte = pmap_pte_testset(pte, npte); 1043 opte = pmap_pte_testset(pte, npte);
1044} 1044}
1045 1045
1046/* 1046/*
1047 * pmap_emap_sync: perform TLB flush or pmap load, if it was deferred. 1047 * pmap_emap_sync: perform TLB flush or pmap load, if it was deferred.
1048 */ 1048 */
1049void 1049void
1050pmap_emap_sync(bool canload) 1050pmap_emap_sync(bool canload)
1051{ 1051{
1052 struct cpu_info *ci = curcpu(); 1052 struct cpu_info *ci = curcpu();
1053 struct pmap *pmap; 1053 struct pmap *pmap;
1054 1054
1055 KASSERT(kpreempt_disabled()); 1055 KASSERT(kpreempt_disabled());
1056 if (__predict_true(ci->ci_want_pmapload && canload)) { 1056 if (__predict_true(ci->ci_want_pmapload && canload)) {
1057 /* 1057 /*
1058 * XXX: Hint for pmap_reactivate(), which might suggest to 1058 * XXX: Hint for pmap_reactivate(), which might suggest to
1059 * not perform TLB flush, if state has not changed. 1059 * not perform TLB flush, if state has not changed.
1060 */ 1060 */
1061 pmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map); 1061 pmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map);
1062 if (__predict_false(pmap == ci->ci_pmap)) { 1062 if (__predict_false(pmap == ci->ci_pmap)) {
1063 const uint32_t cpumask = ci->ci_cpumask; 1063 const uint32_t cpumask = ci->ci_cpumask;
1064 atomic_and_32(&pmap->pm_cpus, ~cpumask); 1064 atomic_and_32(&pmap->pm_cpus, ~cpumask);
1065 } 1065 }
1066 pmap_load(); 1066 pmap_load();
1067 KASSERT(ci->ci_want_pmapload == 0); 1067 KASSERT(ci->ci_want_pmapload == 0);
1068 } else { 1068 } else {
1069 tlbflush(); 1069 tlbflush();
1070 } 1070 }
1071 1071
1072} 1072}
1073 1073
1074void 1074void
1075pmap_emap_remove(vaddr_t sva, vsize_t len) 1075pmap_emap_remove(vaddr_t sva, vsize_t len)
1076{ 1076{
1077 pt_entry_t *pte, xpte; 1077 pt_entry_t *pte, xpte;
1078 vaddr_t va, eva = sva + len; 1078 vaddr_t va, eva = sva + len;
1079 1079
1080 for (va = sva; va < eva; va += PAGE_SIZE) { 1080 for (va = sva; va < eva; va += PAGE_SIZE) {
1081 pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va); 1081 pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va);
1082 xpte |= pmap_pte_testset(pte, 0); 1082 xpte |= pmap_pte_testset(pte, 0);
1083 } 1083 }
1084} 1084}
1085 1085
1086__strict_weak_alias(pmap_kenter_ma, pmap_kenter_pa); 1086__strict_weak_alias(pmap_kenter_ma, pmap_kenter_pa);
1087 1087
1088#if defined(__x86_64__) 1088#if defined(__x86_64__)
1089/* 1089/*
1090 * Change protection for a virtual address. Local for a CPU only, don't 1090 * Change protection for a virtual address. Local for a CPU only, don't
1091 * care about TLB shootdowns. 1091 * care about TLB shootdowns.
1092 * 1092 *
1093 * => must be called with preemption disabled 1093 * => must be called with preemption disabled
1094 */ 1094 */
1095void 1095void
1096pmap_changeprot_local(vaddr_t va, vm_prot_t prot) 1096pmap_changeprot_local(vaddr_t va, vm_prot_t prot)
1097{ 1097{
1098 pt_entry_t *pte, opte, npte; 1098 pt_entry_t *pte, opte, npte;
1099 1099
1100 KASSERT(kpreempt_disabled()); 1100 KASSERT(kpreempt_disabled());
1101 1101
1102 if (va < VM_MIN_KERNEL_ADDRESS) 1102 if (va < VM_MIN_KERNEL_ADDRESS)
1103 pte = vtopte(va); 1103 pte = vtopte(va);
1104 else 1104 else
1105 pte = kvtopte(va); 1105 pte = kvtopte(va);
1106 1106
1107 npte = opte = *pte; 1107 npte = opte = *pte;
1108 1108
1109 if ((prot & VM_PROT_WRITE) != 0) 1109 if ((prot & VM_PROT_WRITE) != 0)
1110 npte |= PG_RW; 1110 npte |= PG_RW;
1111 else 1111 else
1112 npte &= ~PG_RW; 1112 npte &= ~PG_RW;
1113 1113
1114 if (opte != npte) { 1114 if (opte != npte) {
1115 pmap_pte_set(pte, npte); 1115 pmap_pte_set(pte, npte);
1116 pmap_pte_flush(); 1116 pmap_pte_flush();
1117 invlpg(va); 1117 invlpg(va);
1118 } 1118 }
1119} 1119}
1120#endif /* defined(__x86_64__) */ 1120#endif /* defined(__x86_64__) */
1121 1121
1122/* 1122/*
1123 * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking 1123 * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking
1124 * 1124 *
1125 * => no need to lock anything 1125 * => no need to lock anything
1126 * => caller must dispose of any vm_page mapped in the va range 1126 * => caller must dispose of any vm_page mapped in the va range
1127 * => note: not an inline function 1127 * => note: not an inline function
1128 * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE 1128 * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE
1129 * => we assume kernel only unmaps valid addresses and thus don't bother 1129 * => we assume kernel only unmaps valid addresses and thus don't bother
1130 * checking the valid bit before doing TLB flushing 1130 * checking the valid bit before doing TLB flushing
1131 * => must be followed by call to pmap_update() before reuse of page 1131 * => must be followed by call to pmap_update() before reuse of page
1132 */ 1132 */
1133 1133
1134void 1134void
1135pmap_kremove(vaddr_t sva, vsize_t len) 1135pmap_kremove(vaddr_t sva, vsize_t len)
1136{ 1136{
1137 pt_entry_t *pte, opte; 1137 pt_entry_t *pte, opte;
1138 vaddr_t va, eva; 1138 vaddr_t va, eva;
1139 1139
1140 eva = sva + len; 1140 eva = sva + len;
1141 1141
1142 kpreempt_disable(); 1142 kpreempt_disable();
1143 for (va = sva; va < eva; va += PAGE_SIZE) { 1143 for (va = sva; va < eva; va += PAGE_SIZE) {
1144 if (va < VM_MIN_KERNEL_ADDRESS) 1144 if (va < VM_MIN_KERNEL_ADDRESS)
1145 pte = vtopte(va); 1145 pte = vtopte(va);
1146 else 1146 else
1147 pte = kvtopte(va); 1147 pte = kvtopte(va);
1148 opte = pmap_pte_testset(pte, 0); /* zap! */ 1148 opte = pmap_pte_testset(pte, 0); /* zap! */
1149 if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) { 1149 if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) {
1150 pmap_tlb_shootdown(pmap_kernel(), va, opte, 1150 pmap_tlb_shootdown(pmap_kernel(), va, opte,
1151 TLBSHOOT_KREMOVE); 1151 TLBSHOOT_KREMOVE);
1152 } 1152 }
1153 KASSERT((opte & PG_PS) == 0); 1153 KASSERT((opte & PG_PS) == 0);
1154 KASSERT((opte & PG_PVLIST) == 0); 1154 KASSERT((opte & PG_PVLIST) == 0);
1155 } 1155 }
1156 kpreempt_enable(); 1156 kpreempt_enable();
1157} 1157}
1158 1158
1159/* 1159/*
1160 * p m a p i n i t f u n c t i o n s 1160 * p m a p i n i t f u n c t i o n s
1161 * 1161 *
1162 * pmap_bootstrap and pmap_init are called during system startup 1162 * pmap_bootstrap and pmap_init are called during system startup
1163 * to init the pmap module. pmap_bootstrap() does a low level 1163 * to init the pmap module. pmap_bootstrap() does a low level
1164 * init just to get things rolling. pmap_init() finishes the job. 1164 * init just to get things rolling. pmap_init() finishes the job.
1165 */ 1165 */
1166 1166
1167/* 1167/*
1168 * pmap_bootstrap: get the system in a state where it can run with VM 1168 * pmap_bootstrap: get the system in a state where it can run with VM
1169 * properly enabled (called before main()). the VM system is 1169 * properly enabled (called before main()). the VM system is
1170 * fully init'd later... 1170 * fully init'd later...
1171 * 1171 *
1172 * => on i386, locore.s has already enabled the MMU by allocating 1172 * => on i386, locore.s has already enabled the MMU by allocating
1173 * a PDP for the kernel, and nkpde PTP's for the kernel. 1173 * a PDP for the kernel, and nkpde PTP's for the kernel.
1174 * => kva_start is the first free virtual address in kernel space 1174 * => kva_start is the first free virtual address in kernel space
1175 */ 1175 */
1176 1176
1177void 1177void
1178pmap_bootstrap(vaddr_t kva_start) 1178pmap_bootstrap(vaddr_t kva_start)
1179{ 1179{
1180 struct pmap *kpm; 1180 struct pmap *kpm;
1181 pt_entry_t *pte; 1181 pt_entry_t *pte;
1182 int i; 1182 int i;
1183 vaddr_t kva; 1183 vaddr_t kva;
1184#ifndef XEN 1184#ifndef XEN
1185 pd_entry_t *pde; 1185 pd_entry_t *pde;
1186 unsigned long p1i; 1186 unsigned long p1i;
1187 vaddr_t kva_end; 1187 vaddr_t kva_end;
1188#endif 1188#endif
1189#ifdef __HAVE_DIRECT_MAP 1189#ifdef __HAVE_DIRECT_MAP
1190 phys_ram_seg_t *mc; 1190 phys_ram_seg_t *mc;
1191 long ndmpdp; 1191 long ndmpdp;
1192 paddr_t dmpd, dmpdp, pdp; 1192 paddr_t dmpd, dmpdp, pdp;
1193 vaddr_t tmpva; 1193 vaddr_t tmpva;
1194#endif 1194#endif
1195 1195
1196 pt_entry_t pg_nx = (cpu_feature[2] & CPUID_NOX ? PG_NX : 0); 1196 pt_entry_t pg_nx = (cpu_feature[2] & CPUID_NOX ? PG_NX : 0);
1197 1197
1198 /* 1198 /*
1199 * set up our local static global vars that keep track of the 1199 * set up our local static global vars that keep track of the
1200 * usage of KVM before kernel_map is set up 1200 * usage of KVM before kernel_map is set up
1201 */ 1201 */
1202 1202
1203 virtual_avail = kva_start; /* first free KVA */ 1203 virtual_avail = kva_start; /* first free KVA */
1204 virtual_end = VM_MAX_KERNEL_ADDRESS; /* last KVA */ 1204 virtual_end = VM_MAX_KERNEL_ADDRESS; /* last KVA */
1205 1205
1206 /* 1206 /*
1207 * set up protection_codes: we need to be able to convert from 1207 * set up protection_codes: we need to be able to convert from
1208 * a MI protection code (some combo of VM_PROT...) to something 1208 * a MI protection code (some combo of VM_PROT...) to something
1209 * we can jam into a i386 PTE. 1209 * we can jam into a i386 PTE.
1210 */ 1210 */
1211 1211
1212 protection_codes[VM_PROT_NONE] = pg_nx; /* --- */ 1212 protection_codes[VM_PROT_NONE] = pg_nx; /* --- */
1213 protection_codes[VM_PROT_EXECUTE] = PG_RO | PG_X; /* --x */ 1213 protection_codes[VM_PROT_EXECUTE] = PG_RO | PG_X; /* --x */
1214 protection_codes[VM_PROT_READ] = PG_RO | pg_nx; /* -r- */ 1214 protection_codes[VM_PROT_READ] = PG_RO | pg_nx; /* -r- */
1215 protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO | PG_X;/* -rx */ 1215 protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO | PG_X;/* -rx */
1216 protection_codes[VM_PROT_WRITE] = PG_RW | pg_nx; /* w-- */ 1216 protection_codes[VM_PROT_WRITE] = PG_RW | pg_nx; /* w-- */
1217 protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW | PG_X;/* w-x */ 1217 protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW | PG_X;/* w-x */
1218 protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW | pg_nx; 1218 protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW | pg_nx;
1219 /* wr- */ 1219 /* wr- */
1220 protection_codes[VM_PROT_ALL] = PG_RW | PG_X; /* wrx */ 1220 protection_codes[VM_PROT_ALL] = PG_RW | PG_X; /* wrx */
1221 1221
1222 /* 1222 /*
1223 * now we init the kernel's pmap 1223 * now we init the kernel's pmap
1224 * 1224 *
1225 * the kernel pmap's pm_obj is not used for much. however, in 1225 * the kernel pmap's pm_obj is not used for much. however, in
1226 * user pmaps the pm_obj contains the list of active PTPs. 1226 * user pmaps the pm_obj contains the list of active PTPs.
1227 * the pm_obj currently does not have a pager. it might be possible 1227 * the pm_obj currently does not have a pager. it might be possible
1228 * to add a pager that would allow a process to read-only mmap its 1228 * to add a pager that would allow a process to read-only mmap its
1229 * own page tables (fast user level vtophys?). this may or may not 1229 * own page tables (fast user level vtophys?). this may or may not
1230 * be useful. 1230 * be useful.
1231 */ 1231 */
1232 1232
1233 kpm = pmap_kernel(); 1233 kpm = pmap_kernel();
1234 for (i = 0; i < PTP_LEVELS - 1; i++) { 1234 for (i = 0; i < PTP_LEVELS - 1; i++) {
1235 mutex_init(&kpm->pm_obj_lock[i], MUTEX_DEFAULT, IPL_NONE); 1235 mutex_init(&kpm->pm_obj_lock[i], MUTEX_DEFAULT, IPL_NONE);
1236 uvm_obj_init(&kpm->pm_obj[i], NULL, false, 1); 1236 uvm_obj_init(&kpm->pm_obj[i], NULL, false, 1);
1237 uvm_obj_setlock(&kpm->pm_obj[i], &kpm->pm_obj_lock[i]); 1237 uvm_obj_setlock(&kpm->pm_obj[i], &kpm->pm_obj_lock[i]);
1238 kpm->pm_ptphint[i] = NULL; 1238 kpm->pm_ptphint[i] = NULL;
1239 } 1239 }
1240 memset(&kpm->pm_list, 0, sizeof(kpm->pm_list)); /* pm_list not used */ 1240 memset(&kpm->pm_list, 0, sizeof(kpm->pm_list)); /* pm_list not used */
1241 1241
1242 kpm->pm_pdir = (pd_entry_t *)(PDPpaddr + KERNBASE); 1242 kpm->pm_pdir = (pd_entry_t *)(PDPpaddr + KERNBASE);
1243 for (i = 0; i < PDP_SIZE; i++) 1243 for (i = 0; i < PDP_SIZE; i++)
1244 kpm->pm_pdirpa[i] = PDPpaddr + PAGE_SIZE * i; 1244 kpm->pm_pdirpa[i] = PDPpaddr + PAGE_SIZE * i;
1245 1245
1246 kpm->pm_stats.wired_count = kpm->pm_stats.resident_count = 1246 kpm->pm_stats.wired_count = kpm->pm_stats.resident_count =
1247 x86_btop(kva_start - VM_MIN_KERNEL_ADDRESS); 1247 x86_btop(kva_start - VM_MIN_KERNEL_ADDRESS);
1248 1248
1249 /* 1249 /*
1250 * the above is just a rough estimate and not critical to the proper 1250 * the above is just a rough estimate and not critical to the proper
1251 * operation of the system. 1251 * operation of the system.
1252 */ 1252 */
1253 1253
1254#ifndef XEN 1254#ifndef XEN
1255 /* 1255 /*
1256 * Begin to enable global TLB entries if they are supported. 1256 * Begin to enable global TLB entries if they are supported.
1257 * The G bit has no effect until the CR4_PGE bit is set in CR4, 1257 * The G bit has no effect until the CR4_PGE bit is set in CR4,
1258 * which happens in cpu_init(), which is run on each cpu 1258 * which happens in cpu_init(), which is run on each cpu
1259 * (and happens later) 1259 * (and happens later)
1260 */ 1260 */
1261 1261
1262 if (cpu_feature[0] & CPUID_PGE) { 1262 if (cpu_feature[0] & CPUID_PGE) {
1263 pmap_pg_g = PG_G; /* enable software */ 1263 pmap_pg_g = PG_G; /* enable software */
1264 1264
1265 /* add PG_G attribute to already mapped kernel pages */ 1265 /* add PG_G attribute to already mapped kernel pages */
1266 if (KERNBASE == VM_MIN_KERNEL_ADDRESS) { 1266 if (KERNBASE == VM_MIN_KERNEL_ADDRESS) {
1267 kva_end = virtual_avail; 1267 kva_end = virtual_avail;
1268 } else { 1268 } else {
1269 extern vaddr_t eblob, esym; 1269 extern vaddr_t eblob, esym;
1270 kva_end = (vaddr_t)&end; 1270 kva_end = (vaddr_t)&end;
1271 if (esym > kva_end) 1271 if (esym > kva_end)
1272 kva_end = esym; 1272 kva_end = esym;
1273 if (eblob > kva_end) 1273 if (eblob > kva_end)
1274 kva_end = eblob; 1274 kva_end = eblob;
1275 kva_end = roundup(kva_end, PAGE_SIZE); 1275 kva_end = roundup(kva_end, PAGE_SIZE);
1276 } 1276 }
1277 for (kva = KERNBASE; kva < kva_end; kva += PAGE_SIZE) { 1277 for (kva = KERNBASE; kva < kva_end; kva += PAGE_SIZE) {
1278 p1i = pl1_i(kva); 1278 p1i = pl1_i(kva);
1279 if (pmap_valid_entry(PTE_BASE[p1i])) 1279 if (pmap_valid_entry(PTE_BASE[p1i]))
1280 PTE_BASE[p1i] |= PG_G; 1280 PTE_BASE[p1i] |= PG_G;
1281 } 1281 }
1282 } 1282 }
1283 1283
1284 /* 1284 /*
1285 * enable large pages if they are supported. 1285 * enable large pages if they are supported.
1286 */ 1286 */
1287 1287
1288 if (cpu_feature[0] & CPUID_PSE) { 1288 if (cpu_feature[0] & CPUID_PSE) {
1289 paddr_t pa; 1289 paddr_t pa;
1290 extern char __data_start; 1290 extern char __data_start;
1291 1291
1292 lcr4(rcr4() | CR4_PSE); /* enable hardware (via %cr4) */ 1292 lcr4(rcr4() | CR4_PSE); /* enable hardware (via %cr4) */
1293 pmap_largepages = 1; /* enable software */ 1293 pmap_largepages = 1; /* enable software */
1294 1294
1295 /* 1295 /*
1296 * the TLB must be flushed after enabling large pages 1296 * the TLB must be flushed after enabling large pages
1297 * on Pentium CPUs, according to section 3.6.2.2 of 1297 * on Pentium CPUs, according to section 3.6.2.2 of
1298 * "Intel Architecture Software Developer's Manual, 1298 * "Intel Architecture Software Developer's Manual,
1299 * Volume 3: System Programming". 1299 * Volume 3: System Programming".
1300 */ 1300 */
1301 tlbflushg(); 1301 tlbflushg();
1302 1302
1303 /* 1303 /*
1304 * now, remap the kernel text using large pages. we 1304 * now, remap the kernel text using large pages. we
1305 * assume that the linker has properly aligned the 1305 * assume that the linker has properly aligned the
1306 * .data segment to a NBPD_L2 boundary. 1306 * .data segment to a NBPD_L2 boundary.
1307 */ 1307 */
1308 kva_end = rounddown((vaddr_t)&__data_start, NBPD_L1); 1308 kva_end = rounddown((vaddr_t)&__data_start, NBPD_L1);
1309 for (pa = 0, kva = KERNBASE; kva + NBPD_L2 <= kva_end; 1309 for (pa = 0, kva = KERNBASE; kva + NBPD_L2 <= kva_end;
1310 kva += NBPD_L2, pa += NBPD_L2) { 1310 kva += NBPD_L2, pa += NBPD_L2) {
1311 pde = &L2_BASE[pl2_i(kva)]; 1311 pde = &L2_BASE[pl2_i(kva)];
1312 *pde = pa | pmap_pg_g | PG_PS | 1312 *pde = pa | pmap_pg_g | PG_PS |
1313 PG_KR | PG_V; /* zap! */ 1313 PG_KR | PG_V; /* zap! */
1314 tlbflushg(); 1314 tlbflushg();
1315 } 1315 }
1316#if defined(DEBUG) 1316#if defined(DEBUG)
1317 aprint_normal("kernel text is mapped with %" PRIuPSIZE " large " 1317 aprint_normal("kernel text is mapped with %" PRIuPSIZE " large "
1318 "pages and %" PRIuPSIZE " normal pages\n", 1318 "pages and %" PRIuPSIZE " normal pages\n",
1319 howmany(kva - KERNBASE, NBPD_L2), 1319 howmany(kva - KERNBASE, NBPD_L2),
1320 howmany((vaddr_t)&__data_start - kva, NBPD_L1)); 1320 howmany((vaddr_t)&__data_start - kva, NBPD_L1));
1321#endif /* defined(DEBUG) */ 1321#endif /* defined(DEBUG) */
1322 } 1322 }
1323#endif /* !XEN */ 1323#endif /* !XEN */
1324 1324
1325#ifdef __HAVE_DIRECT_MAP 1325#ifdef __HAVE_DIRECT_MAP
1326 1326
1327 tmpva = (KERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2); 1327 tmpva = (KERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2);
1328 pte = PTE_BASE + pl1_i(tmpva); 1328 pte = PTE_BASE + pl1_i(tmpva);
1329 1329
1330 /* 1330 /*
1331 * Map the direct map. Use 1GB pages if they are available, 1331 * Map the direct map. Use 1GB pages if they are available,
1332 * otherwise use 2MB pages. 1332 * otherwise use 2MB pages.
1333 */ 1333 */
1334 1334
1335 mc = &mem_clusters[mem_cluster_cnt - 1]; 1335 mc = &mem_clusters[mem_cluster_cnt - 1];
1336 ndmpdp = (mc->start + mc->size + NBPD_L3 - 1) >> L3_SHIFT; 1336 ndmpdp = (mc->start + mc->size + NBPD_L3 - 1) >> L3_SHIFT;
1337 dmpdp = avail_start; avail_start += PAGE_SIZE; 1337 dmpdp = avail_start; avail_start += PAGE_SIZE;
1338 1338
1339 if (cpu_feature[2] & CPUID_P1GB) { 1339 if (cpu_feature[2] & CPUID_P1GB) {
1340 for (i = 0; i < ndmpdp; i++) { 1340 for (i = 0; i < ndmpdp; i++) {
1341 pdp = (paddr_t)&(((pd_entry_t *)dmpdp)[i]); 1341 pdp = (paddr_t)&(((pd_entry_t *)dmpdp)[i]);
1342 *pte = (pdp & PG_FRAME) | PG_V | PG_RW; 1342 *pte = (pdp & PG_FRAME) | PG_V | PG_RW;
1343 pmap_update_pg(tmpva); 1343 pmap_update_pg(tmpva);
1344 1344
1345 pde = (pd_entry_t *)(tmpva + (pdp & ~PG_FRAME)); 1345 pde = (pd_entry_t *)(tmpva + (pdp & ~PG_FRAME));
1346 *pde = ((paddr_t)i << L3_SHIFT) | 1346 *pde = ((paddr_t)i << L3_SHIFT) |
1347 PG_RW | PG_V | PG_U | PG_PS | PG_G; 1347 PG_RW | PG_V | PG_U | PG_PS | PG_G;
1348 } 1348 }
1349 } else { 1349 } else {
1350 dmpd = avail_start; avail_start += ndmpdp * PAGE_SIZE; 1350 dmpd = avail_start; avail_start += ndmpdp * PAGE_SIZE;
1351 1351
1352 for (i = 0; i < NPDPG * ndmpdp; i++) { 1352 for (i = 0; i < NPDPG * ndmpdp; i++) {
1353 pdp = (paddr_t)&(((pd_entry_t *)dmpd)[i]); 1353 pdp = (paddr_t)&(((pd_entry_t *)dmpd)[i]);
1354 *pte = (pdp & PG_FRAME) | PG_V | PG_RW; 1354 *pte = (pdp & PG_FRAME) | PG_V | PG_RW;
1355 pmap_update_pg(tmpva); 1355 pmap_update_pg(tmpva);
1356 1356
1357 pde = (pd_entry_t *)(tmpva + (pdp & ~PG_FRAME)); 1357 pde = (pd_entry_t *)(tmpva + (pdp & ~PG_FRAME));
1358 *pde = ((paddr_t)i << L2_SHIFT) | 1358 *pde = ((paddr_t)i << L2_SHIFT) |
1359 PG_RW | PG_V | PG_U | PG_PS | PG_G; 1359 PG_RW | PG_V | PG_U | PG_PS | PG_G;
1360 } 1360 }
1361 for (i = 0; i < ndmpdp; i++) { 1361 for (i = 0; i < ndmpdp; i++) {
1362 pdp = (paddr_t)&(((pd_entry_t *)dmpdp)[i]); 1362 pdp = (paddr_t)&(((pd_entry_t *)dmpdp)[i]);
1363 *pte = (pdp & PG_FRAME) | PG_V | PG_RW; 1363 *pte = (pdp & PG_FRAME) | PG_V | PG_RW;
1364 pmap_update_pg((vaddr_t)tmpva); 1364 pmap_update_pg((vaddr_t)tmpva);
1365 1365
1366 pde = (pd_entry_t *)(tmpva + (pdp & ~PG_FRAME)); 1366 pde = (pd_entry_t *)(tmpva + (pdp & ~PG_FRAME));
1367 *pde = (dmpd + (i << PAGE_SHIFT)) | 1367 *pde = (dmpd + (i << PAGE_SHIFT)) |
1368 PG_RW | PG_V | PG_U | PG_G; 1368 PG_RW | PG_V | PG_U;
1369 } 1369 }
1370 } 1370 }
1371 1371
1372 kpm->pm_pdir[PDIR_SLOT_DIRECT] = dmpdp | PG_KW | PG_V | PG_U; 1372 kpm->pm_pdir[PDIR_SLOT_DIRECT] = dmpdp | PG_KW | PG_V | PG_U;
1373 1373
1374 tlbflushg(); 1374 tlbflush();
1375 1375
1376#else 1376#else
1377 if (VM_MIN_KERNEL_ADDRESS != KERNBASE) { 1377 if (VM_MIN_KERNEL_ADDRESS != KERNBASE) {
1378 /* 1378 /*
1379 * zero_pte is stuck at the end of mapped space for the kernel 1379 * zero_pte is stuck at the end of mapped space for the kernel
1380 * image (disjunct from kva space). This is done so that it 1380 * image (disjunct from kva space). This is done so that it
1381 * can safely be used in pmap_growkernel (pmap_get_physpage), 1381 * can safely be used in pmap_growkernel (pmap_get_physpage),
1382 * when it's called for the first time. 1382 * when it's called for the first time.
1383 * XXXfvdl fix this for MULTIPROCESSOR later. 1383 * XXXfvdl fix this for MULTIPROCESSOR later.
1384 */ 1384 */
1385 1385
1386 early_zerop = (void *)(KERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2); 1386 early_zerop = (void *)(KERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2);
1387 early_zero_pte = PTE_BASE + pl1_i((vaddr_t)early_zerop); 1387 early_zero_pte = PTE_BASE + pl1_i((vaddr_t)early_zerop);
1388 } 1388 }
1389 1389
1390 /* 1390 /*
1391 * now we allocate the "special" VAs which are used for tmp mappings 1391 * now we allocate the "special" VAs which are used for tmp mappings
1392 * by the pmap (and other modules). we allocate the VAs by advancing 1392 * by the pmap (and other modules). we allocate the VAs by advancing
1393 * virtual_avail (note that there are no pages mapped at these VAs). 1393 * virtual_avail (note that there are no pages mapped at these VAs).
1394 * we find the PTE that maps the allocated VA via the linear PTE 1394 * we find the PTE that maps the allocated VA via the linear PTE
1395 * mapping. 1395 * mapping.
1396 */ 1396 */
1397 1397
1398 pte = PTE_BASE + pl1_i(virtual_avail); 1398 pte = PTE_BASE + pl1_i(virtual_avail);
1399 1399
1400#ifdef MULTIPROCESSOR 1400#ifdef MULTIPROCESSOR
1401 /* 1401 /*
1402 * Waste some VA space to avoid false sharing of cache lines 1402 * Waste some VA space to avoid false sharing of cache lines
1403 * for page table pages: Give each possible CPU a cache line 1403 * for page table pages: Give each possible CPU a cache line
1404 * of PTE's (8) to play with, though we only need 4. We could 1404 * of PTE's (8) to play with, though we only need 4. We could
1405 * recycle some of this waste by putting the idle stacks here 1405 * recycle some of this waste by putting the idle stacks here
1406 * as well; we could waste less space if we knew the largest 1406 * as well; we could waste less space if we knew the largest
1407 * CPU ID beforehand. 1407 * CPU ID beforehand.
1408 */ 1408 */
1409 csrcp = (char *) virtual_avail; csrc_pte = pte; 1409 csrcp = (char *) virtual_avail; csrc_pte = pte;
1410 1410
1411 cdstp = (char *) virtual_avail+PAGE_SIZE; cdst_pte = pte+1; 1411 cdstp = (char *) virtual_avail+PAGE_SIZE; cdst_pte = pte+1;
1412 1412
1413 zerop = (char *) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2; 1413 zerop = (char *) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2;
1414 1414
1415 ptpp = (char *) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3; 1415 ptpp = (char *) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3;
1416 1416
1417 virtual_avail += PAGE_SIZE * maxcpus * NPTECL; 1417 virtual_avail += PAGE_SIZE * maxcpus * NPTECL;
1418 pte += maxcpus * NPTECL; 1418 pte += maxcpus * NPTECL;
1419#else 1419#else
1420 csrcp = (void *) virtual_avail; csrc_pte = pte; /* allocate */ 1420 csrcp = (void *) virtual_avail; csrc_pte = pte; /* allocate */
1421 virtual_avail += PAGE_SIZE; pte++; /* advance */ 1421 virtual_avail += PAGE_SIZE; pte++; /* advance */
1422 1422
1423 cdstp = (void *) virtual_avail; cdst_pte = pte; 1423 cdstp = (void *) virtual_avail; cdst_pte = pte;
1424 virtual_avail += PAGE_SIZE; pte++; 1424 virtual_avail += PAGE_SIZE; pte++;
1425 1425
1426 zerop = (void *) virtual_avail; zero_pte = pte; 1426 zerop = (void *) virtual_avail; zero_pte = pte;
1427 virtual_avail += PAGE_SIZE; pte++; 1427 virtual_avail += PAGE_SIZE; pte++;
1428 1428
1429 ptpp = (void *) virtual_avail; ptp_pte = pte; 1429 ptpp = (void *) virtual_avail; ptp_pte = pte;
1430 virtual_avail += PAGE_SIZE; pte++; 1430 virtual_avail += PAGE_SIZE; pte++;
1431#endif 1431#endif
1432 1432
1433 if (VM_MIN_KERNEL_ADDRESS == KERNBASE) { 1433 if (VM_MIN_KERNEL_ADDRESS == KERNBASE) {
1434 early_zerop = zerop; 1434 early_zerop = zerop;
1435 early_zero_pte = zero_pte; 1435 early_zero_pte = zero_pte;
1436 } 1436 }
1437#endif 1437#endif
1438 1438
1439 /* 1439 /*
1440 * Nothing after this point actually needs pte. 1440 * Nothing after this point actually needs pte.
1441 */ 1441 */
1442 pte = (void *)0xdeadbeef; 1442 pte = (void *)0xdeadbeef;
1443 1443
1444#ifdef XEN 1444#ifdef XEN
1445#ifdef __x86_64__ 1445#ifdef __x86_64__
1446 /* 1446 /*
1447 * We want a dummy page directory for Xen: 1447 * We want a dummy page directory for Xen:
1448 * when deactivate a pmap, Xen will still consider it active. 1448 * when deactivate a pmap, Xen will still consider it active.
1449 * So we set user PGD to this one to lift all protection on 1449 * So we set user PGD to this one to lift all protection on
1450 * the now inactive page tables set. 1450 * the now inactive page tables set.
1451 */ 1451 */
1452 xen_dummy_user_pgd = avail_start; 1452 xen_dummy_user_pgd = avail_start;
1453 avail_start += PAGE_SIZE; 1453 avail_start += PAGE_SIZE;
1454  1454
1455 /* Zero fill it, the less checks in Xen it requires the better */ 1455 /* Zero fill it, the less checks in Xen it requires the better */
1456 memset((void *) (xen_dummy_user_pgd + KERNBASE), 0, PAGE_SIZE); 1456 memset((void *) (xen_dummy_user_pgd + KERNBASE), 0, PAGE_SIZE);
1457 /* Mark read-only */ 1457 /* Mark read-only */
1458 HYPERVISOR_update_va_mapping(xen_dummy_user_pgd + KERNBASE, 1458 HYPERVISOR_update_va_mapping(xen_dummy_user_pgd + KERNBASE,
1459 pmap_pa2pte(xen_dummy_user_pgd) | PG_u | PG_V, UVMF_INVLPG); 1459 pmap_pa2pte(xen_dummy_user_pgd) | PG_u | PG_V, UVMF_INVLPG);
1460 /* Pin as L4 */ 1460 /* Pin as L4 */
1461 xpq_queue_pin_l4_table(xpmap_ptom_masked(xen_dummy_user_pgd)); 1461 xpq_queue_pin_l4_table(xpmap_ptom_masked(xen_dummy_user_pgd));
1462#endif /* __x86_64__ */ 1462#endif /* __x86_64__ */
1463 idt_vaddr = virtual_avail; /* don't need pte */ 1463 idt_vaddr = virtual_avail; /* don't need pte */
1464 idt_paddr = avail_start; /* steal a page */ 1464 idt_paddr = avail_start; /* steal a page */
1465 /* 1465 /*
1466 * Xen require one more page as we can't store 1466 * Xen require one more page as we can't store
1467 * GDT and LDT on the same page 1467 * GDT and LDT on the same page
1468 */ 1468 */
1469 virtual_avail += 3 * PAGE_SIZE; 1469 virtual_avail += 3 * PAGE_SIZE;
1470 avail_start += 3 * PAGE_SIZE; 1470 avail_start += 3 * PAGE_SIZE;
1471#else /* XEN */ 1471#else /* XEN */
1472 idt_vaddr = virtual_avail; /* don't need pte */ 1472 idt_vaddr = virtual_avail; /* don't need pte */
1473 idt_paddr = avail_start; /* steal a page */ 1473 idt_paddr = avail_start; /* steal a page */
1474#if defined(__x86_64__) 1474#if defined(__x86_64__)
1475 virtual_avail += 2 * PAGE_SIZE; 1475 virtual_avail += 2 * PAGE_SIZE;
1476 avail_start += 2 * PAGE_SIZE; 1476 avail_start += 2 * PAGE_SIZE;
1477#else /* defined(__x86_64__) */ 1477#else /* defined(__x86_64__) */
1478 virtual_avail += PAGE_SIZE; 1478 virtual_avail += PAGE_SIZE;
1479 avail_start += PAGE_SIZE; 1479 avail_start += PAGE_SIZE;
1480 /* pentium f00f bug stuff */ 1480 /* pentium f00f bug stuff */
1481 pentium_idt_vaddr = virtual_avail; /* don't need pte */ 1481 pentium_idt_vaddr = virtual_avail; /* don't need pte */
1482 virtual_avail += PAGE_SIZE; 1482 virtual_avail += PAGE_SIZE;
1483#endif /* defined(__x86_64__) */ 1483#endif /* defined(__x86_64__) */
1484#endif /* XEN */ 1484#endif /* XEN */
1485 1485
1486#ifdef _LP64 1486#ifdef _LP64
1487 /* 1487 /*
1488 * Grab a page below 4G for things that need it (i.e. 1488 * Grab a page below 4G for things that need it (i.e.
1489 * having an initial %cr3 for the MP trampoline). 1489 * having an initial %cr3 for the MP trampoline).
1490 */ 1490 */
1491 lo32_vaddr = virtual_avail; 1491 lo32_vaddr = virtual_avail;
1492 virtual_avail += PAGE_SIZE; 1492 virtual_avail += PAGE_SIZE;
1493 lo32_paddr = avail_start; 1493 lo32_paddr = avail_start;
1494 avail_start += PAGE_SIZE; 1494 avail_start += PAGE_SIZE;
1495#endif 1495#endif
1496 1496
1497 /* 1497 /*
1498 * now we reserve some VM for mapping pages when doing a crash dump 1498 * now we reserve some VM for mapping pages when doing a crash dump
1499 */ 1499 */
1500 1500
1501 virtual_avail = reserve_dumppages(virtual_avail); 1501 virtual_avail = reserve_dumppages(virtual_avail);
1502 1502
1503 /* 1503 /*
1504 * init the static-global locks and global lists. 1504 * init the static-global locks and global lists.
1505 * 1505 *
1506 * => pventry::pvh_lock (initialized elsewhere) must also be 1506 * => pventry::pvh_lock (initialized elsewhere) must also be
1507 * a spin lock, again at IPL_VM to prevent deadlock, and 1507 * a spin lock, again at IPL_VM to prevent deadlock, and
1508 * again is never taken from interrupt context. 1508 * again is never taken from interrupt context.
1509 */ 1509 */
1510 1510
1511 mutex_init(&pmaps_lock, MUTEX_DEFAULT, IPL_NONE); 1511 mutex_init(&pmaps_lock, MUTEX_DEFAULT, IPL_NONE);
1512 LIST_INIT(&pmaps); 1512 LIST_INIT(&pmaps);
1513 1513
1514 /* 1514 /*
1515 * initialize caches. 1515 * initialize caches.
1516 */ 1516 */
1517 1517
1518 pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap), 0, 0, 0, 1518 pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap), 0, 0, 0,
1519 "pmappl", NULL, IPL_NONE, NULL, NULL, NULL); 1519 "pmappl", NULL, IPL_NONE, NULL, NULL, NULL);
1520#ifdef PAE 1520#ifdef PAE
1521 pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE * PDP_SIZE, 0, 0, 0, 1521 pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE * PDP_SIZE, 0, 0, 0,
1522 "pdppl", &pmap_pdp_allocator, IPL_NONE, 1522 "pdppl", &pmap_pdp_allocator, IPL_NONE,
1523 pmap_pdp_ctor, pmap_pdp_dtor, NULL); 1523 pmap_pdp_ctor, pmap_pdp_dtor, NULL);
1524#else /* PAE */ 1524#else /* PAE */
1525 pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE, 0, 0, 0, 1525 pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE, 0, 0, 0,
1526 "pdppl", NULL, IPL_NONE, pmap_pdp_ctor, pmap_pdp_dtor, NULL); 1526 "pdppl", NULL, IPL_NONE, pmap_pdp_ctor, pmap_pdp_dtor, NULL);
1527#endif /* PAE */ 1527#endif /* PAE */
1528 pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0, 1528 pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0,
1529 PR_LARGECACHE, "pvpl", &pool_allocator_meta, IPL_NONE, NULL, 1529 PR_LARGECACHE, "pvpl", &pool_allocator_meta, IPL_NONE, NULL,
1530 NULL, NULL); 1530 NULL, NULL);
1531 1531
1532 /* 1532 /*
1533 * ensure the TLB is sync'd with reality by flushing it... 1533 * ensure the TLB is sync'd with reality by flushing it...
1534 */ 1534 */
1535 1535
1536 tlbflushg(); 1536 tlbflushg();
1537 1537
1538 /* 1538 /*
1539 * calculate pmap_maxkvaddr from nkptp[]. 1539 * calculate pmap_maxkvaddr from nkptp[].
1540 */ 1540 */
1541 1541
1542 kva = VM_MIN_KERNEL_ADDRESS; 1542 kva = VM_MIN_KERNEL_ADDRESS;
1543 for (i = PTP_LEVELS - 1; i >= 1; i--) { 1543 for (i = PTP_LEVELS - 1; i >= 1; i--) {
1544 kva += nkptp[i] * nbpd[i]; 1544 kva += nkptp[i] * nbpd[i];
1545 } 1545 }
1546 pmap_maxkvaddr = kva; 1546 pmap_maxkvaddr = kva;
1547} 1547}
1548 1548
1549#if defined(__x86_64__) 1549#if defined(__x86_64__)
1550/* 1550/*
1551 * Pre-allocate PTPs for low memory, so that 1:1 mappings for various 1551 * Pre-allocate PTPs for low memory, so that 1:1 mappings for various
1552 * trampoline code can be entered. 1552 * trampoline code can be entered.
1553 */ 1553 */
1554void 1554void
1555pmap_prealloc_lowmem_ptps(void) 1555pmap_prealloc_lowmem_ptps(void)
1556{ 1556{
1557 int level; 1557 int level;
1558 paddr_t newp; 1558 paddr_t newp;
1559#ifdef XEN 1559#ifdef XEN
1560 paddr_t pdes_pa; 1560 paddr_t pdes_pa;
1561 1561
1562 pdes_pa = pmap_pdirpa(pmap_kernel(), 0); 1562 pdes_pa = pmap_pdirpa(pmap_kernel(), 0);
1563 level = PTP_LEVELS; 1563 level = PTP_LEVELS;
1564 for (;;) { 1564 for (;;) {
1565 newp = avail_start; 1565 newp = avail_start;
1566 avail_start += PAGE_SIZE; 1566 avail_start += PAGE_SIZE;
1567 HYPERVISOR_update_va_mapping ((vaddr_t)early_zerop, 1567 HYPERVISOR_update_va_mapping ((vaddr_t)early_zerop,
1568 xpmap_ptom_masked(newp) | PG_u | PG_V | PG_RW, UVMF_INVLPG); 1568 xpmap_ptom_masked(newp) | PG_u | PG_V | PG_RW, UVMF_INVLPG);
1569 memset(early_zerop, 0, PAGE_SIZE); 1569 memset(early_zerop, 0, PAGE_SIZE);
1570 /* Mark R/O before installing */ 1570 /* Mark R/O before installing */
1571 HYPERVISOR_update_va_mapping ((vaddr_t)early_zerop, 1571 HYPERVISOR_update_va_mapping ((vaddr_t)early_zerop,
1572 xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG); 1572 xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG);
1573 if (newp < (NKL2_KIMG_ENTRIES * NBPD_L2)) 1573 if (newp < (NKL2_KIMG_ENTRIES * NBPD_L2))
1574 HYPERVISOR_update_va_mapping (newp + KERNBASE, 1574 HYPERVISOR_update_va_mapping (newp + KERNBASE,
1575 xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG); 1575 xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG);
1576 /* Update the pmap_kernel() L4 shadow */ 1576 /* Update the pmap_kernel() L4 shadow */
1577 xpq_queue_pte_update ( 1577 xpq_queue_pte_update (
1578 xpmap_ptom_masked(pdes_pa) 1578 xpmap_ptom_masked(pdes_pa)
1579 + (pl_i(0, level) * sizeof (pd_entry_t)), 1579 + (pl_i(0, level) * sizeof (pd_entry_t)),
1580 xpmap_ptom_masked(newp) | PG_RW | PG_u | PG_V); 1580 xpmap_ptom_masked(newp) | PG_RW | PG_u | PG_V);
1581 /* sync to per-cpu PD */ 1581 /* sync to per-cpu PD */
1582 xpq_queue_pte_update( 1582 xpq_queue_pte_update(
1583 xpmap_ptom_masked(cpu_info_primary.ci_kpm_pdirpa + 1583 xpmap_ptom_masked(cpu_info_primary.ci_kpm_pdirpa +
1584 pl_i(0, PTP_LEVELS) * 1584 pl_i(0, PTP_LEVELS) *
1585 sizeof(pd_entry_t)), 1585 sizeof(pd_entry_t)),
1586 pmap_kernel()->pm_pdir[pl_i(0, PTP_LEVELS)]); 1586 pmap_kernel()->pm_pdir[pl_i(0, PTP_LEVELS)]);
1587 pmap_pte_flush(); 1587 pmap_pte_flush();
1588 level--; 1588 level--;
1589 if (level <= 1) 1589 if (level <= 1)
1590 break; 1590 break;
1591 pdes_pa = newp; 1591 pdes_pa = newp;
1592 } 1592 }
1593#else /* XEN */ 1593#else /* XEN */
1594 pd_entry_t *pdes; 1594 pd_entry_t *pdes;
1595 1595
1596 pdes = pmap_kernel()->pm_pdir; 1596 pdes = pmap_kernel()->pm_pdir;
1597 level = PTP_LEVELS; 1597 level = PTP_LEVELS;
1598 for (;;) { 1598 for (;;) {
1599 newp = avail_start; 1599 newp = avail_start;
1600 avail_start += PAGE_SIZE; 1600 avail_start += PAGE_SIZE;
1601#ifdef __HAVE_DIRECT_MAP 1601#ifdef __HAVE_DIRECT_MAP
1602 memset((void *)PMAP_DIRECT_MAP(newp), 0, PAGE_SIZE); 1602 memset((void *)PMAP_DIRECT_MAP(newp), 0, PAGE_SIZE);
1603#else 1603#else
1604 pmap_pte_set(early_zero_pte, (newp & PG_FRAME) | PG_V | PG_RW); 1604 pmap_pte_set(early_zero_pte, (newp & PG_FRAME) | PG_V | PG_RW);
1605 pmap_pte_flush(); 1605 pmap_pte_flush();
1606 pmap_update_pg((vaddr_t)early_zerop); 1606 pmap_update_pg((vaddr_t)early_zerop);
1607 memset(early_zerop, 0, PAGE_SIZE); 1607 memset(early_zerop, 0, PAGE_SIZE);
1608#endif 1608#endif
1609 pdes[pl_i(0, level)] = (newp & PG_FRAME) | PG_V | PG_RW; 1609 pdes[pl_i(0, level)] = (newp & PG_FRAME) | PG_V | PG_RW;
1610 level--; 1610 level--;
1611 if (level <= 1) 1611 if (level <= 1)
1612 break; 1612 break;
1613 pdes = normal_pdes[level - 2]; 1613 pdes = normal_pdes[level - 2];
1614 } 1614 }
1615#endif /* XEN */ 1615#endif /* XEN */
1616} 1616}
1617#endif /* defined(__x86_64__) */ 1617#endif /* defined(__x86_64__) */
1618 1618
1619/* 1619/*
1620 * pmap_init: called from uvm_init, our job is to get the pmap 1620 * pmap_init: called from uvm_init, our job is to get the pmap
1621 * system ready to manage mappings... 1621 * system ready to manage mappings...
1622 */ 1622 */
1623 1623
1624void 1624void
1625pmap_init(void) 1625pmap_init(void)
1626{ 1626{
1627 int i; 1627 int i;
1628 1628
1629 for (i = 0; i < PV_HASH_SIZE; i++) { 1629 for (i = 0; i < PV_HASH_SIZE; i++) {
1630 SLIST_INIT(&pv_hash_heads[i].hh_list); 1630 SLIST_INIT(&pv_hash_heads[i].hh_list);
1631 } 1631 }
1632 for (i = 0; i < PV_HASH_LOCK_CNT; i++) { 1632 for (i = 0; i < PV_HASH_LOCK_CNT; i++) {
1633 mutex_init(&pv_hash_locks[i].lock, MUTEX_NODEBUG, IPL_VM); 1633 mutex_init(&pv_hash_locks[i].lock, MUTEX_NODEBUG, IPL_VM);
1634 } 1634 }
1635 1635
1636 pmap_tlb_init(); 1636 pmap_tlb_init();
1637 1637
1638 evcnt_attach_dynamic(&pmap_iobmp_evcnt, EVCNT_TYPE_MISC, 1638 evcnt_attach_dynamic(&pmap_iobmp_evcnt, EVCNT_TYPE_MISC,
1639 NULL, "x86", "io bitmap copy"); 1639 NULL, "x86", "io bitmap copy");
1640 evcnt_attach_dynamic(&pmap_ldt_evcnt, EVCNT_TYPE_MISC, 1640 evcnt_attach_dynamic(&pmap_ldt_evcnt, EVCNT_TYPE_MISC,
1641 NULL, "x86", "ldt sync"); 1641 NULL, "x86", "ldt sync");
1642 1642
1643 /* 1643 /*
1644 * done: pmap module is up (and ready for business) 1644 * done: pmap module is up (and ready for business)
1645 */ 1645 */
1646 1646
1647 pmap_initialized = true; 1647 pmap_initialized = true;
1648} 1648}
1649 1649
1650/* 1650/*
1651 * pmap_cpu_init_late: perform late per-CPU initialization. 1651 * pmap_cpu_init_late: perform late per-CPU initialization.
1652 */ 1652 */
1653 1653
1654#ifndef XEN 1654#ifndef XEN
1655void 1655void
1656pmap_cpu_init_late(struct cpu_info *ci) 1656pmap_cpu_init_late(struct cpu_info *ci)
1657{ 1657{
1658 /* 1658 /*
1659 * The BP has already its own PD page allocated during early 1659 * The BP has already its own PD page allocated during early
1660 * MD startup. 1660 * MD startup.
1661 */ 1661 */
1662 if (ci == &cpu_info_primary) 1662 if (ci == &cpu_info_primary)
1663 return; 1663 return;
1664 1664
1665#ifdef PAE 1665#ifdef PAE
1666 int ret; 1666 int ret;
1667 struct pglist pg; 1667 struct pglist pg;
1668 struct vm_page *vmap; 1668 struct vm_page *vmap;
1669 1669
1670 /* 1670 /*
1671 * Allocate a page for the per-CPU L3 PD. cr3 being 32 bits, PA musts 1671 * Allocate a page for the per-CPU L3 PD. cr3 being 32 bits, PA musts
1672 * resides below the 4GB boundary. 1672 * resides below the 4GB boundary.
1673 */ 1673 */
1674 ret = uvm_pglistalloc(PAGE_SIZE, 0, 0x100000000ULL, 32, 0, &pg, 1, 0); 1674 ret = uvm_pglistalloc(PAGE_SIZE, 0, 0x100000000ULL, 32, 0, &pg, 1, 0);
1675 vmap = TAILQ_FIRST(&pg); 1675 vmap = TAILQ_FIRST(&pg);
1676 1676
1677 if (ret != 0 || vmap == NULL) 1677 if (ret != 0 || vmap == NULL)
1678 panic("%s: failed to allocate L3 pglist for CPU %d (ret %d)\n", 1678 panic("%s: failed to allocate L3 pglist for CPU %d (ret %d)\n",
1679 __func__, cpu_index(ci), ret); 1679 __func__, cpu_index(ci), ret);
1680 1680
1681 ci->ci_pae_l3_pdirpa = vmap->phys_addr; 1681 ci->ci_pae_l3_pdirpa = vmap->phys_addr;
1682 1682
1683 ci->ci_pae_l3_pdir = (paddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 1683 ci->ci_pae_l3_pdir = (paddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
1684 UVM_KMF_VAONLY | UVM_KMF_NOWAIT); 1684 UVM_KMF_VAONLY | UVM_KMF_NOWAIT);
1685 if (ci->ci_pae_l3_pdir == NULL) 1685 if (ci->ci_pae_l3_pdir == NULL)
1686 panic("%s: failed to allocate L3 PD for CPU %d\n", 1686 panic("%s: failed to allocate L3 PD for CPU %d\n",
1687 __func__, cpu_index(ci)); 1687 __func__, cpu_index(ci));
1688 1688
1689 pmap_kenter_pa((vaddr_t)ci->ci_pae_l3_pdir, ci->ci_pae_l3_pdirpa, 1689 pmap_kenter_pa((vaddr_t)ci->ci_pae_l3_pdir, ci->ci_pae_l3_pdirpa,
1690 VM_PROT_READ | VM_PROT_WRITE, 0); 1690 VM_PROT_READ | VM_PROT_WRITE, 0);
1691 1691
1692 pmap_update(pmap_kernel()); 1692 pmap_update(pmap_kernel());
1693#endif 1693#endif
1694} 1694}
1695#endif 1695#endif
1696 1696
1697/* 1697/*
1698 * p v _ e n t r y f u n c t i o n s 1698 * p v _ e n t r y f u n c t i o n s
1699 */ 1699 */
1700 1700
1701/* 1701/*
1702 * pmap_free_pvs: free a list of pv_entrys 1702 * pmap_free_pvs: free a list of pv_entrys
1703 */ 1703 */
1704 1704
1705static void 1705static void
1706pmap_free_pvs(struct pv_entry *pve) 1706pmap_free_pvs(struct pv_entry *pve)
1707{ 1707{
1708 struct pv_entry *next; 1708 struct pv_entry *next;
1709 1709
1710 for ( /* null */ ; pve != NULL ; pve = next) { 1710 for ( /* null */ ; pve != NULL ; pve = next) {
1711 next = pve->pve_next; 1711 next = pve->pve_next;
1712 pool_cache_put(&pmap_pv_cache, pve); 1712 pool_cache_put(&pmap_pv_cache, pve);
1713 } 1713 }
1714} 1714}
1715 1715
1716/* 1716/*
1717 * main pv_entry manipulation functions: 1717 * main pv_entry manipulation functions:
1718 * pmap_enter_pv: enter a mapping onto a pv_head list 1718 * pmap_enter_pv: enter a mapping onto a pv_head list
1719 * pmap_remove_pv: remove a mapping from a pv_head list 1719 * pmap_remove_pv: remove a mapping from a pv_head list
1720 * 1720 *
1721 * NOTE: Both pmap_enter_pv and pmap_remove_pv expect the caller to lock  1721 * NOTE: Both pmap_enter_pv and pmap_remove_pv expect the caller to lock
1722 * the pvh before calling 1722 * the pvh before calling
1723 */ 1723 */
1724 1724
1725/* 1725/*
1726 * insert_pv: a helper of pmap_enter_pv 1726 * insert_pv: a helper of pmap_enter_pv
1727 */ 1727 */
1728 1728
1729static void 1729static void
1730insert_pv(struct pmap_page *pp, struct pv_entry *pve) 1730insert_pv(struct pmap_page *pp, struct pv_entry *pve)
1731{ 1731{
1732 struct pv_hash_head *hh; 1732 struct pv_hash_head *hh;
1733 kmutex_t *lock; 1733 kmutex_t *lock;
1734 u_int hash; 1734 u_int hash;
1735 1735
1736 hash = pvhash_hash(pve->pve_pte.pte_ptp, pve->pve_pte.pte_va); 1736 hash = pvhash_hash(pve->pve_pte.pte_ptp, pve->pve_pte.pte_va);
1737 lock = pvhash_lock(hash); 1737 lock = pvhash_lock(hash);
1738 hh = pvhash_head(hash); 1738 hh = pvhash_head(hash);
1739 mutex_spin_enter(lock); 1739 mutex_spin_enter(lock);
1740 SLIST_INSERT_HEAD(&hh->hh_list, pve, pve_hash); 1740 SLIST_INSERT_HEAD(&hh->hh_list, pve, pve_hash);
1741 mutex_spin_exit(lock); 1741 mutex_spin_exit(lock);
1742 1742
1743 LIST_INSERT_HEAD(&pp->pp_head.pvh_list, pve, pve_list); 1743 LIST_INSERT_HEAD(&pp->pp_head.pvh_list, pve, pve_list);
1744} 1744}
1745 1745
1746/* 1746/*
1747 * pmap_enter_pv: enter a mapping onto a pv_head lst 1747 * pmap_enter_pv: enter a mapping onto a pv_head lst
1748 * 1748 *
1749 * => caller should adjust ptp's wire_count before calling 1749 * => caller should adjust ptp's wire_count before calling
1750 */ 1750 */
1751 1751
1752static struct pv_entry * 1752static struct pv_entry *
1753pmap_enter_pv(struct pmap_page *pp, 1753pmap_enter_pv(struct pmap_page *pp,
1754 struct pv_entry *pve, /* preallocated pve for us to use */ 1754 struct pv_entry *pve, /* preallocated pve for us to use */
1755 struct pv_entry **sparepve, 1755 struct pv_entry **sparepve,
1756 struct vm_page *ptp, 1756 struct vm_page *ptp,
1757 vaddr_t va) 1757 vaddr_t va)
1758{ 1758{
1759 1759
1760 KASSERT(ptp == NULL || ptp->wire_count >= 2); 1760 KASSERT(ptp == NULL || ptp->wire_count >= 2);
1761 KASSERT(ptp == NULL || ptp->uobject != NULL); 1761 KASSERT(ptp == NULL || ptp->uobject != NULL);
1762 KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset); 1762 KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
1763 1763
1764 if ((pp->pp_flags & PP_EMBEDDED) == 0) { 1764 if ((pp->pp_flags & PP_EMBEDDED) == 0) {
1765 if (LIST_EMPTY(&pp->pp_head.pvh_list)) { 1765 if (LIST_EMPTY(&pp->pp_head.pvh_list)) {
1766 pp->pp_flags |= PP_EMBEDDED; 1766 pp->pp_flags |= PP_EMBEDDED;
1767 pp->pp_pte.pte_ptp = ptp; 1767 pp->pp_pte.pte_ptp = ptp;
1768 pp->pp_pte.pte_va = va; 1768 pp->pp_pte.pte_va = va;
1769 1769
1770 return pve; 1770 return pve;
1771 } 1771 }
1772 } else { 1772 } else {
1773 struct pv_entry *pve2; 1773 struct pv_entry *pve2;
1774 1774
1775 pve2 = *sparepve; 1775 pve2 = *sparepve;
1776 *sparepve = NULL; 1776 *sparepve = NULL;
1777 1777
1778 pve2->pve_pte = pp->pp_pte; 1778 pve2->pve_pte = pp->pp_pte;
1779 pp->pp_flags &= ~PP_EMBEDDED; 1779 pp->pp_flags &= ~PP_EMBEDDED;
1780 LIST_INIT(&pp->pp_head.pvh_list); 1780 LIST_INIT(&pp->pp_head.pvh_list);
1781 insert_pv(pp, pve2); 1781 insert_pv(pp, pve2);
1782 } 1782 }
1783 1783
1784 pve->pve_pte.pte_ptp = ptp; 1784 pve->pve_pte.pte_ptp = ptp;
1785 pve->pve_pte.pte_va = va; 1785 pve->pve_pte.pte_va = va;
1786 insert_pv(pp, pve); 1786 insert_pv(pp, pve);
1787 1787
1788 return NULL; 1788 return NULL;
1789} 1789}
1790 1790
1791/* 1791/*
1792 * pmap_remove_pv: try to remove a mapping from a pv_list 1792 * pmap_remove_pv: try to remove a mapping from a pv_list
1793 * 1793 *
1794 * => caller should adjust ptp's wire_count and free PTP if needed 1794 * => caller should adjust ptp's wire_count and free PTP if needed
1795 * => we return the removed pve 1795 * => we return the removed pve
1796 */ 1796 */
1797 1797
1798static struct pv_entry * 1798static struct pv_entry *
1799pmap_remove_pv(struct pmap_page *pp, struct vm_page *ptp, vaddr_t va) 1799pmap_remove_pv(struct pmap_page *pp, struct vm_page *ptp, vaddr_t va)
1800{ 1800{
1801 struct pv_hash_head *hh; 1801 struct pv_hash_head *hh;
1802 struct pv_entry *pve; 1802 struct pv_entry *pve;
1803 kmutex_t *lock; 1803 kmutex_t *lock;
1804 u_int hash; 1804 u_int hash;
1805 1805
1806 KASSERT(ptp == NULL || ptp->uobject != NULL); 1806 KASSERT(ptp == NULL || ptp->uobject != NULL);
1807 KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset); 1807 KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
1808 1808
1809 if ((pp->pp_flags & PP_EMBEDDED) != 0) { 1809 if ((pp->pp_flags & PP_EMBEDDED) != 0) {
1810 KASSERT(pp->pp_pte.pte_ptp == ptp); 1810 KASSERT(pp->pp_pte.pte_ptp == ptp);
1811 KASSERT(pp->pp_pte.pte_va == va); 1811 KASSERT(pp->pp_pte.pte_va == va);
1812 1812
1813 pp->pp_flags &= ~PP_EMBEDDED; 1813 pp->pp_flags &= ~PP_EMBEDDED;
1814 LIST_INIT(&pp->pp_head.pvh_list); 1814 LIST_INIT(&pp->pp_head.pvh_list);
1815 1815
1816 return NULL; 1816 return NULL;
1817 } 1817 }
1818 1818
1819 hash = pvhash_hash(ptp, va); 1819 hash = pvhash_hash(ptp, va);
1820 lock = pvhash_lock(hash); 1820 lock = pvhash_lock(hash);
1821 hh = pvhash_head(hash); 1821 hh = pvhash_head(hash);
1822 mutex_spin_enter(lock); 1822 mutex_spin_enter(lock);
1823 pve = pvhash_remove(hh, ptp, va); 1823 pve = pvhash_remove(hh, ptp, va);
1824 mutex_spin_exit(lock); 1824 mutex_spin_exit(lock);
1825 1825
1826 LIST_REMOVE(pve, pve_list); 1826 LIST_REMOVE(pve, pve_list);
1827 1827
1828 return pve; 1828 return pve;
1829} 1829}
1830 1830
1831/* 1831/*
1832 * p t p f u n c t i o n s 1832 * p t p f u n c t i o n s
1833 */ 1833 */
1834 1834
1835static inline struct vm_page * 1835static inline struct vm_page *
1836pmap_find_ptp(struct pmap *pmap, vaddr_t va, paddr_t pa, int level) 1836pmap_find_ptp(struct pmap *pmap, vaddr_t va, paddr_t pa, int level)
1837{ 1837{
1838 int lidx = level - 1; 1838 int lidx = level - 1;
1839 struct vm_page *pg; 1839 struct vm_page *pg;
1840 1840
1841 KASSERT(mutex_owned(pmap->pm_lock)); 1841 KASSERT(mutex_owned(pmap->pm_lock));
1842 1842
1843 if (pa != (paddr_t)-1 && pmap->pm_ptphint[lidx] && 1843 if (pa != (paddr_t)-1 && pmap->pm_ptphint[lidx] &&
1844 pa == VM_PAGE_TO_PHYS(pmap->pm_ptphint[lidx])) { 1844 pa == VM_PAGE_TO_PHYS(pmap->pm_ptphint[lidx])) {
1845 return (pmap->pm_ptphint[lidx]); 1845 return (pmap->pm_ptphint[lidx]);
1846 } 1846 }
1847 PMAP_SUBOBJ_LOCK(pmap, lidx); 1847 PMAP_SUBOBJ_LOCK(pmap, lidx);
1848 pg = uvm_pagelookup(&pmap->pm_obj[lidx], ptp_va2o(va, level)); 1848 pg = uvm_pagelookup(&pmap->pm_obj[lidx], ptp_va2o(va, level));
1849 PMAP_SUBOBJ_UNLOCK(pmap, lidx); 1849 PMAP_SUBOBJ_UNLOCK(pmap, lidx);
1850 1850
1851 KASSERT(pg == NULL || pg->wire_count >= 1); 1851 KASSERT(pg == NULL || pg->wire_count >= 1);
1852 return pg; 1852 return pg;
1853} 1853}
1854 1854
1855static inline void 1855static inline void
1856pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level) 1856pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level)
1857{ 1857{
1858 lwp_t *l; 1858 lwp_t *l;
1859 int lidx; 1859 int lidx;
1860 struct uvm_object *obj; 1860 struct uvm_object *obj;
1861 1861
1862 KASSERT(ptp->wire_count == 1); 1862 KASSERT(ptp->wire_count == 1);
1863 1863
1864 lidx = level - 1; 1864 lidx = level - 1;
1865 1865
1866 obj = &pmap->pm_obj[lidx]; 1866 obj = &pmap->pm_obj[lidx];
1867 pmap_stats_update(pmap, -1, 0); 1867 pmap_stats_update(pmap, -1, 0);
1868 if (lidx != 0) 1868 if (lidx != 0)
1869 mutex_enter(obj->vmobjlock); 1869 mutex_enter(obj->vmobjlock);
1870 if (pmap->pm_ptphint[lidx] == ptp) 1870 if (pmap->pm_ptphint[lidx] == ptp)
1871 pmap->pm_ptphint[lidx] = TAILQ_FIRST(&obj->memq); 1871 pmap->pm_ptphint[lidx] = TAILQ_FIRST(&obj->memq);
1872 ptp->wire_count = 0; 1872 ptp->wire_count = 0;
1873 uvm_pagerealloc(ptp, NULL, 0); 1873 uvm_pagerealloc(ptp, NULL, 0);
1874 l = curlwp; 1874 l = curlwp;
1875 KASSERT((l->l_pflag & LP_INTR) == 0); 1875 KASSERT((l->l_pflag & LP_INTR) == 0);
1876 VM_PAGE_TO_PP(ptp)->pp_link = l->l_md.md_gc_ptp; 1876 VM_PAGE_TO_PP(ptp)->pp_link = l->l_md.md_gc_ptp;
1877 l->l_md.md_gc_ptp = ptp; 1877 l->l_md.md_gc_ptp = ptp;
1878 if (lidx != 0) 1878 if (lidx != 0)
1879 mutex_exit(obj->vmobjlock); 1879 mutex_exit(obj->vmobjlock);
1880} 1880}
1881 1881
1882static void 1882static void
1883pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va, 1883pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va,
1884 pt_entry_t *ptes, pd_entry_t * const *pdes) 1884 pt_entry_t *ptes, pd_entry_t * const *pdes)
1885{ 1885{
1886 unsigned long index; 1886 unsigned long index;
1887 int level; 1887 int level;
1888 vaddr_t invaladdr; 1888 vaddr_t invaladdr;
1889 pd_entry_t opde; 1889 pd_entry_t opde;
1890#ifdef XEN 1890#ifdef XEN
1891 struct pmap *curpmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map); 1891 struct pmap *curpmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map);
1892#ifdef MULTIPROCESSOR 1892#ifdef MULTIPROCESSOR
1893 vaddr_t invaladdr2; 1893 vaddr_t invaladdr2;
1894#endif 1894#endif
1895#endif 1895#endif
1896 1896
1897 KASSERT(pmap != pmap_kernel()); 1897 KASSERT(pmap != pmap_kernel());
1898 KASSERT(mutex_owned(pmap->pm_lock)); 1898 KASSERT(mutex_owned(pmap->pm_lock));
1899 KASSERT(kpreempt_disabled()); 1899 KASSERT(kpreempt_disabled());
1900 1900
1901 level = 1; 1901 level = 1;
1902 do { 1902 do {
1903 index = pl_i(va, level + 1); 1903 index = pl_i(va, level + 1);
1904 opde = pmap_pte_testset(&pdes[level - 1][index], 0); 1904 opde = pmap_pte_testset(&pdes[level - 1][index], 0);
1905#if defined(XEN) 1905#if defined(XEN)
1906# if defined(__x86_64__) 1906# if defined(__x86_64__)
1907 /* 1907 /*
1908 * If ptp is a L3 currently mapped in kernel space, 1908 * If ptp is a L3 currently mapped in kernel space,
1909 * clear it before freeing 1909 * clear it before freeing
1910 */ 1910 */
1911 if (pmap_pdirpa(pmap, 0) == curcpu()->ci_xen_current_user_pgd 1911 if (pmap_pdirpa(pmap, 0) == curcpu()->ci_xen_current_user_pgd
1912 && level == PTP_LEVELS - 1) { 1912 && level == PTP_LEVELS - 1) {
1913 pmap_pte_set(&pmap_kernel()->pm_pdir[index], 0); 1913 pmap_pte_set(&pmap_kernel()->pm_pdir[index], 0);
1914 /* 1914 /*
1915 * Update the per-cpu PD on all cpus the current 1915 * Update the per-cpu PD on all cpus the current
1916 * pmap is active on  1916 * pmap is active on
1917 */  1917 */
1918 CPU_INFO_ITERATOR cii; 1918 CPU_INFO_ITERATOR cii;
1919 struct cpu_info *ci; 1919 struct cpu_info *ci;
1920 for (CPU_INFO_FOREACH(cii, ci)) { 1920 for (CPU_INFO_FOREACH(cii, ci)) {
1921 if (ci == NULL) { 1921 if (ci == NULL) {
1922 continue; 1922 continue;
1923 } 1923 }
1924 if (ci->ci_cpumask & pmap->pm_cpus) { 1924 if (ci->ci_cpumask & pmap->pm_cpus) {
1925 pmap_pte_set(&ci->ci_kpm_pdir[index], 0); 1925 pmap_pte_set(&ci->ci_kpm_pdir[index], 0);
1926 } 1926 }
1927 } 1927 }
1928 } 1928 }
1929# endif /*__x86_64__ */ 1929# endif /*__x86_64__ */
1930 invaladdr = level == 1 ? (vaddr_t)ptes : 1930 invaladdr = level == 1 ? (vaddr_t)ptes :
1931 (vaddr_t)pdes[level - 2]; 1931 (vaddr_t)pdes[level - 2];
1932 pmap_tlb_shootdown(curpmap, invaladdr + index * PAGE_SIZE, 1932 pmap_tlb_shootdown(curpmap, invaladdr + index * PAGE_SIZE,
1933 opde, TLBSHOOT_FREE_PTP1); 1933 opde, TLBSHOOT_FREE_PTP1);
1934# if defined(MULTIPROCESSOR) 1934# if defined(MULTIPROCESSOR)
1935 invaladdr2 = level == 1 ? (vaddr_t)PTE_BASE : 1935 invaladdr2 = level == 1 ? (vaddr_t)PTE_BASE :
1936 (vaddr_t)normal_pdes[level - 2]; 1936 (vaddr_t)normal_pdes[level - 2];
1937 if (pmap != curpmap || invaladdr != invaladdr2) { 1937 if (pmap != curpmap || invaladdr != invaladdr2) {
1938 pmap_tlb_shootdown(pmap, invaladdr2 + index * PAGE_SIZE, 1938 pmap_tlb_shootdown(pmap, invaladdr2 + index * PAGE_SIZE,
1939 opde, TLBSHOOT_FREE_PTP2); 1939 opde, TLBSHOOT_FREE_PTP2);
1940 } 1940 }
1941# endif /* MULTIPROCESSOR */ 1941# endif /* MULTIPROCESSOR */
1942#else /* XEN */ 1942#else /* XEN */
1943 invaladdr = level == 1 ? (vaddr_t)ptes : 1943 invaladdr = level == 1 ? (vaddr_t)ptes :
1944 (vaddr_t)pdes[level - 2]; 1944 (vaddr_t)pdes[level - 2];
1945 pmap_tlb_shootdown(pmap, invaladdr + index * PAGE_SIZE, 1945 pmap_tlb_shootdown(pmap, invaladdr + index * PAGE_SIZE,
1946 opde, TLBSHOOT_FREE_PTP1); 1946 opde, TLBSHOOT_FREE_PTP1);
1947#endif /* XEN */ 1947#endif /* XEN */
1948 pmap_freepage(pmap, ptp, level); 1948 pmap_freepage(pmap, ptp, level);
1949 if (level < PTP_LEVELS - 1) { 1949 if (level < PTP_LEVELS - 1) {
1950 ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1); 1950 ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1);
1951 ptp->wire_count--; 1951 ptp->wire_count--;
1952 if (ptp->wire_count > 1) 1952 if (ptp->wire_count > 1)
1953 break; 1953 break;
1954 } 1954 }
1955 } while (++level < PTP_LEVELS); 1955 } while (++level < PTP_LEVELS);
1956 pmap_pte_flush(); 1956 pmap_pte_flush();
1957} 1957}
1958 1958
1959/* 1959/*
1960 * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one) 1960 * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one)
1961 * 1961 *
1962 * => pmap should NOT be pmap_kernel() 1962 * => pmap should NOT be pmap_kernel()
1963 * => pmap should be locked 1963 * => pmap should be locked
1964 * => preemption should be disabled 1964 * => preemption should be disabled
1965 */ 1965 */
1966 1966
1967static struct vm_page * 1967static struct vm_page *
1968pmap_get_ptp(struct pmap *pmap, vaddr_t va, pd_entry_t * const *pdes) 1968pmap_get_ptp(struct pmap *pmap, vaddr_t va, pd_entry_t * const *pdes)
1969{ 1969{
1970 struct vm_page *ptp, *pptp; 1970 struct vm_page *ptp, *pptp;
1971 int i; 1971 int i;
1972 unsigned long index; 1972 unsigned long index;
1973 pd_entry_t *pva; 1973 pd_entry_t *pva;
1974 paddr_t ppa, pa; 1974 paddr_t ppa, pa;
1975 struct uvm_object *obj; 1975 struct uvm_object *obj;
1976 1976
1977 KASSERT(pmap != pmap_kernel()); 1977 KASSERT(pmap != pmap_kernel());
1978 KASSERT(mutex_owned(pmap->pm_lock)); 1978 KASSERT(mutex_owned(pmap->pm_lock));
1979 KASSERT(kpreempt_disabled()); 1979 KASSERT(kpreempt_disabled());
1980 1980
1981 ptp = NULL; 1981 ptp = NULL;
1982 pa = (paddr_t)-1; 1982 pa = (paddr_t)-1;
1983 1983
1984 /* 1984 /*
1985 * Loop through all page table levels seeing if we need to 1985 * Loop through all page table levels seeing if we need to
1986 * add a new page to that level. 1986 * add a new page to that level.
1987 */ 1987 */
1988 for (i = PTP_LEVELS; i > 1; i--) { 1988 for (i = PTP_LEVELS; i > 1; i--) {
1989 /* 1989 /*
1990 * Save values from previous round. 1990 * Save values from previous round.
1991 */ 1991 */
1992 pptp = ptp; 1992 pptp = ptp;
1993 ppa = pa; 1993 ppa = pa;
1994 1994
1995 index = pl_i(va, i); 1995 index = pl_i(va, i);
1996 pva = pdes[i - 2]; 1996 pva = pdes[i - 2];
1997 1997
1998 if (pmap_valid_entry(pva[index])) { 1998 if (pmap_valid_entry(pva[index])) {
1999 ppa = pmap_pte2pa(pva[index]); 1999 ppa = pmap_pte2pa(pva[index]);
2000 ptp = NULL; 2000 ptp = NULL;
2001 continue; 2001 continue;
2002 } 2002 }
2003 2003
2004 obj = &pmap->pm_obj[i-2]; 2004 obj = &pmap->pm_obj[i-2];
2005 PMAP_SUBOBJ_LOCK(pmap, i - 2); 2005 PMAP_SUBOBJ_LOCK(pmap, i - 2);
2006 ptp = uvm_pagealloc(obj, ptp_va2o(va, i - 1), NULL, 2006 ptp = uvm_pagealloc(obj, ptp_va2o(va, i - 1), NULL,
2007 UVM_PGA_USERESERVE|UVM_PGA_ZERO); 2007 UVM_PGA_USERESERVE|UVM_PGA_ZERO);
2008 PMAP_SUBOBJ_UNLOCK(pmap, i - 2); 2008 PMAP_SUBOBJ_UNLOCK(pmap, i - 2);
2009 2009
2010 if (ptp == NULL) 2010 if (ptp == NULL)
2011 return NULL; 2011 return NULL;
2012 2012
2013 ptp->flags &= ~PG_BUSY; /* never busy */ 2013 ptp->flags &= ~PG_BUSY; /* never busy */
2014 ptp->wire_count = 1; 2014 ptp->wire_count = 1;
2015 pmap->pm_ptphint[i - 2] = ptp; 2015 pmap->pm_ptphint[i - 2] = ptp;
2016 pa = VM_PAGE_TO_PHYS(ptp); 2016 pa = VM_PAGE_TO_PHYS(ptp);
2017 pmap_pte_set(&pva[index], (pd_entry_t) 2017 pmap_pte_set(&pva[index], (pd_entry_t)
2018 (pmap_pa2pte(pa) | PG_u | PG_RW | PG_V)); 2018 (pmap_pa2pte(pa) | PG_u | PG_RW | PG_V));
2019#if defined(XEN) && defined(__x86_64__) 2019#if defined(XEN) && defined(__x86_64__)
2020 /* 2020 /*
2021 * In Xen we must enter the mapping in kernel map too 2021 * In Xen we must enter the mapping in kernel map too
2022 * if pmap is curmap and modifying top level (PGD) 2022 * if pmap is curmap and modifying top level (PGD)
2023 */ 2023 */
2024 if(i == PTP_LEVELS && pmap != pmap_kernel()) { 2024 if(i == PTP_LEVELS && pmap != pmap_kernel()) {
2025 pmap_pte_set(&pmap_kernel()->pm_pdir[index], 2025 pmap_pte_set(&pmap_kernel()->pm_pdir[index],
2026 (pd_entry_t) (pmap_pa2pte(pa) 2026 (pd_entry_t) (pmap_pa2pte(pa)
2027 | PG_u | PG_RW | PG_V)); 2027 | PG_u | PG_RW | PG_V));
2028 /* 2028 /*
2029 * Update the per-cpu PD on all cpus the current 2029 * Update the per-cpu PD on all cpus the current
2030 * pmap is active on  2030 * pmap is active on
2031 */  2031 */
2032 CPU_INFO_ITERATOR cii; 2032 CPU_INFO_ITERATOR cii;
2033 struct cpu_info *ci; 2033 struct cpu_info *ci;
2034 for (CPU_INFO_FOREACH(cii, ci)) { 2034 for (CPU_INFO_FOREACH(cii, ci)) {
2035 if (ci == NULL) { 2035 if (ci == NULL) {
2036 continue; 2036 continue;
2037 } 2037 }
2038 if (ci->ci_cpumask & pmap->pm_cpus) { 2038 if (ci->ci_cpumask & pmap->pm_cpus) {
2039 pmap_pte_set(&ci->ci_kpm_pdir[index], 2039 pmap_pte_set(&ci->ci_kpm_pdir[index],
2040 (pd_entry_t) (pmap_pa2pte(pa) | PG_u | PG_RW | PG_V)); 2040 (pd_entry_t) (pmap_pa2pte(pa) | PG_u | PG_RW | PG_V));
2041 } 2041 }
2042 } 2042 }
2043 } 2043 }
2044#endif /* XEN && __x86_64__ */ 2044#endif /* XEN && __x86_64__ */
2045 pmap_pte_flush(); 2045 pmap_pte_flush();
2046 pmap_stats_update(pmap, 1, 0); 2046 pmap_stats_update(pmap, 1, 0);
2047 /* 2047 /*
2048 * If we're not in the top level, increase the 2048 * If we're not in the top level, increase the
2049 * wire count of the parent page. 2049 * wire count of the parent page.
2050 */ 2050 */
2051 if (i < PTP_LEVELS) { 2051 if (i < PTP_LEVELS) {
2052 if (pptp == NULL) 2052 if (pptp == NULL)
2053 pptp = pmap_find_ptp(pmap, va, ppa, i); 2053 pptp = pmap_find_ptp(pmap, va, ppa, i);
2054#ifdef DIAGNOSTIC 2054#ifdef DIAGNOSTIC
2055 if (pptp == NULL) 2055 if (pptp == NULL)
2056 panic("pde page disappeared"); 2056 panic("pde page disappeared");
2057#endif 2057#endif
2058 pptp->wire_count++; 2058 pptp->wire_count++;
2059 } 2059 }
2060 } 2060 }
2061 2061
2062 /* 2062 /*
2063 * ptp is not NULL if we just allocated a new ptp. If it's 2063 * ptp is not NULL if we just allocated a new ptp. If it's
2064 * still NULL, we must look up the existing one. 2064 * still NULL, we must look up the existing one.
2065 */ 2065 */
2066 if (ptp == NULL) { 2066 if (ptp == NULL) {
2067 ptp = pmap_find_ptp(pmap, va, ppa, 1); 2067 ptp = pmap_find_ptp(pmap, va, ppa, 1);
2068#ifdef DIAGNOSTIC 2068#ifdef DIAGNOSTIC
2069 if (ptp == NULL) { 2069 if (ptp == NULL) {
2070 printf("va %" PRIxVADDR " ppa %" PRIxPADDR "\n", 2070 printf("va %" PRIxVADDR " ppa %" PRIxPADDR "\n",
2071 va, ppa); 2071 va, ppa);
2072 panic("pmap_get_ptp: unmanaged user PTP"); 2072 panic("pmap_get_ptp: unmanaged user PTP");
2073 } 2073 }
2074#endif 2074#endif
2075 } 2075 }
2076 2076
2077 pmap->pm_ptphint[0] = ptp; 2077 pmap->pm_ptphint[0] = ptp;
2078 return(ptp); 2078 return(ptp);
2079} 2079}
2080 2080
2081/* 2081/*
2082 * p m a p l i f e c y c l e f u n c t i o n s 2082 * p m a p l i f e c y c l e f u n c t i o n s
2083 */ 2083 */
2084 2084
2085/* 2085/*
2086 * pmap_pdp_ctor: constructor for the PDP cache. 2086 * pmap_pdp_ctor: constructor for the PDP cache.
2087 */ 2087 */
2088static int 2088static int
2089pmap_pdp_ctor(void *arg, void *v, int flags) 2089pmap_pdp_ctor(void *arg, void *v, int flags)
2090{ 2090{
2091 pd_entry_t *pdir = v; 2091 pd_entry_t *pdir = v;
2092 paddr_t pdirpa = 0; /* XXX: GCC */ 2092 paddr_t pdirpa = 0; /* XXX: GCC */
2093 vaddr_t object; 2093 vaddr_t object;
2094 int i; 2094 int i;
2095 2095
2096#if !defined(XEN) || !defined(__x86_64__) 2096#if !defined(XEN) || !defined(__x86_64__)
2097 int npde; 2097 int npde;
2098#endif 2098#endif
2099#ifdef XEN 2099#ifdef XEN
2100 int s; 2100 int s;
2101#endif 2101#endif
2102 2102
2103 /* 2103 /*
2104 * NOTE: The `pmaps_lock' is held when the PDP is allocated. 2104 * NOTE: The `pmaps_lock' is held when the PDP is allocated.
2105 */ 2105 */
2106 2106
2107#if defined(XEN) && defined(__x86_64__) 2107#if defined(XEN) && defined(__x86_64__)
2108 /* fetch the physical address of the page directory. */ 2108 /* fetch the physical address of the page directory. */
2109 (void) pmap_extract(pmap_kernel(), (vaddr_t) pdir, &pdirpa); 2109 (void) pmap_extract(pmap_kernel(), (vaddr_t) pdir, &pdirpa);
2110 2110
2111 /* zero init area */ 2111 /* zero init area */
2112 memset (pdir, 0, PAGE_SIZE); /* Xen wants a clean page */ 2112 memset (pdir, 0, PAGE_SIZE); /* Xen wants a clean page */
2113 /* 2113 /*
2114 * this pdir will NEVER be active in kernel mode 2114 * this pdir will NEVER be active in kernel mode
2115 * so mark recursive entry invalid 2115 * so mark recursive entry invalid
2116 */ 2116 */
2117 pdir[PDIR_SLOT_PTE] = pmap_pa2pte(pdirpa) | PG_u; 2117 pdir[PDIR_SLOT_PTE] = pmap_pa2pte(pdirpa) | PG_u;
2118 /* 2118 /*
2119 * PDP constructed this way won't be for kernel, 2119 * PDP constructed this way won't be for kernel,
2120 * hence we don't put kernel mappings on Xen. 2120 * hence we don't put kernel mappings on Xen.
2121 * But we need to make pmap_create() happy, so put a dummy (without 2121 * But we need to make pmap_create() happy, so put a dummy (without
2122 * PG_V) value at the right place. 2122 * PG_V) value at the right place.
2123 */ 2123 */
2124 pdir[PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1] - 1] = 2124 pdir[PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1] - 1] =
2125 (pd_entry_t)-1 & PG_FRAME; 2125 (pd_entry_t)-1 & PG_FRAME;
2126#else /* XEN && __x86_64__*/ 2126#else /* XEN && __x86_64__*/
2127 /* zero init area */ 2127 /* zero init area */
2128 memset(pdir, 0, PDIR_SLOT_PTE * sizeof(pd_entry_t)); 2128 memset(pdir, 0, PDIR_SLOT_PTE * sizeof(pd_entry_t));
2129 2129
2130 object = (vaddr_t)v; 2130 object = (vaddr_t)v;
2131 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) { 2131 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) {
2132 /* fetch the physical address of the page directory. */ 2132 /* fetch the physical address of the page directory. */
2133 (void) pmap_extract(pmap_kernel(), object, &pdirpa); 2133 (void) pmap_extract(pmap_kernel(), object, &pdirpa);
2134 /* put in recursive PDE to map the PTEs */ 2134 /* put in recursive PDE to map the PTEs */
2135 pdir[PDIR_SLOT_PTE + i] = pmap_pa2pte(pdirpa) | PG_V; 2135 pdir[PDIR_SLOT_PTE + i] = pmap_pa2pte(pdirpa) | PG_V;
2136#ifndef XEN 2136#ifndef XEN
2137 pdir[PDIR_SLOT_PTE + i] |= PG_KW; 2137 pdir[PDIR_SLOT_PTE + i] |= PG_KW;
2138#endif 2138#endif
2139 } 2139 }
2140 2140
2141 /* copy kernel's PDE */ 2141 /* copy kernel's PDE */
2142 npde = nkptp[PTP_LEVELS - 1]; 2142 npde = nkptp[PTP_LEVELS - 1];
2143 2143
2144 memcpy(&pdir[PDIR_SLOT_KERN], &PDP_BASE[PDIR_SLOT_KERN], 2144 memcpy(&pdir[PDIR_SLOT_KERN], &PDP_BASE[PDIR_SLOT_KERN],
2145 npde * sizeof(pd_entry_t)); 2145 npde * sizeof(pd_entry_t));
2146 2146
2147 /* zero the rest */ 2147 /* zero the rest */
2148 memset(&pdir[PDIR_SLOT_KERN + npde], 0, 2148 memset(&pdir[PDIR_SLOT_KERN + npde], 0,
2149 (NTOPLEVEL_PDES - (PDIR_SLOT_KERN + npde)) * sizeof(pd_entry_t)); 2149 (NTOPLEVEL_PDES - (PDIR_SLOT_KERN + npde)) * sizeof(pd_entry_t));
2150 2150
2151 if (VM_MIN_KERNEL_ADDRESS != KERNBASE) { 2151 if (VM_MIN_KERNEL_ADDRESS != KERNBASE) {
2152 int idx = pl_i(KERNBASE, PTP_LEVELS); 2152 int idx = pl_i(KERNBASE, PTP_LEVELS);
2153 2153
2154 pdir[idx] = PDP_BASE[idx]; 2154 pdir[idx] = PDP_BASE[idx];
2155 } 2155 }
2156 2156
2157#ifdef __HAVE_DIRECT_MAP 2157#ifdef __HAVE_DIRECT_MAP
2158 pdir[PDIR_SLOT_DIRECT] = PDP_BASE[PDIR_SLOT_DIRECT]; 2158 pdir[PDIR_SLOT_DIRECT] = PDP_BASE[PDIR_SLOT_DIRECT];
2159#endif 2159#endif
2160 2160
2161#endif /* XEN && __x86_64__*/ 2161#endif /* XEN && __x86_64__*/
2162#ifdef XEN 2162#ifdef XEN
2163 s = splvm(); 2163 s = splvm();
2164 object = (vaddr_t)v; 2164 object = (vaddr_t)v;
2165 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) { 2165 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) {
2166 (void) pmap_extract(pmap_kernel(), object, &pdirpa); 2166 (void) pmap_extract(pmap_kernel(), object, &pdirpa);
2167 /* FIXME: This should use pmap_protect() .. */ 2167 /* FIXME: This should use pmap_protect() .. */
2168 pmap_kenter_pa(object, pdirpa, VM_PROT_READ, 0); 2168 pmap_kenter_pa(object, pdirpa, VM_PROT_READ, 0);
2169 pmap_update(pmap_kernel()); 2169 pmap_update(pmap_kernel());
2170 /* 2170 /*
2171 * pin as L2/L4 page, we have to do the page with the 2171 * pin as L2/L4 page, we have to do the page with the
2172 * PDIR_SLOT_PTE entries last 2172 * PDIR_SLOT_PTE entries last
2173 */ 2173 */
2174#ifdef PAE 2174#ifdef PAE
2175 if (i == l2tol3(PDIR_SLOT_PTE)) 2175 if (i == l2tol3(PDIR_SLOT_PTE))
2176 continue; 2176 continue;
2177#endif 2177#endif
2178 2178
2179#ifdef __x86_64__ 2179#ifdef __x86_64__
2180 xpq_queue_pin_l4_table(xpmap_ptom_masked(pdirpa)); 2180 xpq_queue_pin_l4_table(xpmap_ptom_masked(pdirpa));
2181#else 2181#else
2182 xpq_queue_pin_l2_table(xpmap_ptom_masked(pdirpa)); 2182 xpq_queue_pin_l2_table(xpmap_ptom_masked(pdirpa));
2183#endif 2183#endif
2184 } 2184 }
2185#ifdef PAE 2185#ifdef PAE
2186 object = ((vaddr_t)pdir) + PAGE_SIZE * l2tol3(PDIR_SLOT_PTE); 2186 object = ((vaddr_t)pdir) + PAGE_SIZE * l2tol3(PDIR_SLOT_PTE);
2187 (void)pmap_extract(pmap_kernel(), object, &pdirpa); 2187 (void)pmap_extract(pmap_kernel(), object, &pdirpa);
2188 xpq_queue_pin_l2_table(xpmap_ptom_masked(pdirpa)); 2188 xpq_queue_pin_l2_table(xpmap_ptom_masked(pdirpa));
2189#endif 2189#endif
2190 splx(s); 2190 splx(s);
2191#endif /* XEN */ 2191#endif /* XEN */
2192 2192
2193 return (0); 2193 return (0);
2194} 2194}
2195 2195
2196/* 2196/*
2197 * pmap_pdp_dtor: destructor for the PDP cache. 2197 * pmap_pdp_dtor: destructor for the PDP cache.
2198 */ 2198 */
2199 2199
2200static void 2200static void
2201pmap_pdp_dtor(void *arg, void *v) 2201pmap_pdp_dtor(void *arg, void *v)
2202{ 2202{
2203#ifdef XEN 2203#ifdef XEN
2204 paddr_t pdirpa = 0; /* XXX: GCC */ 2204 paddr_t pdirpa = 0; /* XXX: GCC */
2205 vaddr_t object = (vaddr_t)v; 2205 vaddr_t object = (vaddr_t)v;
2206 int i; 2206 int i;
2207 int s = splvm(); 2207 int s = splvm();
2208 pt_entry_t *pte; 2208 pt_entry_t *pte;
2209 2209
2210 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) { 2210 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) {
2211 /* fetch the physical address of the page directory. */ 2211 /* fetch the physical address of the page directory. */
2212 (void) pmap_extract(pmap_kernel(), object, &pdirpa); 2212 (void) pmap_extract(pmap_kernel(), object, &pdirpa);
2213 /* unpin page table */ 2213 /* unpin page table */
2214 xpq_queue_unpin_table(xpmap_ptom_masked(pdirpa)); 2214 xpq_queue_unpin_table(xpmap_ptom_masked(pdirpa));
2215 } 2215 }
2216 object = (vaddr_t)v; 2216 object = (vaddr_t)v;
2217 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) { 2217 for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) {
2218 /* Set page RW again */ 2218 /* Set page RW again */
2219 pte = kvtopte(object); 2219 pte = kvtopte(object);
2220 xpq_queue_pte_update(xpmap_ptetomach(pte), *pte | PG_RW); 2220 xpq_queue_pte_update(xpmap_ptetomach(pte), *pte | PG_RW);
2221 xpq_queue_invlpg((vaddr_t)object); 2221 xpq_queue_invlpg((vaddr_t)object);
2222 } 2222 }
2223 splx(s); 2223 splx(s);
2224#endif /* XEN */ 2224#endif /* XEN */
2225} 2225}
2226 2226
2227#ifdef PAE 2227#ifdef PAE
2228 2228
2229/* pmap_pdp_alloc: Allocate a page for the pdp memory pool. */ 2229/* pmap_pdp_alloc: Allocate a page for the pdp memory pool. */
2230 2230
2231static void * 2231static void *
2232pmap_pdp_alloc(struct pool *pp, int flags) 2232pmap_pdp_alloc(struct pool *pp, int flags)
2233{ 2233{
2234 return (void *)uvm_km_alloc(kernel_map, 2234 return (void *)uvm_km_alloc(kernel_map,
2235 PAGE_SIZE * PDP_SIZE, PAGE_SIZE * PDP_SIZE, 2235 PAGE_SIZE * PDP_SIZE, PAGE_SIZE * PDP_SIZE,
2236 ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK) 2236 ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK)
2237 | UVM_KMF_WIRED); 2237 | UVM_KMF_WIRED);
2238} 2238}
2239 2239
2240/* 2240/*
2241 * pmap_pdp_free: free a PDP 2241 * pmap_pdp_free: free a PDP
2242 */ 2242 */
2243 2243
2244static void 2244static void
2245pmap_pdp_free(struct pool *pp, void *v) 2245pmap_pdp_free(struct pool *pp, void *v)
2246{ 2246{
2247 uvm_km_free(kernel_map, (vaddr_t)v, PAGE_SIZE * PDP_SIZE, 2247 uvm_km_free(kernel_map, (vaddr_t)v, PAGE_SIZE * PDP_SIZE,
2248 UVM_KMF_WIRED); 2248 UVM_KMF_WIRED);
2249} 2249}
2250#endif /* PAE */ 2250#endif /* PAE */
2251 2251
2252/* 2252/*
2253 * pmap_create: create a pmap 2253 * pmap_create: create a pmap
2254 * 2254 *
2255 * => note: old pmap interface took a "size" args which allowed for 2255 * => note: old pmap interface took a "size" args which allowed for
2256 * the creation of "software only" pmaps (not in bsd). 2256 * the creation of "software only" pmaps (not in bsd).
2257 */ 2257 */
2258 2258
2259struct pmap * 2259struct pmap *
2260pmap_create(void) 2260pmap_create(void)
2261{ 2261{
2262 struct pmap *pmap; 2262 struct pmap *pmap;
2263 int i; 2263 int i;
2264 2264
2265 pmap = pool_cache_get(&pmap_cache, PR_WAITOK); 2265 pmap = pool_cache_get(&pmap_cache, PR_WAITOK);
2266 2266
2267 /* init uvm_object */ 2267 /* init uvm_object */
2268 for (i = 0; i < PTP_LEVELS - 1; i++) { 2268 for (i = 0; i < PTP_LEVELS - 1; i++) {
2269 mutex_init(&pmap->pm_obj_lock[i], MUTEX_DEFAULT, IPL_NONE); 2269 mutex_init(&pmap->pm_obj_lock[i], MUTEX_DEFAULT, IPL_NONE);
2270 uvm_obj_init(&pmap->pm_obj[i], NULL, false, 1); 2270 uvm_obj_init(&pmap->pm_obj[i], NULL, false, 1);
2271 uvm_obj_setlock(&pmap->pm_obj[i], &pmap->pm_obj_lock[i]); 2271 uvm_obj_setlock(&pmap->pm_obj[i], &pmap->pm_obj_lock[i]);
2272 pmap->pm_ptphint[i] = NULL; 2272 pmap->pm_ptphint[i] = NULL;
2273 } 2273 }
2274 pmap->pm_stats.wired_count = 0; 2274 pmap->pm_stats.wired_count = 0;
2275 /* count the PDP allocd below */ 2275 /* count the PDP allocd below */
2276 pmap->pm_stats.resident_count = PDP_SIZE; 2276 pmap->pm_stats.resident_count = PDP_SIZE;
2277#if !defined(__x86_64__) 2277#if !defined(__x86_64__)
2278 pmap->pm_hiexec = 0; 2278 pmap->pm_hiexec = 0;
2279#endif /* !defined(__x86_64__) */ 2279#endif /* !defined(__x86_64__) */
2280 pmap->pm_flags = 0; 2280 pmap->pm_flags = 0;
2281 pmap->pm_cpus = 0; 2281 pmap->pm_cpus = 0;
2282 pmap->pm_kernel_cpus = 0; 2282 pmap->pm_kernel_cpus = 0;
2283 pmap->pm_gc_ptp = NULL; 2283 pmap->pm_gc_ptp = NULL;
2284 2284
2285 /* init the LDT */ 2285 /* init the LDT */
2286 pmap->pm_ldt = NULL; 2286 pmap->pm_ldt = NULL;
2287 pmap->pm_ldt_len = 0; 2287 pmap->pm_ldt_len = 0;
2288 pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL); 2288 pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL);
2289 2289
2290 /* allocate PDP */ 2290 /* allocate PDP */
2291 try_again: 2291 try_again:
2292 pmap->pm_pdir = pool_cache_get(&pmap_pdp_cache, PR_WAITOK); 2292 pmap->pm_pdir = pool_cache_get(&pmap_pdp_cache, PR_WAITOK);
2293 2293
2294 mutex_enter(&pmaps_lock); 2294 mutex_enter(&pmaps_lock);
2295 2295
2296 if (pmap->pm_pdir[PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1] - 1] == 0) { 2296 if (pmap->pm_pdir[PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1] - 1] == 0) {
2297 mutex_exit(&pmaps_lock); 2297 mutex_exit(&pmaps_lock);
2298 pool_cache_destruct_object(&pmap_pdp_cache, pmap->pm_pdir); 2298 pool_cache_destruct_object(&pmap_pdp_cache, pmap->pm_pdir);
2299 goto try_again; 2299 goto try_again;
2300 } 2300 }
2301 2301
2302 for (i = 0; i < PDP_SIZE; i++) 2302 for (i = 0; i < PDP_SIZE; i++)
2303 pmap->pm_pdirpa[i] = 2303 pmap->pm_pdirpa[i] =
2304 pmap_pte2pa(pmap->pm_pdir[PDIR_SLOT_PTE + i]); 2304 pmap_pte2pa(pmap->pm_pdir[PDIR_SLOT_PTE + i]);
2305 2305
2306 LIST_INSERT_HEAD(&pmaps, pmap, pm_list); 2306 LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
2307 2307
2308 mutex_exit(&pmaps_lock); 2308 mutex_exit(&pmaps_lock);
2309 2309
2310 return (pmap); 2310 return (pmap);
2311} 2311}
2312 2312
2313/* 2313/*
2314 * pmap_free_ptps: put a list of ptps back to the freelist. 2314 * pmap_free_ptps: put a list of ptps back to the freelist.
2315 */ 2315 */
2316 2316
2317static void 2317static void
2318pmap_free_ptps(struct vm_page *empty_ptps) 2318pmap_free_ptps(struct vm_page *empty_ptps)
2319{ 2319{
2320 struct vm_page *ptp; 2320 struct vm_page *ptp;
2321 struct pmap_page *pp; 2321 struct pmap_page *pp;
2322 2322
2323 while ((ptp = empty_ptps) != NULL) { 2323 while ((ptp = empty_ptps) != NULL) {
2324 pp = VM_PAGE_TO_PP(ptp); 2324 pp = VM_PAGE_TO_PP(ptp);
2325 empty_ptps = pp->pp_link; 2325 empty_ptps = pp->pp_link;
2326 LIST_INIT(&pp->pp_head.pvh_list); 2326 LIST_INIT(&pp->pp_head.pvh_list);
2327 uvm_pagefree(ptp); 2327 uvm_pagefree(ptp);
2328 } 2328 }
2329} 2329}
2330 2330
2331/* 2331/*
2332 * pmap_destroy: drop reference count on pmap. free pmap if 2332 * pmap_destroy: drop reference count on pmap. free pmap if
2333 * reference count goes to zero. 2333 * reference count goes to zero.
2334 */ 2334 */
2335 2335
2336void 2336void
2337pmap_destroy(struct pmap *pmap) 2337pmap_destroy(struct pmap *pmap)
2338{ 2338{
2339 int i; 2339 int i;
2340#ifdef DIAGNOSTIC 2340#ifdef DIAGNOSTIC
2341 struct cpu_info *ci; 2341 struct cpu_info *ci;
2342 CPU_INFO_ITERATOR cii; 2342 CPU_INFO_ITERATOR cii;
2343#endif /* DIAGNOSTIC */ 2343#endif /* DIAGNOSTIC */
2344 lwp_t *l; 2344 lwp_t *l;
2345 2345
2346 /* 2346 /*
2347 * If we have torn down this pmap, process deferred frees and 2347 * If we have torn down this pmap, process deferred frees and
2348 * invalidations. Free now if the system is low on memory. 2348 * invalidations. Free now if the system is low on memory.
2349 * Otherwise, free when the pmap is destroyed thus avoiding a 2349 * Otherwise, free when the pmap is destroyed thus avoiding a
2350 * TLB shootdown. 2350 * TLB shootdown.
2351 */ 2351 */
2352 l = curlwp; 2352 l = curlwp;
2353 if (__predict_false(l->l_md.md_gc_pmap == pmap)) { 2353 if (__predict_false(l->l_md.md_gc_pmap == pmap)) {
2354 if (uvmexp.free < uvmexp.freetarg) { 2354 if (uvmexp.free < uvmexp.freetarg) {
2355 pmap_update(pmap); 2355 pmap_update(pmap);
2356 } else { 2356 } else {
2357 KASSERT(pmap->pm_gc_ptp == NULL); 2357 KASSERT(pmap->pm_gc_ptp == NULL);
2358 pmap->pm_gc_ptp = l->l_md.md_gc_ptp; 2358 pmap->pm_gc_ptp = l->l_md.md_gc_ptp;
2359 l->l_md.md_gc_ptp = NULL; 2359 l->l_md.md_gc_ptp = NULL;
2360 l->l_md.md_gc_pmap = NULL; 2360 l->l_md.md_gc_pmap = NULL;
2361 } 2361 }
2362 } 2362 }
2363 2363
2364 /* 2364 /*
2365 * drop reference count 2365 * drop reference count
2366 */ 2366 */
2367 2367
2368 if (atomic_dec_uint_nv(&pmap->pm_obj[0].uo_refs) > 0) { 2368 if (atomic_dec_uint_nv(&pmap->pm_obj[0].uo_refs) > 0) {
2369 return; 2369 return;
2370 } 2370 }
2371 2371
2372#ifdef DIAGNOSTIC 2372#ifdef DIAGNOSTIC
2373 for (CPU_INFO_FOREACH(cii, ci)) 2373 for (CPU_INFO_FOREACH(cii, ci))