| @@ -1,1769 +1,1769 @@ | | | @@ -1,1769 +1,1769 @@ |
1 | /* $NetBSD: pmap.c,v 1.119 2011/04/14 16:00:21 yamt Exp $ */ | | 1 | /* $NetBSD: pmap.c,v 1.120 2011/04/27 07:42:11 plunky Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright (c) 2007 Manuel Bouyer. | | 4 | * Copyright (c) 2007 Manuel Bouyer. |
5 | * | | 5 | * |
6 | * Redistribution and use in source and binary forms, with or without | | 6 | * Redistribution and use in source and binary forms, with or without |
7 | * modification, are permitted provided that the following conditions | | 7 | * modification, are permitted provided that the following conditions |
8 | * are met: | | 8 | * are met: |
9 | * 1. Redistributions of source code must retain the above copyright | | 9 | * 1. Redistributions of source code must retain the above copyright |
10 | * notice, this list of conditions and the following disclaimer. | | 10 | * notice, this list of conditions and the following disclaimer. |
11 | * 2. Redistributions in binary form must reproduce the above copyright | | 11 | * 2. Redistributions in binary form must reproduce the above copyright |
12 | * notice, this list of conditions and the following disclaimer in the | | 12 | * notice, this list of conditions and the following disclaimer in the |
13 | * documentation and/or other materials provided with the distribution. | | 13 | * documentation and/or other materials provided with the distribution. |
14 | * | | 14 | * |
15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | | 15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
16 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | | 16 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
17 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | | 17 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
18 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | | 18 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
19 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | | 19 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
20 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | | 20 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
21 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | | 21 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
22 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | | 22 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | | 23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
24 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | | 24 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | * | | 25 | * |
26 | */ | | 26 | */ |
27 | | | 27 | |
28 | /* | | 28 | /* |
29 | * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> | | 29 | * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> |
30 | * | | 30 | * |
31 | * Permission to use, copy, modify, and distribute this software for any | | 31 | * Permission to use, copy, modify, and distribute this software for any |
32 | * purpose with or without fee is hereby granted, provided that the above | | 32 | * purpose with or without fee is hereby granted, provided that the above |
33 | * copyright notice and this permission notice appear in all copies. | | 33 | * copyright notice and this permission notice appear in all copies. |
34 | * | | 34 | * |
35 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | | 35 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
36 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | | 36 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
37 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | | 37 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
38 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | | 38 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
39 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | | 39 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
40 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | | 40 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
41 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | | 41 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
42 | */ | | 42 | */ |
43 | | | 43 | |
44 | /* | | 44 | /* |
45 | * Copyright (c) 1997 Charles D. Cranor and Washington University. | | 45 | * Copyright (c) 1997 Charles D. Cranor and Washington University. |
46 | * All rights reserved. | | 46 | * All rights reserved. |
47 | * | | 47 | * |
48 | * Redistribution and use in source and binary forms, with or without | | 48 | * Redistribution and use in source and binary forms, with or without |
49 | * modification, are permitted provided that the following conditions | | 49 | * modification, are permitted provided that the following conditions |
50 | * are met: | | 50 | * are met: |
51 | * 1. Redistributions of source code must retain the above copyright | | 51 | * 1. Redistributions of source code must retain the above copyright |
52 | * notice, this list of conditions and the following disclaimer. | | 52 | * notice, this list of conditions and the following disclaimer. |
53 | * 2. Redistributions in binary form must reproduce the above copyright | | 53 | * 2. Redistributions in binary form must reproduce the above copyright |
54 | * notice, this list of conditions and the following disclaimer in the | | 54 | * notice, this list of conditions and the following disclaimer in the |
55 | * documentation and/or other materials provided with the distribution. | | 55 | * documentation and/or other materials provided with the distribution. |
56 | * | | 56 | * |
57 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | | 57 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
58 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | | 58 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
59 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | | 59 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
60 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | | 60 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
61 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | | 61 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
62 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | | 62 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
63 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | | 63 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
64 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | | 64 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
65 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | | 65 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
66 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | | 66 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
67 | */ | | 67 | */ |
68 | | | 68 | |
69 | /* | | 69 | /* |
70 | * Copyright 2001 (c) Wasabi Systems, Inc. | | 70 | * Copyright 2001 (c) Wasabi Systems, Inc. |
71 | * All rights reserved. | | 71 | * All rights reserved. |
72 | * | | 72 | * |
73 | * Written by Frank van der Linden for Wasabi Systems, Inc. | | 73 | * Written by Frank van der Linden for Wasabi Systems, Inc. |
74 | * | | 74 | * |
75 | * Redistribution and use in source and binary forms, with or without | | 75 | * Redistribution and use in source and binary forms, with or without |
76 | * modification, are permitted provided that the following conditions | | 76 | * modification, are permitted provided that the following conditions |
77 | * are met: | | 77 | * are met: |
78 | * 1. Redistributions of source code must retain the above copyright | | 78 | * 1. Redistributions of source code must retain the above copyright |
79 | * notice, this list of conditions and the following disclaimer. | | 79 | * notice, this list of conditions and the following disclaimer. |
80 | * 2. Redistributions in binary form must reproduce the above copyright | | 80 | * 2. Redistributions in binary form must reproduce the above copyright |
81 | * notice, this list of conditions and the following disclaimer in the | | 81 | * notice, this list of conditions and the following disclaimer in the |
82 | * documentation and/or other materials provided with the distribution. | | 82 | * documentation and/or other materials provided with the distribution. |
83 | * 3. All advertising materials mentioning features or use of this software | | 83 | * 3. All advertising materials mentioning features or use of this software |
84 | * must display the following acknowledgement: | | 84 | * must display the following acknowledgement: |
85 | * This product includes software developed for the NetBSD Project by | | 85 | * This product includes software developed for the NetBSD Project by |
86 | * Wasabi Systems, Inc. | | 86 | * Wasabi Systems, Inc. |
87 | * 4. The name of Wasabi Systems, Inc. may not be used to endorse | | 87 | * 4. The name of Wasabi Systems, Inc. may not be used to endorse |
88 | * or promote products derived from this software without specific prior | | 88 | * or promote products derived from this software without specific prior |
89 | * written permission. | | 89 | * written permission. |
90 | * | | 90 | * |
91 | * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND | | 91 | * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND |
92 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 92 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
93 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 93 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
94 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC | | 94 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC |
95 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 95 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
96 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 96 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
97 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 97 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
98 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 98 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
99 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 99 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
100 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 100 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
101 | * POSSIBILITY OF SUCH DAMAGE. | | 101 | * POSSIBILITY OF SUCH DAMAGE. |
102 | */ | | 102 | */ |
103 | | | 103 | |
104 | /* | | 104 | /* |
105 | * This is the i386 pmap modified and generalized to support x86-64 | | 105 | * This is the i386 pmap modified and generalized to support x86-64 |
106 | * as well. The idea is to hide the upper N levels of the page tables | | 106 | * as well. The idea is to hide the upper N levels of the page tables |
107 | * inside pmap_get_ptp, pmap_free_ptp and pmap_growkernel. The rest | | 107 | * inside pmap_get_ptp, pmap_free_ptp and pmap_growkernel. The rest |
108 | * is mostly untouched, except that it uses some more generalized | | 108 | * is mostly untouched, except that it uses some more generalized |
109 | * macros and interfaces. | | 109 | * macros and interfaces. |
110 | * | | 110 | * |
111 | * This pmap has been tested on the i386 as well, and it can be easily | | 111 | * This pmap has been tested on the i386 as well, and it can be easily |
112 | * adapted to PAE. | | 112 | * adapted to PAE. |
113 | * | | 113 | * |
114 | * fvdl@wasabisystems.com 18-Jun-2001 | | 114 | * fvdl@wasabisystems.com 18-Jun-2001 |
115 | */ | | 115 | */ |
116 | | | 116 | |
117 | /* | | 117 | /* |
118 | * pmap.c: i386 pmap module rewrite | | 118 | * pmap.c: i386 pmap module rewrite |
119 | * Chuck Cranor <chuck@netbsd> | | 119 | * Chuck Cranor <chuck@netbsd> |
120 | * 11-Aug-97 | | 120 | * 11-Aug-97 |
121 | * | | 121 | * |
122 | * history of this pmap module: in addition to my own input, i used | | 122 | * history of this pmap module: in addition to my own input, i used |
123 | * the following references for this rewrite of the i386 pmap: | | 123 | * the following references for this rewrite of the i386 pmap: |
124 | * | | 124 | * |
125 | * [1] the NetBSD i386 pmap. this pmap appears to be based on the | | 125 | * [1] the NetBSD i386 pmap. this pmap appears to be based on the |
126 | * BSD hp300 pmap done by Mike Hibler at University of Utah. | | 126 | * BSD hp300 pmap done by Mike Hibler at University of Utah. |
127 | * it was then ported to the i386 by William Jolitz of UUNET | | 127 | * it was then ported to the i386 by William Jolitz of UUNET |
128 | * Technologies, Inc. Then Charles M. Hannum of the NetBSD | | 128 | * Technologies, Inc. Then Charles M. Hannum of the NetBSD |
129 | * project fixed some bugs and provided some speed ups. | | 129 | * project fixed some bugs and provided some speed ups. |
130 | * | | 130 | * |
131 | * [2] the FreeBSD i386 pmap. this pmap seems to be the | | 131 | * [2] the FreeBSD i386 pmap. this pmap seems to be the |
132 | * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson | | 132 | * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson |
133 | * and David Greenman. | | 133 | * and David Greenman. |
134 | * | | 134 | * |
135 | * [3] the Mach pmap. this pmap, from CMU, seems to have migrated | | 135 | * [3] the Mach pmap. this pmap, from CMU, seems to have migrated |
136 | * between several processors. the VAX version was done by | | 136 | * between several processors. the VAX version was done by |
137 | * Avadis Tevanian, Jr., and Michael Wayne Young. the i386 | | 137 | * Avadis Tevanian, Jr., and Michael Wayne Young. the i386 |
138 | * version was done by Lance Berc, Mike Kupfer, Bob Baron, | | 138 | * version was done by Lance Berc, Mike Kupfer, Bob Baron, |
139 | * David Golub, and Richard Draves. the alpha version was | | 139 | * David Golub, and Richard Draves. the alpha version was |
140 | * done by Alessandro Forin (CMU/Mach) and Chris Demetriou | | 140 | * done by Alessandro Forin (CMU/Mach) and Chris Demetriou |
141 | * (NetBSD/alpha). | | 141 | * (NetBSD/alpha). |
142 | */ | | 142 | */ |
143 | | | 143 | |
144 | #include <sys/cdefs.h> | | 144 | #include <sys/cdefs.h> |
145 | __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.119 2011/04/14 16:00:21 yamt Exp $"); | | 145 | __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.120 2011/04/27 07:42:11 plunky Exp $"); |
146 | | | 146 | |
147 | #include "opt_user_ldt.h" | | 147 | #include "opt_user_ldt.h" |
148 | #include "opt_lockdebug.h" | | 148 | #include "opt_lockdebug.h" |
149 | #include "opt_multiprocessor.h" | | 149 | #include "opt_multiprocessor.h" |
150 | #include "opt_xen.h" | | 150 | #include "opt_xen.h" |
151 | #if !defined(__x86_64__) | | 151 | #if !defined(__x86_64__) |
152 | #include "opt_kstack_dr0.h" | | 152 | #include "opt_kstack_dr0.h" |
153 | #endif /* !defined(__x86_64__) */ | | 153 | #endif /* !defined(__x86_64__) */ |
154 | | | 154 | |
155 | #include <sys/param.h> | | 155 | #include <sys/param.h> |
156 | #include <sys/systm.h> | | 156 | #include <sys/systm.h> |
157 | #include <sys/proc.h> | | 157 | #include <sys/proc.h> |
158 | #include <sys/pool.h> | | 158 | #include <sys/pool.h> |
159 | #include <sys/kernel.h> | | 159 | #include <sys/kernel.h> |
160 | #include <sys/atomic.h> | | 160 | #include <sys/atomic.h> |
161 | #include <sys/cpu.h> | | 161 | #include <sys/cpu.h> |
162 | #include <sys/intr.h> | | 162 | #include <sys/intr.h> |
163 | #include <sys/xcall.h> | | 163 | #include <sys/xcall.h> |
164 | | | 164 | |
165 | #include <uvm/uvm.h> | | 165 | #include <uvm/uvm.h> |
166 | | | 166 | |
167 | #include <dev/isa/isareg.h> | | 167 | #include <dev/isa/isareg.h> |
168 | | | 168 | |
169 | #include <machine/specialreg.h> | | 169 | #include <machine/specialreg.h> |
170 | #include <machine/gdt.h> | | 170 | #include <machine/gdt.h> |
171 | #include <machine/isa_machdep.h> | | 171 | #include <machine/isa_machdep.h> |
172 | #include <machine/cpuvar.h> | | 172 | #include <machine/cpuvar.h> |
173 | | | 173 | |
174 | #include <x86/pmap.h> | | 174 | #include <x86/pmap.h> |
175 | #include <x86/pmap_pv.h> | | 175 | #include <x86/pmap_pv.h> |
176 | | | 176 | |
177 | #include <x86/i82489reg.h> | | 177 | #include <x86/i82489reg.h> |
178 | #include <x86/i82489var.h> | | 178 | #include <x86/i82489var.h> |
179 | | | 179 | |
180 | #ifdef XEN | | 180 | #ifdef XEN |
181 | #include <xen/xen3-public/xen.h> | | 181 | #include <xen/xen3-public/xen.h> |
182 | #include <xen/hypervisor.h> | | 182 | #include <xen/hypervisor.h> |
183 | #endif | | 183 | #endif |
184 | | | 184 | |
185 | /* flag to be used for kernel mappings: PG_u on Xen/amd64, 0 otherwise */ | | 185 | /* flag to be used for kernel mappings: PG_u on Xen/amd64, 0 otherwise */ |
186 | #if defined(XEN) && defined(__x86_64__) | | 186 | #if defined(XEN) && defined(__x86_64__) |
187 | #define PG_k PG_u | | 187 | #define PG_k PG_u |
188 | #else | | 188 | #else |
189 | #define PG_k 0 | | 189 | #define PG_k 0 |
190 | #endif | | 190 | #endif |
191 | | | 191 | |
192 | /* | | 192 | /* |
193 | * general info: | | 193 | * general info: |
194 | * | | 194 | * |
195 | * - for an explanation of how the i386 MMU hardware works see | | 195 | * - for an explanation of how the i386 MMU hardware works see |
196 | * the comments in <machine/pte.h>. | | 196 | * the comments in <machine/pte.h>. |
197 | * | | 197 | * |
198 | * - for an explanation of the general memory structure used by | | 198 | * - for an explanation of the general memory structure used by |
199 | * this pmap (including the recursive mapping), see the comments | | 199 | * this pmap (including the recursive mapping), see the comments |
200 | * in <machine/pmap.h>. | | 200 | * in <machine/pmap.h>. |
201 | * | | 201 | * |
202 | * this file contains the code for the "pmap module." the module's | | 202 | * this file contains the code for the "pmap module." the module's |
203 | * job is to manage the hardware's virtual to physical address mappings. | | 203 | * job is to manage the hardware's virtual to physical address mappings. |
204 | * note that there are two levels of mapping in the VM system: | | 204 | * note that there are two levels of mapping in the VM system: |
205 | * | | 205 | * |
206 | * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's | | 206 | * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's |
207 | * to map ranges of virtual address space to objects/files. for | | 207 | * to map ranges of virtual address space to objects/files. for |
208 | * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only | | 208 | * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only |
209 | * to the file /bin/ls starting at offset zero." note that | | 209 | * to the file /bin/ls starting at offset zero." note that |
210 | * the upper layer mapping is not concerned with how individual | | 210 | * the upper layer mapping is not concerned with how individual |
211 | * vm_pages are mapped. | | 211 | * vm_pages are mapped. |
212 | * | | 212 | * |
213 | * [2] the lower layer of the VM system (the pmap) maintains the mappings | | 213 | * [2] the lower layer of the VM system (the pmap) maintains the mappings |
214 | * from virtual addresses. it is concerned with which vm_page is | | 214 | * from virtual addresses. it is concerned with which vm_page is |
215 | * mapped where. for example, when you run /bin/ls and start | | 215 | * mapped where. for example, when you run /bin/ls and start |
216 | * at page 0x1000 the fault routine may lookup the correct page | | 216 | * at page 0x1000 the fault routine may lookup the correct page |
217 | * of the /bin/ls file and then ask the pmap layer to establish | | 217 | * of the /bin/ls file and then ask the pmap layer to establish |
218 | * a mapping for it. | | 218 | * a mapping for it. |
219 | * | | 219 | * |
220 | * note that information in the lower layer of the VM system can be | | 220 | * note that information in the lower layer of the VM system can be |
221 | * thrown away since it can easily be reconstructed from the info | | 221 | * thrown away since it can easily be reconstructed from the info |
222 | * in the upper layer. | | 222 | * in the upper layer. |
223 | * | | 223 | * |
224 | * data structures we use include: | | 224 | * data structures we use include: |
225 | * | | 225 | * |
226 | * - struct pmap: describes the address space of one thread | | 226 | * - struct pmap: describes the address space of one thread |
227 | * - struct pv_entry: describes one <PMAP,VA> mapping of a PA | | 227 | * - struct pv_entry: describes one <PMAP,VA> mapping of a PA |
228 | * - struct pv_head: there is one pv_head per managed page of | | 228 | * - struct pv_head: there is one pv_head per managed page of |
229 | * physical memory. the pv_head points to a list of pv_entry | | 229 | * physical memory. the pv_head points to a list of pv_entry |
230 | * structures which describe all the <PMAP,VA> pairs that this | | 230 | * structures which describe all the <PMAP,VA> pairs that this |
231 | * page is mapped in. this is critical for page based operations | | 231 | * page is mapped in. this is critical for page based operations |
232 | * such as pmap_page_protect() [change protection on _all_ mappings | | 232 | * such as pmap_page_protect() [change protection on _all_ mappings |
233 | * of a page] | | 233 | * of a page] |
234 | */ | | 234 | */ |
235 | | | 235 | |
236 | /* | | 236 | /* |
237 | * memory allocation | | 237 | * memory allocation |
238 | * | | 238 | * |
239 | * - there are three data structures that we must dynamically allocate: | | 239 | * - there are three data structures that we must dynamically allocate: |
240 | * | | 240 | * |
241 | * [A] new process' page directory page (PDP) | | 241 | * [A] new process' page directory page (PDP) |
242 | * - plan 1: done at pmap_create() we use | | 242 | * - plan 1: done at pmap_create() we use |
243 | * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this | | 243 | * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this |
244 | * allocation. | | 244 | * allocation. |
245 | * | | 245 | * |
246 | * if we are low in free physical memory then we sleep in | | 246 | * if we are low in free physical memory then we sleep in |
247 | * uvm_km_alloc -- in this case this is ok since we are creating | | 247 | * uvm_km_alloc -- in this case this is ok since we are creating |
248 | * a new pmap and should not be holding any locks. | | 248 | * a new pmap and should not be holding any locks. |
249 | * | | 249 | * |
250 | * if the kernel is totally out of virtual space | | 250 | * if the kernel is totally out of virtual space |
251 | * (i.e. uvm_km_alloc returns NULL), then we panic. | | 251 | * (i.e. uvm_km_alloc returns NULL), then we panic. |
252 | * | | 252 | * |
253 | * [B] new page tables pages (PTP) | | 253 | * [B] new page tables pages (PTP) |
254 | * - call uvm_pagealloc() | | 254 | * - call uvm_pagealloc() |
255 | * => success: zero page, add to pm_pdir | | 255 | * => success: zero page, add to pm_pdir |
256 | * => failure: we are out of free vm_pages, let pmap_enter() | | 256 | * => failure: we are out of free vm_pages, let pmap_enter() |
257 | * tell UVM about it. | | 257 | * tell UVM about it. |
258 | * | | 258 | * |
259 | * note: for kernel PTPs, we start with NKPTP of them. as we map | | 259 | * note: for kernel PTPs, we start with NKPTP of them. as we map |
260 | * kernel memory (at uvm_map time) we check to see if we've grown | | 260 | * kernel memory (at uvm_map time) we check to see if we've grown |
261 | * the kernel pmap. if so, we call the optional function | | 261 | * the kernel pmap. if so, we call the optional function |
262 | * pmap_growkernel() to grow the kernel PTPs in advance. | | 262 | * pmap_growkernel() to grow the kernel PTPs in advance. |
263 | * | | 263 | * |
264 | * [C] pv_entry structures | | 264 | * [C] pv_entry structures |
265 | */ | | 265 | */ |
266 | | | 266 | |
267 | /* | | 267 | /* |
268 | * locking | | 268 | * locking |
269 | * | | 269 | * |
270 | * we have the following locks that we must contend with: | | 270 | * we have the following locks that we must contend with: |
271 | * | | 271 | * |
272 | * mutexes: | | 272 | * mutexes: |
273 | * | | 273 | * |
274 | * - pmap lock (per pmap, part of uvm_object) | | 274 | * - pmap lock (per pmap, part of uvm_object) |
275 | * this lock protects the fields in the pmap structure including | | 275 | * this lock protects the fields in the pmap structure including |
276 | * the non-kernel PDEs in the PDP, and the PTEs. it also locks | | 276 | * the non-kernel PDEs in the PDP, and the PTEs. it also locks |
277 | * in the alternate PTE space (since that is determined by the | | 277 | * in the alternate PTE space (since that is determined by the |
278 | * entry in the PDP). | | 278 | * entry in the PDP). |
279 | * | | 279 | * |
280 | * - pvh_lock (per pv_head) | | 280 | * - pvh_lock (per pv_head) |
281 | * this lock protects the pv_entry list which is chained off the | | 281 | * this lock protects the pv_entry list which is chained off the |
282 | * pv_head structure for a specific managed PA. it is locked | | 282 | * pv_head structure for a specific managed PA. it is locked |
283 | * when traversing the list (e.g. adding/removing mappings, | | 283 | * when traversing the list (e.g. adding/removing mappings, |
284 | * syncing R/M bits, etc.) | | 284 | * syncing R/M bits, etc.) |
285 | * | | 285 | * |
286 | * - pmaps_lock | | 286 | * - pmaps_lock |
287 | * this lock protects the list of active pmaps (headed by "pmaps"). | | 287 | * this lock protects the list of active pmaps (headed by "pmaps"). |
288 | * we lock it when adding or removing pmaps from this list. | | 288 | * we lock it when adding or removing pmaps from this list. |
289 | * | | 289 | * |
290 | * tlb shootdown | | 290 | * tlb shootdown |
291 | * | | 291 | * |
292 | * tlb shootdowns are hard interrupts that operate outside the spl | | 292 | * tlb shootdowns are hard interrupts that operate outside the spl |
293 | * framework: they don't need to be blocked provided that the pmap module | | 293 | * framework: they don't need to be blocked provided that the pmap module |
294 | * gets the order of events correct. the calls are made by talking directly | | 294 | * gets the order of events correct. the calls are made by talking directly |
295 | * to the lapic. the stubs to handle the interrupts are quite short and do | | 295 | * to the lapic. the stubs to handle the interrupts are quite short and do |
296 | * one of the following: invalidate a single page, a range of pages, all | | 296 | * one of the following: invalidate a single page, a range of pages, all |
297 | * user tlb entries or the entire tlb. | | 297 | * user tlb entries or the entire tlb. |
298 | * | | 298 | * |
299 | * the cpus synchronize with each other using pmap_mbox structures which are | | 299 | * the cpus synchronize with each other using pmap_mbox structures which are |
300 | * aligned on 64-byte cache lines. tlb shootdowns against the kernel pmap | | 300 | * aligned on 64-byte cache lines. tlb shootdowns against the kernel pmap |
301 | * use a global mailbox and are generated using a broadcast ipi (broadcast | | 301 | * use a global mailbox and are generated using a broadcast ipi (broadcast |
302 | * to all but the sending cpu). shootdowns against regular pmaps use | | 302 | * to all but the sending cpu). shootdowns against regular pmaps use |
303 | * per-cpu mailboxes and are multicast. kernel and user shootdowns can | | 303 | * per-cpu mailboxes and are multicast. kernel and user shootdowns can |
304 | * execute simultaneously, as can shootdowns within different multithreaded | | 304 | * execute simultaneously, as can shootdowns within different multithreaded |
305 | * processes. TODO: | | 305 | * processes. TODO: |
306 | * | | 306 | * |
307 | * 1. figure out which waitpoints can be deferered to pmap_update(). | | 307 | * 1. figure out which waitpoints can be deferered to pmap_update(). |
308 | * 2. see if there is a cheap way to batch some updates. | | 308 | * 2. see if there is a cheap way to batch some updates. |
309 | */ | | 309 | */ |
310 | | | 310 | |
311 | const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER; | | 311 | const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER; |
312 | const int ptp_shifts[] = PTP_SHIFT_INITIALIZER; | | 312 | const int ptp_shifts[] = PTP_SHIFT_INITIALIZER; |
313 | const long nkptpmax[] = NKPTPMAX_INITIALIZER; | | 313 | const long nkptpmax[] = NKPTPMAX_INITIALIZER; |
314 | const long nbpd[] = NBPD_INITIALIZER; | | 314 | const long nbpd[] = NBPD_INITIALIZER; |
315 | pd_entry_t * const normal_pdes[] = PDES_INITIALIZER; | | 315 | pd_entry_t * const normal_pdes[] = PDES_INITIALIZER; |
316 | pd_entry_t * const alternate_pdes[] = APDES_INITIALIZER; | | 316 | pd_entry_t * const alternate_pdes[] = APDES_INITIALIZER; |
317 | | | 317 | |
318 | long nkptp[] = NKPTP_INITIALIZER; | | 318 | long nkptp[] = NKPTP_INITIALIZER; |
319 | | | 319 | |
320 | static kmutex_t pmaps_lock; | | 320 | static kmutex_t pmaps_lock; |
321 | | | 321 | |
322 | static vaddr_t pmap_maxkvaddr; | | 322 | static vaddr_t pmap_maxkvaddr; |
323 | | | 323 | |
324 | #define COUNT(x) /* nothing */ | | 324 | #define COUNT(x) /* nothing */ |
325 | | | 325 | |
326 | /* | | 326 | /* |
327 | * XXX kludge: dummy locking to make KASSERTs in uvm_page.c comfortable. | | 327 | * XXX kludge: dummy locking to make KASSERTs in uvm_page.c comfortable. |
328 | * actual locking is done by pm_lock. | | 328 | * actual locking is done by pm_lock. |
329 | */ | | 329 | */ |
330 | #if defined(DIAGNOSTIC) | | 330 | #if defined(DIAGNOSTIC) |
331 | #define PMAP_SUBOBJ_LOCK(pm, idx) \ | | 331 | #define PMAP_SUBOBJ_LOCK(pm, idx) \ |
332 | KASSERT(mutex_owned(&(pm)->pm_lock)); \ | | 332 | KASSERT(mutex_owned(&(pm)->pm_lock)); \ |
333 | if ((idx) != 0) \ | | 333 | if ((idx) != 0) \ |
334 | mutex_enter(&(pm)->pm_obj[(idx)].vmobjlock) | | 334 | mutex_enter(&(pm)->pm_obj[(idx)].vmobjlock) |
335 | #define PMAP_SUBOBJ_UNLOCK(pm, idx) \ | | 335 | #define PMAP_SUBOBJ_UNLOCK(pm, idx) \ |
336 | KASSERT(mutex_owned(&(pm)->pm_lock)); \ | | 336 | KASSERT(mutex_owned(&(pm)->pm_lock)); \ |
337 | if ((idx) != 0) \ | | 337 | if ((idx) != 0) \ |
338 | mutex_exit(&(pm)->pm_obj[(idx)].vmobjlock) | | 338 | mutex_exit(&(pm)->pm_obj[(idx)].vmobjlock) |
339 | #else /* defined(DIAGNOSTIC) */ | | 339 | #else /* defined(DIAGNOSTIC) */ |
340 | #define PMAP_SUBOBJ_LOCK(pm, idx) /* nothing */ | | 340 | #define PMAP_SUBOBJ_LOCK(pm, idx) /* nothing */ |
341 | #define PMAP_SUBOBJ_UNLOCK(pm, idx) /* nothing */ | | 341 | #define PMAP_SUBOBJ_UNLOCK(pm, idx) /* nothing */ |
342 | #endif /* defined(DIAGNOSTIC) */ | | 342 | #endif /* defined(DIAGNOSTIC) */ |
343 | | | 343 | |
344 | /* | | 344 | /* |
345 | * Misc. event counters. | | 345 | * Misc. event counters. |
346 | */ | | 346 | */ |
347 | struct evcnt pmap_iobmp_evcnt; | | 347 | struct evcnt pmap_iobmp_evcnt; |
348 | struct evcnt pmap_ldt_evcnt; | | 348 | struct evcnt pmap_ldt_evcnt; |
349 | | | 349 | |
350 | /* | | 350 | /* |
351 | * Global TLB shootdown mailbox. | | 351 | * Global TLB shootdown mailbox. |
352 | */ | | 352 | */ |
353 | struct evcnt pmap_tlb_evcnt __aligned(64); | | 353 | struct evcnt pmap_tlb_evcnt __aligned(64); |
354 | struct pmap_mbox pmap_mbox __aligned(64); | | 354 | struct pmap_mbox pmap_mbox __aligned(64); |
355 | | | 355 | |
356 | /* | | 356 | /* |
357 | * PAT | | 357 | * PAT |
358 | */ | | 358 | */ |
359 | #define PATENTRY(n, type) (type << ((n) * 8)) | | 359 | #define PATENTRY(n, type) (type << ((n) * 8)) |
360 | #define PAT_UC 0x0ULL | | 360 | #define PAT_UC 0x0ULL |
361 | #define PAT_WC 0x1ULL | | 361 | #define PAT_WC 0x1ULL |
362 | #define PAT_WT 0x4ULL | | 362 | #define PAT_WT 0x4ULL |
363 | #define PAT_WP 0x5ULL | | 363 | #define PAT_WP 0x5ULL |
364 | #define PAT_WB 0x6ULL | | 364 | #define PAT_WB 0x6ULL |
365 | #define PAT_UCMINUS 0x7ULL | | 365 | #define PAT_UCMINUS 0x7ULL |
366 | | | 366 | |
367 | static bool cpu_pat_enabled = false; | | 367 | static bool cpu_pat_enabled = false; |
368 | | | 368 | |
369 | | | 369 | |
370 | /* | | 370 | /* |
371 | * Per-CPU data. The pmap mailbox is cache intensive so gets its | | 371 | * Per-CPU data. The pmap mailbox is cache intensive so gets its |
372 | * own line. Note that the mailbox must be the first item. | | 372 | * own line. Note that the mailbox must be the first item. |
373 | */ | | 373 | */ |
374 | struct pmap_cpu { | | 374 | struct pmap_cpu { |
375 | /* TLB shootdown */ | | 375 | /* TLB shootdown */ |
376 | struct pmap_mbox pc_mbox; | | 376 | struct pmap_mbox pc_mbox; |
377 | }; | | 377 | }; |
378 | | | 378 | |
379 | union { | | 379 | union { |
380 | struct pmap_cpu pc; | | 380 | struct pmap_cpu pc; |
381 | uint8_t padding[64]; | | 381 | uint8_t padding[64]; |
382 | } pmap_cpu[MAXCPUS] __aligned(64); | | 382 | } pmap_cpu[MAXCPUS] __aligned(64); |
383 | | | 383 | |
384 | /* | | 384 | /* |
385 | * global data structures | | 385 | * global data structures |
386 | */ | | 386 | */ |
387 | | | 387 | |
388 | static struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ | | 388 | static struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ |
389 | struct pmap *const kernel_pmap_ptr = &kernel_pmap_store; | | 389 | struct pmap *const kernel_pmap_ptr = &kernel_pmap_store; |
390 | | | 390 | |
391 | /* | | 391 | /* |
392 | * pmap_pg_g: if our processor supports PG_G in the PTE then we | | 392 | * pmap_pg_g: if our processor supports PG_G in the PTE then we |
393 | * set pmap_pg_g to PG_G (otherwise it is zero). | | 393 | * set pmap_pg_g to PG_G (otherwise it is zero). |
394 | */ | | 394 | */ |
395 | | | 395 | |
396 | int pmap_pg_g = 0; | | 396 | int pmap_pg_g = 0; |
397 | | | 397 | |
398 | /* | | 398 | /* |
399 | * pmap_largepages: if our processor supports PG_PS and we are | | 399 | * pmap_largepages: if our processor supports PG_PS and we are |
400 | * using it, this is set to true. | | 400 | * using it, this is set to true. |
401 | */ | | 401 | */ |
402 | | | 402 | |
403 | int pmap_largepages; | | 403 | int pmap_largepages; |
404 | | | 404 | |
405 | /* | | 405 | /* |
406 | * i386 physical memory comes in a big contig chunk with a small | | 406 | * i386 physical memory comes in a big contig chunk with a small |
407 | * hole toward the front of it... the following two paddr_t's | | 407 | * hole toward the front of it... the following two paddr_t's |
408 | * (shared with machdep.c) describe the physical address space | | 408 | * (shared with machdep.c) describe the physical address space |
409 | * of this machine. | | 409 | * of this machine. |
410 | */ | | 410 | */ |
411 | paddr_t avail_start; /* PA of first available physical page */ | | 411 | paddr_t avail_start; /* PA of first available physical page */ |
412 | paddr_t avail_end; /* PA of last available physical page */ | | 412 | paddr_t avail_end; /* PA of last available physical page */ |
413 | | | 413 | |
414 | #ifdef XEN | | 414 | #ifdef XEN |
415 | #ifdef __x86_64__ | | 415 | #ifdef __x86_64__ |
416 | /* Dummy PGD for user cr3, used between pmap_deactivate() and pmap_activate() */ | | 416 | /* Dummy PGD for user cr3, used between pmap_deactivate() and pmap_activate() */ |
417 | static paddr_t xen_dummy_user_pgd; | | 417 | static paddr_t xen_dummy_user_pgd; |
418 | #endif /* __x86_64__ */ | | 418 | #endif /* __x86_64__ */ |
419 | paddr_t pmap_pa_start; /* PA of first physical page for this domain */ | | 419 | paddr_t pmap_pa_start; /* PA of first physical page for this domain */ |
420 | paddr_t pmap_pa_end; /* PA of last physical page for this domain */ | | 420 | paddr_t pmap_pa_end; /* PA of last physical page for this domain */ |
421 | #endif /* XEN */ | | 421 | #endif /* XEN */ |
422 | | | 422 | |
423 | #define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp) | | 423 | #define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp) |
424 | | | 424 | |
425 | #define pp_lock(pp) mutex_spin_enter(&(pp)->pp_lock) | | 425 | #define pp_lock(pp) mutex_spin_enter(&(pp)->pp_lock) |
426 | #define pp_unlock(pp) mutex_spin_exit(&(pp)->pp_lock) | | 426 | #define pp_unlock(pp) mutex_spin_exit(&(pp)->pp_lock) |
427 | #define pp_locked(pp) mutex_owned(&(pp)->pp_lock) | | 427 | #define pp_locked(pp) mutex_owned(&(pp)->pp_lock) |
428 | | | 428 | |
429 | #define PV_HASH_SIZE 32768 | | 429 | #define PV_HASH_SIZE 32768 |
430 | #define PV_HASH_LOCK_CNT 32 | | 430 | #define PV_HASH_LOCK_CNT 32 |
431 | | | 431 | |
432 | struct pv_hash_lock { | | 432 | struct pv_hash_lock { |
433 | kmutex_t lock; | | 433 | kmutex_t lock; |
434 | } __aligned(CACHE_LINE_SIZE) pv_hash_locks[PV_HASH_LOCK_CNT] | | 434 | } __aligned(CACHE_LINE_SIZE) pv_hash_locks[PV_HASH_LOCK_CNT] |
435 | __aligned(CACHE_LINE_SIZE); | | 435 | __aligned(CACHE_LINE_SIZE); |
436 | | | 436 | |
437 | struct pv_hash_head { | | 437 | struct pv_hash_head { |
438 | SLIST_HEAD(, pv_entry) hh_list; | | 438 | SLIST_HEAD(, pv_entry) hh_list; |
439 | } pv_hash_heads[PV_HASH_SIZE]; | | 439 | } pv_hash_heads[PV_HASH_SIZE]; |
440 | | | 440 | |
441 | static u_int | | 441 | static u_int |
442 | pvhash_hash(struct vm_page *ptp, vaddr_t va) | | 442 | pvhash_hash(struct vm_page *ptp, vaddr_t va) |
443 | { | | 443 | { |
444 | | | 444 | |
445 | return (uintptr_t)ptp / sizeof(*ptp) + (va >> PAGE_SHIFT); | | 445 | return (uintptr_t)ptp / sizeof(*ptp) + (va >> PAGE_SHIFT); |
446 | } | | 446 | } |
447 | | | 447 | |
448 | static struct pv_hash_head * | | 448 | static struct pv_hash_head * |
449 | pvhash_head(u_int hash) | | 449 | pvhash_head(u_int hash) |
450 | { | | 450 | { |
451 | | | 451 | |
452 | return &pv_hash_heads[hash % PV_HASH_SIZE]; | | 452 | return &pv_hash_heads[hash % PV_HASH_SIZE]; |
453 | } | | 453 | } |
454 | | | 454 | |
455 | static kmutex_t * | | 455 | static kmutex_t * |
456 | pvhash_lock(u_int hash) | | 456 | pvhash_lock(u_int hash) |
457 | { | | 457 | { |
458 | | | 458 | |
459 | return &pv_hash_locks[hash % PV_HASH_LOCK_CNT].lock; | | 459 | return &pv_hash_locks[hash % PV_HASH_LOCK_CNT].lock; |
460 | } | | 460 | } |
461 | | | 461 | |
462 | static struct pv_entry * | | 462 | static struct pv_entry * |
463 | pvhash_remove(struct pv_hash_head *hh, struct vm_page *ptp, vaddr_t va) | | 463 | pvhash_remove(struct pv_hash_head *hh, struct vm_page *ptp, vaddr_t va) |
464 | { | | 464 | { |
465 | struct pv_entry *pve; | | 465 | struct pv_entry *pve; |
466 | struct pv_entry *prev; | | 466 | struct pv_entry *prev; |
467 | | | 467 | |
468 | prev = NULL; | | 468 | prev = NULL; |
469 | SLIST_FOREACH(pve, &hh->hh_list, pve_hash) { | | 469 | SLIST_FOREACH(pve, &hh->hh_list, pve_hash) { |
470 | if (pve->pve_pte.pte_ptp == ptp && | | 470 | if (pve->pve_pte.pte_ptp == ptp && |
471 | pve->pve_pte.pte_va == va) { | | 471 | pve->pve_pte.pte_va == va) { |
472 | if (prev != NULL) { | | 472 | if (prev != NULL) { |
473 | SLIST_REMOVE_AFTER(prev, pve_hash); | | 473 | SLIST_REMOVE_AFTER(prev, pve_hash); |
474 | } else { | | 474 | } else { |
475 | SLIST_REMOVE_HEAD(&hh->hh_list, pve_hash); | | 475 | SLIST_REMOVE_HEAD(&hh->hh_list, pve_hash); |
476 | } | | 476 | } |
477 | break; | | 477 | break; |
478 | } | | 478 | } |
479 | prev = pve; | | 479 | prev = pve; |
480 | } | | 480 | } |
481 | return pve; | | 481 | return pve; |
482 | } | | 482 | } |
483 | | | 483 | |
484 | /* | | 484 | /* |
485 | * other data structures | | 485 | * other data structures |
486 | */ | | 486 | */ |
487 | | | 487 | |
488 | static pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */ | | 488 | static pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */ |
489 | static bool pmap_initialized = false; /* pmap_init done yet? */ | | 489 | static bool pmap_initialized = false; /* pmap_init done yet? */ |
490 | | | 490 | |
491 | /* | | 491 | /* |
492 | * the following two vaddr_t's are used during system startup | | 492 | * the following two vaddr_t's are used during system startup |
493 | * to keep track of how much of the kernel's VM space we have used. | | 493 | * to keep track of how much of the kernel's VM space we have used. |
494 | * once the system is started, the management of the remaining kernel | | 494 | * once the system is started, the management of the remaining kernel |
495 | * VM space is turned over to the kernel_map vm_map. | | 495 | * VM space is turned over to the kernel_map vm_map. |
496 | */ | | 496 | */ |
497 | | | 497 | |
498 | static vaddr_t virtual_avail; /* VA of first free KVA */ | | 498 | static vaddr_t virtual_avail; /* VA of first free KVA */ |
499 | static vaddr_t virtual_end; /* VA of last free KVA */ | | 499 | static vaddr_t virtual_end; /* VA of last free KVA */ |
500 | | | 500 | |
501 | /* | | 501 | /* |
502 | * linked list of all non-kernel pmaps | | 502 | * linked list of all non-kernel pmaps |
503 | */ | | 503 | */ |
504 | | | 504 | |
505 | static struct pmap_head pmaps; | | 505 | static struct pmap_head pmaps; |
506 | | | 506 | |
507 | /* | | 507 | /* |
508 | * pool that pmap structures are allocated from | | 508 | * pool that pmap structures are allocated from |
509 | */ | | 509 | */ |
510 | | | 510 | |
511 | static struct pool_cache pmap_cache; | | 511 | static struct pool_cache pmap_cache; |
512 | | | 512 | |
513 | /* | | 513 | /* |
514 | * pv_entry cache | | 514 | * pv_entry cache |
515 | */ | | 515 | */ |
516 | | | 516 | |
517 | static struct pool_cache pmap_pv_cache; | | 517 | static struct pool_cache pmap_pv_cache; |
518 | | | 518 | |
519 | /* | | 519 | /* |
520 | * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a | | 520 | * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a |
521 | * maxcpus*NPTECL array of PTE's, to avoid cache line thrashing | | 521 | * maxcpus*NPTECL array of PTE's, to avoid cache line thrashing |
522 | * due to false sharing. | | 522 | * due to false sharing. |
523 | */ | | 523 | */ |
524 | | | 524 | |
525 | #ifdef MULTIPROCESSOR | | 525 | #ifdef MULTIPROCESSOR |
526 | #define PTESLEW(pte, id) ((pte)+(id)*NPTECL) | | 526 | #define PTESLEW(pte, id) ((pte)+(id)*NPTECL) |
527 | #define VASLEW(va,id) ((va)+(id)*NPTECL*PAGE_SIZE) | | 527 | #define VASLEW(va,id) ((va)+(id)*NPTECL*PAGE_SIZE) |
528 | #else | | 528 | #else |
529 | #define PTESLEW(pte, id) (pte) | | 529 | #define PTESLEW(pte, id) (pte) |
530 | #define VASLEW(va,id) (va) | | 530 | #define VASLEW(va,id) (va) |
531 | #endif | | 531 | #endif |
532 | | | 532 | |
533 | /* | | 533 | /* |
534 | * special VAs and the PTEs that map them | | 534 | * special VAs and the PTEs that map them |
535 | */ | | 535 | */ |
536 | static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte, *early_zero_pte; | | 536 | static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte, *early_zero_pte; |
537 | static char *csrcp, *cdstp, *zerop, *ptpp, *early_zerop; | | 537 | static char *csrcp, *cdstp, *zerop, *ptpp, *early_zerop; |
538 | | | 538 | |
539 | int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int); | | 539 | int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int); |
540 | | | 540 | |
541 | /* | | 541 | /* |
542 | * pool and cache that PDPs are allocated from | | 542 | * pool and cache that PDPs are allocated from |
543 | */ | | 543 | */ |
544 | | | 544 | |
545 | static struct pool_cache pmap_pdp_cache; | | 545 | static struct pool_cache pmap_pdp_cache; |
546 | int pmap_pdp_ctor(void *, void *, int); | | 546 | int pmap_pdp_ctor(void *, void *, int); |
547 | void pmap_pdp_dtor(void *, void *); | | 547 | void pmap_pdp_dtor(void *, void *); |
548 | #ifdef PAE | | 548 | #ifdef PAE |
549 | /* need to allocate items of 4 pages */ | | 549 | /* need to allocate items of 4 pages */ |
550 | void *pmap_pdp_alloc(struct pool *, int); | | 550 | void *pmap_pdp_alloc(struct pool *, int); |
551 | void pmap_pdp_free(struct pool *, void *); | | 551 | void pmap_pdp_free(struct pool *, void *); |
552 | static struct pool_allocator pmap_pdp_allocator = { | | 552 | static struct pool_allocator pmap_pdp_allocator = { |
553 | .pa_alloc = pmap_pdp_alloc, | | 553 | .pa_alloc = pmap_pdp_alloc, |
554 | .pa_free = pmap_pdp_free, | | 554 | .pa_free = pmap_pdp_free, |
555 | .pa_pagesz = PAGE_SIZE * PDP_SIZE, | | 555 | .pa_pagesz = PAGE_SIZE * PDP_SIZE, |
556 | }; | | 556 | }; |
557 | #endif /* PAE */ | | 557 | #endif /* PAE */ |
558 | | | 558 | |
559 | void *vmmap; /* XXX: used by mem.c... it should really uvm_map_reserve it */ | | 559 | void *vmmap; /* XXX: used by mem.c... it should really uvm_map_reserve it */ |
560 | | | 560 | |
561 | extern vaddr_t idt_vaddr; /* we allocate IDT early */ | | 561 | extern vaddr_t idt_vaddr; /* we allocate IDT early */ |
562 | extern paddr_t idt_paddr; | | 562 | extern paddr_t idt_paddr; |
563 | | | 563 | |
564 | #ifdef _LP64 | | 564 | #ifdef _LP64 |
565 | extern vaddr_t lo32_vaddr; | | 565 | extern vaddr_t lo32_vaddr; |
566 | extern vaddr_t lo32_paddr; | | 566 | extern vaddr_t lo32_paddr; |
567 | #endif | | 567 | #endif |
568 | | | 568 | |
569 | extern int end; | | 569 | extern int end; |
570 | | | 570 | |
571 | #ifdef i386 | | 571 | #ifdef i386 |
572 | /* stuff to fix the pentium f00f bug */ | | 572 | /* stuff to fix the pentium f00f bug */ |
573 | extern vaddr_t pentium_idt_vaddr; | | 573 | extern vaddr_t pentium_idt_vaddr; |
574 | #endif | | 574 | #endif |
575 | | | 575 | |
576 | | | 576 | |
577 | /* | | 577 | /* |
578 | * local prototypes | | 578 | * local prototypes |
579 | */ | | 579 | */ |
580 | | | 580 | |
581 | static struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t, | | 581 | static struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t, |
582 | pd_entry_t * const *); | | 582 | pd_entry_t * const *); |
583 | static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int); | | 583 | static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int); |
584 | static void pmap_freepage(struct pmap *, struct vm_page *, int); | | 584 | static void pmap_freepage(struct pmap *, struct vm_page *, int); |
585 | static void pmap_free_ptp(struct pmap *, struct vm_page *, | | 585 | static void pmap_free_ptp(struct pmap *, struct vm_page *, |
586 | vaddr_t, pt_entry_t *, | | 586 | vaddr_t, pt_entry_t *, |
587 | pd_entry_t * const *); | | 587 | pd_entry_t * const *); |
588 | static bool pmap_is_curpmap(struct pmap *); | | 588 | static bool pmap_is_curpmap(struct pmap *); |
589 | static bool pmap_is_active(struct pmap *, struct cpu_info *, bool); | | 589 | static bool pmap_is_active(struct pmap *, struct cpu_info *, bool); |
590 | static bool pmap_remove_pte(struct pmap *, struct vm_page *, | | 590 | static bool pmap_remove_pte(struct pmap *, struct vm_page *, |
591 | pt_entry_t *, vaddr_t, | | 591 | pt_entry_t *, vaddr_t, |
592 | struct pv_entry **); | | 592 | struct pv_entry **); |
593 | static pt_entry_t pmap_remove_ptes(struct pmap *, struct vm_page *, | | 593 | static pt_entry_t pmap_remove_ptes(struct pmap *, struct vm_page *, |
594 | vaddr_t, vaddr_t, vaddr_t, | | 594 | vaddr_t, vaddr_t, vaddr_t, |
595 | struct pv_entry **); | | 595 | struct pv_entry **); |
596 | | | 596 | |
597 | static void pmap_unmap_apdp(void); | | 597 | static void pmap_unmap_apdp(void); |
598 | static bool pmap_get_physpage(vaddr_t, int, paddr_t *); | | 598 | static bool pmap_get_physpage(vaddr_t, int, paddr_t *); |
599 | static void pmap_alloc_level(pd_entry_t * const *, vaddr_t, int, | | 599 | static void pmap_alloc_level(pd_entry_t * const *, vaddr_t, int, |
600 | long *); | | 600 | long *); |
601 | | | 601 | |
602 | static bool pmap_reactivate(struct pmap *); | | 602 | static bool pmap_reactivate(struct pmap *); |
603 | | | 603 | |
604 | /* | | 604 | /* |
605 | * p m a p h e l p e r f u n c t i o n s | | 605 | * p m a p h e l p e r f u n c t i o n s |
606 | */ | | 606 | */ |
607 | | | 607 | |
608 | static inline void | | 608 | static inline void |
609 | pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff) | | 609 | pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff) |
610 | { | | 610 | { |
611 | | | 611 | |
612 | if (pmap == pmap_kernel()) { | | 612 | if (pmap == pmap_kernel()) { |
613 | atomic_add_long(&pmap->pm_stats.resident_count, resid_diff); | | 613 | atomic_add_long(&pmap->pm_stats.resident_count, resid_diff); |
614 | atomic_add_long(&pmap->pm_stats.wired_count, wired_diff); | | 614 | atomic_add_long(&pmap->pm_stats.wired_count, wired_diff); |
615 | } else { | | 615 | } else { |
616 | KASSERT(mutex_owned(&pmap->pm_lock)); | | 616 | KASSERT(mutex_owned(&pmap->pm_lock)); |
617 | pmap->pm_stats.resident_count += resid_diff; | | 617 | pmap->pm_stats.resident_count += resid_diff; |
618 | pmap->pm_stats.wired_count += wired_diff; | | 618 | pmap->pm_stats.wired_count += wired_diff; |
619 | } | | 619 | } |
620 | } | | 620 | } |
621 | | | 621 | |
622 | static inline void | | 622 | static inline void |
623 | pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte) | | 623 | pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte) |
624 | { | | 624 | { |
625 | int resid_diff = ((npte & PG_V) ? 1 : 0) - ((opte & PG_V) ? 1 : 0); | | 625 | int resid_diff = ((npte & PG_V) ? 1 : 0) - ((opte & PG_V) ? 1 : 0); |
626 | int wired_diff = ((npte & PG_W) ? 1 : 0) - ((opte & PG_W) ? 1 : 0); | | 626 | int wired_diff = ((npte & PG_W) ? 1 : 0) - ((opte & PG_W) ? 1 : 0); |
627 | | | 627 | |
628 | KASSERT((npte & (PG_V | PG_W)) != PG_W); | | 628 | KASSERT((npte & (PG_V | PG_W)) != PG_W); |
629 | KASSERT((opte & (PG_V | PG_W)) != PG_W); | | 629 | KASSERT((opte & (PG_V | PG_W)) != PG_W); |
630 | | | 630 | |
631 | pmap_stats_update(pmap, resid_diff, wired_diff); | | 631 | pmap_stats_update(pmap, resid_diff, wired_diff); |
632 | } | | 632 | } |
633 | | | 633 | |
634 | /* | | 634 | /* |
635 | * ptp_to_pmap: lookup pmap by ptp | | 635 | * ptp_to_pmap: lookup pmap by ptp |
636 | */ | | 636 | */ |
637 | | | 637 | |
638 | static struct pmap * | | 638 | static struct pmap * |
639 | ptp_to_pmap(struct vm_page *ptp) | | 639 | ptp_to_pmap(struct vm_page *ptp) |
640 | { | | 640 | { |
641 | struct pmap *pmap; | | 641 | struct pmap *pmap; |
642 | | | 642 | |
643 | if (ptp == NULL) { | | 643 | if (ptp == NULL) { |
644 | return pmap_kernel(); | | 644 | return pmap_kernel(); |
645 | } | | 645 | } |
646 | pmap = (struct pmap *)ptp->uobject; | | 646 | pmap = (struct pmap *)ptp->uobject; |
647 | KASSERT(pmap != NULL); | | 647 | KASSERT(pmap != NULL); |
648 | KASSERT(&pmap->pm_obj[0] == ptp->uobject); | | 648 | KASSERT(&pmap->pm_obj[0] == ptp->uobject); |
649 | return pmap; | | 649 | return pmap; |
650 | } | | 650 | } |
651 | | | 651 | |
652 | static inline struct pv_pte * | | 652 | static inline struct pv_pte * |
653 | pve_to_pvpte(struct pv_entry *pve) | | 653 | pve_to_pvpte(struct pv_entry *pve) |
654 | { | | 654 | { |
655 | | | 655 | |
656 | KASSERT((void *)&pve->pve_pte == (void *)pve); | | 656 | KASSERT((void *)&pve->pve_pte == (void *)pve); |
657 | return &pve->pve_pte; | | 657 | return &pve->pve_pte; |
658 | } | | 658 | } |
659 | | | 659 | |
660 | static inline struct pv_entry * | | 660 | static inline struct pv_entry * |
661 | pvpte_to_pve(struct pv_pte *pvpte) | | 661 | pvpte_to_pve(struct pv_pte *pvpte) |
662 | { | | 662 | { |
663 | struct pv_entry *pve = (void *)pvpte; | | 663 | struct pv_entry *pve = (void *)pvpte; |
664 | | | 664 | |
665 | KASSERT(pve_to_pvpte(pve) == pvpte); | | 665 | KASSERT(pve_to_pvpte(pve) == pvpte); |
666 | return pve; | | 666 | return pve; |
667 | } | | 667 | } |
668 | | | 668 | |
669 | /* | | 669 | /* |
670 | * pv_pte_first, pv_pte_next: PV list iterator. | | 670 | * pv_pte_first, pv_pte_next: PV list iterator. |
671 | */ | | 671 | */ |
672 | | | 672 | |
673 | static struct pv_pte * | | 673 | static struct pv_pte * |
674 | pv_pte_first(struct pmap_page *pp) | | 674 | pv_pte_first(struct pmap_page *pp) |
675 | { | | 675 | { |
676 | | | 676 | |
677 | KASSERT(pp_locked(pp)); | | 677 | KASSERT(pp_locked(pp)); |
678 | if ((pp->pp_flags & PP_EMBEDDED) != 0) { | | 678 | if ((pp->pp_flags & PP_EMBEDDED) != 0) { |
679 | return &pp->pp_pte; | | 679 | return &pp->pp_pte; |
680 | } | | 680 | } |
681 | return pve_to_pvpte(LIST_FIRST(&pp->pp_head.pvh_list)); | | 681 | return pve_to_pvpte(LIST_FIRST(&pp->pp_head.pvh_list)); |
682 | } | | 682 | } |
683 | | | 683 | |
684 | static struct pv_pte * | | 684 | static struct pv_pte * |
685 | pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte) | | 685 | pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte) |
686 | { | | 686 | { |
687 | | | 687 | |
688 | KASSERT(pvpte != NULL); | | 688 | KASSERT(pvpte != NULL); |
689 | KASSERT(pp_locked(pp)); | | 689 | KASSERT(pp_locked(pp)); |
690 | if (pvpte == &pp->pp_pte) { | | 690 | if (pvpte == &pp->pp_pte) { |
691 | KASSERT((pp->pp_flags & PP_EMBEDDED) != 0); | | 691 | KASSERT((pp->pp_flags & PP_EMBEDDED) != 0); |
692 | return NULL; | | 692 | return NULL; |
693 | } | | 693 | } |
694 | KASSERT((pp->pp_flags & PP_EMBEDDED) == 0); | | 694 | KASSERT((pp->pp_flags & PP_EMBEDDED) == 0); |
695 | return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list)); | | 695 | return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list)); |
696 | } | | 696 | } |
697 | | | 697 | |
698 | /* | | 698 | /* |
699 | * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? | | 699 | * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? |
700 | * of course the kernel is always loaded | | 700 | * of course the kernel is always loaded |
701 | */ | | 701 | */ |
702 | | | 702 | |
703 | inline static bool | | 703 | inline static bool |
704 | pmap_is_curpmap(struct pmap *pmap) | | 704 | pmap_is_curpmap(struct pmap *pmap) |
705 | { | | 705 | { |
706 | #if defined(XEN) && defined(__x86_64__) | | 706 | #if defined(XEN) && defined(__x86_64__) |
707 | /* | | 707 | /* |
708 | * Only kernel pmap is physically loaded. | | 708 | * Only kernel pmap is physically loaded. |
709 | * User PGD may be active, but TLB will be flushed | | 709 | * User PGD may be active, but TLB will be flushed |
710 | * with HYPERVISOR_iret anyway, so let's say no | | 710 | * with HYPERVISOR_iret anyway, so let's say no |
711 | */ | | 711 | */ |
712 | return(pmap == pmap_kernel()); | | 712 | return(pmap == pmap_kernel()); |
713 | #else /* XEN && __x86_64__*/ | | 713 | #else /* XEN && __x86_64__*/ |
714 | return((pmap == pmap_kernel()) || | | 714 | return((pmap == pmap_kernel()) || |
715 | (pmap == curcpu()->ci_pmap)); | | 715 | (pmap == curcpu()->ci_pmap)); |
716 | #endif | | 716 | #endif |
717 | } | | 717 | } |
718 | | | 718 | |
719 | /* | | 719 | /* |
720 | * pmap_is_active: is this pmap loaded into the specified processor's %cr3? | | 720 | * pmap_is_active: is this pmap loaded into the specified processor's %cr3? |
721 | */ | | 721 | */ |
722 | | | 722 | |
723 | inline static bool | | 723 | inline static bool |
724 | pmap_is_active(struct pmap *pmap, struct cpu_info *ci, bool kernel) | | 724 | pmap_is_active(struct pmap *pmap, struct cpu_info *ci, bool kernel) |
725 | { | | 725 | { |
726 | | | 726 | |
727 | return (pmap == pmap_kernel() || | | 727 | return (pmap == pmap_kernel() || |
728 | (pmap->pm_cpus & ci->ci_cpumask) != 0 || | | 728 | (pmap->pm_cpus & ci->ci_cpumask) != 0 || |
729 | (kernel && (pmap->pm_kernel_cpus & ci->ci_cpumask) != 0)); | | 729 | (kernel && (pmap->pm_kernel_cpus & ci->ci_cpumask) != 0)); |
730 | } | | 730 | } |
731 | | | 731 | |
732 | static void | | 732 | static void |
733 | pmap_apte_flush(struct pmap *pmap) | | 733 | pmap_apte_flush(struct pmap *pmap) |
734 | { | | 734 | { |
735 | | | 735 | |
736 | KASSERT(kpreempt_disabled()); | | 736 | KASSERT(kpreempt_disabled()); |
737 | | | 737 | |
738 | /* | | 738 | /* |
739 | * Flush the APTE mapping from all other CPUs that | | 739 | * Flush the APTE mapping from all other CPUs that |
740 | * are using the pmap we are using (who's APTE space | | 740 | * are using the pmap we are using (who's APTE space |
741 | * is the one we've just modified). | | 741 | * is the one we've just modified). |
742 | * | | 742 | * |
743 | * XXXthorpej -- find a way to defer the IPI. | | 743 | * XXXthorpej -- find a way to defer the IPI. |
744 | */ | | 744 | */ |
745 | pmap_tlb_shootdown(pmap, (vaddr_t)-1LL, 0, 0); | | 745 | pmap_tlb_shootdown(pmap, (vaddr_t)-1LL, 0, 0); |
746 | pmap_tlb_shootwait(); | | 746 | pmap_tlb_shootwait(); |
747 | } | | 747 | } |
748 | | | 748 | |
749 | /* | | 749 | /* |
750 | * Unmap the content of APDP PDEs | | 750 | * Unmap the content of APDP PDEs |
751 | */ | | 751 | */ |
752 | static void | | 752 | static void |
753 | pmap_unmap_apdp(void) | | 753 | pmap_unmap_apdp(void) |
754 | { | | 754 | { |
755 | int i; | | 755 | int i; |
756 | | | 756 | |
757 | for (i = 0; i < PDP_SIZE; i++) { | | 757 | for (i = 0; i < PDP_SIZE; i++) { |
758 | pmap_pte_set(APDP_PDE+i, 0); | | 758 | pmap_pte_set(APDP_PDE+i, 0); |
759 | #if defined (XEN) && defined (PAE) | | 759 | #if defined (XEN) && defined (PAE) |
760 | /* clear shadow entries too */ | | 760 | /* clear shadow entries too */ |
761 | pmap_pte_set(APDP_PDE_SHADOW+i, 0); | | 761 | pmap_pte_set(APDP_PDE_SHADOW+i, 0); |
762 | #endif | | 762 | #endif |
763 | } | | 763 | } |
764 | } | | 764 | } |
765 | | | 765 | |
766 | /* | | 766 | /* |
767 | * Add a reference to the specified pmap. | | 767 | * Add a reference to the specified pmap. |
768 | */ | | 768 | */ |
769 | | | 769 | |
770 | inline void | | 770 | void |
771 | pmap_reference(struct pmap *pmap) | | 771 | pmap_reference(struct pmap *pmap) |
772 | { | | 772 | { |
773 | | | 773 | |
774 | atomic_inc_uint(&pmap->pm_obj[0].uo_refs); | | 774 | atomic_inc_uint(&pmap->pm_obj[0].uo_refs); |
775 | } | | 775 | } |
776 | | | 776 | |
777 | /* | | 777 | /* |
778 | * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in | | 778 | * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in |
779 | * | | 779 | * |
780 | * => we lock enough pmaps to keep things locked in | | 780 | * => we lock enough pmaps to keep things locked in |
781 | * => must be undone with pmap_unmap_ptes before returning | | 781 | * => must be undone with pmap_unmap_ptes before returning |
782 | */ | | 782 | */ |
783 | | | 783 | |
784 | void | | 784 | void |
785 | pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2, | | 785 | pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2, |
786 | pd_entry_t **ptepp, pd_entry_t * const **pdeppp) | | 786 | pd_entry_t **ptepp, pd_entry_t * const **pdeppp) |
787 | { | | 787 | { |
788 | pd_entry_t opde, npde; | | 788 | pd_entry_t opde, npde; |
789 | struct pmap *ourpmap; | | 789 | struct pmap *ourpmap; |
790 | struct cpu_info *ci; | | 790 | struct cpu_info *ci; |
791 | struct lwp *l; | | 791 | struct lwp *l; |
792 | bool iscurrent; | | 792 | bool iscurrent; |
793 | uint64_t ncsw; | | 793 | uint64_t ncsw; |
794 | #ifdef XEN | | 794 | #ifdef XEN |
795 | int s, i; | | 795 | int s, i; |
796 | #endif | | 796 | #endif |
797 | | | 797 | |
798 | /* the kernel's pmap is always accessible */ | | 798 | /* the kernel's pmap is always accessible */ |
799 | if (pmap == pmap_kernel()) { | | 799 | if (pmap == pmap_kernel()) { |
800 | *pmap2 = NULL; | | 800 | *pmap2 = NULL; |
801 | *ptepp = PTE_BASE; | | 801 | *ptepp = PTE_BASE; |
802 | *pdeppp = normal_pdes; | | 802 | *pdeppp = normal_pdes; |
803 | return; | | 803 | return; |
804 | } | | 804 | } |
805 | KASSERT(kpreempt_disabled()); | | 805 | KASSERT(kpreempt_disabled()); |
806 | | | 806 | |
807 | retry: | | 807 | retry: |
808 | l = curlwp; | | 808 | l = curlwp; |
809 | ncsw = l->l_ncsw; | | 809 | ncsw = l->l_ncsw; |
810 | ourpmap = NULL; | | 810 | ourpmap = NULL; |
811 | ci = curcpu(); | | 811 | ci = curcpu(); |
812 | #if defined(XEN) && defined(__x86_64__) | | 812 | #if defined(XEN) && defined(__x86_64__) |
813 | /* | | 813 | /* |
814 | * curmap can only be pmap_kernel so at this point | | 814 | * curmap can only be pmap_kernel so at this point |
815 | * pmap_is_curpmap is always false | | 815 | * pmap_is_curpmap is always false |
816 | */ | | 816 | */ |
817 | iscurrent = 0; | | 817 | iscurrent = 0; |
818 | ourpmap = pmap_kernel(); | | 818 | ourpmap = pmap_kernel(); |
819 | #else /* XEN && __x86_64__*/ | | 819 | #else /* XEN && __x86_64__*/ |
820 | if (ci->ci_want_pmapload && | | 820 | if (ci->ci_want_pmapload && |
821 | vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) { | | 821 | vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) { |
822 | pmap_load(); | | 822 | pmap_load(); |
823 | if (l->l_ncsw != ncsw) | | 823 | if (l->l_ncsw != ncsw) |
824 | goto retry; | | 824 | goto retry; |
825 | } | | 825 | } |
826 | iscurrent = pmap_is_curpmap(pmap); | | 826 | iscurrent = pmap_is_curpmap(pmap); |
827 | /* if curpmap then we are always mapped */ | | 827 | /* if curpmap then we are always mapped */ |
828 | if (iscurrent) { | | 828 | if (iscurrent) { |
829 | mutex_enter(&pmap->pm_lock); | | 829 | mutex_enter(&pmap->pm_lock); |
830 | *pmap2 = NULL; | | 830 | *pmap2 = NULL; |
831 | *ptepp = PTE_BASE; | | 831 | *ptepp = PTE_BASE; |
832 | *pdeppp = normal_pdes; | | 832 | *pdeppp = normal_pdes; |
833 | goto out; | | 833 | goto out; |
834 | } | | 834 | } |
835 | ourpmap = ci->ci_pmap; | | 835 | ourpmap = ci->ci_pmap; |
836 | #endif /* XEN && __x86_64__ */ | | 836 | #endif /* XEN && __x86_64__ */ |
837 | | | 837 | |
838 | /* need to lock both curpmap and pmap: use ordered locking */ | | 838 | /* need to lock both curpmap and pmap: use ordered locking */ |
839 | pmap_reference(ourpmap); | | 839 | pmap_reference(ourpmap); |
840 | if ((uintptr_t) pmap < (uintptr_t) ourpmap) { | | 840 | if ((uintptr_t) pmap < (uintptr_t) ourpmap) { |
841 | mutex_enter(&pmap->pm_lock); | | 841 | mutex_enter(&pmap->pm_lock); |
842 | mutex_enter(&ourpmap->pm_lock); | | 842 | mutex_enter(&ourpmap->pm_lock); |
843 | } else { | | 843 | } else { |
844 | mutex_enter(&ourpmap->pm_lock); | | 844 | mutex_enter(&ourpmap->pm_lock); |
845 | mutex_enter(&pmap->pm_lock); | | 845 | mutex_enter(&pmap->pm_lock); |
846 | } | | 846 | } |
847 | | | 847 | |
848 | if (l->l_ncsw != ncsw) | | 848 | if (l->l_ncsw != ncsw) |
849 | goto unlock_and_retry; | | 849 | goto unlock_and_retry; |
850 | | | 850 | |
851 | /* need to load a new alternate pt space into curpmap? */ | | 851 | /* need to load a new alternate pt space into curpmap? */ |
852 | COUNT(apdp_pde_map); | | 852 | COUNT(apdp_pde_map); |
853 | opde = *APDP_PDE; | | 853 | opde = *APDP_PDE; |
854 | if (!pmap_valid_entry(opde) || | | 854 | if (!pmap_valid_entry(opde) || |
855 | pmap_pte2pa(opde) != pmap_pdirpa(pmap, 0)) { | | 855 | pmap_pte2pa(opde) != pmap_pdirpa(pmap, 0)) { |
856 | #ifdef XEN | | 856 | #ifdef XEN |
857 | s = splvm(); | | 857 | s = splvm(); |
858 | /* Make recursive entry usable in user PGD */ | | 858 | /* Make recursive entry usable in user PGD */ |
859 | for (i = 0; i < PDP_SIZE; i++) { | | 859 | for (i = 0; i < PDP_SIZE; i++) { |
860 | npde = pmap_pa2pte( | | 860 | npde = pmap_pa2pte( |
861 | pmap_pdirpa(pmap, i * NPDPG)) | PG_k | PG_V; | | 861 | pmap_pdirpa(pmap, i * NPDPG)) | PG_k | PG_V; |
862 | xpq_queue_pte_update( | | 862 | xpq_queue_pte_update( |
863 | xpmap_ptom(pmap_pdirpa(pmap, PDIR_SLOT_PTE + i)), | | 863 | xpmap_ptom(pmap_pdirpa(pmap, PDIR_SLOT_PTE + i)), |
864 | npde); | | 864 | npde); |
865 | xpq_queue_pte_update(xpmap_ptetomach(&APDP_PDE[i]), | | 865 | xpq_queue_pte_update(xpmap_ptetomach(&APDP_PDE[i]), |
866 | npde); | | 866 | npde); |
867 | #ifdef PAE | | 867 | #ifdef PAE |
868 | /* update shadow entry too */ | | 868 | /* update shadow entry too */ |
869 | xpq_queue_pte_update( | | 869 | xpq_queue_pte_update( |
870 | xpmap_ptetomach(&APDP_PDE_SHADOW[i]), npde); | | 870 | xpmap_ptetomach(&APDP_PDE_SHADOW[i]), npde); |
871 | #endif /* PAE */ | | 871 | #endif /* PAE */ |
872 | xpq_queue_invlpg( | | 872 | xpq_queue_invlpg( |
873 | (vaddr_t)&pmap->pm_pdir[PDIR_SLOT_PTE + i]); | | 873 | (vaddr_t)&pmap->pm_pdir[PDIR_SLOT_PTE + i]); |
874 | } | | 874 | } |
875 | if (pmap_valid_entry(opde)) | | 875 | if (pmap_valid_entry(opde)) |
876 | pmap_apte_flush(ourpmap); | | 876 | pmap_apte_flush(ourpmap); |
877 | splx(s); | | 877 | splx(s); |
878 | #else /* XEN */ | | 878 | #else /* XEN */ |
879 | int i; | | 879 | int i; |
880 | for (i = 0; i < PDP_SIZE; i++) { | | 880 | for (i = 0; i < PDP_SIZE; i++) { |
881 | npde = pmap_pa2pte( | | 881 | npde = pmap_pa2pte( |
882 | pmap_pdirpa(pmap, i * NPDPG)) | PG_RW | PG_V; | | 882 | pmap_pdirpa(pmap, i * NPDPG)) | PG_RW | PG_V; |
883 | pmap_pte_set(APDP_PDE+i, npde); | | 883 | pmap_pte_set(APDP_PDE+i, npde); |
884 | } | | 884 | } |
885 | pmap_pte_flush(); | | 885 | pmap_pte_flush(); |
886 | if (pmap_valid_entry(opde)) | | 886 | if (pmap_valid_entry(opde)) |
887 | pmap_apte_flush(ourpmap); | | 887 | pmap_apte_flush(ourpmap); |
888 | #endif /* XEN */ | | 888 | #endif /* XEN */ |
889 | } | | 889 | } |
890 | *pmap2 = ourpmap; | | 890 | *pmap2 = ourpmap; |
891 | *ptepp = APTE_BASE; | | 891 | *ptepp = APTE_BASE; |
892 | *pdeppp = alternate_pdes; | | 892 | *pdeppp = alternate_pdes; |
893 | KASSERT(l->l_ncsw == ncsw); | | 893 | KASSERT(l->l_ncsw == ncsw); |
894 | #if !defined(XEN) || !defined(__x86_64__) | | 894 | #if !defined(XEN) || !defined(__x86_64__) |
895 | out: | | 895 | out: |
896 | #endif | | 896 | #endif |
897 | /* | | 897 | /* |
898 | * might have blocked, need to retry? | | 898 | * might have blocked, need to retry? |
899 | */ | | 899 | */ |
900 | if (l->l_ncsw != ncsw) { | | 900 | if (l->l_ncsw != ncsw) { |
901 | unlock_and_retry: | | 901 | unlock_and_retry: |
902 | if (ourpmap != NULL) { | | 902 | if (ourpmap != NULL) { |
903 | mutex_exit(&ourpmap->pm_lock); | | 903 | mutex_exit(&ourpmap->pm_lock); |
904 | pmap_destroy(ourpmap); | | 904 | pmap_destroy(ourpmap); |
905 | } | | 905 | } |
906 | mutex_exit(&pmap->pm_lock); | | 906 | mutex_exit(&pmap->pm_lock); |
907 | goto retry; | | 907 | goto retry; |
908 | } | | 908 | } |
909 | | | 909 | |
910 | return; | | 910 | return; |
911 | } | | 911 | } |
912 | | | 912 | |
913 | /* | | 913 | /* |
914 | * pmap_unmap_ptes: unlock the PTE mapping of "pmap" | | 914 | * pmap_unmap_ptes: unlock the PTE mapping of "pmap" |
915 | */ | | 915 | */ |
916 | | | 916 | |
917 | void | | 917 | void |
918 | pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2) | | 918 | pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2) |
919 | { | | 919 | { |
920 | | | 920 | |
921 | if (pmap == pmap_kernel()) { | | 921 | if (pmap == pmap_kernel()) { |
922 | return; | | 922 | return; |
923 | } | | 923 | } |
924 | KASSERT(kpreempt_disabled()); | | 924 | KASSERT(kpreempt_disabled()); |
925 | if (pmap2 == NULL) { | | 925 | if (pmap2 == NULL) { |
926 | mutex_exit(&pmap->pm_lock); | | 926 | mutex_exit(&pmap->pm_lock); |
927 | } else { | | 927 | } else { |
928 | #if defined(XEN) && defined(__x86_64__) | | 928 | #if defined(XEN) && defined(__x86_64__) |
929 | KASSERT(pmap2 == pmap_kernel()); | | 929 | KASSERT(pmap2 == pmap_kernel()); |
930 | #else | | 930 | #else |
931 | KASSERT(curcpu()->ci_pmap == pmap2); | | 931 | KASSERT(curcpu()->ci_pmap == pmap2); |
932 | #endif | | 932 | #endif |
933 | #if defined(MULTIPROCESSOR) | | 933 | #if defined(MULTIPROCESSOR) |
934 | pmap_unmap_apdp(); | | 934 | pmap_unmap_apdp(); |
935 | pmap_pte_flush(); | | 935 | pmap_pte_flush(); |
936 | pmap_apte_flush(pmap2); | | 936 | pmap_apte_flush(pmap2); |
937 | #endif | | 937 | #endif |
938 | COUNT(apdp_pde_unmap); | | 938 | COUNT(apdp_pde_unmap); |
939 | mutex_exit(&pmap->pm_lock); | | 939 | mutex_exit(&pmap->pm_lock); |
940 | mutex_exit(&pmap2->pm_lock); | | 940 | mutex_exit(&pmap2->pm_lock); |
941 | pmap_destroy(pmap2); | | 941 | pmap_destroy(pmap2); |
942 | } | | 942 | } |
943 | } | | 943 | } |
944 | | | 944 | |
945 | inline static void | | 945 | inline static void |
946 | pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte) | | 946 | pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte) |
947 | { | | 947 | { |
948 | | | 948 | |
949 | #if !defined(__x86_64__) | | 949 | #if !defined(__x86_64__) |
950 | if (curproc == NULL || curproc->p_vmspace == NULL || | | 950 | if (curproc == NULL || curproc->p_vmspace == NULL || |
951 | pm != vm_map_pmap(&curproc->p_vmspace->vm_map)) | | 951 | pm != vm_map_pmap(&curproc->p_vmspace->vm_map)) |
952 | return; | | 952 | return; |
953 | | | 953 | |
954 | if ((opte ^ npte) & PG_X) | | 954 | if ((opte ^ npte) & PG_X) |
955 | pmap_update_pg(va); | | 955 | pmap_update_pg(va); |
956 | | | 956 | |
957 | /* | | 957 | /* |
958 | * Executability was removed on the last executable change. | | 958 | * Executability was removed on the last executable change. |
959 | * Reset the code segment to something conservative and | | 959 | * Reset the code segment to something conservative and |
960 | * let the trap handler deal with setting the right limit. | | 960 | * let the trap handler deal with setting the right limit. |
961 | * We can't do that because of locking constraints on the vm map. | | 961 | * We can't do that because of locking constraints on the vm map. |
962 | */ | | 962 | */ |
963 | | | 963 | |
964 | if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) { | | 964 | if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) { |
965 | struct trapframe *tf = curlwp->l_md.md_regs; | | 965 | struct trapframe *tf = curlwp->l_md.md_regs; |
966 | | | 966 | |
967 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); | | 967 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); |
968 | pm->pm_hiexec = I386_MAX_EXE_ADDR; | | 968 | pm->pm_hiexec = I386_MAX_EXE_ADDR; |
969 | } | | 969 | } |
970 | #endif /* !defined(__x86_64__) */ | | 970 | #endif /* !defined(__x86_64__) */ |
971 | } | | 971 | } |
972 | | | 972 | |
973 | #if !defined(__x86_64__) | | 973 | #if !defined(__x86_64__) |
974 | /* | | 974 | /* |
975 | * Fixup the code segment to cover all potential executable mappings. | | 975 | * Fixup the code segment to cover all potential executable mappings. |
976 | * returns 0 if no changes to the code segment were made. | | 976 | * returns 0 if no changes to the code segment were made. |
977 | */ | | 977 | */ |
978 | | | 978 | |
979 | int | | 979 | int |
980 | pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb) | | 980 | pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb) |
981 | { | | 981 | { |
982 | struct vm_map_entry *ent; | | 982 | struct vm_map_entry *ent; |
983 | struct pmap *pm = vm_map_pmap(map); | | 983 | struct pmap *pm = vm_map_pmap(map); |
984 | vaddr_t va = 0; | | 984 | vaddr_t va = 0; |
985 | | | 985 | |
986 | vm_map_lock_read(map); | | 986 | vm_map_lock_read(map); |
987 | for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) { | | 987 | for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) { |
988 | | | 988 | |
989 | /* | | 989 | /* |
990 | * This entry has greater va than the entries before. | | 990 | * This entry has greater va than the entries before. |
991 | * We need to make it point to the last page, not past it. | | 991 | * We need to make it point to the last page, not past it. |
992 | */ | | 992 | */ |
993 | | | 993 | |
994 | if (ent->protection & VM_PROT_EXECUTE) | | 994 | if (ent->protection & VM_PROT_EXECUTE) |
995 | va = trunc_page(ent->end) - PAGE_SIZE; | | 995 | va = trunc_page(ent->end) - PAGE_SIZE; |
996 | } | | 996 | } |
997 | vm_map_unlock_read(map); | | 997 | vm_map_unlock_read(map); |
998 | if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL)) | | 998 | if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL)) |
999 | return (0); | | 999 | return (0); |
1000 | | | 1000 | |
1001 | pm->pm_hiexec = va; | | 1001 | pm->pm_hiexec = va; |
1002 | if (pm->pm_hiexec > I386_MAX_EXE_ADDR) { | | 1002 | if (pm->pm_hiexec > I386_MAX_EXE_ADDR) { |
1003 | tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); | | 1003 | tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); |
1004 | } else { | | 1004 | } else { |
1005 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); | | 1005 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); |
1006 | return (0); | | 1006 | return (0); |
1007 | } | | 1007 | } |
1008 | return (1); | | 1008 | return (1); |
1009 | } | | 1009 | } |
1010 | #endif /* !defined(__x86_64__) */ | | 1010 | #endif /* !defined(__x86_64__) */ |
1011 | | | 1011 | |
1012 | void | | 1012 | void |
1013 | pat_init(struct cpu_info *ci) | | 1013 | pat_init(struct cpu_info *ci) |
1014 | { | | 1014 | { |
1015 | uint64_t pat; | | 1015 | uint64_t pat; |
1016 | | | 1016 | |
1017 | if (!(ci->ci_feat_val[0] & CPUID_PAT)) | | 1017 | if (!(ci->ci_feat_val[0] & CPUID_PAT)) |
1018 | return; | | 1018 | return; |
1019 | | | 1019 | |
1020 | /* We change WT to WC. Leave all other entries the default values. */ | | 1020 | /* We change WT to WC. Leave all other entries the default values. */ |
1021 | pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) | | | 1021 | pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) | |
1022 | PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) | | | 1022 | PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) | |
1023 | PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) | | | 1023 | PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) | |
1024 | PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC); | | 1024 | PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC); |
1025 | | | 1025 | |
1026 | wrmsr(MSR_CR_PAT, pat); | | 1026 | wrmsr(MSR_CR_PAT, pat); |
1027 | cpu_pat_enabled = true; | | 1027 | cpu_pat_enabled = true; |
1028 | aprint_debug_dev(ci->ci_dev, "PAT enabled\n"); | | 1028 | aprint_debug_dev(ci->ci_dev, "PAT enabled\n"); |
1029 | } | | 1029 | } |
1030 | | | 1030 | |
1031 | static pt_entry_t | | 1031 | static pt_entry_t |
1032 | pmap_pat_flags(u_int flags) | | 1032 | pmap_pat_flags(u_int flags) |
1033 | { | | 1033 | { |
1034 | u_int cacheflags = (flags & PMAP_CACHE_MASK); | | 1034 | u_int cacheflags = (flags & PMAP_CACHE_MASK); |
1035 | | | 1035 | |
1036 | if (!cpu_pat_enabled) { | | 1036 | if (!cpu_pat_enabled) { |
1037 | switch (cacheflags) { | | 1037 | switch (cacheflags) { |
1038 | case PMAP_NOCACHE: | | 1038 | case PMAP_NOCACHE: |
1039 | case PMAP_NOCACHE_OVR: | | 1039 | case PMAP_NOCACHE_OVR: |
1040 | /* results in PGC_UCMINUS on cpus which have | | 1040 | /* results in PGC_UCMINUS on cpus which have |
1041 | * the cpuid PAT but PAT "disabled" | | 1041 | * the cpuid PAT but PAT "disabled" |
1042 | */ | | 1042 | */ |
1043 | return PG_N; | | 1043 | return PG_N; |
1044 | default: | | 1044 | default: |
1045 | return 0; | | 1045 | return 0; |
1046 | } | | 1046 | } |
1047 | } | | 1047 | } |
1048 | | | 1048 | |
1049 | switch (cacheflags) { | | 1049 | switch (cacheflags) { |
1050 | case PMAP_NOCACHE: | | 1050 | case PMAP_NOCACHE: |
1051 | return PGC_UC; | | 1051 | return PGC_UC; |
1052 | case PMAP_WRITE_COMBINE: | | 1052 | case PMAP_WRITE_COMBINE: |
1053 | return PGC_WC; | | 1053 | return PGC_WC; |
1054 | case PMAP_WRITE_BACK: | | 1054 | case PMAP_WRITE_BACK: |
1055 | return PGC_WB; | | 1055 | return PGC_WB; |
1056 | case PMAP_NOCACHE_OVR: | | 1056 | case PMAP_NOCACHE_OVR: |
1057 | return PGC_UCMINUS; | | 1057 | return PGC_UCMINUS; |
1058 | } | | 1058 | } |
1059 | | | 1059 | |
1060 | return 0; | | 1060 | return 0; |
1061 | } | | 1061 | } |
1062 | | | 1062 | |
1063 | /* | | 1063 | /* |
1064 | * p m a p k e n t e r f u n c t i o n s | | 1064 | * p m a p k e n t e r f u n c t i o n s |
1065 | * | | 1065 | * |
1066 | * functions to quickly enter/remove pages from the kernel address | | 1066 | * functions to quickly enter/remove pages from the kernel address |
1067 | * space. pmap_kremove is exported to MI kernel. we make use of | | 1067 | * space. pmap_kremove is exported to MI kernel. we make use of |
1068 | * the recursive PTE mappings. | | 1068 | * the recursive PTE mappings. |
1069 | */ | | 1069 | */ |
1070 | | | 1070 | |
1071 | /* | | 1071 | /* |
1072 | * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking | | 1072 | * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking |
1073 | * | | 1073 | * |
1074 | * => no need to lock anything, assume va is already allocated | | 1074 | * => no need to lock anything, assume va is already allocated |
1075 | * => should be faster than normal pmap enter function | | 1075 | * => should be faster than normal pmap enter function |
1076 | */ | | 1076 | */ |
1077 | | | 1077 | |
1078 | void | | 1078 | void |
1079 | pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) | | 1079 | pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) |
1080 | { | | 1080 | { |
1081 | pt_entry_t *pte, opte, npte; | | 1081 | pt_entry_t *pte, opte, npte; |
1082 | | | 1082 | |
1083 | KASSERT(!(prot & ~VM_PROT_ALL)); | | 1083 | KASSERT(!(prot & ~VM_PROT_ALL)); |
1084 | | | 1084 | |
1085 | if (va < VM_MIN_KERNEL_ADDRESS) | | 1085 | if (va < VM_MIN_KERNEL_ADDRESS) |
1086 | pte = vtopte(va); | | 1086 | pte = vtopte(va); |
1087 | else | | 1087 | else |
1088 | pte = kvtopte(va); | | 1088 | pte = kvtopte(va); |
1089 | #ifdef DOM0OPS | | 1089 | #ifdef DOM0OPS |
1090 | if (pa < pmap_pa_start || pa >= pmap_pa_end) { | | 1090 | if (pa < pmap_pa_start || pa >= pmap_pa_end) { |
1091 | #ifdef DEBUG | | 1091 | #ifdef DEBUG |
1092 | printk("pmap_kenter_pa: pa 0x%" PRIx64 " for va 0x%" PRIx64 | | 1092 | printk("pmap_kenter_pa: pa 0x%" PRIx64 " for va 0x%" PRIx64 |
1093 | " outside range\n", (int64_t)pa, (int64_t)va); | | 1093 | " outside range\n", (int64_t)pa, (int64_t)va); |
1094 | #endif /* DEBUG */ | | 1094 | #endif /* DEBUG */ |
1095 | npte = pa; | | 1095 | npte = pa; |
1096 | } else | | 1096 | } else |
1097 | #endif /* DOM0OPS */ | | 1097 | #endif /* DOM0OPS */ |
1098 | npte = pmap_pa2pte(pa); | | 1098 | npte = pmap_pa2pte(pa); |
1099 | npte |= protection_codes[prot] | PG_k | PG_V | pmap_pg_g; | | 1099 | npte |= protection_codes[prot] | PG_k | PG_V | pmap_pg_g; |
1100 | npte |= pmap_pat_flags(flags); | | 1100 | npte |= pmap_pat_flags(flags); |
1101 | opte = pmap_pte_testset(pte, npte); /* zap! */ | | 1101 | opte = pmap_pte_testset(pte, npte); /* zap! */ |
1102 | #if defined(DIAGNOSTIC) | | 1102 | #if defined(DIAGNOSTIC) |
1103 | /* XXX For now... */ | | 1103 | /* XXX For now... */ |
1104 | if (opte & PG_PS) | | 1104 | if (opte & PG_PS) |
1105 | panic("pmap_kenter_pa: PG_PS"); | | 1105 | panic("pmap_kenter_pa: PG_PS"); |
1106 | #endif | | 1106 | #endif |
1107 | if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) { | | 1107 | if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) { |
1108 | /* This should not happen, so no need to batch updates. */ | | 1108 | /* This should not happen, so no need to batch updates. */ |
1109 | kpreempt_disable(); | | 1109 | kpreempt_disable(); |
1110 | pmap_tlb_shootdown(pmap_kernel(), va, 0, opte); | | 1110 | pmap_tlb_shootdown(pmap_kernel(), va, 0, opte); |
1111 | kpreempt_enable(); | | 1111 | kpreempt_enable(); |
1112 | } | | 1112 | } |
1113 | } | | 1113 | } |
1114 | | | 1114 | |
1115 | void | | 1115 | void |
1116 | pmap_emap_enter(vaddr_t va, paddr_t pa, vm_prot_t prot) | | 1116 | pmap_emap_enter(vaddr_t va, paddr_t pa, vm_prot_t prot) |
1117 | { | | 1117 | { |
1118 | pt_entry_t *pte, opte, npte; | | 1118 | pt_entry_t *pte, opte, npte; |
1119 | | | 1119 | |
1120 | KASSERT((prot & ~VM_PROT_ALL) == 0); | | 1120 | KASSERT((prot & ~VM_PROT_ALL) == 0); |
1121 | pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va); | | 1121 | pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va); |
1122 | | | 1122 | |
1123 | #ifdef DOM0OPS | | 1123 | #ifdef DOM0OPS |
1124 | if (pa < pmap_pa_start || pa >= pmap_pa_end) { | | 1124 | if (pa < pmap_pa_start || pa >= pmap_pa_end) { |
1125 | npte = pa; | | 1125 | npte = pa; |
1126 | } else | | 1126 | } else |
1127 | #endif | | 1127 | #endif |
1128 | npte = pmap_pa2pte(pa); | | 1128 | npte = pmap_pa2pte(pa); |
1129 | | | 1129 | |
1130 | npte = pmap_pa2pte(pa); | | 1130 | npte = pmap_pa2pte(pa); |
1131 | npte |= protection_codes[prot] | PG_k | PG_V; | | 1131 | npte |= protection_codes[prot] | PG_k | PG_V; |
1132 | opte = pmap_pte_testset(pte, npte); | | 1132 | opte = pmap_pte_testset(pte, npte); |
1133 | } | | 1133 | } |
1134 | | | 1134 | |
1135 | /* | | 1135 | /* |
1136 | * pmap_emap_sync: perform TLB flush or pmap load, if it was deferred. | | 1136 | * pmap_emap_sync: perform TLB flush or pmap load, if it was deferred. |
1137 | */ | | 1137 | */ |
1138 | void | | 1138 | void |
1139 | pmap_emap_sync(bool canload) | | 1139 | pmap_emap_sync(bool canload) |
1140 | { | | 1140 | { |
1141 | struct cpu_info *ci = curcpu(); | | 1141 | struct cpu_info *ci = curcpu(); |
1142 | struct pmap *pmap; | | 1142 | struct pmap *pmap; |
1143 | | | 1143 | |
1144 | KASSERT(kpreempt_disabled()); | | 1144 | KASSERT(kpreempt_disabled()); |
1145 | if (__predict_true(ci->ci_want_pmapload && canload)) { | | 1145 | if (__predict_true(ci->ci_want_pmapload && canload)) { |
1146 | /* | | 1146 | /* |
1147 | * XXX: Hint for pmap_reactivate(), which might suggest to | | 1147 | * XXX: Hint for pmap_reactivate(), which might suggest to |
1148 | * not perform TLB flush, if state has not changed. | | 1148 | * not perform TLB flush, if state has not changed. |
1149 | */ | | 1149 | */ |
1150 | pmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map); | | 1150 | pmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map); |
1151 | if (__predict_false(pmap == ci->ci_pmap)) { | | 1151 | if (__predict_false(pmap == ci->ci_pmap)) { |
1152 | const uint32_t cpumask = ci->ci_cpumask; | | 1152 | const uint32_t cpumask = ci->ci_cpumask; |
1153 | atomic_and_32(&pmap->pm_cpus, ~cpumask); | | 1153 | atomic_and_32(&pmap->pm_cpus, ~cpumask); |
1154 | } | | 1154 | } |
1155 | pmap_load(); | | 1155 | pmap_load(); |
1156 | KASSERT(ci->ci_want_pmapload == 0); | | 1156 | KASSERT(ci->ci_want_pmapload == 0); |
1157 | } else { | | 1157 | } else { |
1158 | tlbflush(); | | 1158 | tlbflush(); |
1159 | } | | 1159 | } |
1160 | | | 1160 | |
1161 | } | | 1161 | } |
1162 | | | 1162 | |
1163 | void | | 1163 | void |
1164 | pmap_emap_remove(vaddr_t sva, vsize_t len) | | 1164 | pmap_emap_remove(vaddr_t sva, vsize_t len) |
1165 | { | | 1165 | { |
1166 | pt_entry_t *pte, xpte; | | 1166 | pt_entry_t *pte, xpte; |
1167 | vaddr_t va, eva = sva + len; | | 1167 | vaddr_t va, eva = sva + len; |
1168 | | | 1168 | |
1169 | for (va = sva; va < eva; va += PAGE_SIZE) { | | 1169 | for (va = sva; va < eva; va += PAGE_SIZE) { |
1170 | pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va); | | 1170 | pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va); |
1171 | xpte |= pmap_pte_testset(pte, 0); | | 1171 | xpte |= pmap_pte_testset(pte, 0); |
1172 | } | | 1172 | } |
1173 | } | | 1173 | } |
1174 | | | 1174 | |
1175 | __weak_alias(pmap_kenter_ma, pmap_kenter_pa); | | 1175 | __weak_alias(pmap_kenter_ma, pmap_kenter_pa); |
1176 | | | 1176 | |
1177 | #if defined(__x86_64__) | | 1177 | #if defined(__x86_64__) |
1178 | /* | | 1178 | /* |
1179 | * Change protection for a virtual address. Local for a CPU only, don't | | 1179 | * Change protection for a virtual address. Local for a CPU only, don't |
1180 | * care about TLB shootdowns. | | 1180 | * care about TLB shootdowns. |
1181 | * | | 1181 | * |
1182 | * => must be called with preemption disabled | | 1182 | * => must be called with preemption disabled |
1183 | */ | | 1183 | */ |
1184 | void | | 1184 | void |
1185 | pmap_changeprot_local(vaddr_t va, vm_prot_t prot) | | 1185 | pmap_changeprot_local(vaddr_t va, vm_prot_t prot) |
1186 | { | | 1186 | { |
1187 | pt_entry_t *pte, opte, npte; | | 1187 | pt_entry_t *pte, opte, npte; |
1188 | | | 1188 | |
1189 | KASSERT(kpreempt_disabled()); | | 1189 | KASSERT(kpreempt_disabled()); |
1190 | | | 1190 | |
1191 | if (va < VM_MIN_KERNEL_ADDRESS) | | 1191 | if (va < VM_MIN_KERNEL_ADDRESS) |
1192 | pte = vtopte(va); | | 1192 | pte = vtopte(va); |
1193 | else | | 1193 | else |
1194 | pte = kvtopte(va); | | 1194 | pte = kvtopte(va); |
1195 | | | 1195 | |
1196 | npte = opte = *pte; | | 1196 | npte = opte = *pte; |
1197 | | | 1197 | |
1198 | if ((prot & VM_PROT_WRITE) != 0) | | 1198 | if ((prot & VM_PROT_WRITE) != 0) |
1199 | npte |= PG_RW; | | 1199 | npte |= PG_RW; |
1200 | else | | 1200 | else |
1201 | npte &= ~PG_RW; | | 1201 | npte &= ~PG_RW; |
1202 | | | 1202 | |
1203 | if (opte != npte) { | | 1203 | if (opte != npte) { |
1204 | pmap_pte_set(pte, npte); | | 1204 | pmap_pte_set(pte, npte); |
1205 | pmap_pte_flush(); | | 1205 | pmap_pte_flush(); |
1206 | invlpg(va); | | 1206 | invlpg(va); |
1207 | } | | 1207 | } |
1208 | } | | 1208 | } |
1209 | #endif /* defined(__x86_64__) */ | | 1209 | #endif /* defined(__x86_64__) */ |
1210 | | | 1210 | |
1211 | /* | | 1211 | /* |
1212 | * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking | | 1212 | * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking |
1213 | * | | 1213 | * |
1214 | * => no need to lock anything | | 1214 | * => no need to lock anything |
1215 | * => caller must dispose of any vm_page mapped in the va range | | 1215 | * => caller must dispose of any vm_page mapped in the va range |
1216 | * => note: not an inline function | | 1216 | * => note: not an inline function |
1217 | * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE | | 1217 | * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE |
1218 | * => we assume kernel only unmaps valid addresses and thus don't bother | | 1218 | * => we assume kernel only unmaps valid addresses and thus don't bother |
1219 | * checking the valid bit before doing TLB flushing | | 1219 | * checking the valid bit before doing TLB flushing |
1220 | * => must be followed by call to pmap_update() before reuse of page | | 1220 | * => must be followed by call to pmap_update() before reuse of page |
1221 | */ | | 1221 | */ |
1222 | | | 1222 | |
1223 | void | | 1223 | void |
1224 | pmap_kremove(vaddr_t sva, vsize_t len) | | 1224 | pmap_kremove(vaddr_t sva, vsize_t len) |
1225 | { | | 1225 | { |
1226 | pt_entry_t *pte, xpte; | | 1226 | pt_entry_t *pte, xpte; |
1227 | vaddr_t va, eva; | | 1227 | vaddr_t va, eva; |
1228 | | | 1228 | |
1229 | eva = sva + len; | | 1229 | eva = sva + len; |
1230 | xpte = 0; | | 1230 | xpte = 0; |
1231 | | | 1231 | |
1232 | for (va = sva; va < eva; va += PAGE_SIZE) { | | 1232 | for (va = sva; va < eva; va += PAGE_SIZE) { |
1233 | if (va < VM_MIN_KERNEL_ADDRESS) | | 1233 | if (va < VM_MIN_KERNEL_ADDRESS) |
1234 | pte = vtopte(va); | | 1234 | pte = vtopte(va); |
1235 | else | | 1235 | else |
1236 | pte = kvtopte(va); | | 1236 | pte = kvtopte(va); |
1237 | xpte |= pmap_pte_testset(pte, 0); /* zap! */ | | 1237 | xpte |= pmap_pte_testset(pte, 0); /* zap! */ |
1238 | #if defined(DIAGNOSTIC) | | 1238 | #if defined(DIAGNOSTIC) |
1239 | /* XXX For now... */ | | 1239 | /* XXX For now... */ |
1240 | if (xpte & PG_PS) | | 1240 | if (xpte & PG_PS) |
1241 | panic("pmap_kremove: PG_PS"); | | 1241 | panic("pmap_kremove: PG_PS"); |
1242 | if (xpte & PG_PVLIST) | | 1242 | if (xpte & PG_PVLIST) |
1243 | panic("pmap_kremove: PG_PVLIST mapping for 0x%lx", | | 1243 | panic("pmap_kremove: PG_PVLIST mapping for 0x%lx", |
1244 | va); | | 1244 | va); |
1245 | #endif | | 1245 | #endif |
1246 | } | | 1246 | } |
1247 | if ((xpte & (PG_V | PG_U)) == (PG_V | PG_U)) { | | 1247 | if ((xpte & (PG_V | PG_U)) == (PG_V | PG_U)) { |
1248 | kpreempt_disable(); | | 1248 | kpreempt_disable(); |
1249 | pmap_tlb_shootdown(pmap_kernel(), sva, eva, xpte); | | 1249 | pmap_tlb_shootdown(pmap_kernel(), sva, eva, xpte); |
1250 | kpreempt_enable(); | | 1250 | kpreempt_enable(); |
1251 | } | | 1251 | } |
1252 | } | | 1252 | } |
1253 | | | 1253 | |
1254 | /* | | 1254 | /* |
1255 | * p m a p i n i t f u n c t i o n s | | 1255 | * p m a p i n i t f u n c t i o n s |
1256 | * | | 1256 | * |
1257 | * pmap_bootstrap and pmap_init are called during system startup | | 1257 | * pmap_bootstrap and pmap_init are called during system startup |
1258 | * to init the pmap module. pmap_bootstrap() does a low level | | 1258 | * to init the pmap module. pmap_bootstrap() does a low level |
1259 | * init just to get things rolling. pmap_init() finishes the job. | | 1259 | * init just to get things rolling. pmap_init() finishes the job. |
1260 | */ | | 1260 | */ |
1261 | | | 1261 | |
1262 | /* | | 1262 | /* |
1263 | * pmap_bootstrap: get the system in a state where it can run with VM | | 1263 | * pmap_bootstrap: get the system in a state where it can run with VM |
1264 | * properly enabled (called before main()). the VM system is | | 1264 | * properly enabled (called before main()). the VM system is |
1265 | * fully init'd later... | | 1265 | * fully init'd later... |
1266 | * | | 1266 | * |
1267 | * => on i386, locore.s has already enabled the MMU by allocating | | 1267 | * => on i386, locore.s has already enabled the MMU by allocating |
1268 | * a PDP for the kernel, and nkpde PTP's for the kernel. | | 1268 | * a PDP for the kernel, and nkpde PTP's for the kernel. |
1269 | * => kva_start is the first free virtual address in kernel space | | 1269 | * => kva_start is the first free virtual address in kernel space |
1270 | */ | | 1270 | */ |
1271 | | | 1271 | |
1272 | void | | 1272 | void |
1273 | pmap_bootstrap(vaddr_t kva_start) | | 1273 | pmap_bootstrap(vaddr_t kva_start) |
1274 | { | | 1274 | { |
1275 | struct pmap *kpm; | | 1275 | struct pmap *kpm; |
1276 | pt_entry_t *pte; | | 1276 | pt_entry_t *pte; |
1277 | int i; | | 1277 | int i; |
1278 | vaddr_t kva; | | 1278 | vaddr_t kva; |
1279 | #ifndef XEN | | 1279 | #ifndef XEN |
1280 | unsigned long p1i; | | 1280 | unsigned long p1i; |
1281 | vaddr_t kva_end; | | 1281 | vaddr_t kva_end; |
1282 | #endif | | 1282 | #endif |
1283 | | | 1283 | |
1284 | pt_entry_t pg_nx = (cpu_feature[2] & CPUID_NOX ? PG_NX : 0); | | 1284 | pt_entry_t pg_nx = (cpu_feature[2] & CPUID_NOX ? PG_NX : 0); |
1285 | | | 1285 | |
1286 | /* | | 1286 | /* |
1287 | * set up our local static global vars that keep track of the | | 1287 | * set up our local static global vars that keep track of the |
1288 | * usage of KVM before kernel_map is set up | | 1288 | * usage of KVM before kernel_map is set up |
1289 | */ | | 1289 | */ |
1290 | | | 1290 | |
1291 | virtual_avail = kva_start; /* first free KVA */ | | 1291 | virtual_avail = kva_start; /* first free KVA */ |
1292 | virtual_end = VM_MAX_KERNEL_ADDRESS; /* last KVA */ | | 1292 | virtual_end = VM_MAX_KERNEL_ADDRESS; /* last KVA */ |
1293 | | | 1293 | |
1294 | /* | | 1294 | /* |
1295 | * set up protection_codes: we need to be able to convert from | | 1295 | * set up protection_codes: we need to be able to convert from |
1296 | * a MI protection code (some combo of VM_PROT...) to something | | 1296 | * a MI protection code (some combo of VM_PROT...) to something |
1297 | * we can jam into a i386 PTE. | | 1297 | * we can jam into a i386 PTE. |
1298 | */ | | 1298 | */ |
1299 | | | 1299 | |
1300 | protection_codes[VM_PROT_NONE] = pg_nx; /* --- */ | | 1300 | protection_codes[VM_PROT_NONE] = pg_nx; /* --- */ |
1301 | protection_codes[VM_PROT_EXECUTE] = PG_RO | PG_X; /* --x */ | | 1301 | protection_codes[VM_PROT_EXECUTE] = PG_RO | PG_X; /* --x */ |
1302 | protection_codes[VM_PROT_READ] = PG_RO | pg_nx; /* -r- */ | | 1302 | protection_codes[VM_PROT_READ] = PG_RO | pg_nx; /* -r- */ |
1303 | protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO | PG_X;/* -rx */ | | 1303 | protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO | PG_X;/* -rx */ |
1304 | protection_codes[VM_PROT_WRITE] = PG_RW | pg_nx; /* w-- */ | | 1304 | protection_codes[VM_PROT_WRITE] = PG_RW | pg_nx; /* w-- */ |
1305 | protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW | PG_X;/* w-x */ | | 1305 | protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW | PG_X;/* w-x */ |
1306 | protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW | pg_nx; | | 1306 | protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW | pg_nx; |
1307 | /* wr- */ | | 1307 | /* wr- */ |
1308 | protection_codes[VM_PROT_ALL] = PG_RW | PG_X; /* wrx */ | | 1308 | protection_codes[VM_PROT_ALL] = PG_RW | PG_X; /* wrx */ |
1309 | | | 1309 | |
1310 | /* | | 1310 | /* |
1311 | * now we init the kernel's pmap | | 1311 | * now we init the kernel's pmap |
1312 | * | | 1312 | * |
1313 | * the kernel pmap's pm_obj is not used for much. however, in | | 1313 | * the kernel pmap's pm_obj is not used for much. however, in |
1314 | * user pmaps the pm_obj contains the list of active PTPs. | | 1314 | * user pmaps the pm_obj contains the list of active PTPs. |
1315 | * the pm_obj currently does not have a pager. it might be possible | | 1315 | * the pm_obj currently does not have a pager. it might be possible |
1316 | * to add a pager that would allow a process to read-only mmap its | | 1316 | * to add a pager that would allow a process to read-only mmap its |
1317 | * own page tables (fast user level vtophys?). this may or may not | | 1317 | * own page tables (fast user level vtophys?). this may or may not |
1318 | * be useful. | | 1318 | * be useful. |
1319 | */ | | 1319 | */ |
1320 | | | 1320 | |
1321 | kpm = pmap_kernel(); | | 1321 | kpm = pmap_kernel(); |
1322 | for (i = 0; i < PTP_LEVELS - 1; i++) { | | 1322 | for (i = 0; i < PTP_LEVELS - 1; i++) { |
1323 | UVM_OBJ_INIT(&kpm->pm_obj[i], NULL, 1); | | 1323 | UVM_OBJ_INIT(&kpm->pm_obj[i], NULL, 1); |
1324 | kpm->pm_ptphint[i] = NULL; | | 1324 | kpm->pm_ptphint[i] = NULL; |
1325 | } | | 1325 | } |
1326 | memset(&kpm->pm_list, 0, sizeof(kpm->pm_list)); /* pm_list not used */ | | 1326 | memset(&kpm->pm_list, 0, sizeof(kpm->pm_list)); /* pm_list not used */ |
1327 | | | 1327 | |
1328 | kpm->pm_pdir = (pd_entry_t *)(PDPpaddr + KERNBASE); | | 1328 | kpm->pm_pdir = (pd_entry_t *)(PDPpaddr + KERNBASE); |
1329 | for (i = 0; i < PDP_SIZE; i++) | | 1329 | for (i = 0; i < PDP_SIZE; i++) |
1330 | kpm->pm_pdirpa[i] = PDPpaddr + PAGE_SIZE * i; | | 1330 | kpm->pm_pdirpa[i] = PDPpaddr + PAGE_SIZE * i; |
1331 | | | 1331 | |
1332 | kpm->pm_stats.wired_count = kpm->pm_stats.resident_count = | | 1332 | kpm->pm_stats.wired_count = kpm->pm_stats.resident_count = |
1333 | x86_btop(kva_start - VM_MIN_KERNEL_ADDRESS); | | 1333 | x86_btop(kva_start - VM_MIN_KERNEL_ADDRESS); |
1334 | | | 1334 | |
1335 | /* | | 1335 | /* |
1336 | * the above is just a rough estimate and not critical to the proper | | 1336 | * the above is just a rough estimate and not critical to the proper |
1337 | * operation of the system. | | 1337 | * operation of the system. |
1338 | */ | | 1338 | */ |
1339 | | | 1339 | |
1340 | #ifndef XEN | | 1340 | #ifndef XEN |
1341 | /* | | 1341 | /* |
1342 | * Begin to enable global TLB entries if they are supported. | | 1342 | * Begin to enable global TLB entries if they are supported. |
1343 | * The G bit has no effect until the CR4_PGE bit is set in CR4, | | 1343 | * The G bit has no effect until the CR4_PGE bit is set in CR4, |
1344 | * which happens in cpu_init(), which is run on each cpu | | 1344 | * which happens in cpu_init(), which is run on each cpu |
1345 | * (and happens later) | | 1345 | * (and happens later) |
1346 | */ | | 1346 | */ |
1347 | | | 1347 | |
1348 | if (cpu_feature[0] & CPUID_PGE) { | | 1348 | if (cpu_feature[0] & CPUID_PGE) { |
1349 | pmap_pg_g = PG_G; /* enable software */ | | 1349 | pmap_pg_g = PG_G; /* enable software */ |
1350 | | | 1350 | |
1351 | /* add PG_G attribute to already mapped kernel pages */ | | 1351 | /* add PG_G attribute to already mapped kernel pages */ |
1352 | if (KERNBASE == VM_MIN_KERNEL_ADDRESS) { | | 1352 | if (KERNBASE == VM_MIN_KERNEL_ADDRESS) { |
1353 | kva_end = virtual_avail; | | 1353 | kva_end = virtual_avail; |
1354 | } else { | | 1354 | } else { |
1355 | extern vaddr_t eblob, esym; | | 1355 | extern vaddr_t eblob, esym; |
1356 | kva_end = (vaddr_t)&end; | | 1356 | kva_end = (vaddr_t)&end; |
1357 | if (esym > kva_end) | | 1357 | if (esym > kva_end) |
1358 | kva_end = esym; | | 1358 | kva_end = esym; |
1359 | if (eblob > kva_end) | | 1359 | if (eblob > kva_end) |
1360 | kva_end = eblob; | | 1360 | kva_end = eblob; |
1361 | kva_end = roundup(kva_end, PAGE_SIZE); | | 1361 | kva_end = roundup(kva_end, PAGE_SIZE); |
1362 | } | | 1362 | } |
1363 | for (kva = KERNBASE; kva < kva_end; kva += PAGE_SIZE) { | | 1363 | for (kva = KERNBASE; kva < kva_end; kva += PAGE_SIZE) { |
1364 | p1i = pl1_i(kva); | | 1364 | p1i = pl1_i(kva); |
1365 | if (pmap_valid_entry(PTE_BASE[p1i])) | | 1365 | if (pmap_valid_entry(PTE_BASE[p1i])) |
1366 | PTE_BASE[p1i] |= PG_G; | | 1366 | PTE_BASE[p1i] |= PG_G; |
1367 | } | | 1367 | } |
1368 | } | | 1368 | } |
1369 | | | 1369 | |
1370 | /* | | 1370 | /* |
1371 | * enable large pages if they are supported. | | 1371 | * enable large pages if they are supported. |
1372 | */ | | 1372 | */ |
1373 | | | 1373 | |
1374 | if (cpu_feature[0] & CPUID_PSE) { | | 1374 | if (cpu_feature[0] & CPUID_PSE) { |
1375 | paddr_t pa; | | 1375 | paddr_t pa; |
1376 | pd_entry_t *pde; | | 1376 | pd_entry_t *pde; |
1377 | extern char __data_start; | | 1377 | extern char __data_start; |
1378 | | | 1378 | |
1379 | lcr4(rcr4() | CR4_PSE); /* enable hardware (via %cr4) */ | | 1379 | lcr4(rcr4() | CR4_PSE); /* enable hardware (via %cr4) */ |
1380 | pmap_largepages = 1; /* enable software */ | | 1380 | pmap_largepages = 1; /* enable software */ |
1381 | | | 1381 | |
1382 | /* | | 1382 | /* |
1383 | * the TLB must be flushed after enabling large pages | | 1383 | * the TLB must be flushed after enabling large pages |
1384 | * on Pentium CPUs, according to section 3.6.2.2 of | | 1384 | * on Pentium CPUs, according to section 3.6.2.2 of |
1385 | * "Intel Architecture Software Developer's Manual, | | 1385 | * "Intel Architecture Software Developer's Manual, |
1386 | * Volume 3: System Programming". | | 1386 | * Volume 3: System Programming". |
1387 | */ | | 1387 | */ |
1388 | tlbflush(); | | 1388 | tlbflush(); |
1389 | | | 1389 | |
1390 | /* | | 1390 | /* |
1391 | * now, remap the kernel text using large pages. we | | 1391 | * now, remap the kernel text using large pages. we |
1392 | * assume that the linker has properly aligned the | | 1392 | * assume that the linker has properly aligned the |
1393 | * .data segment to a NBPD_L2 boundary. | | 1393 | * .data segment to a NBPD_L2 boundary. |
1394 | */ | | 1394 | */ |
1395 | kva_end = rounddown((vaddr_t)&__data_start, NBPD_L1); | | 1395 | kva_end = rounddown((vaddr_t)&__data_start, NBPD_L1); |
1396 | for (pa = 0, kva = KERNBASE; kva + NBPD_L2 <= kva_end; | | 1396 | for (pa = 0, kva = KERNBASE; kva + NBPD_L2 <= kva_end; |
1397 | kva += NBPD_L2, pa += NBPD_L2) { | | 1397 | kva += NBPD_L2, pa += NBPD_L2) { |
1398 | pde = &L2_BASE[pl2_i(kva)]; | | 1398 | pde = &L2_BASE[pl2_i(kva)]; |
1399 | *pde = pa | pmap_pg_g | PG_PS | | | 1399 | *pde = pa | pmap_pg_g | PG_PS | |
1400 | PG_KR | PG_V; /* zap! */ | | 1400 | PG_KR | PG_V; /* zap! */ |
1401 | tlbflush(); | | 1401 | tlbflush(); |
1402 | } | | 1402 | } |
1403 | #if defined(DEBUG) | | 1403 | #if defined(DEBUG) |
1404 | aprint_normal("kernel text is mapped with %" PRIuPSIZE " large " | | 1404 | aprint_normal("kernel text is mapped with %" PRIuPSIZE " large " |
1405 | "pages and %" PRIuPSIZE " normal pages\n", | | 1405 | "pages and %" PRIuPSIZE " normal pages\n", |
1406 | howmany(kva - KERNBASE, NBPD_L2), | | 1406 | howmany(kva - KERNBASE, NBPD_L2), |
1407 | howmany((vaddr_t)&__data_start - kva, NBPD_L1)); | | 1407 | howmany((vaddr_t)&__data_start - kva, NBPD_L1)); |
1408 | #endif /* defined(DEBUG) */ | | 1408 | #endif /* defined(DEBUG) */ |
1409 | } | | 1409 | } |
1410 | #endif /* !XEN */ | | 1410 | #endif /* !XEN */ |
1411 | | | 1411 | |
1412 | if (VM_MIN_KERNEL_ADDRESS != KERNBASE) { | | 1412 | if (VM_MIN_KERNEL_ADDRESS != KERNBASE) { |
1413 | /* | | 1413 | /* |
1414 | * zero_pte is stuck at the end of mapped space for the kernel | | 1414 | * zero_pte is stuck at the end of mapped space for the kernel |
1415 | * image (disjunct from kva space). This is done so that it | | 1415 | * image (disjunct from kva space). This is done so that it |
1416 | * can safely be used in pmap_growkernel (pmap_get_physpage), | | 1416 | * can safely be used in pmap_growkernel (pmap_get_physpage), |
1417 | * when it's called for the first time. | | 1417 | * when it's called for the first time. |
1418 | * XXXfvdl fix this for MULTIPROCESSOR later. | | 1418 | * XXXfvdl fix this for MULTIPROCESSOR later. |
1419 | */ | | 1419 | */ |
1420 | | | 1420 | |
1421 | early_zerop = (void *)(KERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2); | | 1421 | early_zerop = (void *)(KERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2); |
1422 | early_zero_pte = PTE_BASE + pl1_i((vaddr_t)early_zerop); | | 1422 | early_zero_pte = PTE_BASE + pl1_i((vaddr_t)early_zerop); |
1423 | } | | 1423 | } |
1424 | | | 1424 | |
1425 | /* | | 1425 | /* |
1426 | * now we allocate the "special" VAs which are used for tmp mappings | | 1426 | * now we allocate the "special" VAs which are used for tmp mappings |
1427 | * by the pmap (and other modules). we allocate the VAs by advancing | | 1427 | * by the pmap (and other modules). we allocate the VAs by advancing |
1428 | * virtual_avail (note that there are no pages mapped at these VAs). | | 1428 | * virtual_avail (note that there are no pages mapped at these VAs). |
1429 | * we find the PTE that maps the allocated VA via the linear PTE | | 1429 | * we find the PTE that maps the allocated VA via the linear PTE |
1430 | * mapping. | | 1430 | * mapping. |
1431 | */ | | 1431 | */ |
1432 | | | 1432 | |
1433 | pte = PTE_BASE + pl1_i(virtual_avail); | | 1433 | pte = PTE_BASE + pl1_i(virtual_avail); |
1434 | | | 1434 | |
1435 | #ifdef MULTIPROCESSOR | | 1435 | #ifdef MULTIPROCESSOR |
1436 | /* | | 1436 | /* |
1437 | * Waste some VA space to avoid false sharing of cache lines | | 1437 | * Waste some VA space to avoid false sharing of cache lines |
1438 | * for page table pages: Give each possible CPU a cache line | | 1438 | * for page table pages: Give each possible CPU a cache line |
1439 | * of PTE's (8) to play with, though we only need 4. We could | | 1439 | * of PTE's (8) to play with, though we only need 4. We could |
1440 | * recycle some of this waste by putting the idle stacks here | | 1440 | * recycle some of this waste by putting the idle stacks here |
1441 | * as well; we could waste less space if we knew the largest | | 1441 | * as well; we could waste less space if we knew the largest |
1442 | * CPU ID beforehand. | | 1442 | * CPU ID beforehand. |
1443 | */ | | 1443 | */ |
1444 | csrcp = (char *) virtual_avail; csrc_pte = pte; | | 1444 | csrcp = (char *) virtual_avail; csrc_pte = pte; |
1445 | | | 1445 | |
1446 | cdstp = (char *) virtual_avail+PAGE_SIZE; cdst_pte = pte+1; | | 1446 | cdstp = (char *) virtual_avail+PAGE_SIZE; cdst_pte = pte+1; |
1447 | | | 1447 | |
1448 | zerop = (char *) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2; | | 1448 | zerop = (char *) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2; |
1449 | | | 1449 | |
1450 | ptpp = (char *) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3; | | 1450 | ptpp = (char *) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3; |
1451 | | | 1451 | |
1452 | virtual_avail += PAGE_SIZE * maxcpus * NPTECL; | | 1452 | virtual_avail += PAGE_SIZE * maxcpus * NPTECL; |
1453 | pte += maxcpus * NPTECL; | | 1453 | pte += maxcpus * NPTECL; |
1454 | #else | | 1454 | #else |
1455 | csrcp = (void *) virtual_avail; csrc_pte = pte; /* allocate */ | | 1455 | csrcp = (void *) virtual_avail; csrc_pte = pte; /* allocate */ |
1456 | virtual_avail += PAGE_SIZE; pte++; /* advance */ | | 1456 | virtual_avail += PAGE_SIZE; pte++; /* advance */ |
1457 | | | 1457 | |
1458 | cdstp = (void *) virtual_avail; cdst_pte = pte; | | 1458 | cdstp = (void *) virtual_avail; cdst_pte = pte; |
1459 | virtual_avail += PAGE_SIZE; pte++; | | 1459 | virtual_avail += PAGE_SIZE; pte++; |
1460 | | | 1460 | |
1461 | zerop = (void *) virtual_avail; zero_pte = pte; | | 1461 | zerop = (void *) virtual_avail; zero_pte = pte; |
1462 | virtual_avail += PAGE_SIZE; pte++; | | 1462 | virtual_avail += PAGE_SIZE; pte++; |
1463 | | | 1463 | |
1464 | ptpp = (void *) virtual_avail; ptp_pte = pte; | | 1464 | ptpp = (void *) virtual_avail; ptp_pte = pte; |
1465 | virtual_avail += PAGE_SIZE; pte++; | | 1465 | virtual_avail += PAGE_SIZE; pte++; |
1466 | #endif | | 1466 | #endif |
1467 | | | 1467 | |
1468 | if (VM_MIN_KERNEL_ADDRESS == KERNBASE) { | | 1468 | if (VM_MIN_KERNEL_ADDRESS == KERNBASE) { |
1469 | early_zerop = zerop; | | 1469 | early_zerop = zerop; |
1470 | early_zero_pte = zero_pte; | | 1470 | early_zero_pte = zero_pte; |
1471 | } | | 1471 | } |
1472 | | | 1472 | |
1473 | /* | | 1473 | /* |
1474 | * Nothing after this point actually needs pte; | | 1474 | * Nothing after this point actually needs pte; |
1475 | */ | | 1475 | */ |
1476 | pte = (void *)0xdeadbeef; | | 1476 | pte = (void *)0xdeadbeef; |
1477 | | | 1477 | |
1478 | /* XXX: vmmap used by mem.c... should be uvm_map_reserve */ | | 1478 | /* XXX: vmmap used by mem.c... should be uvm_map_reserve */ |
1479 | /* XXXfvdl PTEs not needed here */ | | 1479 | /* XXXfvdl PTEs not needed here */ |
1480 | vmmap = (char *)virtual_avail; /* don't need pte */ | | 1480 | vmmap = (char *)virtual_avail; /* don't need pte */ |
1481 | virtual_avail += PAGE_SIZE; pte++; | | 1481 | virtual_avail += PAGE_SIZE; pte++; |
1482 | | | 1482 | |
1483 | #ifdef XEN | | 1483 | #ifdef XEN |
1484 | #ifdef __x86_64__ | | 1484 | #ifdef __x86_64__ |
1485 | /* | | 1485 | /* |
1486 | * We want a dummy page directory for Xen: | | 1486 | * We want a dummy page directory for Xen: |
1487 | * when deactivate a pmap, Xen will still consider it active. | | 1487 | * when deactivate a pmap, Xen will still consider it active. |
1488 | * So we set user PGD to this one to lift all protection on | | 1488 | * So we set user PGD to this one to lift all protection on |
1489 | * the now inactive page tables set. | | 1489 | * the now inactive page tables set. |
1490 | */ | | 1490 | */ |
1491 | xen_dummy_user_pgd = avail_start; | | 1491 | xen_dummy_user_pgd = avail_start; |
1492 | avail_start += PAGE_SIZE; | | 1492 | avail_start += PAGE_SIZE; |
1493 | | | 1493 | |
1494 | /* Zero fill it, the less checks in Xen it requires the better */ | | 1494 | /* Zero fill it, the less checks in Xen it requires the better */ |
1495 | memset((void *) (xen_dummy_user_pgd + KERNBASE), 0, PAGE_SIZE); | | 1495 | memset((void *) (xen_dummy_user_pgd + KERNBASE), 0, PAGE_SIZE); |
1496 | /* Mark read-only */ | | 1496 | /* Mark read-only */ |
1497 | HYPERVISOR_update_va_mapping(xen_dummy_user_pgd + KERNBASE, | | 1497 | HYPERVISOR_update_va_mapping(xen_dummy_user_pgd + KERNBASE, |
1498 | pmap_pa2pte(xen_dummy_user_pgd) | PG_u | PG_V, UVMF_INVLPG); | | 1498 | pmap_pa2pte(xen_dummy_user_pgd) | PG_u | PG_V, UVMF_INVLPG); |
1499 | /* Pin as L4 */ | | 1499 | /* Pin as L4 */ |
1500 | xpq_queue_pin_l4_table(xpmap_ptom_masked(xen_dummy_user_pgd)); | | 1500 | xpq_queue_pin_l4_table(xpmap_ptom_masked(xen_dummy_user_pgd)); |
1501 | #endif /* __x86_64__ */ | | 1501 | #endif /* __x86_64__ */ |
1502 | idt_vaddr = virtual_avail; /* don't need pte */ | | 1502 | idt_vaddr = virtual_avail; /* don't need pte */ |
1503 | idt_paddr = avail_start; /* steal a page */ | | 1503 | idt_paddr = avail_start; /* steal a page */ |
1504 | /* | | 1504 | /* |
1505 | * Xen require one more page as we can't store | | 1505 | * Xen require one more page as we can't store |
1506 | * GDT and LDT on the same page | | 1506 | * GDT and LDT on the same page |
1507 | */ | | 1507 | */ |
1508 | virtual_avail += 3 * PAGE_SIZE; | | 1508 | virtual_avail += 3 * PAGE_SIZE; |
1509 | avail_start += 3 * PAGE_SIZE; | | 1509 | avail_start += 3 * PAGE_SIZE; |
1510 | #else /* XEN */ | | 1510 | #else /* XEN */ |
1511 | idt_vaddr = virtual_avail; /* don't need pte */ | | 1511 | idt_vaddr = virtual_avail; /* don't need pte */ |
1512 | idt_paddr = avail_start; /* steal a page */ | | 1512 | idt_paddr = avail_start; /* steal a page */ |
1513 | #if defined(__x86_64__) | | 1513 | #if defined(__x86_64__) |
1514 | virtual_avail += 2 * PAGE_SIZE; pte += 2; | | 1514 | virtual_avail += 2 * PAGE_SIZE; pte += 2; |
1515 | avail_start += 2 * PAGE_SIZE; | | 1515 | avail_start += 2 * PAGE_SIZE; |
1516 | #else /* defined(__x86_64__) */ | | 1516 | #else /* defined(__x86_64__) */ |
1517 | virtual_avail += PAGE_SIZE; pte++; | | 1517 | virtual_avail += PAGE_SIZE; pte++; |
1518 | avail_start += PAGE_SIZE; | | 1518 | avail_start += PAGE_SIZE; |
1519 | /* pentium f00f bug stuff */ | | 1519 | /* pentium f00f bug stuff */ |
1520 | pentium_idt_vaddr = virtual_avail; /* don't need pte */ | | 1520 | pentium_idt_vaddr = virtual_avail; /* don't need pte */ |
1521 | virtual_avail += PAGE_SIZE; pte++; | | 1521 | virtual_avail += PAGE_SIZE; pte++; |
1522 | #endif /* defined(__x86_64__) */ | | 1522 | #endif /* defined(__x86_64__) */ |
1523 | #endif /* XEN */ | | 1523 | #endif /* XEN */ |
1524 | | | 1524 | |
1525 | #ifdef _LP64 | | 1525 | #ifdef _LP64 |
1526 | /* | | 1526 | /* |
1527 | * Grab a page below 4G for things that need it (i.e. | | 1527 | * Grab a page below 4G for things that need it (i.e. |
1528 | * having an initial %cr3 for the MP trampoline). | | 1528 | * having an initial %cr3 for the MP trampoline). |
1529 | */ | | 1529 | */ |
1530 | lo32_vaddr = virtual_avail; | | 1530 | lo32_vaddr = virtual_avail; |
1531 | virtual_avail += PAGE_SIZE; pte++; | | 1531 | virtual_avail += PAGE_SIZE; pte++; |
1532 | lo32_paddr = avail_start; | | 1532 | lo32_paddr = avail_start; |
1533 | avail_start += PAGE_SIZE; | | 1533 | avail_start += PAGE_SIZE; |
1534 | #endif | | 1534 | #endif |
1535 | | | 1535 | |
1536 | /* | | 1536 | /* |
1537 | * now we reserve some VM for mapping pages when doing a crash dump | | 1537 | * now we reserve some VM for mapping pages when doing a crash dump |
1538 | */ | | 1538 | */ |
1539 | | | 1539 | |
1540 | virtual_avail = reserve_dumppages(virtual_avail); | | 1540 | virtual_avail = reserve_dumppages(virtual_avail); |
1541 | | | 1541 | |
1542 | /* | | 1542 | /* |
1543 | * init the static-global locks and global lists. | | 1543 | * init the static-global locks and global lists. |
1544 | * | | 1544 | * |
1545 | * => pventry::pvh_lock (initialized elsewhere) must also be | | 1545 | * => pventry::pvh_lock (initialized elsewhere) must also be |
1546 | * a spin lock, again at IPL_VM to prevent deadlock, and | | 1546 | * a spin lock, again at IPL_VM to prevent deadlock, and |
1547 | * again is never taken from interrupt context. | | 1547 | * again is never taken from interrupt context. |
1548 | */ | | 1548 | */ |
1549 | | | 1549 | |
1550 | mutex_init(&pmaps_lock, MUTEX_DEFAULT, IPL_NONE); | | 1550 | mutex_init(&pmaps_lock, MUTEX_DEFAULT, IPL_NONE); |
1551 | LIST_INIT(&pmaps); | | 1551 | LIST_INIT(&pmaps); |
1552 | pmap_cpu_init_early(curcpu()); | | 1552 | pmap_cpu_init_early(curcpu()); |
1553 | | | 1553 | |
1554 | /* | | 1554 | /* |
1555 | * initialize caches. | | 1555 | * initialize caches. |
1556 | */ | | 1556 | */ |
1557 | | | 1557 | |
1558 | pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap), 0, 0, 0, | | 1558 | pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap), 0, 0, 0, |
1559 | "pmappl", NULL, IPL_NONE, NULL, NULL, NULL); | | 1559 | "pmappl", NULL, IPL_NONE, NULL, NULL, NULL); |
1560 | #ifdef PAE | | 1560 | #ifdef PAE |
1561 | pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE * PDP_SIZE, 0, 0, 0, | | 1561 | pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE * PDP_SIZE, 0, 0, 0, |
1562 | "pdppl", &pmap_pdp_allocator, IPL_NONE, | | 1562 | "pdppl", &pmap_pdp_allocator, IPL_NONE, |
1563 | pmap_pdp_ctor, pmap_pdp_dtor, NULL); | | 1563 | pmap_pdp_ctor, pmap_pdp_dtor, NULL); |
1564 | #else /* PAE */ | | 1564 | #else /* PAE */ |
1565 | pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE, 0, 0, 0, | | 1565 | pool_cache_bootstrap(&pmap_pdp_cache, PAGE_SIZE, 0, 0, 0, |
1566 | "pdppl", NULL, IPL_NONE, pmap_pdp_ctor, pmap_pdp_dtor, NULL); | | 1566 | "pdppl", NULL, IPL_NONE, pmap_pdp_ctor, pmap_pdp_dtor, NULL); |
1567 | #endif /* PAE */ | | 1567 | #endif /* PAE */ |
1568 | pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0, | | 1568 | pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0, |
1569 | PR_LARGECACHE, "pvpl", &pool_allocator_meta, IPL_NONE, NULL, | | 1569 | PR_LARGECACHE, "pvpl", &pool_allocator_meta, IPL_NONE, NULL, |
1570 | NULL, NULL); | | 1570 | NULL, NULL); |
1571 | | | 1571 | |
1572 | /* | | 1572 | /* |
1573 | * ensure the TLB is sync'd with reality by flushing it... | | 1573 | * ensure the TLB is sync'd with reality by flushing it... |
1574 | */ | | 1574 | */ |
1575 | | | 1575 | |
1576 | tlbflush(); | | 1576 | tlbflush(); |
1577 | | | 1577 | |
1578 | /* | | 1578 | /* |
1579 | * calculate pmap_maxkvaddr from nkptp[]. | | 1579 | * calculate pmap_maxkvaddr from nkptp[]. |
1580 | */ | | 1580 | */ |
1581 | | | 1581 | |
1582 | kva = VM_MIN_KERNEL_ADDRESS; | | 1582 | kva = VM_MIN_KERNEL_ADDRESS; |
1583 | for (i = PTP_LEVELS - 1; i >= 1; i--) { | | 1583 | for (i = PTP_LEVELS - 1; i >= 1; i--) { |
1584 | kva += nkptp[i] * nbpd[i]; | | 1584 | kva += nkptp[i] * nbpd[i]; |
1585 | } | | 1585 | } |
1586 | pmap_maxkvaddr = kva; | | 1586 | pmap_maxkvaddr = kva; |
1587 | } | | 1587 | } |
1588 | | | 1588 | |
1589 | #if defined(__x86_64__) | | 1589 | #if defined(__x86_64__) |
1590 | /* | | 1590 | /* |
1591 | * Pre-allocate PTPs for low memory, so that 1:1 mappings for various | | 1591 | * Pre-allocate PTPs for low memory, so that 1:1 mappings for various |
1592 | * trampoline code can be entered. | | 1592 | * trampoline code can be entered. |
1593 | */ | | 1593 | */ |
1594 | void | | 1594 | void |
1595 | pmap_prealloc_lowmem_ptps(void) | | 1595 | pmap_prealloc_lowmem_ptps(void) |
1596 | { | | 1596 | { |
1597 | #ifdef XEN | | 1597 | #ifdef XEN |
1598 | int level; | | 1598 | int level; |
1599 | paddr_t newp; | | 1599 | paddr_t newp; |
1600 | paddr_t pdes_pa; | | 1600 | paddr_t pdes_pa; |
1601 | | | 1601 | |
1602 | pdes_pa = pmap_pdirpa(pmap_kernel(), 0); | | 1602 | pdes_pa = pmap_pdirpa(pmap_kernel(), 0); |
1603 | level = PTP_LEVELS; | | 1603 | level = PTP_LEVELS; |
1604 | for (;;) { | | 1604 | for (;;) { |
1605 | newp = avail_start; | | 1605 | newp = avail_start; |
1606 | avail_start += PAGE_SIZE; | | 1606 | avail_start += PAGE_SIZE; |
1607 | HYPERVISOR_update_va_mapping ((vaddr_t)early_zerop, | | 1607 | HYPERVISOR_update_va_mapping ((vaddr_t)early_zerop, |
1608 | xpmap_ptom_masked(newp) | PG_u | PG_V | PG_RW, UVMF_INVLPG); | | 1608 | xpmap_ptom_masked(newp) | PG_u | PG_V | PG_RW, UVMF_INVLPG); |
1609 | memset((void *)early_zerop, 0, PAGE_SIZE); | | 1609 | memset((void *)early_zerop, 0, PAGE_SIZE); |
1610 | /* Mark R/O before installing */ | | 1610 | /* Mark R/O before installing */ |
1611 | HYPERVISOR_update_va_mapping ((vaddr_t)early_zerop, | | 1611 | HYPERVISOR_update_va_mapping ((vaddr_t)early_zerop, |
1612 | xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG); | | 1612 | xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG); |
1613 | if (newp < (NKL2_KIMG_ENTRIES * NBPD_L2)) | | 1613 | if (newp < (NKL2_KIMG_ENTRIES * NBPD_L2)) |
1614 | HYPERVISOR_update_va_mapping (newp + KERNBASE, | | 1614 | HYPERVISOR_update_va_mapping (newp + KERNBASE, |
1615 | xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG); | | 1615 | xpmap_ptom_masked(newp) | PG_u | PG_V, UVMF_INVLPG); |
1616 | xpq_queue_pte_update ( | | 1616 | xpq_queue_pte_update ( |
1617 | xpmap_ptom_masked(pdes_pa) | | 1617 | xpmap_ptom_masked(pdes_pa) |
1618 | + (pl_i(0, level) * sizeof (pd_entry_t)), | | 1618 | + (pl_i(0, level) * sizeof (pd_entry_t)), |
1619 | xpmap_ptom_masked(newp) | PG_RW | PG_u | PG_V); | | 1619 | xpmap_ptom_masked(newp) | PG_RW | PG_u | PG_V); |
1620 | level--; | | 1620 | level--; |
1621 | if (level <= 1) | | 1621 | if (level <= 1) |
1622 | break; | | 1622 | break; |
1623 | pdes_pa = newp; | | 1623 | pdes_pa = newp; |
1624 | } | | 1624 | } |
1625 | #else /* XEN */ | | 1625 | #else /* XEN */ |
1626 | pd_entry_t *pdes; | | 1626 | pd_entry_t *pdes; |
1627 | int level; | | 1627 | int level; |
1628 | paddr_t newp; | | 1628 | paddr_t newp; |
1629 | | | 1629 | |
1630 | pdes = pmap_kernel()->pm_pdir; | | 1630 | pdes = pmap_kernel()->pm_pdir; |
1631 | level = PTP_LEVELS; | | 1631 | level = PTP_LEVELS; |
1632 | for (;;) { | | 1632 | for (;;) { |
1633 | newp = avail_start; | | 1633 | newp = avail_start; |
1634 | avail_start += PAGE_SIZE; | | 1634 | avail_start += PAGE_SIZE; |
1635 | *early_zero_pte = (newp & PG_FRAME) | PG_V | PG_RW; | | 1635 | *early_zero_pte = (newp & PG_FRAME) | PG_V | PG_RW; |
1636 | pmap_update_pg((vaddr_t)early_zerop); | | 1636 | pmap_update_pg((vaddr_t)early_zerop); |
1637 | memset(early_zerop, 0, PAGE_SIZE); | | 1637 | memset(early_zerop, 0, PAGE_SIZE); |
1638 | pdes[pl_i(0, level)] = (newp & PG_FRAME) | PG_V | PG_RW; | | 1638 | pdes[pl_i(0, level)] = (newp & PG_FRAME) | PG_V | PG_RW; |
1639 | level--; | | 1639 | level--; |
1640 | if (level <= 1) | | 1640 | if (level <= 1) |
1641 | break; | | 1641 | break; |
1642 | pdes = normal_pdes[level - 2]; | | 1642 | pdes = normal_pdes[level - 2]; |
1643 | } | | 1643 | } |
1644 | #endif /* XEN */ | | 1644 | #endif /* XEN */ |
1645 | } | | 1645 | } |
1646 | #endif /* defined(__x86_64__) */ | | 1646 | #endif /* defined(__x86_64__) */ |
1647 | | | 1647 | |
1648 | /* | | 1648 | /* |
1649 | * pmap_init: called from uvm_init, our job is to get the pmap | | 1649 | * pmap_init: called from uvm_init, our job is to get the pmap |
1650 | * system ready to manage mappings... | | 1650 | * system ready to manage mappings... |
1651 | */ | | 1651 | */ |
1652 | | | 1652 | |
1653 | void | | 1653 | void |
1654 | pmap_init(void) | | 1654 | pmap_init(void) |
1655 | { | | 1655 | { |
1656 | int i; | | 1656 | int i; |
1657 | | | 1657 | |
1658 | for (i = 0; i < PV_HASH_SIZE; i++) { | | 1658 | for (i = 0; i < PV_HASH_SIZE; i++) { |
1659 | SLIST_INIT(&pv_hash_heads[i].hh_list); | | 1659 | SLIST_INIT(&pv_hash_heads[i].hh_list); |
1660 | } | | 1660 | } |
1661 | for (i = 0; i < PV_HASH_LOCK_CNT; i++) { | | 1661 | for (i = 0; i < PV_HASH_LOCK_CNT; i++) { |
1662 | mutex_init(&pv_hash_locks[i].lock, MUTEX_NODEBUG, IPL_VM); | | 1662 | mutex_init(&pv_hash_locks[i].lock, MUTEX_NODEBUG, IPL_VM); |
1663 | } | | 1663 | } |
1664 | | | 1664 | |
1665 | /* | | 1665 | /* |
1666 | * done: pmap module is up (and ready for business) | | 1666 | * done: pmap module is up (and ready for business) |
1667 | */ | | 1667 | */ |
1668 | | | 1668 | |
1669 | pmap_initialized = true; | | 1669 | pmap_initialized = true; |
1670 | } | | 1670 | } |
1671 | | | 1671 | |
1672 | /* | | 1672 | /* |
1673 | * pmap_cpu_init_early: perform early per-CPU initialization. | | 1673 | * pmap_cpu_init_early: perform early per-CPU initialization. |
1674 | */ | | 1674 | */ |
1675 | | | 1675 | |
1676 | void | | 1676 | void |
1677 | pmap_cpu_init_early(struct cpu_info *ci) | | 1677 | pmap_cpu_init_early(struct cpu_info *ci) |
1678 | { | | 1678 | { |
1679 | struct pmap_cpu *pc; | | 1679 | struct pmap_cpu *pc; |
1680 | static uint8_t pmap_cpu_alloc; | | 1680 | static uint8_t pmap_cpu_alloc; |
1681 | | | 1681 | |
1682 | pc = &pmap_cpu[pmap_cpu_alloc++].pc; | | 1682 | pc = &pmap_cpu[pmap_cpu_alloc++].pc; |
1683 | ci->ci_pmap_cpu = pc; | | 1683 | ci->ci_pmap_cpu = pc; |
1684 | } | | 1684 | } |
1685 | | | 1685 | |
1686 | /* | | 1686 | /* |
1687 | * pmap_cpu_init_late: perform late per-CPU initialization. | | 1687 | * pmap_cpu_init_late: perform late per-CPU initialization. |
1688 | */ | | 1688 | */ |
1689 | | | 1689 | |
1690 | void | | 1690 | void |
1691 | pmap_cpu_init_late(struct cpu_info *ci) | | 1691 | pmap_cpu_init_late(struct cpu_info *ci) |
1692 | { | | 1692 | { |
1693 | | | 1693 | |
1694 | if (ci == &cpu_info_primary) { | | 1694 | if (ci == &cpu_info_primary) { |
1695 | evcnt_attach_dynamic(&pmap_tlb_evcnt, EVCNT_TYPE_INTR, | | 1695 | evcnt_attach_dynamic(&pmap_tlb_evcnt, EVCNT_TYPE_INTR, |
1696 | NULL, "global", "TLB IPI"); | | 1696 | NULL, "global", "TLB IPI"); |
1697 | evcnt_attach_dynamic(&pmap_iobmp_evcnt, EVCNT_TYPE_MISC, | | 1697 | evcnt_attach_dynamic(&pmap_iobmp_evcnt, EVCNT_TYPE_MISC, |
1698 | NULL, "x86", "io bitmap copy"); | | 1698 | NULL, "x86", "io bitmap copy"); |
1699 | evcnt_attach_dynamic(&pmap_ldt_evcnt, EVCNT_TYPE_MISC, | | 1699 | evcnt_attach_dynamic(&pmap_ldt_evcnt, EVCNT_TYPE_MISC, |
1700 | NULL, "x86", "ldt sync"); | | 1700 | NULL, "x86", "ldt sync"); |
1701 | } | | 1701 | } |
1702 | | | 1702 | |
1703 | evcnt_attach_dynamic(&ci->ci_tlb_evcnt, EVCNT_TYPE_MISC, | | 1703 | evcnt_attach_dynamic(&ci->ci_tlb_evcnt, EVCNT_TYPE_MISC, |
1704 | NULL, device_xname(ci->ci_dev), "TLB IPI"); | | 1704 | NULL, device_xname(ci->ci_dev), "TLB IPI"); |
1705 | | | 1705 | |
1706 | #ifdef PAE | | 1706 | #ifdef PAE |
1707 | int ret; | | 1707 | int ret; |
1708 | struct pglist pg; | | 1708 | struct pglist pg; |
1709 | struct vm_page *vmap; | | 1709 | struct vm_page *vmap; |
1710 | | | 1710 | |
1711 | /* The BP has already its own L3 page allocated in locore.S. */ | | 1711 | /* The BP has already its own L3 page allocated in locore.S. */ |
1712 | if (ci == &cpu_info_primary) | | 1712 | if (ci == &cpu_info_primary) |
1713 | return; | | 1713 | return; |
1714 | | | 1714 | |
1715 | /* | | 1715 | /* |
1716 | * Allocate a page for the per-CPU L3 PD. cr3 being 32 bits, PA musts | | 1716 | * Allocate a page for the per-CPU L3 PD. cr3 being 32 bits, PA musts |
1717 | * resides below the 4GB boundary. | | 1717 | * resides below the 4GB boundary. |
1718 | */ | | 1718 | */ |
1719 | ret = uvm_pglistalloc(PAGE_SIZE, 0, 0x100000000ULL, 32, 0, &pg, 1, 0); | | 1719 | ret = uvm_pglistalloc(PAGE_SIZE, 0, 0x100000000ULL, 32, 0, &pg, 1, 0); |
1720 | vmap = TAILQ_FIRST(&pg); | | 1720 | vmap = TAILQ_FIRST(&pg); |
1721 | | | 1721 | |
1722 | if (ret != 0 || vmap == NULL) | | 1722 | if (ret != 0 || vmap == NULL) |
1723 | panic("%s: failed to allocate L3 pglist for CPU %d (ret %d)\n", | | 1723 | panic("%s: failed to allocate L3 pglist for CPU %d (ret %d)\n", |
1724 | __func__, cpu_index(ci), ret); | | 1724 | __func__, cpu_index(ci), ret); |
1725 | | | 1725 | |
1726 | ci->ci_pae_l3_pdirpa = vmap->phys_addr; | | 1726 | ci->ci_pae_l3_pdirpa = vmap->phys_addr; |
1727 | | | 1727 | |
1728 | ci->ci_pae_l3_pdir = (paddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, | | 1728 | ci->ci_pae_l3_pdir = (paddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, |
1729 | UVM_KMF_VAONLY | UVM_KMF_NOWAIT); | | 1729 | UVM_KMF_VAONLY | UVM_KMF_NOWAIT); |
1730 | if (ci->ci_pae_l3_pdir == NULL) | | 1730 | if (ci->ci_pae_l3_pdir == NULL) |
1731 | panic("%s: failed to allocate L3 PD for CPU %d\n", | | 1731 | panic("%s: failed to allocate L3 PD for CPU %d\n", |
1732 | __func__, cpu_index(ci)); | | 1732 | __func__, cpu_index(ci)); |
1733 | | | 1733 | |
1734 | pmap_kenter_pa((vaddr_t)ci->ci_pae_l3_pdir, ci->ci_pae_l3_pdirpa, | | 1734 | pmap_kenter_pa((vaddr_t)ci->ci_pae_l3_pdir, ci->ci_pae_l3_pdirpa, |
1735 | VM_PROT_READ | VM_PROT_WRITE, 0); | | 1735 | VM_PROT_READ | VM_PROT_WRITE, 0); |
1736 | | | 1736 | |
1737 | pmap_update(pmap_kernel()); | | 1737 | pmap_update(pmap_kernel()); |
1738 | #endif | | 1738 | #endif |
1739 | } | | 1739 | } |
1740 | | | 1740 | |
1741 | /* | | 1741 | /* |
1742 | * p v _ e n t r y f u n c t i o n s | | 1742 | * p v _ e n t r y f u n c t i o n s |
1743 | */ | | 1743 | */ |
1744 | | | 1744 | |
1745 | /* | | 1745 | /* |
1746 | * pmap_free_pvs: free a list of pv_entrys | | 1746 | * pmap_free_pvs: free a list of pv_entrys |
1747 | */ | | 1747 | */ |
1748 | | | 1748 | |
1749 | static void | | 1749 | static void |
1750 | pmap_free_pvs(struct pv_entry *pve) | | 1750 | pmap_free_pvs(struct pv_entry *pve) |
1751 | { | | 1751 | { |
1752 | struct pv_entry *next; | | 1752 | struct pv_entry *next; |
1753 | | | 1753 | |
1754 | for ( /* null */ ; pve != NULL ; pve = next) { | | 1754 | for ( /* null */ ; pve != NULL ; pve = next) { |
1755 | next = pve->pve_next; | | 1755 | next = pve->pve_next; |
1756 | pool_cache_put(&pmap_pv_cache, pve); | | 1756 | pool_cache_put(&pmap_pv_cache, pve); |
1757 | } | | 1757 | } |
1758 | } | | 1758 | } |
1759 | | | 1759 | |
1760 | /* | | 1760 | /* |
1761 | * main pv_entry manipulation functions: | | 1761 | * main pv_entry manipulation functions: |
1762 | * pmap_enter_pv: enter a mapping onto a pv_head list | | 1762 | * pmap_enter_pv: enter a mapping onto a pv_head list |
1763 | * pmap_remove_pv: remove a mapping from a pv_head list | | 1763 | * pmap_remove_pv: remove a mapping from a pv_head list |
1764 | * | | 1764 | * |
1765 | * NOTE: Both pmap_enter_pv and pmap_remove_pv expect the caller to lock | | 1765 | * NOTE: Both pmap_enter_pv and pmap_remove_pv expect the caller to lock |
1766 | * the pvh before calling | | 1766 | * the pvh before calling |
1767 | */ | | 1767 | */ |
1768 | | | 1768 | |
1769 | /* | | 1769 | /* |