| @@ -1,1132 +1,1132 @@ | | | @@ -1,1132 +1,1132 @@ |
1 | /* $NetBSD: pmap.c,v 1.403 2020/08/04 06:23:46 skrll Exp $ */ | | 1 | /* $NetBSD: pmap.c,v 1.404 2020/09/01 11:24:14 bouyer Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Andrew Doran, and by Maxime Villard. | | 8 | * by Andrew Doran, and by Maxime Villard. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright | | 15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the | | 16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. | | 17 | * documentation and/or other materials provided with the distribution. |
18 | * | | 18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. | | 29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ | | 30 | */ |
31 | | | 31 | |
32 | /* | | 32 | /* |
33 | * Copyright (c) 2007 Manuel Bouyer. | | 33 | * Copyright (c) 2007 Manuel Bouyer. |
34 | * | | 34 | * |
35 | * Redistribution and use in source and binary forms, with or without | | 35 | * Redistribution and use in source and binary forms, with or without |
36 | * modification, are permitted provided that the following conditions | | 36 | * modification, are permitted provided that the following conditions |
37 | * are met: | | 37 | * are met: |
38 | * 1. Redistributions of source code must retain the above copyright | | 38 | * 1. Redistributions of source code must retain the above copyright |
39 | * notice, this list of conditions and the following disclaimer. | | 39 | * notice, this list of conditions and the following disclaimer. |
40 | * 2. Redistributions in binary form must reproduce the above copyright | | 40 | * 2. Redistributions in binary form must reproduce the above copyright |
41 | * notice, this list of conditions and the following disclaimer in the | | 41 | * notice, this list of conditions and the following disclaimer in the |
42 | * documentation and/or other materials provided with the distribution. | | 42 | * documentation and/or other materials provided with the distribution. |
43 | * | | 43 | * |
44 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | | 44 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
45 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | | 45 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
46 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | | 46 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
47 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | | 47 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
48 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | | 48 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
49 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | | 49 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
50 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | | 50 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
51 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | | 51 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
52 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | | 52 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
53 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | | 53 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
54 | */ | | 54 | */ |
55 | | | 55 | |
56 | /* | | 56 | /* |
57 | * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> | | 57 | * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> |
58 | * | | 58 | * |
59 | * Permission to use, copy, modify, and distribute this software for any | | 59 | * Permission to use, copy, modify, and distribute this software for any |
60 | * purpose with or without fee is hereby granted, provided that the above | | 60 | * purpose with or without fee is hereby granted, provided that the above |
61 | * copyright notice and this permission notice appear in all copies. | | 61 | * copyright notice and this permission notice appear in all copies. |
62 | * | | 62 | * |
63 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | | 63 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
64 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | | 64 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
65 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | | 65 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
66 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | | 66 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
67 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | | 67 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
68 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | | 68 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
69 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | | 69 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
70 | */ | | 70 | */ |
71 | | | 71 | |
72 | /* | | 72 | /* |
73 | * Copyright 2001 (c) Wasabi Systems, Inc. | | 73 | * Copyright 2001 (c) Wasabi Systems, Inc. |
74 | * All rights reserved. | | 74 | * All rights reserved. |
75 | * | | 75 | * |
76 | * Written by Frank van der Linden for Wasabi Systems, Inc. | | 76 | * Written by Frank van der Linden for Wasabi Systems, Inc. |
77 | * | | 77 | * |
78 | * Redistribution and use in source and binary forms, with or without | | 78 | * Redistribution and use in source and binary forms, with or without |
79 | * modification, are permitted provided that the following conditions | | 79 | * modification, are permitted provided that the following conditions |
80 | * are met: | | 80 | * are met: |
81 | * 1. Redistributions of source code must retain the above copyright | | 81 | * 1. Redistributions of source code must retain the above copyright |
82 | * notice, this list of conditions and the following disclaimer. | | 82 | * notice, this list of conditions and the following disclaimer. |
83 | * 2. Redistributions in binary form must reproduce the above copyright | | 83 | * 2. Redistributions in binary form must reproduce the above copyright |
84 | * notice, this list of conditions and the following disclaimer in the | | 84 | * notice, this list of conditions and the following disclaimer in the |
85 | * documentation and/or other materials provided with the distribution. | | 85 | * documentation and/or other materials provided with the distribution. |
86 | * 3. All advertising materials mentioning features or use of this software | | 86 | * 3. All advertising materials mentioning features or use of this software |
87 | * must display the following acknowledgement: | | 87 | * must display the following acknowledgement: |
88 | * This product includes software developed for the NetBSD Project by | | 88 | * This product includes software developed for the NetBSD Project by |
89 | * Wasabi Systems, Inc. | | 89 | * Wasabi Systems, Inc. |
90 | * 4. The name of Wasabi Systems, Inc. may not be used to endorse | | 90 | * 4. The name of Wasabi Systems, Inc. may not be used to endorse |
91 | * or promote products derived from this software without specific prior | | 91 | * or promote products derived from this software without specific prior |
92 | * written permission. | | 92 | * written permission. |
93 | * | | 93 | * |
94 | * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND | | 94 | * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND |
95 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 95 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
96 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 96 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
97 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC | | 97 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC |
98 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 98 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
99 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 99 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
100 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 100 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
101 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 101 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
102 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 102 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
103 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 103 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
104 | * POSSIBILITY OF SUCH DAMAGE. | | 104 | * POSSIBILITY OF SUCH DAMAGE. |
105 | */ | | 105 | */ |
106 | | | 106 | |
107 | /* | | 107 | /* |
108 | * Copyright (c) 1997 Charles D. Cranor and Washington University. | | 108 | * Copyright (c) 1997 Charles D. Cranor and Washington University. |
109 | * All rights reserved. | | 109 | * All rights reserved. |
110 | * | | 110 | * |
111 | * Redistribution and use in source and binary forms, with or without | | 111 | * Redistribution and use in source and binary forms, with or without |
112 | * modification, are permitted provided that the following conditions | | 112 | * modification, are permitted provided that the following conditions |
113 | * are met: | | 113 | * are met: |
114 | * 1. Redistributions of source code must retain the above copyright | | 114 | * 1. Redistributions of source code must retain the above copyright |
115 | * notice, this list of conditions and the following disclaimer. | | 115 | * notice, this list of conditions and the following disclaimer. |
116 | * 2. Redistributions in binary form must reproduce the above copyright | | 116 | * 2. Redistributions in binary form must reproduce the above copyright |
117 | * notice, this list of conditions and the following disclaimer in the | | 117 | * notice, this list of conditions and the following disclaimer in the |
118 | * documentation and/or other materials provided with the distribution. | | 118 | * documentation and/or other materials provided with the distribution. |
119 | * | | 119 | * |
120 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | | 120 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
121 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | | 121 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
122 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | | 122 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
123 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | | 123 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
124 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | | 124 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
125 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | | 125 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
126 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | | 126 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
127 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | | 127 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
128 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | | 128 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
129 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | | 129 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
130 | */ | | 130 | */ |
131 | | | 131 | |
132 | #include <sys/cdefs.h> | | 132 | #include <sys/cdefs.h> |
133 | __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.403 2020/08/04 06:23:46 skrll Exp $"); | | 133 | __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.404 2020/09/01 11:24:14 bouyer Exp $"); |
134 | | | 134 | |
135 | #include "opt_user_ldt.h" | | 135 | #include "opt_user_ldt.h" |
136 | #include "opt_lockdebug.h" | | 136 | #include "opt_lockdebug.h" |
137 | #include "opt_multiprocessor.h" | | 137 | #include "opt_multiprocessor.h" |
138 | #include "opt_xen.h" | | 138 | #include "opt_xen.h" |
139 | #include "opt_svs.h" | | 139 | #include "opt_svs.h" |
140 | #include "opt_kaslr.h" | | 140 | #include "opt_kaslr.h" |
141 | | | 141 | |
142 | #define __MUTEX_PRIVATE /* for assertions */ | | 142 | #define __MUTEX_PRIVATE /* for assertions */ |
143 | | | 143 | |
144 | #include <sys/param.h> | | 144 | #include <sys/param.h> |
145 | #include <sys/systm.h> | | 145 | #include <sys/systm.h> |
146 | #include <sys/proc.h> | | 146 | #include <sys/proc.h> |
147 | #include <sys/pool.h> | | 147 | #include <sys/pool.h> |
148 | #include <sys/kernel.h> | | 148 | #include <sys/kernel.h> |
149 | #include <sys/atomic.h> | | 149 | #include <sys/atomic.h> |
150 | #include <sys/cpu.h> | | 150 | #include <sys/cpu.h> |
151 | #include <sys/intr.h> | | 151 | #include <sys/intr.h> |
152 | #include <sys/xcall.h> | | 152 | #include <sys/xcall.h> |
153 | #include <sys/kcore.h> | | 153 | #include <sys/kcore.h> |
154 | #include <sys/kmem.h> | | 154 | #include <sys/kmem.h> |
155 | #include <sys/asan.h> | | 155 | #include <sys/asan.h> |
156 | #include <sys/msan.h> | | 156 | #include <sys/msan.h> |
157 | #include <sys/entropy.h> | | 157 | #include <sys/entropy.h> |
158 | | | 158 | |
159 | #include <uvm/uvm.h> | | 159 | #include <uvm/uvm.h> |
160 | #include <uvm/pmap/pmap_pvt.h> | | 160 | #include <uvm/pmap/pmap_pvt.h> |
161 | | | 161 | |
162 | #include <dev/isa/isareg.h> | | 162 | #include <dev/isa/isareg.h> |
163 | | | 163 | |
164 | #include <machine/specialreg.h> | | 164 | #include <machine/specialreg.h> |
165 | #include <machine/gdt.h> | | 165 | #include <machine/gdt.h> |
166 | #include <machine/isa_machdep.h> | | 166 | #include <machine/isa_machdep.h> |
167 | #include <machine/cpuvar.h> | | 167 | #include <machine/cpuvar.h> |
168 | #include <machine/cputypes.h> | | 168 | #include <machine/cputypes.h> |
169 | | | 169 | |
170 | #include <x86/pmap.h> | | 170 | #include <x86/pmap.h> |
171 | #include <x86/pmap_pv.h> | | 171 | #include <x86/pmap_pv.h> |
172 | | | 172 | |
173 | #include <x86/i82489reg.h> | | 173 | #include <x86/i82489reg.h> |
174 | #include <x86/i82489var.h> | | 174 | #include <x86/i82489var.h> |
175 | | | 175 | |
176 | #ifdef XEN | | 176 | #ifdef XEN |
177 | #include <xen/include/public/xen.h> | | 177 | #include <xen/include/public/xen.h> |
178 | #include <xen/hypervisor.h> | | 178 | #include <xen/hypervisor.h> |
179 | #endif | | 179 | #endif |
180 | | | 180 | |
181 | /* | | 181 | /* |
182 | * general info: | | 182 | * general info: |
183 | * | | 183 | * |
184 | * - for an explanation of how the x86 MMU hardware works see | | 184 | * - for an explanation of how the x86 MMU hardware works see |
185 | * the comments in <machine/pte.h>. | | 185 | * the comments in <machine/pte.h>. |
186 | * | | 186 | * |
187 | * - for an explanation of the general memory structure used by | | 187 | * - for an explanation of the general memory structure used by |
188 | * this pmap (including the recursive mapping), see the comments | | 188 | * this pmap (including the recursive mapping), see the comments |
189 | * in <machine/pmap.h>. | | 189 | * in <machine/pmap.h>. |
190 | * | | 190 | * |
191 | * this file contains the code for the "pmap module." the module's | | 191 | * this file contains the code for the "pmap module." the module's |
192 | * job is to manage the hardware's virtual to physical address mappings. | | 192 | * job is to manage the hardware's virtual to physical address mappings. |
193 | * note that there are two levels of mapping in the VM system: | | 193 | * note that there are two levels of mapping in the VM system: |
194 | * | | 194 | * |
195 | * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's | | 195 | * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's |
196 | * to map ranges of virtual address space to objects/files. for | | 196 | * to map ranges of virtual address space to objects/files. for |
197 | * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only | | 197 | * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only |
198 | * to the file /bin/ls starting at offset zero." note that | | 198 | * to the file /bin/ls starting at offset zero." note that |
199 | * the upper layer mapping is not concerned with how individual | | 199 | * the upper layer mapping is not concerned with how individual |
200 | * vm_pages are mapped. | | 200 | * vm_pages are mapped. |
201 | * | | 201 | * |
202 | * [2] the lower layer of the VM system (the pmap) maintains the mappings | | 202 | * [2] the lower layer of the VM system (the pmap) maintains the mappings |
203 | * from virtual addresses. it is concerned with which vm_page is | | 203 | * from virtual addresses. it is concerned with which vm_page is |
204 | * mapped where. for example, when you run /bin/ls and start | | 204 | * mapped where. for example, when you run /bin/ls and start |
205 | * at page 0x1000 the fault routine may lookup the correct page | | 205 | * at page 0x1000 the fault routine may lookup the correct page |
206 | * of the /bin/ls file and then ask the pmap layer to establish | | 206 | * of the /bin/ls file and then ask the pmap layer to establish |
207 | * a mapping for it. | | 207 | * a mapping for it. |
208 | * | | 208 | * |
209 | * note that information in the lower layer of the VM system can be | | 209 | * note that information in the lower layer of the VM system can be |
210 | * thrown away since it can easily be reconstructed from the info | | 210 | * thrown away since it can easily be reconstructed from the info |
211 | * in the upper layer. | | 211 | * in the upper layer. |
212 | * | | 212 | * |
213 | * data structures we use include: | | 213 | * data structures we use include: |
214 | * | | 214 | * |
215 | * - struct pmap: describes the address space of one thread | | 215 | * - struct pmap: describes the address space of one thread |
216 | * - struct pmap_page: describes one pv-tracked page, without | | 216 | * - struct pmap_page: describes one pv-tracked page, without |
217 | * necessarily a corresponding vm_page | | 217 | * necessarily a corresponding vm_page |
218 | * - struct pv_entry: describes one <PMAP,VA> mapping of a PA | | 218 | * - struct pv_entry: describes one <PMAP,VA> mapping of a PA |
219 | * - pmap_page::pp_pvlist: there is one list per pv-tracked page of | | 219 | * - pmap_page::pp_pvlist: there is one list per pv-tracked page of |
220 | * physical memory. the pp_pvlist points to a list of pv_entry | | 220 | * physical memory. the pp_pvlist points to a list of pv_entry |
221 | * structures which describe all the <PMAP,VA> pairs that this | | 221 | * structures which describe all the <PMAP,VA> pairs that this |
222 | * page is mapped in. this is critical for page based operations | | 222 | * page is mapped in. this is critical for page based operations |
223 | * such as pmap_page_protect() [change protection on _all_ mappings | | 223 | * such as pmap_page_protect() [change protection on _all_ mappings |
224 | * of a page] | | 224 | * of a page] |
225 | */ | | 225 | */ |
226 | | | 226 | |
227 | /* | | 227 | /* |
228 | * Locking | | 228 | * Locking |
229 | * | | 229 | * |
230 | * We have the following locks that we must deal with, listed in the order | | 230 | * We have the following locks that we must deal with, listed in the order |
231 | * that they are acquired: | | 231 | * that they are acquired: |
232 | * | | 232 | * |
233 | * pg->uobject->vmobjlock, pg->uanon->an_lock | | 233 | * pg->uobject->vmobjlock, pg->uanon->an_lock |
234 | * | | 234 | * |
235 | * For managed pages, these per-object locks are taken by the VM system | | 235 | * For managed pages, these per-object locks are taken by the VM system |
236 | * before calling into the pmap module - either a read or write hold. | | 236 | * before calling into the pmap module - either a read or write hold. |
237 | * The lock hold prevent pages from changing identity while the pmap is | | 237 | * The lock hold prevent pages from changing identity while the pmap is |
238 | * operating on them. For example, the same lock is held across a call | | 238 | * operating on them. For example, the same lock is held across a call |
239 | * to pmap_remove() and the following call to pmap_update(), so that a | | 239 | * to pmap_remove() and the following call to pmap_update(), so that a |
240 | * page does not gain a new identity while its TLB visibility is stale. | | 240 | * page does not gain a new identity while its TLB visibility is stale. |
241 | * | | 241 | * |
242 | * pmap->pm_lock | | 242 | * pmap->pm_lock |
243 | * | | 243 | * |
244 | * This lock protects the fields in the pmap structure including the | | 244 | * This lock protects the fields in the pmap structure including the |
245 | * non-kernel PDEs in the PDP, the PTEs, and PTPs and connected data | | 245 | * non-kernel PDEs in the PDP, the PTEs, and PTPs and connected data |
246 | * structures. For modifying unmanaged kernel PTEs it is not needed as | | 246 | * structures. For modifying unmanaged kernel PTEs it is not needed as |
247 | * kernel PDEs are never freed, and the kernel is expected to be self | | 247 | * kernel PDEs are never freed, and the kernel is expected to be self |
248 | * consistent (and the lock can't be taken for unmanaged kernel PTEs, | | 248 | * consistent (and the lock can't be taken for unmanaged kernel PTEs, |
249 | * because they can be modified from interrupt context). | | 249 | * because they can be modified from interrupt context). |
250 | * | | 250 | * |
251 | * pmaps_lock | | 251 | * pmaps_lock |
252 | * | | 252 | * |
253 | * This lock protects the list of active pmaps (headed by "pmaps"). | | 253 | * This lock protects the list of active pmaps (headed by "pmaps"). |
254 | * It's acquired when adding or removing pmaps or adjusting kernel PDEs. | | 254 | * It's acquired when adding or removing pmaps or adjusting kernel PDEs. |
255 | * | | 255 | * |
256 | * pp_lock | | 256 | * pp_lock |
257 | * | | 257 | * |
258 | * This per-page lock protects PV entry lists and the embedded PV entry | | 258 | * This per-page lock protects PV entry lists and the embedded PV entry |
259 | * in each vm_page, allowing for concurrent operation on pages by | | 259 | * in each vm_page, allowing for concurrent operation on pages by |
260 | * different pmaps. This is a spin mutex at IPL_VM, because at the | | 260 | * different pmaps. This is a spin mutex at IPL_VM, because at the |
261 | * points it is taken context switching is usually not tolerable, and | | 261 | * points it is taken context switching is usually not tolerable, and |
262 | * spin mutexes must block out interrupts that could take kernel_lock. | | 262 | * spin mutexes must block out interrupts that could take kernel_lock. |
263 | */ | | 263 | */ |
264 | | | 264 | |
265 | /* uvm_object is abused here to index pmap_pages; make assertions happy. */ | | 265 | /* uvm_object is abused here to index pmap_pages; make assertions happy. */ |
266 | #ifdef DIAGNOSTIC | | 266 | #ifdef DIAGNOSTIC |
267 | #define PMAP_DUMMY_LOCK(pm) rw_enter(&(pm)->pm_dummy_lock, RW_WRITER) | | 267 | #define PMAP_DUMMY_LOCK(pm) rw_enter(&(pm)->pm_dummy_lock, RW_WRITER) |
268 | #define PMAP_DUMMY_UNLOCK(pm) rw_exit(&(pm)->pm_dummy_lock) | | 268 | #define PMAP_DUMMY_UNLOCK(pm) rw_exit(&(pm)->pm_dummy_lock) |
269 | #else | | 269 | #else |
270 | #define PMAP_DUMMY_LOCK(pm) | | 270 | #define PMAP_DUMMY_LOCK(pm) |
271 | #define PMAP_DUMMY_UNLOCK(pm) | | 271 | #define PMAP_DUMMY_UNLOCK(pm) |
272 | #endif | | 272 | #endif |
273 | | | 273 | |
274 | static const struct uvm_pagerops pmap_pager = { | | 274 | static const struct uvm_pagerops pmap_pager = { |
275 | /* nothing */ | | 275 | /* nothing */ |
276 | }; | | 276 | }; |
277 | | | 277 | |
278 | const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER; | | 278 | const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER; |
279 | const vaddr_t ptp_frames[] = PTP_FRAME_INITIALIZER; | | 279 | const vaddr_t ptp_frames[] = PTP_FRAME_INITIALIZER; |
280 | const int ptp_shifts[] = PTP_SHIFT_INITIALIZER; | | 280 | const int ptp_shifts[] = PTP_SHIFT_INITIALIZER; |
281 | const long nkptpmax[] = NKPTPMAX_INITIALIZER; | | 281 | const long nkptpmax[] = NKPTPMAX_INITIALIZER; |
282 | const long nbpd[] = NBPD_INITIALIZER; | | 282 | const long nbpd[] = NBPD_INITIALIZER; |
283 | #ifdef i386 | | 283 | #ifdef i386 |
284 | pd_entry_t * const normal_pdes[] = PDES_INITIALIZER; | | 284 | pd_entry_t * const normal_pdes[] = PDES_INITIALIZER; |
285 | #else | | 285 | #else |
286 | pd_entry_t *normal_pdes[3]; | | 286 | pd_entry_t *normal_pdes[3]; |
287 | #endif | | 287 | #endif |
288 | | | 288 | |
289 | long nkptp[] = NKPTP_INITIALIZER; | | 289 | long nkptp[] = NKPTP_INITIALIZER; |
290 | | | 290 | |
291 | struct pmap_head pmaps; | | 291 | struct pmap_head pmaps; |
292 | kmutex_t pmaps_lock __cacheline_aligned; | | 292 | kmutex_t pmaps_lock __cacheline_aligned; |
293 | | | 293 | |
294 | struct pcpu_area *pcpuarea __read_mostly; | | 294 | struct pcpu_area *pcpuarea __read_mostly; |
295 | | | 295 | |
296 | static vaddr_t pmap_maxkvaddr; | | 296 | static vaddr_t pmap_maxkvaddr; |
297 | | | 297 | |
298 | /* | | 298 | /* |
299 | * Misc. event counters. | | 299 | * Misc. event counters. |
300 | */ | | 300 | */ |
301 | struct evcnt pmap_iobmp_evcnt; | | 301 | struct evcnt pmap_iobmp_evcnt; |
302 | struct evcnt pmap_ldt_evcnt; | | 302 | struct evcnt pmap_ldt_evcnt; |
303 | | | 303 | |
304 | /* | | 304 | /* |
305 | * PAT | | 305 | * PAT |
306 | */ | | 306 | */ |
307 | static bool cpu_pat_enabled __read_mostly = false; | | 307 | static bool cpu_pat_enabled __read_mostly = false; |
308 | | | 308 | |
309 | /* | | 309 | /* |
310 | * Global data structures | | 310 | * Global data structures |
311 | */ | | 311 | */ |
312 | | | 312 | |
313 | static struct pmap kernel_pmap_store __cacheline_aligned; /* kernel's pmap */ | | 313 | static struct pmap kernel_pmap_store __cacheline_aligned; /* kernel's pmap */ |
314 | struct pmap *const kernel_pmap_ptr = &kernel_pmap_store; | | 314 | struct pmap *const kernel_pmap_ptr = &kernel_pmap_store; |
315 | static rb_tree_t pmap_kernel_rb __cacheline_aligned; | | 315 | static rb_tree_t pmap_kernel_rb __cacheline_aligned; |
316 | | | 316 | |
317 | struct bootspace bootspace __read_mostly; | | 317 | struct bootspace bootspace __read_mostly; |
318 | struct slotspace slotspace __read_mostly; | | 318 | struct slotspace slotspace __read_mostly; |
319 | | | 319 | |
320 | /* Set to PTE_NX if supported. */ | | 320 | /* Set to PTE_NX if supported. */ |
321 | pd_entry_t pmap_pg_nx __read_mostly = 0; | | 321 | pd_entry_t pmap_pg_nx __read_mostly = 0; |
322 | | | 322 | |
323 | /* Set to PTE_G if supported. */ | | 323 | /* Set to PTE_G if supported. */ |
324 | pd_entry_t pmap_pg_g __read_mostly = 0; | | 324 | pd_entry_t pmap_pg_g __read_mostly = 0; |
325 | | | 325 | |
326 | /* Set to true if large pages are supported. */ | | 326 | /* Set to true if large pages are supported. */ |
327 | int pmap_largepages __read_mostly = 0; | | 327 | int pmap_largepages __read_mostly = 0; |
328 | | | 328 | |
329 | paddr_t lowmem_rsvd __read_mostly; | | 329 | paddr_t lowmem_rsvd __read_mostly; |
330 | paddr_t avail_start __read_mostly; /* PA of first available physical page */ | | 330 | paddr_t avail_start __read_mostly; /* PA of first available physical page */ |
331 | paddr_t avail_end __read_mostly; /* PA of last available physical page */ | | 331 | paddr_t avail_end __read_mostly; /* PA of last available physical page */ |
332 | | | 332 | |
333 | #ifdef XENPV | | 333 | #ifdef XENPV |
334 | paddr_t pmap_pa_start; /* PA of first physical page for this domain */ | | 334 | paddr_t pmap_pa_start; /* PA of first physical page for this domain */ |
335 | paddr_t pmap_pa_end; /* PA of last physical page for this domain */ | | 335 | paddr_t pmap_pa_end; /* PA of last physical page for this domain */ |
336 | #endif | | 336 | #endif |
337 | | | 337 | |
338 | #define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp) | | 338 | #define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp) |
339 | #define PMAP_CHECK_PP(pp) \ | | 339 | #define PMAP_CHECK_PP(pp) \ |
340 | KASSERTMSG((pp)->pp_lock.mtx_ipl._ipl == IPL_VM, "bad pmap_page %p", pp) | | 340 | KASSERTMSG((pp)->pp_lock.mtx_ipl._ipl == IPL_VM, "bad pmap_page %p", pp) |
341 | | | 341 | |
342 | /* | | 342 | /* |
343 | * Other data structures | | 343 | * Other data structures |
344 | */ | | 344 | */ |
345 | | | 345 | |
346 | static pt_entry_t protection_codes[8] __read_mostly; | | 346 | static pt_entry_t protection_codes[8] __read_mostly; |
347 | | | 347 | |
348 | static bool pmap_initialized __read_mostly = false; /* pmap_init done yet? */ | | 348 | static bool pmap_initialized __read_mostly = false; /* pmap_init done yet? */ |
349 | | | 349 | |
350 | /* | | 350 | /* |
351 | * The following two vaddr_t's are used during system startup to keep track of | | 351 | * The following two vaddr_t's are used during system startup to keep track of |
352 | * how much of the kernel's VM space we have used. Once the system is started, | | 352 | * how much of the kernel's VM space we have used. Once the system is started, |
353 | * the management of the remaining kernel VM space is turned over to the | | 353 | * the management of the remaining kernel VM space is turned over to the |
354 | * kernel_map vm_map. | | 354 | * kernel_map vm_map. |
355 | */ | | 355 | */ |
356 | static vaddr_t virtual_avail __read_mostly; /* VA of first free KVA */ | | 356 | static vaddr_t virtual_avail __read_mostly; /* VA of first free KVA */ |
357 | static vaddr_t virtual_end __read_mostly; /* VA of last free KVA */ | | 357 | static vaddr_t virtual_end __read_mostly; /* VA of last free KVA */ |
358 | | | 358 | |
359 | #ifndef XENPV | | 359 | #ifndef XENPV |
360 | /* | | 360 | /* |
361 | * LAPIC virtual address, and fake physical address. | | 361 | * LAPIC virtual address, and fake physical address. |
362 | */ | | 362 | */ |
363 | volatile vaddr_t local_apic_va __read_mostly; | | 363 | volatile vaddr_t local_apic_va __read_mostly; |
364 | paddr_t local_apic_pa __read_mostly; | | 364 | paddr_t local_apic_pa __read_mostly; |
365 | #endif | | 365 | #endif |
366 | | | 366 | |
367 | /* | | 367 | /* |
368 | * pool that pmap structures are allocated from | | 368 | * pool that pmap structures are allocated from |
369 | */ | | 369 | */ |
370 | struct pool_cache pmap_cache; | | 370 | struct pool_cache pmap_cache; |
371 | static int pmap_ctor(void *, void *, int); | | 371 | static int pmap_ctor(void *, void *, int); |
372 | static void pmap_dtor(void *, void *); | | 372 | static void pmap_dtor(void *, void *); |
373 | | | 373 | |
374 | /* | | 374 | /* |
375 | * pv_page cache | | 375 | * pv_page cache |
376 | */ | | 376 | */ |
377 | static struct pool_cache pmap_pvp_cache; | | 377 | static struct pool_cache pmap_pvp_cache; |
378 | | | 378 | |
379 | #ifdef __HAVE_DIRECT_MAP | | 379 | #ifdef __HAVE_DIRECT_MAP |
380 | vaddr_t pmap_direct_base __read_mostly; | | 380 | vaddr_t pmap_direct_base __read_mostly; |
381 | vaddr_t pmap_direct_end __read_mostly; | | 381 | vaddr_t pmap_direct_end __read_mostly; |
382 | #endif | | 382 | #endif |
383 | | | 383 | |
384 | #ifndef __HAVE_DIRECT_MAP | | 384 | #ifndef __HAVE_DIRECT_MAP |
385 | /* | | 385 | /* |
386 | * Special VAs and the PTEs that map them | | 386 | * Special VAs and the PTEs that map them |
387 | */ | | 387 | */ |
388 | static pt_entry_t *early_zero_pte; | | 388 | static pt_entry_t *early_zero_pte; |
389 | static void pmap_vpage_cpualloc(struct cpu_info *); | | 389 | static void pmap_vpage_cpualloc(struct cpu_info *); |
390 | #ifdef XENPV | | 390 | #ifdef XENPV |
391 | char *early_zerop; /* also referenced from xen_locore() */ | | 391 | char *early_zerop; /* also referenced from xen_locore() */ |
392 | #else | | 392 | #else |
393 | static char *early_zerop; | | 393 | static char *early_zerop; |
394 | #endif | | 394 | #endif |
395 | #endif | | 395 | #endif |
396 | | | 396 | |
397 | int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int); | | 397 | int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int); |
398 | | | 398 | |
399 | /* PDP pool and its callbacks */ | | 399 | /* PDP pool and its callbacks */ |
400 | static struct pool pmap_pdp_pool; | | 400 | static struct pool pmap_pdp_pool; |
401 | static void pmap_pdp_init(pd_entry_t *); | | 401 | static void pmap_pdp_init(pd_entry_t *); |
402 | static void pmap_pdp_fini(pd_entry_t *); | | 402 | static void pmap_pdp_fini(pd_entry_t *); |
403 | | | 403 | |
404 | #ifdef PAE | | 404 | #ifdef PAE |
405 | /* need to allocate items of 4 pages */ | | 405 | /* need to allocate items of 4 pages */ |
406 | static void *pmap_pdp_alloc(struct pool *, int); | | 406 | static void *pmap_pdp_alloc(struct pool *, int); |
407 | static void pmap_pdp_free(struct pool *, void *); | | 407 | static void pmap_pdp_free(struct pool *, void *); |
408 | static struct pool_allocator pmap_pdp_allocator = { | | 408 | static struct pool_allocator pmap_pdp_allocator = { |
409 | .pa_alloc = pmap_pdp_alloc, | | 409 | .pa_alloc = pmap_pdp_alloc, |
410 | .pa_free = pmap_pdp_free, | | 410 | .pa_free = pmap_pdp_free, |
411 | .pa_pagesz = PAGE_SIZE * PDP_SIZE, | | 411 | .pa_pagesz = PAGE_SIZE * PDP_SIZE, |
412 | }; | | 412 | }; |
413 | #endif | | 413 | #endif |
414 | | | 414 | |
415 | extern vaddr_t idt_vaddr; | | 415 | extern vaddr_t idt_vaddr; |
416 | extern paddr_t idt_paddr; | | 416 | extern paddr_t idt_paddr; |
417 | extern vaddr_t gdt_vaddr; | | 417 | extern vaddr_t gdt_vaddr; |
418 | extern paddr_t gdt_paddr; | | 418 | extern paddr_t gdt_paddr; |
419 | extern vaddr_t ldt_vaddr; | | 419 | extern vaddr_t ldt_vaddr; |
420 | extern paddr_t ldt_paddr; | | 420 | extern paddr_t ldt_paddr; |
421 | | | 421 | |
422 | #ifdef i386 | | 422 | #ifdef i386 |
423 | /* stuff to fix the pentium f00f bug */ | | 423 | /* stuff to fix the pentium f00f bug */ |
424 | extern vaddr_t pentium_idt_vaddr; | | 424 | extern vaddr_t pentium_idt_vaddr; |
425 | #endif | | 425 | #endif |
426 | | | 426 | |
427 | /* Array of freshly allocated PTPs, for pmap_get_ptp(). */ | | 427 | /* Array of freshly allocated PTPs, for pmap_get_ptp(). */ |
428 | struct pmap_ptparray { | | 428 | struct pmap_ptparray { |
429 | struct vm_page *pg[PTP_LEVELS + 1]; | | 429 | struct vm_page *pg[PTP_LEVELS + 1]; |
430 | bool alloced[PTP_LEVELS + 1]; | | 430 | bool alloced[PTP_LEVELS + 1]; |
431 | }; | | 431 | }; |
432 | | | 432 | |
433 | /* | | 433 | /* |
434 | * PV entries are allocated in page-sized chunks and cached per-pmap to | | 434 | * PV entries are allocated in page-sized chunks and cached per-pmap to |
435 | * avoid intense pressure on memory allocators. | | 435 | * avoid intense pressure on memory allocators. |
436 | */ | | 436 | */ |
437 | | | 437 | |
438 | struct pv_page { | | 438 | struct pv_page { |
439 | LIST_HEAD(, pv_entry) pvp_pves; | | 439 | LIST_HEAD(, pv_entry) pvp_pves; |
440 | LIST_ENTRY(pv_page) pvp_list; | | 440 | LIST_ENTRY(pv_page) pvp_list; |
441 | long pvp_nfree; | | 441 | long pvp_nfree; |
442 | struct pmap *pvp_pmap; | | 442 | struct pmap *pvp_pmap; |
443 | }; | | 443 | }; |
444 | | | 444 | |
445 | #define PVE_PER_PVP ((PAGE_SIZE / sizeof(struct pv_entry)) - 1) | | 445 | #define PVE_PER_PVP ((PAGE_SIZE / sizeof(struct pv_entry)) - 1) |
446 | | | 446 | |
447 | /* | | 447 | /* |
448 | * PV tree prototypes | | 448 | * PV tree prototypes |
449 | */ | | 449 | */ |
450 | | | 450 | |
451 | static int pmap_compare_key(void *, const void *, const void *); | | 451 | static int pmap_compare_key(void *, const void *, const void *); |
452 | static int pmap_compare_nodes(void *, const void *, const void *); | | 452 | static int pmap_compare_nodes(void *, const void *, const void *); |
453 | | | 453 | |
454 | /* Read-black tree */ | | 454 | /* Read-black tree */ |
455 | static const rb_tree_ops_t pmap_rbtree_ops = { | | 455 | static const rb_tree_ops_t pmap_rbtree_ops = { |
456 | .rbto_compare_nodes = pmap_compare_nodes, | | 456 | .rbto_compare_nodes = pmap_compare_nodes, |
457 | .rbto_compare_key = pmap_compare_key, | | 457 | .rbto_compare_key = pmap_compare_key, |
458 | .rbto_node_offset = offsetof(struct pv_entry, pve_rb), | | 458 | .rbto_node_offset = offsetof(struct pv_entry, pve_rb), |
459 | .rbto_context = NULL | | 459 | .rbto_context = NULL |
460 | }; | | 460 | }; |
461 | | | 461 | |
462 | /* | | 462 | /* |
463 | * Local prototypes | | 463 | * Local prototypes |
464 | */ | | 464 | */ |
465 | | | 465 | |
466 | #ifdef __HAVE_PCPU_AREA | | 466 | #ifdef __HAVE_PCPU_AREA |
467 | static void pmap_init_pcpu(void); | | 467 | static void pmap_init_pcpu(void); |
468 | #endif | | 468 | #endif |
469 | #ifdef __HAVE_DIRECT_MAP | | 469 | #ifdef __HAVE_DIRECT_MAP |
470 | static void pmap_init_directmap(struct pmap *); | | 470 | static void pmap_init_directmap(struct pmap *); |
471 | #endif | | 471 | #endif |
472 | #if !defined(XENPV) | | 472 | #if !defined(XENPV) |
473 | static void pmap_remap_global(void); | | 473 | static void pmap_remap_global(void); |
474 | #endif | | 474 | #endif |
475 | #ifndef XENPV | | 475 | #ifndef XENPV |
476 | static void pmap_init_lapic(void); | | 476 | static void pmap_init_lapic(void); |
477 | static void pmap_remap_largepages(void); | | 477 | static void pmap_remap_largepages(void); |
478 | #endif | | 478 | #endif |
479 | | | 479 | |
480 | static int pmap_get_ptp(struct pmap *, struct pmap_ptparray *, vaddr_t, int, | | 480 | static int pmap_get_ptp(struct pmap *, struct pmap_ptparray *, vaddr_t, int, |
481 | struct vm_page **); | | 481 | struct vm_page **); |
482 | static void pmap_unget_ptp(struct pmap *, struct pmap_ptparray *); | | 482 | static void pmap_unget_ptp(struct pmap *, struct pmap_ptparray *); |
483 | static void pmap_install_ptp(struct pmap *, struct pmap_ptparray *, vaddr_t, | | 483 | static void pmap_install_ptp(struct pmap *, struct pmap_ptparray *, vaddr_t, |
484 | pd_entry_t * const *); | | 484 | pd_entry_t * const *); |
485 | static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, int); | | 485 | static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, int); |
486 | static void pmap_freepage(struct pmap *, struct vm_page *, int); | | 486 | static void pmap_freepage(struct pmap *, struct vm_page *, int); |
487 | static void pmap_free_ptp(struct pmap *, struct vm_page *, vaddr_t, | | 487 | static void pmap_free_ptp(struct pmap *, struct vm_page *, vaddr_t, |
488 | pt_entry_t *, pd_entry_t * const *); | | 488 | pt_entry_t *, pd_entry_t * const *); |
489 | static bool pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *, | | 489 | static bool pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *, |
490 | vaddr_t); | | 490 | vaddr_t); |
491 | static void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, vaddr_t, | | 491 | static void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, vaddr_t, |
492 | vaddr_t); | | 492 | vaddr_t); |
493 | static int pmap_pvp_ctor(void *, void *, int); | | 493 | static int pmap_pvp_ctor(void *, void *, int); |
494 | static void pmap_pvp_dtor(void *, void *); | | 494 | static void pmap_pvp_dtor(void *, void *); |
495 | static struct pv_entry *pmap_alloc_pv(struct pmap *); | | 495 | static struct pv_entry *pmap_alloc_pv(struct pmap *); |
496 | static void pmap_free_pv(struct pmap *, struct pv_entry *); | | 496 | static void pmap_free_pv(struct pmap *, struct pv_entry *); |
497 | static void pmap_drain_pv(struct pmap *); | | 497 | static void pmap_drain_pv(struct pmap *); |
498 | | | 498 | |
499 | static void pmap_alloc_level(struct pmap *, vaddr_t, long *); | | 499 | static void pmap_alloc_level(struct pmap *, vaddr_t, long *); |
500 | | | 500 | |
501 | static void pmap_load1(struct lwp *, struct pmap *, struct pmap *); | | 501 | static void pmap_load1(struct lwp *, struct pmap *, struct pmap *); |
502 | static void pmap_reactivate(struct pmap *); | | 502 | static void pmap_reactivate(struct pmap *); |
503 | | | 503 | |
504 | /* | | 504 | /* |
505 | * p m a p h e l p e r f u n c t i o n s | | 505 | * p m a p h e l p e r f u n c t i o n s |
506 | */ | | 506 | */ |
507 | | | 507 | |
508 | static inline void | | 508 | static inline void |
509 | pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff) | | 509 | pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff) |
510 | { | | 510 | { |
511 | | | 511 | |
512 | KASSERT(cold || mutex_owned(&pmap->pm_lock)); | | 512 | KASSERT(cold || mutex_owned(&pmap->pm_lock)); |
513 | pmap->pm_stats.resident_count += resid_diff; | | 513 | pmap->pm_stats.resident_count += resid_diff; |
514 | pmap->pm_stats.wired_count += wired_diff; | | 514 | pmap->pm_stats.wired_count += wired_diff; |
515 | } | | 515 | } |
516 | | | 516 | |
517 | static inline void | | 517 | static inline void |
518 | pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte) | | 518 | pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte) |
519 | { | | 519 | { |
520 | int resid_diff = ((npte & PTE_P) ? 1 : 0) - ((opte & PTE_P) ? 1 : 0); | | 520 | int resid_diff = ((npte & PTE_P) ? 1 : 0) - ((opte & PTE_P) ? 1 : 0); |
521 | int wired_diff = ((npte & PTE_WIRED) ? 1 : 0) - ((opte & PTE_WIRED) ? 1 : 0); | | 521 | int wired_diff = ((npte & PTE_WIRED) ? 1 : 0) - ((opte & PTE_WIRED) ? 1 : 0); |
522 | | | 522 | |
523 | KASSERT((npte & (PTE_P | PTE_WIRED)) != PTE_WIRED); | | 523 | KASSERT((npte & (PTE_P | PTE_WIRED)) != PTE_WIRED); |
524 | KASSERT((opte & (PTE_P | PTE_WIRED)) != PTE_WIRED); | | 524 | KASSERT((opte & (PTE_P | PTE_WIRED)) != PTE_WIRED); |
525 | | | 525 | |
526 | pmap_stats_update(pmap, resid_diff, wired_diff); | | 526 | pmap_stats_update(pmap, resid_diff, wired_diff); |
527 | } | | 527 | } |
528 | | | 528 | |
529 | /* | | 529 | /* |
530 | * ptp_to_pmap: lookup pmap by ptp | | 530 | * ptp_to_pmap: lookup pmap by ptp |
531 | */ | | 531 | */ |
532 | static inline struct pmap * | | 532 | static inline struct pmap * |
533 | ptp_to_pmap(struct vm_page *ptp) | | 533 | ptp_to_pmap(struct vm_page *ptp) |
534 | { | | 534 | { |
535 | struct pmap *pmap; | | 535 | struct pmap *pmap; |
536 | | | 536 | |
537 | if (ptp == NULL) { | | 537 | if (ptp == NULL) { |
538 | return pmap_kernel(); | | 538 | return pmap_kernel(); |
539 | } | | 539 | } |
540 | pmap = (struct pmap *)ptp->uobject; | | 540 | pmap = (struct pmap *)ptp->uobject; |
541 | KASSERT(pmap != NULL); | | 541 | KASSERT(pmap != NULL); |
542 | KASSERT(&pmap->pm_obj[0] == ptp->uobject); | | 542 | KASSERT(&pmap->pm_obj[0] == ptp->uobject); |
543 | return pmap; | | 543 | return pmap; |
544 | } | | 544 | } |
545 | | | 545 | |
546 | static inline struct pv_pte * | | 546 | static inline struct pv_pte * |
547 | pve_to_pvpte(struct pv_entry *pve) | | 547 | pve_to_pvpte(struct pv_entry *pve) |
548 | { | | 548 | { |
549 | | | 549 | |
550 | if (pve == NULL) | | 550 | if (pve == NULL) |
551 | return NULL; | | 551 | return NULL; |
552 | KASSERT((void *)&pve->pve_pte == (void *)pve); | | 552 | KASSERT((void *)&pve->pve_pte == (void *)pve); |
553 | return &pve->pve_pte; | | 553 | return &pve->pve_pte; |
554 | } | | 554 | } |
555 | | | 555 | |
556 | static inline struct pv_entry * | | 556 | static inline struct pv_entry * |
557 | pvpte_to_pve(struct pv_pte *pvpte) | | 557 | pvpte_to_pve(struct pv_pte *pvpte) |
558 | { | | 558 | { |
559 | struct pv_entry *pve = (void *)pvpte; | | 559 | struct pv_entry *pve = (void *)pvpte; |
560 | | | 560 | |
561 | KASSERT(pve_to_pvpte(pve) == pvpte); | | 561 | KASSERT(pve_to_pvpte(pve) == pvpte); |
562 | return pve; | | 562 | return pve; |
563 | } | | 563 | } |
564 | | | 564 | |
565 | /* | | 565 | /* |
566 | * Return true if the pmap page has an embedded PV entry. | | 566 | * Return true if the pmap page has an embedded PV entry. |
567 | */ | | 567 | */ |
568 | static inline bool | | 568 | static inline bool |
569 | pv_pte_embedded(struct pmap_page *pp) | | 569 | pv_pte_embedded(struct pmap_page *pp) |
570 | { | | 570 | { |
571 | | | 571 | |
572 | KASSERT(mutex_owned(&pp->pp_lock)); | | 572 | KASSERT(mutex_owned(&pp->pp_lock)); |
573 | return (bool)((vaddr_t)pp->pp_pte.pte_ptp | pp->pp_pte.pte_va); | | 573 | return (bool)((vaddr_t)pp->pp_pte.pte_ptp | pp->pp_pte.pte_va); |
574 | } | | 574 | } |
575 | | | 575 | |
576 | /* | | 576 | /* |
577 | * pv_pte_first, pv_pte_next: PV list iterator. | | 577 | * pv_pte_first, pv_pte_next: PV list iterator. |
578 | */ | | 578 | */ |
579 | static inline struct pv_pte * | | 579 | static inline struct pv_pte * |
580 | pv_pte_first(struct pmap_page *pp) | | 580 | pv_pte_first(struct pmap_page *pp) |
581 | { | | 581 | { |
582 | | | 582 | |
583 | KASSERT(mutex_owned(&pp->pp_lock)); | | 583 | KASSERT(mutex_owned(&pp->pp_lock)); |
584 | if (pv_pte_embedded(pp)) { | | 584 | if (pv_pte_embedded(pp)) { |
585 | return &pp->pp_pte; | | 585 | return &pp->pp_pte; |
586 | } | | 586 | } |
587 | return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist)); | | 587 | return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist)); |
588 | } | | 588 | } |
589 | | | 589 | |
590 | static inline struct pv_pte * | | 590 | static inline struct pv_pte * |
591 | pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte) | | 591 | pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte) |
592 | { | | 592 | { |
593 | | | 593 | |
594 | KASSERT(mutex_owned(&pp->pp_lock)); | | 594 | KASSERT(mutex_owned(&pp->pp_lock)); |
595 | KASSERT(pvpte != NULL); | | 595 | KASSERT(pvpte != NULL); |
596 | if (pvpte == &pp->pp_pte) { | | 596 | if (pvpte == &pp->pp_pte) { |
597 | return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist)); | | 597 | return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist)); |
598 | } | | 598 | } |
599 | return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list)); | | 599 | return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list)); |
600 | } | | 600 | } |
601 | | | 601 | |
602 | static inline uint8_t | | 602 | static inline uint8_t |
603 | pmap_pte_to_pp_attrs(pt_entry_t pte) | | 603 | pmap_pte_to_pp_attrs(pt_entry_t pte) |
604 | { | | 604 | { |
605 | uint8_t ret = 0; | | 605 | uint8_t ret = 0; |
606 | if (pte & PTE_D) | | 606 | if (pte & PTE_D) |
607 | ret |= PP_ATTRS_D; | | 607 | ret |= PP_ATTRS_D; |
608 | if (pte & PTE_A) | | 608 | if (pte & PTE_A) |
609 | ret |= PP_ATTRS_A; | | 609 | ret |= PP_ATTRS_A; |
610 | if (pte & PTE_W) | | 610 | if (pte & PTE_W) |
611 | ret |= PP_ATTRS_W; | | 611 | ret |= PP_ATTRS_W; |
612 | return ret; | | 612 | return ret; |
613 | } | | 613 | } |
614 | | | 614 | |
615 | static inline pt_entry_t | | 615 | static inline pt_entry_t |
616 | pmap_pp_attrs_to_pte(uint8_t attrs) | | 616 | pmap_pp_attrs_to_pte(uint8_t attrs) |
617 | { | | 617 | { |
618 | pt_entry_t pte = 0; | | 618 | pt_entry_t pte = 0; |
619 | if (attrs & PP_ATTRS_D) | | 619 | if (attrs & PP_ATTRS_D) |
620 | pte |= PTE_D; | | 620 | pte |= PTE_D; |
621 | if (attrs & PP_ATTRS_A) | | 621 | if (attrs & PP_ATTRS_A) |
622 | pte |= PTE_A; | | 622 | pte |= PTE_A; |
623 | if (attrs & PP_ATTRS_W) | | 623 | if (attrs & PP_ATTRS_W) |
624 | pte |= PTE_W; | | 624 | pte |= PTE_W; |
625 | return pte; | | 625 | return pte; |
626 | } | | 626 | } |
627 | | | 627 | |
628 | /* | | 628 | /* |
629 | * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? | | 629 | * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? |
630 | * of course the kernel is always loaded | | 630 | * of course the kernel is always loaded |
631 | */ | | 631 | */ |
632 | bool | | 632 | bool |
633 | pmap_is_curpmap(struct pmap *pmap) | | 633 | pmap_is_curpmap(struct pmap *pmap) |
634 | { | | 634 | { |
635 | return ((pmap == pmap_kernel()) || (pmap == curcpu()->ci_pmap)); | | 635 | return ((pmap == pmap_kernel()) || (pmap == curcpu()->ci_pmap)); |
636 | } | | 636 | } |
637 | | | 637 | |
638 | inline void | | 638 | inline void |
639 | pmap_reference(struct pmap *pmap) | | 639 | pmap_reference(struct pmap *pmap) |
640 | { | | 640 | { |
641 | | | 641 | |
642 | atomic_inc_uint(&pmap->pm_obj[0].uo_refs); | | 642 | atomic_inc_uint(&pmap->pm_obj[0].uo_refs); |
643 | } | | 643 | } |
644 | | | 644 | |
645 | /* | | 645 | /* |
646 | * rbtree: compare two nodes. | | 646 | * rbtree: compare two nodes. |
647 | */ | | 647 | */ |
648 | static int | | 648 | static int |
649 | pmap_compare_nodes(void *context, const void *n1, const void *n2) | | 649 | pmap_compare_nodes(void *context, const void *n1, const void *n2) |
650 | { | | 650 | { |
651 | const struct pv_entry *pve1 = n1; | | 651 | const struct pv_entry *pve1 = n1; |
652 | const struct pv_entry *pve2 = n2; | | 652 | const struct pv_entry *pve2 = n2; |
653 | | | 653 | |
654 | KASSERT(pve1->pve_pte.pte_ptp == pve2->pve_pte.pte_ptp); | | 654 | KASSERT(pve1->pve_pte.pte_ptp == pve2->pve_pte.pte_ptp); |
655 | | | 655 | |
656 | if (pve1->pve_pte.pte_va < pve2->pve_pte.pte_va) { | | 656 | if (pve1->pve_pte.pte_va < pve2->pve_pte.pte_va) { |
657 | return -1; | | 657 | return -1; |
658 | } | | 658 | } |
659 | if (pve1->pve_pte.pte_va > pve2->pve_pte.pte_va) { | | 659 | if (pve1->pve_pte.pte_va > pve2->pve_pte.pte_va) { |
660 | return 1; | | 660 | return 1; |
661 | } | | 661 | } |
662 | return 0; | | 662 | return 0; |
663 | } | | 663 | } |
664 | | | 664 | |
665 | /* | | 665 | /* |
666 | * rbtree: compare a node and a key. | | 666 | * rbtree: compare a node and a key. |
667 | */ | | 667 | */ |
668 | static int | | 668 | static int |
669 | pmap_compare_key(void *context, const void *n, const void *k) | | 669 | pmap_compare_key(void *context, const void *n, const void *k) |
670 | { | | 670 | { |
671 | const struct pv_entry *pve = n; | | 671 | const struct pv_entry *pve = n; |
672 | const vaddr_t key = (vaddr_t)k; | | 672 | const vaddr_t key = (vaddr_t)k; |
673 | | | 673 | |
674 | if (pve->pve_pte.pte_va < key) { | | 674 | if (pve->pve_pte.pte_va < key) { |
675 | return -1; | | 675 | return -1; |
676 | } | | 676 | } |
677 | if (pve->pve_pte.pte_va > key) { | | 677 | if (pve->pve_pte.pte_va > key) { |
678 | return 1; | | 678 | return 1; |
679 | } | | 679 | } |
680 | return 0; | | 680 | return 0; |
681 | } | | 681 | } |
682 | | | 682 | |
683 | /* | | 683 | /* |
684 | * pmap_ptp_range_set: abuse ptp->uanon to record minimum VA of PTE | | 684 | * pmap_ptp_range_set: abuse ptp->uanon to record minimum VA of PTE |
685 | */ | | 685 | */ |
686 | static inline void | | 686 | static inline void |
687 | pmap_ptp_range_set(struct vm_page *ptp, vaddr_t va) | | 687 | pmap_ptp_range_set(struct vm_page *ptp, vaddr_t va) |
688 | { | | 688 | { |
689 | vaddr_t *min = (vaddr_t *)&ptp->uanon; | | 689 | vaddr_t *min = (vaddr_t *)&ptp->uanon; |
690 | | | 690 | |
691 | if (va < *min) { | | 691 | if (va < *min) { |
692 | *min = va; | | 692 | *min = va; |
693 | } | | 693 | } |
694 | } | | 694 | } |
695 | | | 695 | |
696 | /* | | 696 | /* |
697 | * pmap_ptp_range_clip: abuse ptp->uanon to clip range of PTEs to remove | | 697 | * pmap_ptp_range_clip: abuse ptp->uanon to clip range of PTEs to remove |
698 | */ | | 698 | */ |
699 | static inline void | | 699 | static inline void |
700 | pmap_ptp_range_clip(struct vm_page *ptp, vaddr_t *startva, pt_entry_t **pte) | | 700 | pmap_ptp_range_clip(struct vm_page *ptp, vaddr_t *startva, pt_entry_t **pte) |
701 | { | | 701 | { |
702 | vaddr_t sclip; | | 702 | vaddr_t sclip; |
703 | | | 703 | |
704 | if (ptp == NULL) { | | 704 | if (ptp == NULL) { |
705 | return; | | 705 | return; |
706 | } | | 706 | } |
707 | | | 707 | |
708 | sclip = (vaddr_t)ptp->uanon; | | 708 | sclip = (vaddr_t)ptp->uanon; |
709 | sclip = (*startva < sclip ? sclip : *startva); | | 709 | sclip = (*startva < sclip ? sclip : *startva); |
710 | *pte += (sclip - *startva) / PAGE_SIZE; | | 710 | *pte += (sclip - *startva) / PAGE_SIZE; |
711 | *startva = sclip; | | 711 | *startva = sclip; |
712 | } | | 712 | } |
713 | | | 713 | |
714 | /* | | 714 | /* |
715 | * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in | | 715 | * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in |
716 | * | | 716 | * |
717 | * there are several pmaps involved. some or all of them might be same. | | 717 | * there are several pmaps involved. some or all of them might be same. |
718 | * | | 718 | * |
719 | * - the pmap given by the first argument | | 719 | * - the pmap given by the first argument |
720 | * our caller wants to access this pmap's PTEs. | | 720 | * our caller wants to access this pmap's PTEs. |
721 | * | | 721 | * |
722 | * - pmap_kernel() | | 722 | * - pmap_kernel() |
723 | * the kernel pmap. note that it only contains the kernel part | | 723 | * the kernel pmap. note that it only contains the kernel part |
724 | * of the address space which is shared by any pmap. ie. any | | 724 | * of the address space which is shared by any pmap. ie. any |
725 | * pmap can be used instead of pmap_kernel() for our purpose. | | 725 | * pmap can be used instead of pmap_kernel() for our purpose. |
726 | * | | 726 | * |
727 | * - ci->ci_pmap | | 727 | * - ci->ci_pmap |
728 | * pmap currently loaded on the cpu. | | 728 | * pmap currently loaded on the cpu. |
729 | * | | 729 | * |
730 | * - vm_map_pmap(&curproc->p_vmspace->vm_map) | | 730 | * - vm_map_pmap(&curproc->p_vmspace->vm_map) |
731 | * current process' pmap. | | 731 | * current process' pmap. |
732 | * | | 732 | * |
733 | * => caller must lock pmap first (if not the kernel pmap) | | 733 | * => caller must lock pmap first (if not the kernel pmap) |
734 | * => must be undone with pmap_unmap_ptes before returning | | 734 | * => must be undone with pmap_unmap_ptes before returning |
735 | * => disables kernel preemption | | 735 | * => disables kernel preemption |
736 | */ | | 736 | */ |
737 | void | | 737 | void |
738 | pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2, pd_entry_t **ptepp, | | 738 | pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2, pd_entry_t **ptepp, |
739 | pd_entry_t * const **pdeppp) | | 739 | pd_entry_t * const **pdeppp) |
740 | { | | 740 | { |
741 | struct pmap *curpmap; | | 741 | struct pmap *curpmap; |
742 | struct cpu_info *ci; | | 742 | struct cpu_info *ci; |
743 | lwp_t *l; | | 743 | lwp_t *l; |
744 | | | 744 | |
745 | kpreempt_disable(); | | 745 | kpreempt_disable(); |
746 | | | 746 | |
747 | /* The kernel's pmap is always accessible. */ | | 747 | /* The kernel's pmap is always accessible. */ |
748 | if (pmap == pmap_kernel()) { | | 748 | if (pmap == pmap_kernel()) { |
749 | *pmap2 = NULL; | | 749 | *pmap2 = NULL; |
750 | *ptepp = PTE_BASE; | | 750 | *ptepp = PTE_BASE; |
751 | *pdeppp = normal_pdes; | | 751 | *pdeppp = normal_pdes; |
752 | return; | | 752 | return; |
753 | } | | 753 | } |
754 | | | 754 | |
755 | KASSERT(mutex_owned(&pmap->pm_lock)); | | 755 | KASSERT(mutex_owned(&pmap->pm_lock)); |
756 | | | 756 | |
757 | l = curlwp; | | 757 | l = curlwp; |
758 | ci = l->l_cpu; | | 758 | ci = l->l_cpu; |
759 | curpmap = ci->ci_pmap; | | 759 | curpmap = ci->ci_pmap; |
760 | if (pmap == curpmap) { | | 760 | if (pmap == curpmap) { |
761 | /* | | 761 | /* |
762 | * Already on the CPU: make it valid. This is very | | 762 | * Already on the CPU: make it valid. This is very |
763 | * often the case during exit(), when we have switched | | 763 | * often the case during exit(), when we have switched |
764 | * to the kernel pmap in order to destroy a user pmap. | | 764 | * to the kernel pmap in order to destroy a user pmap. |
765 | */ | | 765 | */ |
766 | if (__predict_false(ci->ci_tlbstate != TLBSTATE_VALID)) { | | 766 | if (__predict_false(ci->ci_tlbstate != TLBSTATE_VALID)) { |
767 | pmap_reactivate(pmap); | | 767 | pmap_reactivate(pmap); |
768 | } | | 768 | } |
769 | *pmap2 = NULL; | | 769 | *pmap2 = NULL; |
770 | } else { | | 770 | } else { |
771 | /* | | 771 | /* |
772 | * Toss current pmap from CPU and install new pmap, but keep | | 772 | * Toss current pmap from CPU and install new pmap, but keep |
773 | * a reference to the old one. Dropping the reference can | | 773 | * a reference to the old one. Dropping the reference can |
774 | * can block as it needs to take locks, so defer that to | | 774 | * can block as it needs to take locks, so defer that to |
775 | * pmap_unmap_ptes(). | | 775 | * pmap_unmap_ptes(). |
776 | */ | | 776 | */ |
777 | pmap_reference(pmap); | | 777 | pmap_reference(pmap); |
778 | pmap_load1(l, pmap, curpmap); | | 778 | pmap_load1(l, pmap, curpmap); |
779 | *pmap2 = curpmap; | | 779 | *pmap2 = curpmap; |
780 | } | | 780 | } |
781 | KASSERT(ci->ci_tlbstate == TLBSTATE_VALID); | | 781 | KASSERT(ci->ci_tlbstate == TLBSTATE_VALID); |
782 | #ifdef DIAGNOSTIC | | 782 | #ifdef DIAGNOSTIC |
783 | pmap->pm_ncsw = lwp_pctr(); | | 783 | pmap->pm_ncsw = lwp_pctr(); |
784 | #endif | | 784 | #endif |
785 | *ptepp = PTE_BASE; | | 785 | *ptepp = PTE_BASE; |
786 | | | 786 | |
787 | #if defined(XENPV) && defined(__x86_64__) | | 787 | #if defined(XENPV) && defined(__x86_64__) |
788 | KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] == L4_BASE); | | 788 | KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] == L4_BASE); |
789 | ci->ci_normal_pdes[PTP_LEVELS - 2] = pmap->pm_pdir; | | 789 | ci->ci_normal_pdes[PTP_LEVELS - 2] = pmap->pm_pdir; |
790 | *pdeppp = ci->ci_normal_pdes; | | 790 | *pdeppp = ci->ci_normal_pdes; |
791 | #else | | 791 | #else |
792 | *pdeppp = normal_pdes; | | 792 | *pdeppp = normal_pdes; |
793 | #endif | | 793 | #endif |
794 | } | | 794 | } |
795 | | | 795 | |
796 | /* | | 796 | /* |
797 | * pmap_unmap_ptes: unlock the PTE mapping of "pmap" | | 797 | * pmap_unmap_ptes: unlock the PTE mapping of "pmap" |
798 | * | | 798 | * |
799 | * => we cannot tolerate context switches while mapped in: assert this. | | 799 | * => we cannot tolerate context switches while mapped in: assert this. |
800 | * => reenables kernel preemption. | | 800 | * => reenables kernel preemption. |
801 | * => does not unlock pmap. | | 801 | * => does not unlock pmap. |
802 | */ | | 802 | */ |
803 | void | | 803 | void |
804 | pmap_unmap_ptes(struct pmap *pmap, struct pmap * pmap2) | | 804 | pmap_unmap_ptes(struct pmap *pmap, struct pmap * pmap2) |
805 | { | | 805 | { |
806 | struct cpu_info *ci; | | 806 | struct cpu_info *ci; |
807 | struct pmap *mypmap; | | 807 | struct pmap *mypmap; |
808 | struct lwp *l; | | 808 | struct lwp *l; |
809 | | | 809 | |
810 | KASSERT(kpreempt_disabled()); | | 810 | KASSERT(kpreempt_disabled()); |
811 | | | 811 | |
812 | /* The kernel's pmap is always accessible. */ | | 812 | /* The kernel's pmap is always accessible. */ |
813 | if (pmap == pmap_kernel()) { | | 813 | if (pmap == pmap_kernel()) { |
814 | kpreempt_enable(); | | 814 | kpreempt_enable(); |
815 | return; | | 815 | return; |
816 | } | | 816 | } |
817 | | | 817 | |
818 | l = curlwp; | | 818 | l = curlwp; |
819 | ci = l->l_cpu; | | 819 | ci = l->l_cpu; |
820 | | | 820 | |
821 | KASSERT(mutex_owned(&pmap->pm_lock)); | | 821 | KASSERT(mutex_owned(&pmap->pm_lock)); |
822 | KASSERT(pmap->pm_ncsw == lwp_pctr()); | | 822 | KASSERT(pmap->pm_ncsw == lwp_pctr()); |
823 | | | 823 | |
824 | #if defined(XENPV) && defined(__x86_64__) | | 824 | #if defined(XENPV) && defined(__x86_64__) |
825 | KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] != L4_BASE); | | 825 | KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] != L4_BASE); |
826 | ci->ci_normal_pdes[PTP_LEVELS - 2] = L4_BASE; | | 826 | ci->ci_normal_pdes[PTP_LEVELS - 2] = L4_BASE; |
827 | #endif | | 827 | #endif |
828 | | | 828 | |
829 | /* If not our own pmap, mark whatever's on the CPU now as lazy. */ | | 829 | /* If not our own pmap, mark whatever's on the CPU now as lazy. */ |
830 | KASSERT(ci->ci_tlbstate == TLBSTATE_VALID); | | 830 | KASSERT(ci->ci_tlbstate == TLBSTATE_VALID); |
831 | mypmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); | | 831 | mypmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); |
832 | if (ci->ci_pmap == vm_map_pmap(&l->l_proc->p_vmspace->vm_map)) { | | 832 | if (ci->ci_pmap == vm_map_pmap(&l->l_proc->p_vmspace->vm_map)) { |
833 | ci->ci_want_pmapload = 0; | | 833 | ci->ci_want_pmapload = 0; |
834 | } else { | | 834 | } else { |
835 | ci->ci_want_pmapload = (mypmap != pmap_kernel()); | | 835 | ci->ci_want_pmapload = (mypmap != pmap_kernel()); |
836 | ci->ci_tlbstate = TLBSTATE_LAZY; | | 836 | ci->ci_tlbstate = TLBSTATE_LAZY; |
837 | } | | 837 | } |
838 | | | 838 | |
839 | /* Now safe to re-enable preemption. */ | | 839 | /* Now safe to re-enable preemption. */ |
840 | kpreempt_enable(); | | 840 | kpreempt_enable(); |
841 | | | 841 | |
842 | /* Toss reference to other pmap taken earlier. */ | | 842 | /* Toss reference to other pmap taken earlier. */ |
843 | if (pmap2 != NULL) { | | 843 | if (pmap2 != NULL) { |
844 | pmap_destroy(pmap2); | | 844 | pmap_destroy(pmap2); |
845 | } | | 845 | } |
846 | } | | 846 | } |
847 | | | 847 | |
848 | inline static void | | 848 | inline static void |
849 | pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte) | | 849 | pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte) |
850 | { | | 850 | { |
851 | | | 851 | |
852 | #if !defined(__x86_64__) | | 852 | #if !defined(__x86_64__) |
853 | if (curproc == NULL || curproc->p_vmspace == NULL || | | 853 | if (curproc == NULL || curproc->p_vmspace == NULL || |
854 | pm != vm_map_pmap(&curproc->p_vmspace->vm_map)) | | 854 | pm != vm_map_pmap(&curproc->p_vmspace->vm_map)) |
855 | return; | | 855 | return; |
856 | | | 856 | |
857 | if ((opte ^ npte) & PTE_X) | | 857 | if ((opte ^ npte) & PTE_X) |
858 | pmap_update_pg(va); | | 858 | pmap_update_pg(va); |
859 | | | 859 | |
860 | /* | | 860 | /* |
861 | * Executability was removed on the last executable change. | | 861 | * Executability was removed on the last executable change. |
862 | * Reset the code segment to something conservative and | | 862 | * Reset the code segment to something conservative and |
863 | * let the trap handler deal with setting the right limit. | | 863 | * let the trap handler deal with setting the right limit. |
864 | * We can't do that because of locking constraints on the vm map. | | 864 | * We can't do that because of locking constraints on the vm map. |
865 | */ | | 865 | */ |
866 | | | 866 | |
867 | if ((opte & PTE_X) && (npte & PTE_X) == 0 && va == pm->pm_hiexec) { | | 867 | if ((opte & PTE_X) && (npte & PTE_X) == 0 && va == pm->pm_hiexec) { |
868 | struct trapframe *tf = curlwp->l_md.md_regs; | | 868 | struct trapframe *tf = curlwp->l_md.md_regs; |
869 | | | 869 | |
870 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); | | 870 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); |
871 | pm->pm_hiexec = I386_MAX_EXE_ADDR; | | 871 | pm->pm_hiexec = I386_MAX_EXE_ADDR; |
872 | } | | 872 | } |
873 | #endif /* !defined(__x86_64__) */ | | 873 | #endif /* !defined(__x86_64__) */ |
874 | } | | 874 | } |
875 | | | 875 | |
876 | #if !defined(__x86_64__) | | 876 | #if !defined(__x86_64__) |
877 | /* | | 877 | /* |
878 | * Fixup the code segment to cover all potential executable mappings. | | 878 | * Fixup the code segment to cover all potential executable mappings. |
879 | * returns 0 if no changes to the code segment were made. | | 879 | * returns 0 if no changes to the code segment were made. |
880 | */ | | 880 | */ |
881 | int | | 881 | int |
882 | pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb) | | 882 | pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb) |
883 | { | | 883 | { |
884 | struct vm_map_entry *ent; | | 884 | struct vm_map_entry *ent; |
885 | struct pmap *pm = vm_map_pmap(map); | | 885 | struct pmap *pm = vm_map_pmap(map); |
886 | vaddr_t va = 0; | | 886 | vaddr_t va = 0; |
887 | | | 887 | |
888 | vm_map_lock_read(map); | | 888 | vm_map_lock_read(map); |
889 | for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) { | | 889 | for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) { |
890 | /* | | 890 | /* |
891 | * This entry has greater va than the entries before. | | 891 | * This entry has greater va than the entries before. |
892 | * We need to make it point to the last page, not past it. | | 892 | * We need to make it point to the last page, not past it. |
893 | */ | | 893 | */ |
894 | if (ent->protection & VM_PROT_EXECUTE) | | 894 | if (ent->protection & VM_PROT_EXECUTE) |
895 | va = trunc_page(ent->end) - PAGE_SIZE; | | 895 | va = trunc_page(ent->end) - PAGE_SIZE; |
896 | } | | 896 | } |
897 | vm_map_unlock_read(map); | | 897 | vm_map_unlock_read(map); |
898 | if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL)) | | 898 | if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL)) |
899 | return 0; | | 899 | return 0; |
900 | | | 900 | |
901 | pm->pm_hiexec = va; | | 901 | pm->pm_hiexec = va; |
902 | if (pm->pm_hiexec > I386_MAX_EXE_ADDR) { | | 902 | if (pm->pm_hiexec > I386_MAX_EXE_ADDR) { |
903 | tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); | | 903 | tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); |
904 | } else { | | 904 | } else { |
905 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); | | 905 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); |
906 | return 0; | | 906 | return 0; |
907 | } | | 907 | } |
908 | return 1; | | 908 | return 1; |
909 | } | | 909 | } |
910 | #endif /* !defined(__x86_64__) */ | | 910 | #endif /* !defined(__x86_64__) */ |
911 | | | 911 | |
912 | void | | 912 | void |
913 | pat_init(struct cpu_info *ci) | | 913 | pat_init(struct cpu_info *ci) |
914 | { | | 914 | { |
915 | uint64_t pat; | | 915 | uint64_t pat; |
916 | | | 916 | |
917 | if (!(ci->ci_feat_val[0] & CPUID_PAT)) | | 917 | if (!(ci->ci_feat_val[0] & CPUID_PAT)) |
918 | return; | | 918 | return; |
919 | | | 919 | |
920 | /* We change WT to WC. Leave all other entries the default values. */ | | 920 | /* We change WT to WC. Leave all other entries the default values. */ |
921 | pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) | | | 921 | pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) | |
922 | PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) | | | 922 | PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) | |
923 | PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) | | | 923 | PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) | |
924 | PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC); | | 924 | PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC); |
925 | | | 925 | |
926 | wrmsr(MSR_CR_PAT, pat); | | 926 | wrmsr(MSR_CR_PAT, pat); |
927 | cpu_pat_enabled = true; | | 927 | cpu_pat_enabled = true; |
928 | } | | 928 | } |
929 | | | 929 | |
930 | static pt_entry_t | | 930 | static pt_entry_t |
931 | pmap_pat_flags(u_int flags) | | 931 | pmap_pat_flags(u_int flags) |
932 | { | | 932 | { |
933 | u_int cacheflags = (flags & PMAP_CACHE_MASK); | | 933 | u_int cacheflags = (flags & PMAP_CACHE_MASK); |
934 | | | 934 | |
935 | if (!cpu_pat_enabled) { | | 935 | if (!cpu_pat_enabled) { |
936 | switch (cacheflags) { | | 936 | switch (cacheflags) { |
937 | case PMAP_NOCACHE: | | 937 | case PMAP_NOCACHE: |
938 | case PMAP_NOCACHE_OVR: | | 938 | case PMAP_NOCACHE_OVR: |
939 | /* results in PGC_UCMINUS on cpus which have | | 939 | /* results in PGC_UCMINUS on cpus which have |
940 | * the cpuid PAT but PAT "disabled" | | 940 | * the cpuid PAT but PAT "disabled" |
941 | */ | | 941 | */ |
942 | return PTE_PCD; | | 942 | return PTE_PCD; |
943 | default: | | 943 | default: |
944 | return 0; | | 944 | return 0; |
945 | } | | 945 | } |
946 | } | | 946 | } |
947 | | | 947 | |
948 | switch (cacheflags) { | | 948 | switch (cacheflags) { |
949 | case PMAP_NOCACHE: | | 949 | case PMAP_NOCACHE: |
950 | return PGC_UC; | | 950 | return PGC_UC; |
951 | case PMAP_WRITE_COMBINE: | | 951 | case PMAP_WRITE_COMBINE: |
952 | return PGC_WC; | | 952 | return PGC_WC; |
953 | case PMAP_WRITE_BACK: | | 953 | case PMAP_WRITE_BACK: |
954 | return PGC_WB; | | 954 | return PGC_WB; |
955 | case PMAP_NOCACHE_OVR: | | 955 | case PMAP_NOCACHE_OVR: |
956 | return PGC_UCMINUS; | | 956 | return PGC_UCMINUS; |
957 | } | | 957 | } |
958 | | | 958 | |
959 | return 0; | | 959 | return 0; |
960 | } | | 960 | } |
961 | | | 961 | |
962 | /* | | 962 | /* |
963 | * p m a p k e n t e r f u n c t i o n s | | 963 | * p m a p k e n t e r f u n c t i o n s |
964 | * | | 964 | * |
965 | * functions to quickly enter/remove pages from the kernel address | | 965 | * functions to quickly enter/remove pages from the kernel address |
966 | * space. pmap_kremove is exported to MI kernel. we make use of | | 966 | * space. pmap_kremove is exported to MI kernel. we make use of |
967 | * the recursive PTE mappings. | | 967 | * the recursive PTE mappings. |
968 | */ | | 968 | */ |
969 | | | 969 | |
970 | /* | | 970 | /* |
971 | * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking | | 971 | * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking |
972 | * | | 972 | * |
973 | * => no need to lock anything, assume va is already allocated | | 973 | * => no need to lock anything, assume va is already allocated |
974 | * => should be faster than normal pmap enter function | | 974 | * => should be faster than normal pmap enter function |
975 | */ | | 975 | */ |
976 | void | | 976 | void |
977 | pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) | | 977 | pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) |
978 | { | | 978 | { |
979 | pt_entry_t *pte, opte, npte; | | 979 | pt_entry_t *pte, opte, npte; |
980 | | | 980 | |
981 | KASSERT(!(prot & ~VM_PROT_ALL)); | | 981 | KASSERT(!(prot & ~VM_PROT_ALL)); |
982 | | | 982 | |
983 | if (va < VM_MIN_KERNEL_ADDRESS) | | 983 | if (va < VM_MIN_KERNEL_ADDRESS) |
984 | pte = vtopte(va); | | 984 | pte = vtopte(va); |
985 | else | | 985 | else |
986 | pte = kvtopte(va); | | 986 | pte = kvtopte(va); |
987 | #if defined(XENPV) && defined(DOM0OPS) | | 987 | #if defined(XENPV) && defined(DOM0OPS) |
988 | if (pa < pmap_pa_start || pa >= pmap_pa_end) { | | 988 | if (pa < pmap_pa_start || pa >= pmap_pa_end) { |
989 | #ifdef DEBUG | | 989 | #ifdef DEBUG |
990 | printf_nolog("%s: pa %#" PRIxPADDR " for va %#" PRIxVADDR | | 990 | printf_nolog("%s: pa %#" PRIxPADDR " for va %#" PRIxVADDR |
991 | " outside range\n", __func__, pa, va); | | 991 | " outside range\n", __func__, pa, va); |
992 | #endif /* DEBUG */ | | 992 | #endif /* DEBUG */ |
993 | npte = pa; | | 993 | npte = pa; |
994 | } else | | 994 | } else |
995 | #endif /* XENPV && DOM0OPS */ | | 995 | #endif /* XENPV && DOM0OPS */ |
996 | npte = pmap_pa2pte(pa); | | 996 | npte = pmap_pa2pte(pa); |
997 | npte |= protection_codes[prot] | PTE_P | pmap_pg_g; | | 997 | npte |= protection_codes[prot] | PTE_P | pmap_pg_g; |
998 | npte |= pmap_pat_flags(flags); | | 998 | npte |= pmap_pat_flags(flags); |
999 | opte = pmap_pte_testset(pte, npte); /* zap! */ | | 999 | opte = pmap_pte_testset(pte, npte); /* zap! */ |
1000 | | | 1000 | |
1001 | /* | | 1001 | /* |
1002 | * XXX: make sure we are not dealing with a large page, since the only | | 1002 | * XXX: make sure we are not dealing with a large page, since the only |
1003 | * large pages created are for the kernel image, and they should never | | 1003 | * large pages created are for the kernel image, and they should never |
1004 | * be kentered. | | 1004 | * be kentered. |
1005 | */ | | 1005 | */ |
1006 | KASSERTMSG(!(opte & PTE_PS), "PTE_PS va=%#"PRIxVADDR, va); | | 1006 | KASSERTMSG(!(opte & PTE_PS), "PTE_PS va=%#"PRIxVADDR, va); |
1007 | | | 1007 | |
1008 | if ((opte & (PTE_P | PTE_A)) == (PTE_P | PTE_A)) { | | 1008 | if ((opte & (PTE_P | PTE_A)) == (PTE_P | PTE_A)) { |
1009 | /* This should not happen. */ | | 1009 | /* This should not happen. */ |
1010 | printf_nolog("%s: mapping already present\n", __func__); | | 1010 | printf_nolog("%s: mapping already present\n", __func__); |
1011 | kpreempt_disable(); | | 1011 | kpreempt_disable(); |
1012 | pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER); | | 1012 | pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER); |
1013 | kpreempt_enable(); | | 1013 | kpreempt_enable(); |
1014 | } | | 1014 | } |
1015 | } | | 1015 | } |
1016 | | | 1016 | |
1017 | __strict_weak_alias(pmap_kenter_ma, pmap_kenter_pa); | | 1017 | __strict_weak_alias(pmap_kenter_ma, pmap_kenter_pa); |
1018 | | | 1018 | |
1019 | #if defined(__x86_64__) | | 1019 | #if defined(__x86_64__) |
1020 | /* | | 1020 | /* |
1021 | * Change protection for a virtual address. Local for a CPU only, don't | | 1021 | * Change protection for a virtual address. Local for a CPU only, don't |
1022 | * care about TLB shootdowns. | | 1022 | * care about TLB shootdowns. |
1023 | * | | 1023 | * |
1024 | * => must be called with preemption disabled | | 1024 | * => must be called with preemption disabled |
1025 | */ | | 1025 | */ |
1026 | void | | 1026 | void |
1027 | pmap_changeprot_local(vaddr_t va, vm_prot_t prot) | | 1027 | pmap_changeprot_local(vaddr_t va, vm_prot_t prot) |
1028 | { | | 1028 | { |
1029 | pt_entry_t *pte, opte, npte; | | 1029 | pt_entry_t *pte, opte, npte; |
1030 | | | 1030 | |
1031 | KASSERT(kpreempt_disabled()); | | 1031 | KASSERT(kpreempt_disabled()); |
1032 | | | 1032 | |
1033 | if (va < VM_MIN_KERNEL_ADDRESS) | | 1033 | if (va < VM_MIN_KERNEL_ADDRESS) |
1034 | pte = vtopte(va); | | 1034 | pte = vtopte(va); |
1035 | else | | 1035 | else |
1036 | pte = kvtopte(va); | | 1036 | pte = kvtopte(va); |
1037 | | | 1037 | |
1038 | npte = opte = *pte; | | 1038 | npte = opte = *pte; |
1039 | | | 1039 | |
1040 | if ((prot & VM_PROT_WRITE) != 0) | | 1040 | if ((prot & VM_PROT_WRITE) != 0) |
1041 | npte |= PTE_W; | | 1041 | npte |= PTE_W; |
1042 | else | | 1042 | else |
1043 | npte &= ~(PTE_W|PTE_D); | | 1043 | npte &= ~(PTE_W|PTE_D); |
1044 | | | 1044 | |
1045 | if (opte != npte) { | | 1045 | if (opte != npte) { |
1046 | pmap_pte_set(pte, npte); | | 1046 | pmap_pte_set(pte, npte); |
1047 | pmap_pte_flush(); | | 1047 | pmap_pte_flush(); |
1048 | invlpg(va); | | 1048 | invlpg(va); |
1049 | } | | 1049 | } |
1050 | } | | 1050 | } |
1051 | #endif /* defined(__x86_64__) */ | | 1051 | #endif /* defined(__x86_64__) */ |
1052 | | | 1052 | |
1053 | /* | | 1053 | /* |
1054 | * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking | | 1054 | * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking |
1055 | * | | 1055 | * |
1056 | * => no need to lock anything | | 1056 | * => no need to lock anything |
1057 | * => caller must dispose of any vm_page mapped in the va range | | 1057 | * => caller must dispose of any vm_page mapped in the va range |
1058 | * => note: not an inline function | | 1058 | * => note: not an inline function |
1059 | * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE | | 1059 | * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE |
1060 | * => we assume kernel only unmaps valid addresses and thus don't bother | | 1060 | * => we assume kernel only unmaps valid addresses and thus don't bother |
1061 | * checking the valid bit before doing TLB flushing | | 1061 | * checking the valid bit before doing TLB flushing |
1062 | * => must be followed by call to pmap_update() before reuse of page | | 1062 | * => must be followed by call to pmap_update() before reuse of page |
1063 | */ | | 1063 | */ |
1064 | static void | | 1064 | static void |
1065 | pmap_kremove1(vaddr_t sva, vsize_t len, bool localonly) | | 1065 | pmap_kremove1(vaddr_t sva, vsize_t len, bool localonly) |
1066 | { | | 1066 | { |
1067 | pt_entry_t *pte, opte; | | 1067 | pt_entry_t *pte, opte; |
1068 | vaddr_t va, eva; | | 1068 | vaddr_t va, eva; |
1069 | | | 1069 | |
1070 | eva = sva + len; | | 1070 | eva = sva + len; |
1071 | | | 1071 | |
1072 | kpreempt_disable(); | | 1072 | kpreempt_disable(); |
1073 | for (va = sva; va < eva; va += PAGE_SIZE) { | | 1073 | for (va = sva; va < eva; va += PAGE_SIZE) { |
1074 | pte = kvtopte(va); | | 1074 | pte = kvtopte(va); |
1075 | opte = pmap_pte_testset(pte, 0); /* zap! */ | | 1075 | opte = pmap_pte_testset(pte, 0); /* zap! */ |
1076 | if ((opte & (PTE_P | PTE_A)) == (PTE_P | PTE_A) && !localonly) { | | 1076 | if ((opte & (PTE_P | PTE_A)) == (PTE_P | PTE_A) && !localonly) { |
1077 | pmap_tlb_shootdown(pmap_kernel(), va, opte, | | 1077 | pmap_tlb_shootdown(pmap_kernel(), va, opte, |
1078 | TLBSHOOT_KREMOVE); | | 1078 | TLBSHOOT_KREMOVE); |
1079 | } | | 1079 | } |
1080 | KASSERTMSG((opte & PTE_PS) == 0, | | 1080 | KASSERTMSG((opte & PTE_PS) == 0, |
1081 | "va %#" PRIxVADDR " is a large page", va); | | 1081 | "va %#" PRIxVADDR " is a large page", va); |
1082 | KASSERTMSG((opte & PTE_PVLIST) == 0, | | 1082 | KASSERTMSG((opte & PTE_PVLIST) == 0, |
1083 | "va %#" PRIxVADDR " is a pv tracked page", va); | | 1083 | "va %#" PRIxVADDR " is a pv tracked page", va); |
1084 | } | | 1084 | } |
1085 | if (localonly) { | | 1085 | if (localonly) { |
1086 | tlbflushg(); | | 1086 | tlbflushg(); |
1087 | } | | 1087 | } |
1088 | kpreempt_enable(); | | 1088 | kpreempt_enable(); |
1089 | } | | 1089 | } |
1090 | | | 1090 | |
1091 | void | | 1091 | void |
1092 | pmap_kremove(vaddr_t sva, vsize_t len) | | 1092 | pmap_kremove(vaddr_t sva, vsize_t len) |
1093 | { | | 1093 | { |
1094 | | | 1094 | |
1095 | pmap_kremove1(sva, len, false); | | 1095 | pmap_kremove1(sva, len, false); |
1096 | } | | 1096 | } |
1097 | | | 1097 | |
1098 | /* | | 1098 | /* |
1099 | * pmap_kremove_local: like pmap_kremove(), but only worry about | | 1099 | * pmap_kremove_local: like pmap_kremove(), but only worry about |
1100 | * TLB invalidations on the current CPU. this is only intended | | 1100 | * TLB invalidations on the current CPU. this is only intended |
1101 | * for use while writing kernel crash dumps, either after panic | | 1101 | * for use while writing kernel crash dumps, either after panic |
1102 | * or via reboot -d. | | 1102 | * or via reboot -d. |
1103 | */ | | 1103 | */ |
1104 | void | | 1104 | void |
1105 | pmap_kremove_local(vaddr_t sva, vsize_t len) | | 1105 | pmap_kremove_local(vaddr_t sva, vsize_t len) |
1106 | { | | 1106 | { |
1107 | | | 1107 | |
1108 | pmap_kremove1(sva, len, true); | | 1108 | pmap_kremove1(sva, len, true); |
1109 | } | | 1109 | } |
1110 | | | 1110 | |
1111 | /* | | 1111 | /* |
1112 | * p m a p i n i t f u n c t i o n s | | 1112 | * p m a p i n i t f u n c t i o n s |
1113 | * | | 1113 | * |
1114 | * pmap_bootstrap and pmap_init are called during system startup | | 1114 | * pmap_bootstrap and pmap_init are called during system startup |
1115 | * to init the pmap module. pmap_bootstrap() does a low level | | 1115 | * to init the pmap module. pmap_bootstrap() does a low level |
1116 | * init just to get things rolling. pmap_init() finishes the job. | | 1116 | * init just to get things rolling. pmap_init() finishes the job. |
1117 | */ | | 1117 | */ |
1118 | | | 1118 | |
1119 | /* | | 1119 | /* |
1120 | * pmap_bootstrap_valloc: allocate a virtual address in the bootstrap area. | | 1120 | * pmap_bootstrap_valloc: allocate a virtual address in the bootstrap area. |
1121 | * This function is to be used before any VM system has been set up. | | 1121 | * This function is to be used before any VM system has been set up. |
1122 | * | | 1122 | * |
1123 | * The va is taken from virtual_avail. | | 1123 | * The va is taken from virtual_avail. |
1124 | */ | | 1124 | */ |
1125 | static vaddr_t | | 1125 | static vaddr_t |
1126 | pmap_bootstrap_valloc(size_t npages) | | 1126 | pmap_bootstrap_valloc(size_t npages) |
1127 | { | | 1127 | { |
1128 | vaddr_t va = virtual_avail; | | 1128 | vaddr_t va = virtual_avail; |
1129 | virtual_avail += npages * PAGE_SIZE; | | 1129 | virtual_avail += npages * PAGE_SIZE; |
1130 | return va; | | 1130 | return va; |
1131 | } | | 1131 | } |
1132 | | | 1132 | |
| @@ -4086,1999 +4086,1999 @@ pmap_remove_pte(struct pmap *pmap, struc | | | @@ -4086,1999 +4086,1999 @@ pmap_remove_pte(struct pmap *pmap, struc |
4086 | return true; | | 4086 | return true; |
4087 | } | | 4087 | } |
4088 | | | 4088 | |
4089 | static void | | 4089 | static void |
4090 | pmap_remove_locked(struct pmap *pmap, vaddr_t sva, vaddr_t eva) | | 4090 | pmap_remove_locked(struct pmap *pmap, vaddr_t sva, vaddr_t eva) |
4091 | { | | 4091 | { |
4092 | pt_entry_t *ptes; | | 4092 | pt_entry_t *ptes; |
4093 | pd_entry_t pde; | | 4093 | pd_entry_t pde; |
4094 | pd_entry_t * const *pdes; | | 4094 | pd_entry_t * const *pdes; |
4095 | bool result; | | 4095 | bool result; |
4096 | vaddr_t blkendva, va = sva; | | 4096 | vaddr_t blkendva, va = sva; |
4097 | struct vm_page *ptp; | | 4097 | struct vm_page *ptp; |
4098 | struct pmap *pmap2; | | 4098 | struct pmap *pmap2; |
4099 | int lvl; | | 4099 | int lvl; |
4100 | | | 4100 | |
4101 | KASSERT(mutex_owned(&pmap->pm_lock)); | | 4101 | KASSERT(mutex_owned(&pmap->pm_lock)); |
4102 | | | 4102 | |
4103 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); | | 4103 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); |
4104 | | | 4104 | |
4105 | /* | | 4105 | /* |
4106 | * removing one page? take shortcut function. | | 4106 | * removing one page? take shortcut function. |
4107 | */ | | 4107 | */ |
4108 | | | 4108 | |
4109 | if (va + PAGE_SIZE == eva) { | | 4109 | if (va + PAGE_SIZE == eva) { |
4110 | if (pmap_pdes_valid(va, pdes, &pde, &lvl)) { | | 4110 | if (pmap_pdes_valid(va, pdes, &pde, &lvl)) { |
4111 | KASSERT(lvl == 1); | | 4111 | KASSERT(lvl == 1); |
4112 | | | 4112 | |
4113 | /* Get PTP if non-kernel mapping. */ | | 4113 | /* Get PTP if non-kernel mapping. */ |
4114 | if (pmap != pmap_kernel()) { | | 4114 | if (pmap != pmap_kernel()) { |
4115 | ptp = pmap_find_ptp(pmap, va, 1); | | 4115 | ptp = pmap_find_ptp(pmap, va, 1); |
4116 | KASSERTMSG(ptp != NULL, | | 4116 | KASSERTMSG(ptp != NULL, |
4117 | "%s: unmanaged PTP detected", __func__); | | 4117 | "%s: unmanaged PTP detected", __func__); |
4118 | } else { | | 4118 | } else { |
4119 | /* Never free kernel PTPs. */ | | 4119 | /* Never free kernel PTPs. */ |
4120 | ptp = NULL; | | 4120 | ptp = NULL; |
4121 | } | | 4121 | } |
4122 | | | 4122 | |
4123 | result = pmap_remove_pte(pmap, ptp, | | 4123 | result = pmap_remove_pte(pmap, ptp, |
4124 | &ptes[pl1_i(va)], va); | | 4124 | &ptes[pl1_i(va)], va); |
4125 | | | 4125 | |
4126 | /* | | 4126 | /* |
4127 | * if mapping removed and the PTP is no longer | | 4127 | * if mapping removed and the PTP is no longer |
4128 | * being used, free it! | | 4128 | * being used, free it! |
4129 | */ | | 4129 | */ |
4130 | | | 4130 | |
4131 | if (result && ptp && ptp->wire_count <= 1) | | 4131 | if (result && ptp && ptp->wire_count <= 1) |
4132 | pmap_free_ptp(pmap, ptp, va, ptes, pdes); | | 4132 | pmap_free_ptp(pmap, ptp, va, ptes, pdes); |
4133 | } | | 4133 | } |
4134 | } else for (/* null */ ; va < eva ; va = blkendva) { | | 4134 | } else for (/* null */ ; va < eva ; va = blkendva) { |
4135 | /* determine range of block */ | | 4135 | /* determine range of block */ |
4136 | blkendva = x86_round_pdr(va+1); | | 4136 | blkendva = x86_round_pdr(va+1); |
4137 | if (blkendva > eva) | | 4137 | if (blkendva > eva) |
4138 | blkendva = eva; | | 4138 | blkendva = eva; |
4139 | | | 4139 | |
4140 | if (!pmap_pdes_valid(va, pdes, &pde, &lvl)) { | | 4140 | if (!pmap_pdes_valid(va, pdes, &pde, &lvl)) { |
4141 | /* Skip a range corresponding to an invalid pde. */ | | 4141 | /* Skip a range corresponding to an invalid pde. */ |
4142 | blkendva = (va & ptp_frames[lvl - 1]) + nbpd[lvl - 1]; | | 4142 | blkendva = (va & ptp_frames[lvl - 1]) + nbpd[lvl - 1]; |
4143 | continue; | | 4143 | continue; |
4144 | } | | 4144 | } |
4145 | KASSERT(lvl == 1); | | 4145 | KASSERT(lvl == 1); |
4146 | | | 4146 | |
4147 | /* Get PTP if non-kernel mapping. */ | | 4147 | /* Get PTP if non-kernel mapping. */ |
4148 | if (pmap != pmap_kernel()) { | | 4148 | if (pmap != pmap_kernel()) { |
4149 | ptp = pmap_find_ptp(pmap, va, 1); | | 4149 | ptp = pmap_find_ptp(pmap, va, 1); |
4150 | KASSERTMSG(ptp != NULL, "%s: unmanaged PTP detected", | | 4150 | KASSERTMSG(ptp != NULL, "%s: unmanaged PTP detected", |
4151 | __func__); | | 4151 | __func__); |
4152 | } else { | | 4152 | } else { |
4153 | /* Never free kernel PTPs. */ | | 4153 | /* Never free kernel PTPs. */ |
4154 | ptp = NULL; | | 4154 | ptp = NULL; |
4155 | } | | 4155 | } |
4156 | | | 4156 | |
4157 | pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[pl1_i(va)], va, | | 4157 | pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[pl1_i(va)], va, |
4158 | blkendva); | | 4158 | blkendva); |
4159 | | | 4159 | |
4160 | /* If PTP is no longer being used, free it. */ | | 4160 | /* If PTP is no longer being used, free it. */ |
4161 | if (ptp && ptp->wire_count <= 1) { | | 4161 | if (ptp && ptp->wire_count <= 1) { |
4162 | pmap_free_ptp(pmap, ptp, va, ptes, pdes); | | 4162 | pmap_free_ptp(pmap, ptp, va, ptes, pdes); |
4163 | } | | 4163 | } |
4164 | } | | 4164 | } |
4165 | pmap_unmap_ptes(pmap, pmap2); | | 4165 | pmap_unmap_ptes(pmap, pmap2); |
4166 | pmap_drain_pv(pmap); | | 4166 | pmap_drain_pv(pmap); |
4167 | } | | 4167 | } |
4168 | | | 4168 | |
4169 | /* | | 4169 | /* |
4170 | * pmap_remove: mapping removal function. | | 4170 | * pmap_remove: mapping removal function. |
4171 | * | | 4171 | * |
4172 | * => caller should not be holding any pmap locks | | 4172 | * => caller should not be holding any pmap locks |
4173 | */ | | 4173 | */ |
4174 | void | | 4174 | void |
4175 | pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva) | | 4175 | pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva) |
4176 | { | | 4176 | { |
4177 | if (__predict_false(pmap->pm_remove != NULL)) { | | 4177 | if (__predict_false(pmap->pm_remove != NULL)) { |
4178 | (*pmap->pm_remove)(pmap, sva, eva); | | 4178 | (*pmap->pm_remove)(pmap, sva, eva); |
4179 | return; | | 4179 | return; |
4180 | } | | 4180 | } |
4181 | | | 4181 | |
4182 | mutex_enter(&pmap->pm_lock); | | 4182 | mutex_enter(&pmap->pm_lock); |
4183 | pmap_remove_locked(pmap, sva, eva); | | 4183 | pmap_remove_locked(pmap, sva, eva); |
4184 | mutex_exit(&pmap->pm_lock); | | 4184 | mutex_exit(&pmap->pm_lock); |
4185 | } | | 4185 | } |
4186 | | | 4186 | |
4187 | /* | | 4187 | /* |
4188 | * pmap_sync_pv: clear pte bits and return the old value of the pp_attrs. | | 4188 | * pmap_sync_pv: clear pte bits and return the old value of the pp_attrs. |
4189 | * | | 4189 | * |
4190 | * => The 'clearbits' parameter is either ~0 or PP_ATTRS_... | | 4190 | * => The 'clearbits' parameter is either ~0 or PP_ATTRS_... |
4191 | * => Caller should disable kernel preemption. | | 4191 | * => Caller should disable kernel preemption. |
4192 | * => issues tlb shootdowns if necessary. | | 4192 | * => issues tlb shootdowns if necessary. |
4193 | */ | | 4193 | */ |
4194 | static int | | 4194 | static int |
4195 | pmap_sync_pv(struct pv_pte *pvpte, paddr_t pa, int clearbits, uint8_t *oattrs, | | 4195 | pmap_sync_pv(struct pv_pte *pvpte, paddr_t pa, int clearbits, uint8_t *oattrs, |
4196 | pt_entry_t *optep) | | 4196 | pt_entry_t *optep) |
4197 | { | | 4197 | { |
4198 | struct pmap *pmap; | | 4198 | struct pmap *pmap; |
4199 | struct vm_page *ptp; | | 4199 | struct vm_page *ptp; |
4200 | vaddr_t va; | | 4200 | vaddr_t va; |
4201 | pt_entry_t *ptep; | | 4201 | pt_entry_t *ptep; |
4202 | pt_entry_t opte; | | 4202 | pt_entry_t opte; |
4203 | pt_entry_t npte; | | 4203 | pt_entry_t npte; |
4204 | pt_entry_t expect; | | 4204 | pt_entry_t expect; |
4205 | bool need_shootdown; | | 4205 | bool need_shootdown; |
4206 | | | 4206 | |
4207 | ptp = pvpte->pte_ptp; | | 4207 | ptp = pvpte->pte_ptp; |
4208 | va = pvpte->pte_va; | | 4208 | va = pvpte->pte_va; |
4209 | KASSERT(ptp == NULL || ptp->uobject != NULL); | | 4209 | KASSERT(ptp == NULL || ptp->uobject != NULL); |
4210 | KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset); | | 4210 | KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset); |
4211 | pmap = ptp_to_pmap(ptp); | | 4211 | pmap = ptp_to_pmap(ptp); |
4212 | KASSERT(kpreempt_disabled()); | | 4212 | KASSERT(kpreempt_disabled()); |
4213 | | | 4213 | |
4214 | if (__predict_false(pmap->pm_sync_pv != NULL)) { | | 4214 | if (__predict_false(pmap->pm_sync_pv != NULL)) { |
4215 | return (*pmap->pm_sync_pv)(ptp, va, pa, clearbits, oattrs, | | 4215 | return (*pmap->pm_sync_pv)(ptp, va, pa, clearbits, oattrs, |
4216 | optep); | | 4216 | optep); |
4217 | } | | 4217 | } |
4218 | | | 4218 | |
4219 | expect = pmap_pa2pte(pa) | PTE_P; | | 4219 | expect = pmap_pa2pte(pa) | PTE_P; |
4220 | | | 4220 | |
4221 | if (clearbits != ~0) { | | 4221 | if (clearbits != ~0) { |
4222 | KASSERT((clearbits & ~(PP_ATTRS_D|PP_ATTRS_A|PP_ATTRS_W)) == 0); | | 4222 | KASSERT((clearbits & ~(PP_ATTRS_D|PP_ATTRS_A|PP_ATTRS_W)) == 0); |
4223 | clearbits = pmap_pp_attrs_to_pte(clearbits); | | 4223 | clearbits = pmap_pp_attrs_to_pte(clearbits); |
4224 | } | | 4224 | } |
4225 | | | 4225 | |
4226 | ptep = pmap_map_pte(pmap, ptp, va); | | 4226 | ptep = pmap_map_pte(pmap, ptp, va); |
4227 | do { | | 4227 | do { |
4228 | opte = *ptep; | | 4228 | opte = *ptep; |
4229 | KASSERT((opte & (PTE_D | PTE_A)) != PTE_D); | | 4229 | KASSERT((opte & (PTE_D | PTE_A)) != PTE_D); |
4230 | KASSERT((opte & (PTE_A | PTE_P)) != PTE_A); | | 4230 | KASSERT((opte & (PTE_A | PTE_P)) != PTE_A); |
4231 | KASSERT(opte == 0 || (opte & PTE_P) != 0); | | 4231 | KASSERT(opte == 0 || (opte & PTE_P) != 0); |
4232 | if ((opte & (PTE_FRAME | PTE_P)) != expect) { | | 4232 | if ((opte & (PTE_FRAME | PTE_P)) != expect) { |
4233 | /* | | 4233 | /* |
4234 | * We lost a race with a V->P operation like | | 4234 | * We lost a race with a V->P operation like |
4235 | * pmap_remove(). Wait for the competitor | | 4235 | * pmap_remove(). Wait for the competitor |
4236 | * reflecting pte bits into mp_attrs. | | 4236 | * reflecting pte bits into mp_attrs. |
4237 | */ | | 4237 | */ |
4238 | pmap_unmap_pte(); | | 4238 | pmap_unmap_pte(); |
4239 | return EAGAIN; | | 4239 | return EAGAIN; |
4240 | } | | 4240 | } |
4241 | | | 4241 | |
4242 | /* | | 4242 | /* |
4243 | * Check if there's anything to do on this PTE. | | 4243 | * Check if there's anything to do on this PTE. |
4244 | */ | | 4244 | */ |
4245 | if ((opte & clearbits) == 0) { | | 4245 | if ((opte & clearbits) == 0) { |
4246 | need_shootdown = false; | | 4246 | need_shootdown = false; |
4247 | break; | | 4247 | break; |
4248 | } | | 4248 | } |
4249 | | | 4249 | |
4250 | /* | | 4250 | /* |
4251 | * We need a shootdown if the PTE is cached (PTE_A) ... | | 4251 | * We need a shootdown if the PTE is cached (PTE_A) ... |
4252 | * ... Unless we are clearing only the PTE_W bit and | | 4252 | * ... Unless we are clearing only the PTE_W bit and |
4253 | * it isn't cached as RW (PTE_D). | | 4253 | * it isn't cached as RW (PTE_D). |
4254 | */ | | 4254 | */ |
4255 | need_shootdown = (opte & PTE_A) != 0 && | | 4255 | need_shootdown = (opte & PTE_A) != 0 && |
4256 | !(clearbits == PTE_W && (opte & PTE_D) == 0); | | 4256 | !(clearbits == PTE_W && (opte & PTE_D) == 0); |
4257 | | | 4257 | |
4258 | npte = opte & ~clearbits; | | 4258 | npte = opte & ~clearbits; |
4259 | | | 4259 | |
4260 | /* | | 4260 | /* |
4261 | * If we need a shootdown anyway, clear PTE_A and PTE_D. | | 4261 | * If we need a shootdown anyway, clear PTE_A and PTE_D. |
4262 | */ | | 4262 | */ |
4263 | if (need_shootdown) { | | 4263 | if (need_shootdown) { |
4264 | npte &= ~(PTE_A | PTE_D); | | 4264 | npte &= ~(PTE_A | PTE_D); |
4265 | } | | 4265 | } |
4266 | KASSERT((npte & (PTE_D | PTE_A)) != PTE_D); | | 4266 | KASSERT((npte & (PTE_D | PTE_A)) != PTE_D); |
4267 | KASSERT((npte & (PTE_A | PTE_P)) != PTE_A); | | 4267 | KASSERT((npte & (PTE_A | PTE_P)) != PTE_A); |
4268 | KASSERT(npte == 0 || (opte & PTE_P) != 0); | | 4268 | KASSERT(npte == 0 || (opte & PTE_P) != 0); |
4269 | } while (pmap_pte_cas(ptep, opte, npte) != opte); | | 4269 | } while (pmap_pte_cas(ptep, opte, npte) != opte); |
4270 | | | 4270 | |
4271 | if (need_shootdown) { | | 4271 | if (need_shootdown) { |
4272 | pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_SYNC_PV); | | 4272 | pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_SYNC_PV); |
4273 | } | | 4273 | } |
4274 | pmap_unmap_pte(); | | 4274 | pmap_unmap_pte(); |
4275 | | | 4275 | |
4276 | *oattrs = pmap_pte_to_pp_attrs(opte); | | 4276 | *oattrs = pmap_pte_to_pp_attrs(opte); |
4277 | if (optep != NULL) | | 4277 | if (optep != NULL) |
4278 | *optep = opte; | | 4278 | *optep = opte; |
4279 | return 0; | | 4279 | return 0; |
4280 | } | | 4280 | } |
4281 | | | 4281 | |
4282 | static void | | 4282 | static void |
4283 | pmap_pp_remove_ent(struct pmap *pmap, struct vm_page *ptp, pt_entry_t opte, | | 4283 | pmap_pp_remove_ent(struct pmap *pmap, struct vm_page *ptp, pt_entry_t opte, |
4284 | vaddr_t va) | | 4284 | vaddr_t va) |
4285 | { | | 4285 | { |
4286 | struct pmap *pmap2; | | 4286 | struct pmap *pmap2; |
4287 | pt_entry_t *ptes; | | 4287 | pt_entry_t *ptes; |
4288 | pd_entry_t * const *pdes; | | 4288 | pd_entry_t * const *pdes; |
4289 | | | 4289 | |
4290 | KASSERT(mutex_owned(&pmap->pm_lock)); | | 4290 | KASSERT(mutex_owned(&pmap->pm_lock)); |
4291 | | | 4291 | |
4292 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); | | 4292 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); |
4293 | pmap_stats_update_bypte(pmap, 0, opte); | | 4293 | pmap_stats_update_bypte(pmap, 0, opte); |
4294 | ptp->wire_count--; | | 4294 | ptp->wire_count--; |
4295 | if (ptp->wire_count <= 1) { | | 4295 | if (ptp->wire_count <= 1) { |
4296 | pmap_free_ptp(pmap, ptp, va, ptes, pdes); | | 4296 | pmap_free_ptp(pmap, ptp, va, ptes, pdes); |
4297 | } | | 4297 | } |
4298 | pmap_unmap_ptes(pmap, pmap2); | | 4298 | pmap_unmap_ptes(pmap, pmap2); |
4299 | } | | 4299 | } |
4300 | | | 4300 | |
4301 | static void | | 4301 | static void |
4302 | pmap_pp_remove(struct pmap_page *pp, paddr_t pa) | | 4302 | pmap_pp_remove(struct pmap_page *pp, paddr_t pa) |
4303 | { | | 4303 | { |
4304 | struct pv_pte *pvpte; | | 4304 | struct pv_pte *pvpte; |
4305 | struct vm_page *ptp; | | 4305 | struct vm_page *ptp; |
4306 | uintptr_t sum; | | 4306 | uintptr_t sum; |
4307 | uint8_t oattrs; | | 4307 | uint8_t oattrs; |
4308 | bool locked; | | 4308 | bool locked; |
4309 | | | 4309 | |
4310 | /* | | 4310 | /* |
4311 | * Do an unlocked check to see if the page has no mappings, eg when | | 4311 | * Do an unlocked check to see if the page has no mappings, eg when |
4312 | * pmap_remove_all() was called before amap_wipeout() for a process | | 4312 | * pmap_remove_all() was called before amap_wipeout() for a process |
4313 | * private amap - common. The page being removed must be on the way | | 4313 | * private amap - common. The page being removed must be on the way |
4314 | * out, so we don't have to worry about concurrent attempts to enter | | 4314 | * out, so we don't have to worry about concurrent attempts to enter |
4315 | * it (otherwise the caller either doesn't care or has screwed up). | | 4315 | * it (otherwise the caller either doesn't care or has screwed up). |
4316 | */ | | 4316 | */ |
4317 | sum = (uintptr_t)atomic_load_relaxed(&pp->pp_pte.pte_va); | | 4317 | sum = (uintptr_t)atomic_load_relaxed(&pp->pp_pte.pte_va); |
4318 | sum |= (uintptr_t)atomic_load_relaxed(&pp->pp_pte.pte_ptp); | | 4318 | sum |= (uintptr_t)atomic_load_relaxed(&pp->pp_pte.pte_ptp); |
4319 | sum |= (uintptr_t)atomic_load_relaxed(&pp->pp_pvlist.lh_first); | | 4319 | sum |= (uintptr_t)atomic_load_relaxed(&pp->pp_pvlist.lh_first); |
4320 | if (sum == 0) { | | 4320 | if (sum == 0) { |
4321 | return; | | 4321 | return; |
4322 | } | | 4322 | } |
4323 | | | 4323 | |
4324 | kpreempt_disable(); | | 4324 | kpreempt_disable(); |
4325 | for (;;) { | | 4325 | for (;;) { |
4326 | struct pmap *pmap; | | 4326 | struct pmap *pmap; |
4327 | struct pv_entry *pve; | | 4327 | struct pv_entry *pve; |
4328 | pt_entry_t opte; | | 4328 | pt_entry_t opte; |
4329 | vaddr_t va; | | 4329 | vaddr_t va; |
4330 | | | 4330 | |
4331 | mutex_spin_enter(&pp->pp_lock); | | 4331 | mutex_spin_enter(&pp->pp_lock); |
4332 | if ((pvpte = pv_pte_first(pp)) == NULL) { | | 4332 | if ((pvpte = pv_pte_first(pp)) == NULL) { |
4333 | mutex_spin_exit(&pp->pp_lock); | | 4333 | mutex_spin_exit(&pp->pp_lock); |
4334 | break; | | 4334 | break; |
4335 | } | | 4335 | } |
4336 | | | 4336 | |
4337 | /* | | 4337 | /* |
4338 | * Add a reference to the pmap before clearing the pte. | | 4338 | * Add a reference to the pmap before clearing the pte. |
4339 | * Otherwise the pmap can disappear behind us. | | 4339 | * Otherwise the pmap can disappear behind us. |
4340 | */ | | 4340 | */ |
4341 | ptp = pvpte->pte_ptp; | | 4341 | ptp = pvpte->pte_ptp; |
4342 | pmap = ptp_to_pmap(ptp); | | 4342 | pmap = ptp_to_pmap(ptp); |
4343 | KASSERT(pmap->pm_obj[0].uo_refs > 0); | | 4343 | KASSERT(pmap->pm_obj[0].uo_refs > 0); |
4344 | if (ptp != NULL) { | | 4344 | if (ptp != NULL) { |
4345 | pmap_reference(pmap); | | 4345 | pmap_reference(pmap); |
4346 | } | | 4346 | } |
4347 | | | 4347 | |
4348 | /* | | 4348 | /* |
4349 | * Now try to lock it. We need a direct handoff between | | 4349 | * Now try to lock it. We need a direct handoff between |
4350 | * pp_lock and pm_lock to know the pv_entry is kept intact | | 4350 | * pp_lock and pm_lock to know the pv_entry is kept intact |
4351 | * and kept associated with this pmap. If that can't be | | 4351 | * and kept associated with this pmap. If that can't be |
4352 | * had, wait for the pmap's lock to become free and then | | 4352 | * had, wait for the pmap's lock to become free and then |
4353 | * retry. | | 4353 | * retry. |
4354 | */ | | 4354 | */ |
4355 | locked = mutex_tryenter(&pmap->pm_lock); | | 4355 | locked = mutex_tryenter(&pmap->pm_lock); |
4356 | mutex_spin_exit(&pp->pp_lock); | | 4356 | mutex_spin_exit(&pp->pp_lock); |
4357 | if (!locked) { | | 4357 | if (!locked) { |
4358 | mutex_enter(&pmap->pm_lock); | | 4358 | mutex_enter(&pmap->pm_lock); |
4359 | /* nothing, just wait for it */ | | 4359 | /* nothing, just wait for it */ |
4360 | mutex_exit(&pmap->pm_lock); | | 4360 | mutex_exit(&pmap->pm_lock); |
4361 | if (ptp != NULL) { | | 4361 | if (ptp != NULL) { |
4362 | pmap_destroy(pmap); | | 4362 | pmap_destroy(pmap); |
4363 | } | | 4363 | } |
4364 | continue; | | 4364 | continue; |
4365 | } | | 4365 | } |
4366 | va = pvpte->pte_va; | | 4366 | va = pvpte->pte_va; |
4367 | | | 4367 | |
4368 | KASSERTMSG(pmap->pm_stats.resident_count > PDP_SIZE, | | 4368 | KASSERTMSG(pmap->pm_stats.resident_count > PDP_SIZE, |
4369 | "va %lx pmap %p ptp %p is empty", va, pmap, ptp); | | 4369 | "va %lx pmap %p ptp %p is empty", va, pmap, ptp); |
4370 | KASSERTMSG(ptp == NULL || (ptp->flags & PG_FREE) == 0, | | 4370 | KASSERTMSG(ptp == NULL || (ptp->flags & PG_FREE) == 0, |
4371 | "va %lx pmap %p ptp %p is free", va, pmap, ptp); | | 4371 | "va %lx pmap %p ptp %p is free", va, pmap, ptp); |
4372 | KASSERTMSG(ptp == NULL || ptp->wire_count > 1, | | 4372 | KASSERTMSG(ptp == NULL || ptp->wire_count > 1, |
4373 | "va %lx pmap %p ptp %p is empty", va, pmap, ptp); | | 4373 | "va %lx pmap %p ptp %p is empty", va, pmap, ptp); |
4374 | | | 4374 | |
4375 | #ifdef DEBUG | | 4375 | #ifdef DEBUG |
4376 | pmap_check_pv(pmap, ptp, pp, pvpte->pte_va, true); | | 4376 | pmap_check_pv(pmap, ptp, pp, pvpte->pte_va, true); |
4377 | rb_tree_t *tree = (ptp != NULL ? | | 4377 | rb_tree_t *tree = (ptp != NULL ? |
4378 | &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb); | | 4378 | &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb); |
4379 | pve = pmap_treelookup_pv(pmap, ptp, tree, va); | | 4379 | pve = pmap_treelookup_pv(pmap, ptp, tree, va); |
4380 | if (pve == NULL) { | | 4380 | if (pve == NULL) { |
4381 | KASSERTMSG(&pp->pp_pte == pvpte, | | 4381 | KASSERTMSG(&pp->pp_pte == pvpte, |
4382 | "va %lx pmap %p ptp %p pvpte %p pve %p oops 1", | | 4382 | "va %lx pmap %p ptp %p pvpte %p pve %p oops 1", |
4383 | va, pmap, ptp, pvpte, pve); | | 4383 | va, pmap, ptp, pvpte, pve); |
4384 | } else { | | 4384 | } else { |
4385 | KASSERTMSG(&pve->pve_pte == pvpte, | | 4385 | KASSERTMSG(&pve->pve_pte == pvpte, |
4386 | "va %lx pmap %p ptp %p pvpte %p pve %p oops 2", | | 4386 | "va %lx pmap %p ptp %p pvpte %p pve %p oops 2", |
4387 | va, pmap, ptp, pvpte, pve); | | 4387 | va, pmap, ptp, pvpte, pve); |
4388 | } | | 4388 | } |
4389 | #endif | | 4389 | #endif |
4390 | | | 4390 | |
4391 | if (pmap_sync_pv(pvpte, pa, ~0, &oattrs, &opte)) { | | 4391 | if (pmap_sync_pv(pvpte, pa, ~0, &oattrs, &opte)) { |
4392 | panic("pmap_pp_remove: mapping not present"); | | 4392 | panic("pmap_pp_remove: mapping not present"); |
4393 | } | | 4393 | } |
4394 | | | 4394 | |
4395 | pve = pmap_lookup_pv(pmap, ptp, pp, va); | | 4395 | pve = pmap_lookup_pv(pmap, ptp, pp, va); |
4396 | pmap_remove_pv(pmap, pp, ptp, va, pve, oattrs); | | 4396 | pmap_remove_pv(pmap, pp, ptp, va, pve, oattrs); |
4397 | | | 4397 | |
4398 | /* Update the PTP reference count. Free if last reference. */ | | 4398 | /* Update the PTP reference count. Free if last reference. */ |
4399 | if (ptp != NULL) { | | 4399 | if (ptp != NULL) { |
4400 | KASSERT(pmap != pmap_kernel()); | | 4400 | KASSERT(pmap != pmap_kernel()); |
4401 | pmap_tlb_shootnow(); | | 4401 | pmap_tlb_shootnow(); |
4402 | if (__predict_false(pmap->pm_pp_remove_ent != NULL)) { | | 4402 | if (__predict_false(pmap->pm_pp_remove_ent != NULL)) { |
4403 | (*pmap->pm_pp_remove_ent)(pmap, ptp, opte, va); | | 4403 | (*pmap->pm_pp_remove_ent)(pmap, ptp, opte, va); |
4404 | } else { | | 4404 | } else { |
4405 | pmap_pp_remove_ent(pmap, ptp, opte, va); | | 4405 | pmap_pp_remove_ent(pmap, ptp, opte, va); |
4406 | } | | 4406 | } |
4407 | } else { | | 4407 | } else { |
4408 | KASSERT(pmap == pmap_kernel()); | | 4408 | KASSERT(pmap == pmap_kernel()); |
4409 | pmap_stats_update_bypte(pmap, 0, opte); | | 4409 | pmap_stats_update_bypte(pmap, 0, opte); |
4410 | } | | 4410 | } |
4411 | pmap_tlb_shootnow(); | | 4411 | pmap_tlb_shootnow(); |
4412 | pmap_drain_pv(pmap); | | 4412 | pmap_drain_pv(pmap); |
4413 | mutex_exit(&pmap->pm_lock); | | 4413 | mutex_exit(&pmap->pm_lock); |
4414 | if (ptp != NULL) { | | 4414 | if (ptp != NULL) { |
4415 | pmap_destroy(pmap); | | 4415 | pmap_destroy(pmap); |
4416 | } | | 4416 | } |
4417 | } | | 4417 | } |
4418 | kpreempt_enable(); | | 4418 | kpreempt_enable(); |
4419 | } | | 4419 | } |
4420 | | | 4420 | |
4421 | /* | | 4421 | /* |
4422 | * pmap_page_remove: remove a managed vm_page from all pmaps that map it | | 4422 | * pmap_page_remove: remove a managed vm_page from all pmaps that map it |
4423 | * | | 4423 | * |
4424 | * => R/M bits are sync'd back to attrs | | 4424 | * => R/M bits are sync'd back to attrs |
4425 | */ | | 4425 | */ |
4426 | void | | 4426 | void |
4427 | pmap_page_remove(struct vm_page *pg) | | 4427 | pmap_page_remove(struct vm_page *pg) |
4428 | { | | 4428 | { |
4429 | struct pmap_page *pp; | | 4429 | struct pmap_page *pp; |
4430 | paddr_t pa; | | 4430 | paddr_t pa; |
4431 | | | 4431 | |
4432 | pp = VM_PAGE_TO_PP(pg); | | 4432 | pp = VM_PAGE_TO_PP(pg); |
4433 | pa = VM_PAGE_TO_PHYS(pg); | | 4433 | pa = VM_PAGE_TO_PHYS(pg); |
4434 | pmap_pp_remove(pp, pa); | | 4434 | pmap_pp_remove(pp, pa); |
4435 | } | | 4435 | } |
4436 | | | 4436 | |
4437 | /* | | 4437 | /* |
4438 | * pmap_pv_remove: remove an unmanaged pv-tracked page from all pmaps | | 4438 | * pmap_pv_remove: remove an unmanaged pv-tracked page from all pmaps |
4439 | * that map it | | 4439 | * that map it |
4440 | */ | | 4440 | */ |
4441 | void | | 4441 | void |
4442 | pmap_pv_remove(paddr_t pa) | | 4442 | pmap_pv_remove(paddr_t pa) |
4443 | { | | 4443 | { |
4444 | struct pmap_page *pp; | | 4444 | struct pmap_page *pp; |
4445 | | | 4445 | |
4446 | pp = pmap_pv_tracked(pa); | | 4446 | pp = pmap_pv_tracked(pa); |
4447 | if (pp == NULL) | | 4447 | if (pp == NULL) |
4448 | panic("%s: page not pv-tracked: %#"PRIxPADDR, __func__, pa); | | 4448 | panic("%s: page not pv-tracked: %#"PRIxPADDR, __func__, pa); |
4449 | pmap_pp_remove(pp, pa); | | 4449 | pmap_pp_remove(pp, pa); |
4450 | } | | 4450 | } |
4451 | | | 4451 | |
4452 | /* | | 4452 | /* |
4453 | * p m a p a t t r i b u t e f u n c t i o n s | | 4453 | * p m a p a t t r i b u t e f u n c t i o n s |
4454 | * functions that test/change managed page's attributes | | 4454 | * functions that test/change managed page's attributes |
4455 | * since a page can be mapped multiple times we must check each PTE that | | 4455 | * since a page can be mapped multiple times we must check each PTE that |
4456 | * maps it by going down the pv lists. | | 4456 | * maps it by going down the pv lists. |
4457 | */ | | 4457 | */ |
4458 | | | 4458 | |
4459 | /* | | 4459 | /* |
4460 | * pmap_test_attrs: test a page's attributes | | 4460 | * pmap_test_attrs: test a page's attributes |
4461 | */ | | 4461 | */ |
4462 | bool | | 4462 | bool |
4463 | pmap_test_attrs(struct vm_page *pg, unsigned testbits) | | 4463 | pmap_test_attrs(struct vm_page *pg, unsigned testbits) |
4464 | { | | 4464 | { |
4465 | struct pmap_page *pp; | | 4465 | struct pmap_page *pp; |
4466 | struct pv_pte *pvpte; | | 4466 | struct pv_pte *pvpte; |
4467 | struct pmap *pmap; | | 4467 | struct pmap *pmap; |
4468 | uint8_t oattrs; | | 4468 | uint8_t oattrs; |
4469 | u_int result; | | 4469 | u_int result; |
4470 | paddr_t pa; | | 4470 | paddr_t pa; |
4471 | | | 4471 | |
4472 | pp = VM_PAGE_TO_PP(pg); | | 4472 | pp = VM_PAGE_TO_PP(pg); |
4473 | if ((pp->pp_attrs & testbits) != 0) { | | 4473 | if ((pp->pp_attrs & testbits) != 0) { |
4474 | return true; | | 4474 | return true; |
4475 | } | | 4475 | } |
4476 | pa = VM_PAGE_TO_PHYS(pg); | | 4476 | pa = VM_PAGE_TO_PHYS(pg); |
4477 | startover: | | 4477 | startover: |
4478 | mutex_spin_enter(&pp->pp_lock); | | 4478 | mutex_spin_enter(&pp->pp_lock); |
4479 | for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) { | | 4479 | for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) { |
4480 | if ((pp->pp_attrs & testbits) != 0) { | | 4480 | if ((pp->pp_attrs & testbits) != 0) { |
4481 | break; | | 4481 | break; |
4482 | } | | 4482 | } |
4483 | if (pmap_sync_pv(pvpte, pa, 0, &oattrs, NULL)) { | | 4483 | if (pmap_sync_pv(pvpte, pa, 0, &oattrs, NULL)) { |
4484 | /* | | 4484 | /* |
4485 | * raced with a V->P operation. wait for the other | | 4485 | * raced with a V->P operation. wait for the other |
4486 | * side to finish by acquring pmap's lock. if no | | 4486 | * side to finish by acquring pmap's lock. if no |
4487 | * wait, updates to pp_attrs by the other side may | | 4487 | * wait, updates to pp_attrs by the other side may |
4488 | * go unseen. | | 4488 | * go unseen. |
4489 | */ | | 4489 | */ |
4490 | pmap = ptp_to_pmap(pvpte->pte_ptp); | | 4490 | pmap = ptp_to_pmap(pvpte->pte_ptp); |
4491 | pmap_reference(pmap); | | 4491 | pmap_reference(pmap); |
4492 | mutex_spin_exit(&pp->pp_lock); | | 4492 | mutex_spin_exit(&pp->pp_lock); |
4493 | mutex_enter(&pmap->pm_lock); | | 4493 | mutex_enter(&pmap->pm_lock); |
4494 | /* nothing. */ | | 4494 | /* nothing. */ |
4495 | mutex_exit(&pmap->pm_lock); | | 4495 | mutex_exit(&pmap->pm_lock); |
4496 | pmap_destroy(pmap); | | 4496 | pmap_destroy(pmap); |
4497 | goto startover; | | 4497 | goto startover; |
4498 | } | | 4498 | } |
4499 | pp->pp_attrs |= oattrs; | | 4499 | pp->pp_attrs |= oattrs; |
4500 | } | | 4500 | } |
4501 | result = pp->pp_attrs & testbits; | | 4501 | result = pp->pp_attrs & testbits; |
4502 | mutex_spin_exit(&pp->pp_lock); | | 4502 | mutex_spin_exit(&pp->pp_lock); |
4503 | | | 4503 | |
4504 | /* | | 4504 | /* |
4505 | * note that we will exit the for loop with a non-null pve if | | 4505 | * note that we will exit the for loop with a non-null pve if |
4506 | * we have found the bits we are testing for. | | 4506 | * we have found the bits we are testing for. |
4507 | */ | | 4507 | */ |
4508 | | | 4508 | |
4509 | return result != 0; | | 4509 | return result != 0; |
4510 | } | | 4510 | } |
4511 | | | 4511 | |
4512 | static bool | | 4512 | static bool |
4513 | pmap_pp_clear_attrs(struct pmap_page *pp, paddr_t pa, unsigned clearbits) | | 4513 | pmap_pp_clear_attrs(struct pmap_page *pp, paddr_t pa, unsigned clearbits) |
4514 | { | | 4514 | { |
4515 | struct pv_pte *pvpte; | | 4515 | struct pv_pte *pvpte; |
4516 | struct pmap *pmap; | | 4516 | struct pmap *pmap; |
4517 | uint8_t oattrs; | | 4517 | uint8_t oattrs; |
4518 | u_int result; | | 4518 | u_int result; |
4519 | | | 4519 | |
4520 | startover: | | 4520 | startover: |
4521 | mutex_spin_enter(&pp->pp_lock); | | 4521 | mutex_spin_enter(&pp->pp_lock); |
4522 | for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) { | | 4522 | for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) { |
4523 | if (pmap_sync_pv(pvpte, pa, clearbits, &oattrs, NULL)) { | | 4523 | if (pmap_sync_pv(pvpte, pa, clearbits, &oattrs, NULL)) { |
4524 | /* | | 4524 | /* |
4525 | * raced with a V->P operation. wait for the other | | 4525 | * raced with a V->P operation. wait for the other |
4526 | * side to finish by acquring pmap's lock. it is | | 4526 | * side to finish by acquring pmap's lock. it is |
4527 | * probably unmapping the page, and it will be gone | | 4527 | * probably unmapping the page, and it will be gone |
4528 | * when the loop is restarted. | | 4528 | * when the loop is restarted. |
4529 | */ | | 4529 | */ |
4530 | pmap = ptp_to_pmap(pvpte->pte_ptp); | | 4530 | pmap = ptp_to_pmap(pvpte->pte_ptp); |
4531 | pmap_reference(pmap); | | 4531 | pmap_reference(pmap); |
4532 | mutex_spin_exit(&pp->pp_lock); | | 4532 | mutex_spin_exit(&pp->pp_lock); |
4533 | mutex_enter(&pmap->pm_lock); | | 4533 | mutex_enter(&pmap->pm_lock); |
4534 | /* nothing. */ | | 4534 | /* nothing. */ |
4535 | mutex_exit(&pmap->pm_lock); | | 4535 | mutex_exit(&pmap->pm_lock); |
4536 | pmap_destroy(pmap); | | 4536 | pmap_destroy(pmap); |
4537 | goto startover; | | 4537 | goto startover; |
4538 | } | | 4538 | } |
4539 | pp->pp_attrs |= oattrs; | | 4539 | pp->pp_attrs |= oattrs; |
4540 | } | | 4540 | } |
4541 | result = pp->pp_attrs & clearbits; | | 4541 | result = pp->pp_attrs & clearbits; |
4542 | pp->pp_attrs &= ~clearbits; | | 4542 | pp->pp_attrs &= ~clearbits; |
4543 | pmap_tlb_shootnow(); | | 4543 | pmap_tlb_shootnow(); |
4544 | mutex_spin_exit(&pp->pp_lock); | | 4544 | mutex_spin_exit(&pp->pp_lock); |
4545 | | | 4545 | |
4546 | return result != 0; | | 4546 | return result != 0; |
4547 | } | | 4547 | } |
4548 | | | 4548 | |
4549 | /* | | 4549 | /* |
4550 | * pmap_clear_attrs: clear the specified attribute for a page. | | 4550 | * pmap_clear_attrs: clear the specified attribute for a page. |
4551 | * | | 4551 | * |
4552 | * => we return true if we cleared one of the bits we were asked to | | 4552 | * => we return true if we cleared one of the bits we were asked to |
4553 | */ | | 4553 | */ |
4554 | bool | | 4554 | bool |
4555 | pmap_clear_attrs(struct vm_page *pg, unsigned clearbits) | | 4555 | pmap_clear_attrs(struct vm_page *pg, unsigned clearbits) |
4556 | { | | 4556 | { |
4557 | struct pmap_page *pp; | | 4557 | struct pmap_page *pp; |
4558 | paddr_t pa; | | 4558 | paddr_t pa; |
4559 | | | 4559 | |
4560 | pp = VM_PAGE_TO_PP(pg); | | 4560 | pp = VM_PAGE_TO_PP(pg); |
4561 | pa = VM_PAGE_TO_PHYS(pg); | | 4561 | pa = VM_PAGE_TO_PHYS(pg); |
4562 | | | 4562 | |
4563 | /* | | 4563 | /* |
4564 | * If this is a new page, assert it has no mappings and simply zap | | 4564 | * If this is a new page, assert it has no mappings and simply zap |
4565 | * the stored attributes without taking any locks. | | 4565 | * the stored attributes without taking any locks. |
4566 | */ | | 4566 | */ |
4567 | if ((pg->flags & PG_FAKE) != 0) { | | 4567 | if ((pg->flags & PG_FAKE) != 0) { |
4568 | KASSERT(atomic_load_relaxed(&pp->pp_pte.pte_va) == 0); | | 4568 | KASSERT(atomic_load_relaxed(&pp->pp_pte.pte_va) == 0); |
4569 | KASSERT(atomic_load_relaxed(&pp->pp_pte.pte_ptp) == NULL); | | 4569 | KASSERT(atomic_load_relaxed(&pp->pp_pte.pte_ptp) == NULL); |
4570 | KASSERT(atomic_load_relaxed(&pp->pp_pvlist.lh_first) == NULL); | | 4570 | KASSERT(atomic_load_relaxed(&pp->pp_pvlist.lh_first) == NULL); |
4571 | atomic_store_relaxed(&pp->pp_attrs, 0); | | 4571 | atomic_store_relaxed(&pp->pp_attrs, 0); |
4572 | return false; | | 4572 | return false; |
4573 | } else { | | 4573 | } else { |
4574 | return pmap_pp_clear_attrs(pp, pa, clearbits); | | 4574 | return pmap_pp_clear_attrs(pp, pa, clearbits); |
4575 | } | | 4575 | } |
4576 | } | | 4576 | } |
4577 | | | 4577 | |
4578 | /* | | 4578 | /* |
4579 | * pmap_pv_clear_attrs: clear the specified attributes for an unmanaged | | 4579 | * pmap_pv_clear_attrs: clear the specified attributes for an unmanaged |
4580 | * pv-tracked page. | | 4580 | * pv-tracked page. |
4581 | */ | | 4581 | */ |
4582 | bool | | 4582 | bool |
4583 | pmap_pv_clear_attrs(paddr_t pa, unsigned clearbits) | | 4583 | pmap_pv_clear_attrs(paddr_t pa, unsigned clearbits) |
4584 | { | | 4584 | { |
4585 | struct pmap_page *pp; | | 4585 | struct pmap_page *pp; |
4586 | | | 4586 | |
4587 | pp = pmap_pv_tracked(pa); | | 4587 | pp = pmap_pv_tracked(pa); |
4588 | if (pp == NULL) | | 4588 | if (pp == NULL) |
4589 | panic("%s: page not pv-tracked: %#"PRIxPADDR, __func__, pa); | | 4589 | panic("%s: page not pv-tracked: %#"PRIxPADDR, __func__, pa); |
4590 | | | 4590 | |
4591 | return pmap_pp_clear_attrs(pp, pa, clearbits); | | 4591 | return pmap_pp_clear_attrs(pp, pa, clearbits); |
4592 | } | | 4592 | } |
4593 | | | 4593 | |
4594 | /* | | 4594 | /* |
4595 | * p m a p p r o t e c t i o n f u n c t i o n s | | 4595 | * p m a p p r o t e c t i o n f u n c t i o n s |
4596 | */ | | 4596 | */ |
4597 | | | 4597 | |
4598 | /* | | 4598 | /* |
4599 | * pmap_page_protect: change the protection of all recorded mappings | | 4599 | * pmap_page_protect: change the protection of all recorded mappings |
4600 | * of a managed page | | 4600 | * of a managed page |
4601 | * | | 4601 | * |
4602 | * => NOTE: this is an inline function in pmap.h | | 4602 | * => NOTE: this is an inline function in pmap.h |
4603 | */ | | 4603 | */ |
4604 | | | 4604 | |
4605 | /* see pmap.h */ | | 4605 | /* see pmap.h */ |
4606 | | | 4606 | |
4607 | /* | | 4607 | /* |
4608 | * pmap_pv_protect: change the protection of all recorded mappings | | 4608 | * pmap_pv_protect: change the protection of all recorded mappings |
4609 | * of an unmanaged pv-tracked page | | 4609 | * of an unmanaged pv-tracked page |
4610 | * | | 4610 | * |
4611 | * => NOTE: this is an inline function in pmap.h | | 4611 | * => NOTE: this is an inline function in pmap.h |
4612 | */ | | 4612 | */ |
4613 | | | 4613 | |
4614 | /* see pmap.h */ | | 4614 | /* see pmap.h */ |
4615 | | | 4615 | |
4616 | /* | | 4616 | /* |
4617 | * pmap_protect: set the protection in of the pages in a pmap | | 4617 | * pmap_protect: set the protection in of the pages in a pmap |
4618 | * | | 4618 | * |
4619 | * => NOTE: this is an inline function in pmap.h | | 4619 | * => NOTE: this is an inline function in pmap.h |
4620 | */ | | 4620 | */ |
4621 | | | 4621 | |
4622 | /* see pmap.h */ | | 4622 | /* see pmap.h */ |
4623 | | | 4623 | |
4624 | /* | | 4624 | /* |
4625 | * pmap_write_protect: write-protect pages in a pmap. | | 4625 | * pmap_write_protect: write-protect pages in a pmap. |
4626 | * | | 4626 | * |
4627 | * Note for Xen-amd64. Xen automatically adds PTE_U to the kernel pages, but we | | 4627 | * Note for Xen-amd64. Xen automatically adds PTE_U to the kernel pages, but we |
4628 | * don't need to remove this bit when re-entering the PTEs here: Xen tracks the | | 4628 | * don't need to remove this bit when re-entering the PTEs here: Xen tracks the |
4629 | * kernel pages with a reserved bit (_PAGE_GUEST_KERNEL), so even if PTE_U is | | 4629 | * kernel pages with a reserved bit (_PAGE_GUEST_KERNEL), so even if PTE_U is |
4630 | * present the page will still be considered as a kernel page, and the privilege | | 4630 | * present the page will still be considered as a kernel page, and the privilege |
4631 | * separation will be enforced correctly. | | 4631 | * separation will be enforced correctly. |
4632 | */ | | 4632 | */ |
4633 | void | | 4633 | void |
4634 | pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) | | 4634 | pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) |
4635 | { | | 4635 | { |
4636 | pt_entry_t bit_rem, bit_put; | | 4636 | pt_entry_t bit_rem, bit_put; |
4637 | pt_entry_t *ptes; | | 4637 | pt_entry_t *ptes; |
4638 | pt_entry_t * const *pdes; | | 4638 | pt_entry_t * const *pdes; |
4639 | struct pmap *pmap2; | | 4639 | struct pmap *pmap2; |
4640 | vaddr_t blockend, va; | | 4640 | vaddr_t blockend, va; |
4641 | int lvl, i; | | 4641 | int lvl, i; |
4642 | | | 4642 | |
4643 | if (__predict_false(pmap->pm_write_protect != NULL)) { | | 4643 | if (__predict_false(pmap->pm_write_protect != NULL)) { |
4644 | (*pmap->pm_write_protect)(pmap, sva, eva, prot); | | 4644 | (*pmap->pm_write_protect)(pmap, sva, eva, prot); |
4645 | return; | | 4645 | return; |
4646 | } | | 4646 | } |
4647 | | | 4647 | |
4648 | bit_rem = 0; | | 4648 | bit_rem = 0; |
4649 | if (!(prot & VM_PROT_WRITE)) | | 4649 | if (!(prot & VM_PROT_WRITE)) |
4650 | bit_rem = PTE_W; | | 4650 | bit_rem = PTE_W; |
4651 | | | 4651 | |
4652 | bit_put = 0; | | 4652 | bit_put = 0; |
4653 | if (!(prot & VM_PROT_EXECUTE)) | | 4653 | if (!(prot & VM_PROT_EXECUTE)) |
4654 | bit_put = pmap_pg_nx; | | 4654 | bit_put = pmap_pg_nx; |
4655 | | | 4655 | |
4656 | sva &= ~PAGE_MASK; | | 4656 | sva &= ~PAGE_MASK; |
4657 | eva &= ~PAGE_MASK; | | 4657 | eva &= ~PAGE_MASK; |
4658 | | | 4658 | |
4659 | /* | | 4659 | /* |
4660 | * Acquire pmap. No need to lock the kernel pmap as we won't | | 4660 | * Acquire pmap. No need to lock the kernel pmap as we won't |
4661 | * be touching PV entries nor stats and kernel PDEs aren't | | 4661 | * be touching PV entries nor stats and kernel PDEs aren't |
4662 | * freed. | | 4662 | * freed. |
4663 | */ | | 4663 | */ |
4664 | if (pmap != pmap_kernel()) { | | 4664 | if (pmap != pmap_kernel()) { |
4665 | mutex_enter(&pmap->pm_lock); | | 4665 | mutex_enter(&pmap->pm_lock); |
4666 | } | | 4666 | } |
4667 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); | | 4667 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); |
4668 | | | 4668 | |
4669 | for (va = sva ; va < eva; va = blockend) { | | 4669 | for (va = sva ; va < eva; va = blockend) { |
4670 | pt_entry_t *spte, *epte; | | 4670 | pt_entry_t *spte, *epte; |
4671 | | | 4671 | |
4672 | blockend = x86_round_pdr(va + 1); | | 4672 | blockend = x86_round_pdr(va + 1); |
4673 | if (blockend > eva) | | 4673 | if (blockend > eva) |
4674 | blockend = eva; | | 4674 | blockend = eva; |
4675 | | | 4675 | |
4676 | /* Is it a valid block? */ | | 4676 | /* Is it a valid block? */ |
4677 | if (!pmap_pdes_valid(va, pdes, NULL, &lvl)) { | | 4677 | if (!pmap_pdes_valid(va, pdes, NULL, &lvl)) { |
4678 | continue; | | 4678 | continue; |
4679 | } | | 4679 | } |
4680 | KASSERT(va < VM_MAXUSER_ADDRESS || va >= VM_MAX_ADDRESS); | | 4680 | KASSERT(va < VM_MAXUSER_ADDRESS || va >= VM_MAX_ADDRESS); |
4681 | KASSERT(lvl == 1); | | 4681 | KASSERT(lvl == 1); |
4682 | | | 4682 | |
4683 | spte = &ptes[pl1_i(va)]; | | 4683 | spte = &ptes[pl1_i(va)]; |
4684 | epte = &ptes[pl1_i(blockend)]; | | 4684 | epte = &ptes[pl1_i(blockend)]; |
4685 | | | 4685 | |
4686 | for (i = 0; spte < epte; spte++, i++) { | | 4686 | for (i = 0; spte < epte; spte++, i++) { |
4687 | pt_entry_t opte, npte; | | 4687 | pt_entry_t opte, npte; |
4688 | | | 4688 | |
4689 | do { | | 4689 | do { |
4690 | opte = *spte; | | 4690 | opte = *spte; |
4691 | if (!pmap_valid_entry(opte)) { | | 4691 | if (!pmap_valid_entry(opte)) { |
4692 | goto next; | | 4692 | goto next; |
4693 | } | | 4693 | } |
4694 | npte = (opte & ~bit_rem) | bit_put; | | 4694 | npte = (opte & ~bit_rem) | bit_put; |
4695 | } while (pmap_pte_cas(spte, opte, npte) != opte); | | 4695 | } while (pmap_pte_cas(spte, opte, npte) != opte); |
4696 | | | 4696 | |
4697 | if ((opte & PTE_D) != 0) { | | 4697 | if ((opte & PTE_D) != 0) { |
4698 | vaddr_t tva = va + x86_ptob(i); | | 4698 | vaddr_t tva = va + x86_ptob(i); |
4699 | pmap_tlb_shootdown(pmap, tva, opte, | | 4699 | pmap_tlb_shootdown(pmap, tva, opte, |
4700 | TLBSHOOT_WRITE_PROTECT); | | 4700 | TLBSHOOT_WRITE_PROTECT); |
4701 | } | | 4701 | } |
4702 | next:; | | 4702 | next:; |
4703 | } | | 4703 | } |
4704 | } | | 4704 | } |
4705 | | | 4705 | |
4706 | /* Release pmap. */ | | 4706 | /* Release pmap. */ |
4707 | pmap_unmap_ptes(pmap, pmap2); | | 4707 | pmap_unmap_ptes(pmap, pmap2); |
4708 | if (pmap != pmap_kernel()) { | | 4708 | if (pmap != pmap_kernel()) { |
4709 | mutex_exit(&pmap->pm_lock); | | 4709 | mutex_exit(&pmap->pm_lock); |
4710 | } | | 4710 | } |
4711 | } | | 4711 | } |
4712 | | | 4712 | |
4713 | /* | | 4713 | /* |
4714 | * pmap_unwire: clear the wired bit in the PTE. | | 4714 | * pmap_unwire: clear the wired bit in the PTE. |
4715 | * | | 4715 | * |
4716 | * => Mapping should already be present. | | 4716 | * => Mapping should already be present. |
4717 | */ | | 4717 | */ |
4718 | void | | 4718 | void |
4719 | pmap_unwire(struct pmap *pmap, vaddr_t va) | | 4719 | pmap_unwire(struct pmap *pmap, vaddr_t va) |
4720 | { | | 4720 | { |
4721 | pt_entry_t *ptes, *ptep, opte; | | 4721 | pt_entry_t *ptes, *ptep, opte; |
4722 | pd_entry_t * const *pdes; | | 4722 | pd_entry_t * const *pdes; |
4723 | struct pmap *pmap2; | | 4723 | struct pmap *pmap2; |
4724 | int lvl; | | 4724 | int lvl; |
4725 | | | 4725 | |
4726 | if (__predict_false(pmap->pm_unwire != NULL)) { | | 4726 | if (__predict_false(pmap->pm_unwire != NULL)) { |
4727 | (*pmap->pm_unwire)(pmap, va); | | 4727 | (*pmap->pm_unwire)(pmap, va); |
4728 | return; | | 4728 | return; |
4729 | } | | 4729 | } |
4730 | | | 4730 | |
4731 | /* | | 4731 | /* |
4732 | * Acquire pmap. Need to lock the kernel pmap only to protect the | | 4732 | * Acquire pmap. Need to lock the kernel pmap only to protect the |
4733 | * statistics. | | 4733 | * statistics. |
4734 | */ | | 4734 | */ |
4735 | mutex_enter(&pmap->pm_lock); | | 4735 | mutex_enter(&pmap->pm_lock); |
4736 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); | | 4736 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); |
4737 | | | 4737 | |
4738 | if (!pmap_pdes_valid(va, pdes, NULL, &lvl)) { | | 4738 | if (!pmap_pdes_valid(va, pdes, NULL, &lvl)) { |
4739 | panic("%s: invalid PDE va=%#" PRIxVADDR, __func__, va); | | 4739 | panic("%s: invalid PDE va=%#" PRIxVADDR, __func__, va); |
4740 | } | | 4740 | } |
4741 | KASSERT(lvl == 1); | | 4741 | KASSERT(lvl == 1); |
4742 | | | 4742 | |
4743 | ptep = &ptes[pl1_i(va)]; | | 4743 | ptep = &ptes[pl1_i(va)]; |
4744 | opte = *ptep; | | 4744 | opte = *ptep; |
4745 | KASSERT(pmap_valid_entry(opte)); | | 4745 | KASSERT(pmap_valid_entry(opte)); |
4746 | | | 4746 | |
4747 | if (opte & PTE_WIRED) { | | 4747 | if (opte & PTE_WIRED) { |
4748 | pt_entry_t npte = opte & ~PTE_WIRED; | | 4748 | pt_entry_t npte = opte & ~PTE_WIRED; |
4749 | | | 4749 | |
4750 | opte = pmap_pte_testset(ptep, npte); | | 4750 | opte = pmap_pte_testset(ptep, npte); |
4751 | pmap_stats_update_bypte(pmap, npte, opte); | | 4751 | pmap_stats_update_bypte(pmap, npte, opte); |
4752 | } else { | | 4752 | } else { |
4753 | printf("%s: wiring for pmap %p va %#" PRIxVADDR | | 4753 | printf("%s: wiring for pmap %p va %#" PRIxVADDR |
4754 | " did not change!\n", __func__, pmap, va); | | 4754 | " did not change!\n", __func__, pmap, va); |
4755 | } | | 4755 | } |
4756 | | | 4756 | |
4757 | /* Release pmap. */ | | 4757 | /* Release pmap. */ |
4758 | pmap_unmap_ptes(pmap, pmap2); | | 4758 | pmap_unmap_ptes(pmap, pmap2); |
4759 | mutex_exit(&pmap->pm_lock); | | 4759 | mutex_exit(&pmap->pm_lock); |
4760 | } | | 4760 | } |
4761 | | | 4761 | |
4762 | /* | | 4762 | /* |
4763 | * pmap_copy: copy mappings from one pmap to another | | 4763 | * pmap_copy: copy mappings from one pmap to another |
4764 | * | | 4764 | * |
4765 | * => optional function | | 4765 | * => optional function |
4766 | * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) | | 4766 | * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) |
4767 | */ | | 4767 | */ |
4768 | | | 4768 | |
4769 | /* | | 4769 | /* |
4770 | * defined as macro in pmap.h | | 4770 | * defined as macro in pmap.h |
4771 | */ | | 4771 | */ |
4772 | | | 4772 | |
4773 | __strict_weak_alias(pmap_enter, pmap_enter_default); | | 4773 | __strict_weak_alias(pmap_enter, pmap_enter_default); |
4774 | | | 4774 | |
4775 | int | | 4775 | int |
4776 | pmap_enter_default(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, | | 4776 | pmap_enter_default(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, |
4777 | u_int flags) | | 4777 | u_int flags) |
4778 | { | | 4778 | { |
4779 | if (__predict_false(pmap->pm_enter != NULL)) { | | 4779 | if (__predict_false(pmap->pm_enter != NULL)) { |
4780 | return (*pmap->pm_enter)(pmap, va, pa, prot, flags); | | 4780 | return (*pmap->pm_enter)(pmap, va, pa, prot, flags); |
4781 | } | | 4781 | } |
4782 | | | 4782 | |
4783 | return pmap_enter_ma(pmap, va, pa, pa, prot, flags, 0); | | 4783 | return pmap_enter_ma(pmap, va, pa, pa, prot, flags, 0); |
4784 | } | | 4784 | } |
4785 | | | 4785 | |
4786 | /* | | 4786 | /* |
4787 | * pmap_enter: enter a mapping into a pmap | | 4787 | * pmap_enter: enter a mapping into a pmap |
4788 | * | | 4788 | * |
4789 | * => must be done "now" ... no lazy-evaluation | | 4789 | * => must be done "now" ... no lazy-evaluation |
4790 | */ | | 4790 | */ |
4791 | int | | 4791 | int |
4792 | pmap_enter_ma(struct pmap *pmap, vaddr_t va, paddr_t ma, paddr_t pa, | | 4792 | pmap_enter_ma(struct pmap *pmap, vaddr_t va, paddr_t ma, paddr_t pa, |
4793 | vm_prot_t prot, u_int flags, int domid) | | 4793 | vm_prot_t prot, u_int flags, int domid) |
4794 | { | | 4794 | { |
4795 | pt_entry_t *ptes, opte, npte; | | 4795 | pt_entry_t *ptes, opte, npte; |
4796 | pt_entry_t *ptep; | | 4796 | pt_entry_t *ptep; |
4797 | pd_entry_t * const *pdes; | | 4797 | pd_entry_t * const *pdes; |
4798 | struct vm_page *ptp; | | 4798 | struct vm_page *ptp; |
4799 | struct vm_page *new_pg, *old_pg; | | 4799 | struct vm_page *new_pg, *old_pg; |
4800 | struct pmap_page *new_pp, *old_pp; | | 4800 | struct pmap_page *new_pp, *old_pp; |
4801 | struct pv_entry *old_pve, *new_pve; | | 4801 | struct pv_entry *old_pve, *new_pve; |
4802 | bool wired = (flags & PMAP_WIRED) != 0; | | 4802 | bool wired = (flags & PMAP_WIRED) != 0; |
4803 | struct pmap *pmap2; | | 4803 | struct pmap *pmap2; |
4804 | struct pmap_ptparray pt; | | 4804 | struct pmap_ptparray pt; |
4805 | int error; | | 4805 | int error; |
4806 | bool getptp, samepage, new_embedded; | | 4806 | bool getptp, samepage, new_embedded; |
4807 | rb_tree_t *tree; | | 4807 | rb_tree_t *tree; |
4808 | | | 4808 | |
4809 | KASSERT(pmap_initialized); | | 4809 | KASSERT(pmap_initialized); |
4810 | KASSERT(va < VM_MAX_KERNEL_ADDRESS); | | 4810 | KASSERT(va < VM_MAX_KERNEL_ADDRESS); |
4811 | KASSERTMSG(va != (vaddr_t)PDP_BASE, "%s: trying to map va=%#" | | 4811 | KASSERTMSG(va != (vaddr_t)PDP_BASE, "%s: trying to map va=%#" |
4812 | PRIxVADDR " over PDP!", __func__, va); | | 4812 | PRIxVADDR " over PDP!", __func__, va); |
4813 | KASSERTMSG(va < VM_MIN_KERNEL_ADDRESS || | | 4813 | KASSERTMSG(va < VM_MIN_KERNEL_ADDRESS || |
4814 | pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)]), | | 4814 | pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)]), |
4815 | "%s: missing kernel PTP for va=%#" PRIxVADDR, __func__, va); | | 4815 | "%s: missing kernel PTP for va=%#" PRIxVADDR, __func__, va); |
4816 | | | 4816 | |
4817 | #ifdef XENPV | | 4817 | #ifdef XENPV |
4818 | KASSERT(domid == DOMID_SELF || pa == 0); | | 4818 | KASSERT(domid == DOMID_SELF || pa == 0); |
4819 | #endif | | 4819 | #endif |
4820 | | | 4820 | |
4821 | npte = ma | protection_codes[prot] | PTE_P; | | 4821 | npte = ma | protection_codes[prot] | PTE_P; |
4822 | npte |= pmap_pat_flags(flags); | | 4822 | npte |= pmap_pat_flags(flags); |
4823 | if (wired) | | 4823 | if (wired) |
4824 | npte |= PTE_WIRED; | | 4824 | npte |= PTE_WIRED; |
4825 | if (va < VM_MAXUSER_ADDRESS) | | 4825 | if (va < VM_MAXUSER_ADDRESS) |
4826 | npte |= PTE_U; | | 4826 | npte |= PTE_U; |
4827 | | | 4827 | |
4828 | if (pmap == pmap_kernel()) | | 4828 | if (pmap == pmap_kernel()) |
4829 | npte |= pmap_pg_g; | | 4829 | npte |= pmap_pg_g; |
4830 | if (flags & VM_PROT_ALL) { | | 4830 | if (flags & VM_PROT_ALL) { |
4831 | npte |= PTE_A; | | 4831 | npte |= PTE_A; |
4832 | if (flags & VM_PROT_WRITE) { | | 4832 | if (flags & VM_PROT_WRITE) { |
4833 | KASSERT((npte & PTE_W) != 0); | | 4833 | KASSERT((npte & PTE_W) != 0); |
4834 | npte |= PTE_D; | | 4834 | npte |= PTE_D; |
4835 | } | | 4835 | } |
4836 | } | | 4836 | } |
4837 | | | 4837 | |
4838 | #ifdef XENPV | | 4838 | #ifdef XENPV |
4839 | if (domid != DOMID_SELF) | | 4839 | if (domid != DOMID_SELF) |
4840 | new_pg = NULL; | | 4840 | new_pg = NULL; |
4841 | else | | 4841 | else |
4842 | #endif | | 4842 | #endif |
4843 | new_pg = PHYS_TO_VM_PAGE(pa); | | 4843 | new_pg = PHYS_TO_VM_PAGE(pa); |
4844 | | | 4844 | |
4845 | if (new_pg != NULL) { | | 4845 | if (new_pg != NULL) { |
4846 | /* This is a managed page */ | | 4846 | /* This is a managed page */ |
4847 | npte |= PTE_PVLIST; | | 4847 | npte |= PTE_PVLIST; |
4848 | new_pp = VM_PAGE_TO_PP(new_pg); | | 4848 | new_pp = VM_PAGE_TO_PP(new_pg); |
4849 | PMAP_CHECK_PP(new_pp); | | 4849 | PMAP_CHECK_PP(new_pp); |
4850 | } else if ((new_pp = pmap_pv_tracked(pa)) != NULL) { | | 4850 | } else if ((new_pp = pmap_pv_tracked(pa)) != NULL) { |
4851 | /* This is an unmanaged pv-tracked page */ | | 4851 | /* This is an unmanaged pv-tracked page */ |
4852 | npte |= PTE_PVLIST; | | 4852 | npte |= PTE_PVLIST; |
4853 | PMAP_CHECK_PP(new_pp); | | 4853 | PMAP_CHECK_PP(new_pp); |
4854 | } else { | | 4854 | } else { |
4855 | new_pp = NULL; | | 4855 | new_pp = NULL; |
4856 | } | | 4856 | } |
4857 | | | 4857 | |
4858 | /* Begin by locking the pmap. */ | | 4858 | /* Begin by locking the pmap. */ |
4859 | mutex_enter(&pmap->pm_lock); | | 4859 | mutex_enter(&pmap->pm_lock); |
4860 | | | 4860 | |
4861 | /* Look up the PTP. Allocate if none present. */ | | 4861 | /* Look up the PTP. Allocate if none present. */ |
4862 | ptp = NULL; | | 4862 | ptp = NULL; |
4863 | getptp = false; | | 4863 | getptp = false; |
4864 | if (pmap != pmap_kernel()) { | | 4864 | if (pmap != pmap_kernel()) { |
4865 | ptp = pmap_find_ptp(pmap, va, 1); | | 4865 | ptp = pmap_find_ptp(pmap, va, 1); |
4866 | if (ptp == NULL) { | | 4866 | if (ptp == NULL) { |
4867 | getptp = true; | | 4867 | getptp = true; |
4868 | error = pmap_get_ptp(pmap, &pt, va, flags, &ptp); | | 4868 | error = pmap_get_ptp(pmap, &pt, va, flags, &ptp); |
4869 | if (error != 0) { | | 4869 | if (error != 0) { |
4870 | if (flags & PMAP_CANFAIL) { | | 4870 | if (flags & PMAP_CANFAIL) { |
4871 | mutex_exit(&pmap->pm_lock); | | 4871 | mutex_exit(&pmap->pm_lock); |
4872 | return error; | | 4872 | return error; |
4873 | } | | 4873 | } |
4874 | panic("%s: get ptp failed, error=%d", __func__, | | 4874 | panic("%s: get ptp failed, error=%d", __func__, |
4875 | error); | | 4875 | error); |
4876 | } | | 4876 | } |
4877 | } | | 4877 | } |
4878 | tree = &VM_PAGE_TO_PP(ptp)->pp_rb; | | 4878 | tree = &VM_PAGE_TO_PP(ptp)->pp_rb; |
4879 | } else { | | 4879 | } else { |
4880 | /* Embedded PV entries rely on this. */ | | 4880 | /* Embedded PV entries rely on this. */ |
4881 | KASSERT(va != 0); | | 4881 | KASSERT(va != 0); |
4882 | tree = &pmap_kernel_rb; | | 4882 | tree = &pmap_kernel_rb; |
4883 | } | | 4883 | } |
4884 | | | 4884 | |
4885 | /* | | 4885 | /* |
4886 | * Look up the old PV entry at this VA (if any), and insert a new PV | | 4886 | * Look up the old PV entry at this VA (if any), and insert a new PV |
4887 | * entry if required for the new mapping. Temporarily track the old | | 4887 | * entry if required for the new mapping. Temporarily track the old |
4888 | * and new mappings concurrently. Only after the old mapping is | | 4888 | * and new mappings concurrently. Only after the old mapping is |
4889 | * evicted from the pmap will we remove its PV entry. Otherwise, | | 4889 | * evicted from the pmap will we remove its PV entry. Otherwise, |
4890 | * our picture of modified/accessed state for either page could get | | 4890 | * our picture of modified/accessed state for either page could get |
4891 | * out of sync (we need any P->V operation for either page to stall | | 4891 | * out of sync (we need any P->V operation for either page to stall |
4892 | * on pmap->pm_lock until done here). | | 4892 | * on pmap->pm_lock until done here). |
4893 | */ | | 4893 | */ |
4894 | new_pve = NULL; | | 4894 | new_pve = NULL; |
4895 | old_pve = NULL; | | 4895 | old_pve = NULL; |
4896 | samepage = false; | | 4896 | samepage = false; |
4897 | new_embedded = false; | | 4897 | new_embedded = false; |
4898 | | | 4898 | |
4899 | if (new_pp != NULL) { | | 4899 | if (new_pp != NULL) { |
4900 | error = pmap_enter_pv(pmap, new_pp, ptp, va, &new_pve, | | 4900 | error = pmap_enter_pv(pmap, new_pp, ptp, va, &new_pve, |
4901 | &old_pve, &samepage, &new_embedded, tree); | | 4901 | &old_pve, &samepage, &new_embedded, tree); |
4902 | | | 4902 | |
4903 | /* | | 4903 | /* |
4904 | * If a new pv_entry was needed and none was available, we | | 4904 | * If a new pv_entry was needed and none was available, we |
4905 | * can go no further. | | 4905 | * can go no further. |
4906 | */ | | 4906 | */ |
4907 | if (error != 0) { | | 4907 | if (error != 0) { |
4908 | if (flags & PMAP_CANFAIL) { | | 4908 | if (flags & PMAP_CANFAIL) { |
4909 | if (getptp) { | | 4909 | if (getptp) { |
4910 | pmap_unget_ptp(pmap, &pt); | | 4910 | pmap_unget_ptp(pmap, &pt); |
4911 | } | | 4911 | } |
4912 | mutex_exit(&pmap->pm_lock); | | 4912 | mutex_exit(&pmap->pm_lock); |
4913 | return error; | | 4913 | return error; |
4914 | } | | 4914 | } |
4915 | panic("%s: alloc pve failed", __func__); | | 4915 | panic("%s: alloc pve failed", __func__); |
4916 | } | | 4916 | } |
4917 | } else { | | 4917 | } else { |
4918 | old_pve = pmap_treelookup_pv(pmap, ptp, tree, va); | | 4918 | old_pve = pmap_treelookup_pv(pmap, ptp, tree, va); |
4919 | } | | 4919 | } |
4920 | | | 4920 | |
4921 | /* Map PTEs into address space. */ | | 4921 | /* Map PTEs into address space. */ |
4922 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); | | 4922 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); |
4923 | | | 4923 | |
4924 | /* Install any newly allocated PTPs. */ | | 4924 | /* Install any newly allocated PTPs. */ |
4925 | if (getptp) { | | 4925 | if (getptp) { |
4926 | pmap_install_ptp(pmap, &pt, va, pdes); | | 4926 | pmap_install_ptp(pmap, &pt, va, pdes); |
4927 | } | | 4927 | } |
4928 | | | 4928 | |
4929 | /* Check if there is an existing mapping. */ | | 4929 | /* Check if there is an existing mapping. */ |
4930 | ptep = &ptes[pl1_i(va)]; | | 4930 | ptep = &ptes[pl1_i(va)]; |
4931 | opte = *ptep; | | 4931 | opte = *ptep; |
4932 | bool have_oldpa = pmap_valid_entry(opte); | | 4932 | bool have_oldpa = pmap_valid_entry(opte); |
4933 | paddr_t oldpa = pmap_pte2pa(opte); | | 4933 | paddr_t oldpa = pmap_pte2pa(opte); |
4934 | | | 4934 | |
4935 | /* | | 4935 | /* |
4936 | * Update the pte. | | 4936 | * Update the pte. |
4937 | */ | | 4937 | */ |
4938 | do { | | 4938 | do { |
4939 | opte = *ptep; | | 4939 | opte = *ptep; |
4940 | | | 4940 | |
4941 | /* | | 4941 | /* |
4942 | * if the same page, inherit PTE_A and PTE_D. | | 4942 | * if the same page, inherit PTE_A and PTE_D. |
4943 | */ | | 4943 | */ |
4944 | if (((opte ^ npte) & (PTE_FRAME | PTE_P)) == 0) { | | 4944 | if (((opte ^ npte) & (PTE_FRAME | PTE_P)) == 0) { |
4945 | npte |= opte & (PTE_A | PTE_D); | | 4945 | npte |= opte & (PTE_A | PTE_D); |
4946 | } | | 4946 | } |
4947 | #if defined(XENPV) | | 4947 | #if defined(XENPV) |
4948 | if (domid != DOMID_SELF) { | | 4948 | if (domid != DOMID_SELF) { |
4949 | /* pmap_pte_cas with error handling */ | | 4949 | /* pmap_pte_cas with error handling */ |
4950 | int s = splvm(); | | 4950 | int s = splvm(); |
4951 | if (opte != *ptep) { | | 4951 | if (opte != *ptep) { |
4952 | splx(s); | | 4952 | splx(s); |
4953 | continue; | | 4953 | continue; |
4954 | } | | 4954 | } |
4955 | error = xpq_update_foreign( | | 4955 | error = xpq_update_foreign( |
4956 | vtomach((vaddr_t)ptep), npte, domid, flags); | | 4956 | vtomach((vaddr_t)ptep), npte, domid, flags); |
4957 | splx(s); | | 4957 | splx(s); |
4958 | if (error) { | | 4958 | if (error) { |
4959 | /* Undo pv_entry tracking - oof. */ | | 4959 | /* Undo pv_entry tracking - oof. */ |
4960 | if (new_pp != NULL) { | | 4960 | if (new_pp != NULL) { |
4961 | mutex_spin_enter(&new_pp->pp_lock); | | 4961 | mutex_spin_enter(&new_pp->pp_lock); |
4962 | if (new_pve != NULL) { | | 4962 | if (new_pve != NULL) { |
4963 | LIST_REMOVE(new_pve, pve_list); | | 4963 | LIST_REMOVE(new_pve, pve_list); |
4964 | KASSERT(pmap->pm_pve == NULL); | | 4964 | KASSERT(pmap->pm_pve == NULL); |
4965 | pmap->pm_pve = new_pve; | | 4965 | pmap->pm_pve = new_pve; |
4966 | } else if (new_embedded) { | | 4966 | } else if (new_embedded) { |
4967 | new_pp->pp_pte.pte_ptp = NULL; | | 4967 | new_pp->pp_pte.pte_ptp = NULL; |
4968 | new_pp->pp_pte.pte_va = 0; | | 4968 | new_pp->pp_pte.pte_va = 0; |
4969 | } | | 4969 | } |
4970 | mutex_spin_exit(&new_pp->pp_lock); | | 4970 | mutex_spin_exit(&new_pp->pp_lock); |
4971 | } | | 4971 | } |
4972 | pmap_unmap_ptes(pmap, pmap2); | | 4972 | pmap_unmap_ptes(pmap, pmap2); |
4973 | /* Free new PTP. */ | | 4973 | /* Free new PTP. */ |
4974 | if (ptp != NULL && ptp->wire_count <= 1) { | | 4974 | if (ptp != NULL && ptp->wire_count <= 1) { |
4975 | pmap_free_ptp(pmap, ptp, va, ptes, | | 4975 | pmap_free_ptp(pmap, ptp, va, ptes, |
4976 | pdes); | | 4976 | pdes); |
4977 | } | | 4977 | } |
4978 | mutex_exit(&pmap->pm_lock); | | 4978 | mutex_exit(&pmap->pm_lock); |
4979 | return error; | | 4979 | return error; |
4980 | } | | 4980 | } |
4981 | break; | | 4981 | break; |
4982 | } | | 4982 | } |
4983 | #endif /* defined(XENPV) */ | | 4983 | #endif /* defined(XENPV) */ |
4984 | } while (pmap_pte_cas(ptep, opte, npte) != opte); | | 4984 | } while (pmap_pte_cas(ptep, opte, npte) != opte); |
4985 | | | 4985 | |
4986 | /* | | 4986 | /* |
4987 | * Done with the PTEs: they can now be unmapped. | | 4987 | * Done with the PTEs: they can now be unmapped. |
4988 | */ | | 4988 | */ |
4989 | pmap_unmap_ptes(pmap, pmap2); | | 4989 | pmap_unmap_ptes(pmap, pmap2); |
4990 | | | 4990 | |
4991 | /* | | 4991 | /* |
4992 | * Update statistics and PTP's reference count. | | 4992 | * Update statistics and PTP's reference count. |
4993 | */ | | 4993 | */ |
4994 | pmap_stats_update_bypte(pmap, npte, opte); | | 4994 | pmap_stats_update_bypte(pmap, npte, opte); |
4995 | if (ptp != NULL) { | | 4995 | if (ptp != NULL) { |
4996 | if (!have_oldpa) { | | 4996 | if (!have_oldpa) { |
4997 | ptp->wire_count++; | | 4997 | ptp->wire_count++; |
4998 | } | | 4998 | } |
4999 | /* Remember minimum VA in PTP. */ | | 4999 | /* Remember minimum VA in PTP. */ |
5000 | pmap_ptp_range_set(ptp, va); | | 5000 | pmap_ptp_range_set(ptp, va); |
5001 | } | | 5001 | } |
5002 | KASSERT(ptp == NULL || ptp->wire_count > 1); | | 5002 | KASSERT(ptp == NULL || ptp->wire_count > 1); |
5003 | | | 5003 | |
5004 | /* | | 5004 | /* |
5005 | * If the same page, we can skip pv_entry handling. | | 5005 | * If the same page, we can skip pv_entry handling. |
5006 | */ | | 5006 | */ |
5007 | if (((opte ^ npte) & (PTE_FRAME | PTE_P)) == 0) { | | 5007 | if (((opte ^ npte) & (PTE_FRAME | PTE_P)) == 0) { |
5008 | KASSERT(((opte ^ npte) & PTE_PVLIST) == 0); | | 5008 | KASSERT(((opte ^ npte) & PTE_PVLIST) == 0); |
5009 | if ((npte & PTE_PVLIST) != 0) { | | 5009 | if ((npte & PTE_PVLIST) != 0) { |
5010 | KASSERT(samepage); | | 5010 | KASSERT(samepage); |
5011 | pmap_check_pv(pmap, ptp, new_pp, va, true); | | 5011 | pmap_check_pv(pmap, ptp, new_pp, va, true); |
5012 | } | | 5012 | } |
5013 | goto same_pa; | | 5013 | goto same_pa; |
5014 | } else if ((npte & PTE_PVLIST) != 0) { | | 5014 | } else if ((npte & PTE_PVLIST) != 0) { |
5015 | KASSERT(!samepage); | | 5015 | KASSERT(!samepage); |
5016 | } | | 5016 | } |
5017 | | | 5017 | |
5018 | /* | | 5018 | /* |
5019 | * If old page is pv-tracked, remove pv_entry from its list. | | 5019 | * If old page is pv-tracked, remove pv_entry from its list. |
5020 | */ | | 5020 | */ |
5021 | if ((~opte & (PTE_P | PTE_PVLIST)) == 0) { | | 5021 | if ((~opte & (PTE_P | PTE_PVLIST)) == 0) { |
5022 | if ((old_pg = PHYS_TO_VM_PAGE(oldpa)) != NULL) { | | 5022 | if ((old_pg = PHYS_TO_VM_PAGE(oldpa)) != NULL) { |
5023 | old_pp = VM_PAGE_TO_PP(old_pg); | | 5023 | old_pp = VM_PAGE_TO_PP(old_pg); |
5024 | } else if ((old_pp = pmap_pv_tracked(oldpa)) == NULL) { | | 5024 | } else if ((old_pp = pmap_pv_tracked(oldpa)) == NULL) { |
5025 | panic("%s: PTE_PVLIST with pv-untracked page" | | 5025 | panic("%s: PTE_PVLIST with pv-untracked page" |
5026 | " va = %#"PRIxVADDR | | 5026 | " va = %#"PRIxVADDR |
5027 | " pa = %#" PRIxPADDR " (%#" PRIxPADDR ")", | | 5027 | " pa = %#" PRIxPADDR " (%#" PRIxPADDR ")", |
5028 | __func__, va, oldpa, atop(pa)); | | 5028 | __func__, va, oldpa, atop(pa)); |
5029 | } | | 5029 | } |
5030 | | | 5030 | |
5031 | pmap_remove_pv(pmap, old_pp, ptp, va, old_pve, | | 5031 | pmap_remove_pv(pmap, old_pp, ptp, va, old_pve, |
5032 | pmap_pte_to_pp_attrs(opte)); | | 5032 | pmap_pte_to_pp_attrs(opte)); |
5033 | } else { | | 5033 | } else { |
5034 | KASSERT(old_pve == NULL); | | 5034 | KASSERT(old_pve == NULL); |
5035 | KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL); | | 5035 | KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL); |
5036 | } | | 5036 | } |
5037 | | | 5037 | |
5038 | /* | | 5038 | /* |
5039 | * If new page is dynamically PV tracked, insert to tree. | | 5039 | * If new page is dynamically PV tracked, insert to tree. |
5040 | */ | | 5040 | */ |
5041 | if (new_pve != NULL) { | | 5041 | if (new_pve != NULL) { |
5042 | KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL); | | 5042 | KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL); |
5043 | old_pve = rb_tree_insert_node(tree, new_pve); | | 5043 | old_pve = rb_tree_insert_node(tree, new_pve); |
5044 | KASSERT(old_pve == new_pve); | | 5044 | KASSERT(old_pve == new_pve); |
5045 | pmap_check_pv(pmap, ptp, new_pp, va, true); | | 5045 | pmap_check_pv(pmap, ptp, new_pp, va, true); |
5046 | } | | 5046 | } |
5047 | | | 5047 | |
5048 | same_pa: | | 5048 | same_pa: |
5049 | /* | | 5049 | /* |
5050 | * shootdown tlb if necessary. | | 5050 | * shootdown tlb if necessary. |
5051 | */ | | 5051 | */ |
5052 | | | 5052 | |
5053 | if ((~opte & (PTE_P | PTE_A)) == 0 && | | 5053 | if ((~opte & (PTE_P | PTE_A)) == 0 && |
5054 | ((opte ^ npte) & (PTE_FRAME | PTE_W)) != 0) { | | 5054 | ((opte ^ npte) & (PTE_FRAME | PTE_W)) != 0) { |
5055 | pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_ENTER); | | 5055 | pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_ENTER); |
5056 | } | | 5056 | } |
5057 | pmap_drain_pv(pmap); | | 5057 | pmap_drain_pv(pmap); |
5058 | mutex_exit(&pmap->pm_lock); | | 5058 | mutex_exit(&pmap->pm_lock); |
5059 | return 0; | | 5059 | return 0; |
5060 | } | | 5060 | } |
5061 | | | 5061 | |
5062 | #if defined(XEN) && defined(DOM0OPS) | | 5062 | #if defined(XEN) && defined(DOM0OPS) |
5063 | | | 5063 | |
5064 | struct pmap_data_gnt { | | 5064 | struct pmap_data_gnt { |
5065 | SLIST_ENTRY(pmap_data_gnt) pd_gnt_list; | | 5065 | SLIST_ENTRY(pmap_data_gnt) pd_gnt_list; |
5066 | vaddr_t pd_gnt_sva; | | 5066 | vaddr_t pd_gnt_sva; |
5067 | vaddr_t pd_gnt_eva; /* range covered by this gnt */ | | 5067 | vaddr_t pd_gnt_eva; /* range covered by this gnt */ |
5068 | int pd_gnt_refs; /* ref counter */ | | 5068 | int pd_gnt_refs; /* ref counter */ |
5069 | struct gnttab_map_grant_ref pd_gnt_ops[1]; /* variable length */ | | 5069 | struct gnttab_map_grant_ref pd_gnt_ops[1]; /* variable length */ |
5070 | }; | | 5070 | }; |
5071 | SLIST_HEAD(pmap_data_gnt_head, pmap_data_gnt); | | 5071 | SLIST_HEAD(pmap_data_gnt_head, pmap_data_gnt); |
5072 | | | 5072 | |
5073 | static void pmap_remove_gnt(struct pmap *, vaddr_t, vaddr_t); | | 5073 | static void pmap_remove_gnt(struct pmap *, vaddr_t, vaddr_t); |
5074 | | | 5074 | |
5075 | static struct pmap_data_gnt * | | 5075 | static struct pmap_data_gnt * |
5076 | pmap_find_gnt(struct pmap *pmap, vaddr_t sva, vaddr_t eva) | | 5076 | pmap_find_gnt(struct pmap *pmap, vaddr_t sva, vaddr_t eva) |
5077 | { | | 5077 | { |
5078 | struct pmap_data_gnt_head *headp; | | 5078 | struct pmap_data_gnt_head *headp; |
5079 | struct pmap_data_gnt *pgnt; | | 5079 | struct pmap_data_gnt *pgnt; |
5080 | | | 5080 | |
5081 | KASSERT(mutex_owned(&pmap->pm_lock)); | | 5081 | KASSERT(mutex_owned(&pmap->pm_lock)); |
5082 | headp = pmap->pm_data; | | 5082 | headp = pmap->pm_data; |
5083 | KASSERT(headp != NULL); | | 5083 | KASSERT(headp != NULL); |
5084 | SLIST_FOREACH(pgnt, headp, pd_gnt_list) { | | 5084 | SLIST_FOREACH(pgnt, headp, pd_gnt_list) { |
5085 | if (pgnt->pd_gnt_sva >= sva && pgnt->pd_gnt_sva <= eva) | | 5085 | if (pgnt->pd_gnt_sva <= sva && eva <= pgnt->pd_gnt_eva) |
5086 | return pgnt; | | 5086 | return pgnt; |
5087 | /* check that we're not overlapping part of a region */ | | 5087 | /* check that we're not overlapping part of a region */ |
5088 | KASSERT(pgnt->pd_gnt_sva >= eva || pgnt->pd_gnt_eva <= sva); | | 5088 | KASSERT(pgnt->pd_gnt_sva >= eva || pgnt->pd_gnt_eva <= sva); |
5089 | } | | 5089 | } |
5090 | return NULL; | | 5090 | return NULL; |
5091 | } | | 5091 | } |
5092 | | | 5092 | |
5093 | static void | | 5093 | static void |
5094 | pmap_alloc_gnt(struct pmap *pmap, vaddr_t sva, int nentries, | | 5094 | pmap_alloc_gnt(struct pmap *pmap, vaddr_t sva, int nentries, |
5095 | const struct gnttab_map_grant_ref *ops) | | 5095 | const struct gnttab_map_grant_ref *ops) |
5096 | { | | 5096 | { |
5097 | struct pmap_data_gnt_head *headp; | | 5097 | struct pmap_data_gnt_head *headp; |
5098 | struct pmap_data_gnt *pgnt; | | 5098 | struct pmap_data_gnt *pgnt; |
5099 | vaddr_t eva = sva + nentries * PAGE_SIZE; | | 5099 | vaddr_t eva = sva + nentries * PAGE_SIZE; |
5100 | KASSERT(mutex_owned(&pmap->pm_lock)); | | 5100 | KASSERT(mutex_owned(&pmap->pm_lock)); |
5101 | KASSERT(nentries >= 1); | | 5101 | KASSERT(nentries >= 1); |
5102 | if (pmap->pm_remove == NULL) { | | 5102 | if (pmap->pm_remove == NULL) { |
5103 | pmap->pm_remove = pmap_remove_gnt; | | 5103 | pmap->pm_remove = pmap_remove_gnt; |
5104 | KASSERT(pmap->pm_data == NULL); | | 5104 | KASSERT(pmap->pm_data == NULL); |
5105 | headp = kmem_alloc(sizeof(*headp), KM_SLEEP); | | 5105 | headp = kmem_alloc(sizeof(*headp), KM_SLEEP); |
5106 | SLIST_INIT(headp); | | 5106 | SLIST_INIT(headp); |
5107 | pmap->pm_data = headp; | | 5107 | pmap->pm_data = headp; |
5108 | } else { | | 5108 | } else { |
5109 | KASSERT(pmap->pm_remove == pmap_remove_gnt); | | 5109 | KASSERT(pmap->pm_remove == pmap_remove_gnt); |
5110 | KASSERT(pmap->pm_data != NULL); | | 5110 | KASSERT(pmap->pm_data != NULL); |
5111 | headp = pmap->pm_data; | | 5111 | headp = pmap->pm_data; |
5112 | } | | 5112 | } |
5113 | | | 5113 | |
5114 | pgnt = pmap_find_gnt(pmap, sva, eva); | | 5114 | pgnt = pmap_find_gnt(pmap, sva, eva); |
5115 | if (pgnt != NULL) { | | 5115 | if (pgnt != NULL) { |
5116 | KASSERT(pgnt->pd_gnt_sva == sva); | | 5116 | KASSERT(pgnt->pd_gnt_sva == sva); |
5117 | KASSERT(pgnt->pd_gnt_eva == eva); | | 5117 | KASSERT(pgnt->pd_gnt_eva == eva); |
5118 | return; | | 5118 | return; |
5119 | } | | 5119 | } |
5120 | | | 5120 | |
5121 | /* new entry */ | | 5121 | /* new entry */ |
5122 | pgnt = kmem_alloc(sizeof(*pgnt) + | | 5122 | pgnt = kmem_alloc(sizeof(*pgnt) + |
5123 | (nentries - 1) * sizeof(struct gnttab_map_grant_ref), KM_SLEEP); | | 5123 | (nentries - 1) * sizeof(struct gnttab_map_grant_ref), KM_SLEEP); |
5124 | pgnt->pd_gnt_sva = sva; | | 5124 | pgnt->pd_gnt_sva = sva; |
5125 | pgnt->pd_gnt_eva = eva; | | 5125 | pgnt->pd_gnt_eva = eva; |
5126 | pgnt->pd_gnt_refs = 0; | | 5126 | pgnt->pd_gnt_refs = 0; |
5127 | memcpy(pgnt->pd_gnt_ops, ops, | | 5127 | memcpy(pgnt->pd_gnt_ops, ops, |
5128 | sizeof(struct gnttab_map_grant_ref) * nentries); | | 5128 | sizeof(struct gnttab_map_grant_ref) * nentries); |
5129 | SLIST_INSERT_HEAD(headp, pgnt, pd_gnt_list); | | 5129 | SLIST_INSERT_HEAD(headp, pgnt, pd_gnt_list); |
5130 | } | | 5130 | } |
5131 | | | 5131 | |
5132 | static void | | 5132 | static void |
5133 | pmap_free_gnt(struct pmap *pmap, struct pmap_data_gnt *pgnt) | | 5133 | pmap_free_gnt(struct pmap *pmap, struct pmap_data_gnt *pgnt) |
5134 | { | | 5134 | { |
5135 | struct pmap_data_gnt_head *headp = pmap->pm_data; | | 5135 | struct pmap_data_gnt_head *headp = pmap->pm_data; |
5136 | int nentries = (pgnt->pd_gnt_eva - pgnt->pd_gnt_sva) / PAGE_SIZE; | | 5136 | int nentries = (pgnt->pd_gnt_eva - pgnt->pd_gnt_sva) / PAGE_SIZE; |
5137 | KASSERT(nentries >= 1); | | 5137 | KASSERT(nentries >= 1); |
5138 | KASSERT(mutex_owned(&pmap->pm_lock)); | | 5138 | KASSERT(mutex_owned(&pmap->pm_lock)); |
5139 | KASSERT(pgnt->pd_gnt_refs == 0); | | 5139 | KASSERT(pgnt->pd_gnt_refs == 0); |
5140 | SLIST_REMOVE(headp, pgnt, pmap_data_gnt, pd_gnt_list); | | 5140 | SLIST_REMOVE(headp, pgnt, pmap_data_gnt, pd_gnt_list); |
5141 | kmem_free(pgnt, sizeof(*pgnt) + | | 5141 | kmem_free(pgnt, sizeof(*pgnt) + |
5142 | (nentries - 1) * sizeof(struct gnttab_map_grant_ref)); | | 5142 | (nentries - 1) * sizeof(struct gnttab_map_grant_ref)); |
5143 | if (SLIST_EMPTY(headp)) { | | 5143 | if (SLIST_EMPTY(headp)) { |
5144 | kmem_free(headp, sizeof(*headp)); | | 5144 | kmem_free(headp, sizeof(*headp)); |
5145 | pmap->pm_data = NULL; | | 5145 | pmap->pm_data = NULL; |
5146 | pmap->pm_remove = NULL; | | 5146 | pmap->pm_remove = NULL; |
5147 | } | | 5147 | } |
5148 | } | | 5148 | } |
5149 | | | 5149 | |
5150 | /* | | 5150 | /* |
5151 | * pmap_enter_gnt: enter a grant entry into a pmap | | 5151 | * pmap_enter_gnt: enter a grant entry into a pmap |
5152 | * | | 5152 | * |
5153 | * => must be done "now" ... no lazy-evaluation | | 5153 | * => must be done "now" ... no lazy-evaluation |
5154 | */ | | 5154 | */ |
5155 | int | | 5155 | int |
5156 | pmap_enter_gnt(struct pmap *pmap, vaddr_t va, vaddr_t sva, int nentries, | | 5156 | pmap_enter_gnt(struct pmap *pmap, vaddr_t va, vaddr_t sva, int nentries, |
5157 | const struct gnttab_map_grant_ref *oops) | | 5157 | const struct gnttab_map_grant_ref *oops) |
5158 | { | | 5158 | { |
5159 | struct pmap_data_gnt *pgnt; | | 5159 | struct pmap_data_gnt *pgnt; |
5160 | pt_entry_t *ptes, opte; | | 5160 | pt_entry_t *ptes, opte; |
5161 | pt_entry_t *ptep; | | 5161 | pt_entry_t *ptep; |
5162 | pd_entry_t * const *pdes; | | 5162 | pd_entry_t * const *pdes; |
5163 | struct vm_page *ptp; | | 5163 | struct vm_page *ptp; |
5164 | struct vm_page *old_pg; | | 5164 | struct vm_page *old_pg; |
5165 | struct pmap_page *old_pp; | | 5165 | struct pmap_page *old_pp; |
5166 | struct pv_entry *old_pve; | | 5166 | struct pv_entry *old_pve; |
5167 | struct pmap *pmap2; | | 5167 | struct pmap *pmap2; |
5168 | struct pmap_ptparray pt; | | 5168 | struct pmap_ptparray pt; |
5169 | int error; | | 5169 | int error; |
5170 | bool getptp; | | 5170 | bool getptp; |
5171 | rb_tree_t *tree; | | 5171 | rb_tree_t *tree; |
5172 | struct gnttab_map_grant_ref *op; | | 5172 | struct gnttab_map_grant_ref *op; |
5173 | int ret; | | 5173 | int ret; |
5174 | int idx; | | 5174 | int idx; |
5175 | | | 5175 | |
5176 | KASSERT(pmap_initialized); | | 5176 | KASSERT(pmap_initialized); |
5177 | KASSERT(va < VM_MAX_KERNEL_ADDRESS); | | 5177 | KASSERT(va < VM_MAX_KERNEL_ADDRESS); |
5178 | KASSERTMSG(va != (vaddr_t)PDP_BASE, "%s: trying to map va=%#" | | 5178 | KASSERTMSG(va != (vaddr_t)PDP_BASE, "%s: trying to map va=%#" |
5179 | PRIxVADDR " over PDP!", __func__, va); | | 5179 | PRIxVADDR " over PDP!", __func__, va); |
5180 | KASSERT(pmap != pmap_kernel()); | | 5180 | KASSERT(pmap != pmap_kernel()); |
5181 | | | 5181 | |
5182 | /* Begin by locking the pmap. */ | | 5182 | /* Begin by locking the pmap. */ |
5183 | mutex_enter(&pmap->pm_lock); | | 5183 | mutex_enter(&pmap->pm_lock); |
5184 | pmap_alloc_gnt(pmap, sva, nentries, oops); | | 5184 | pmap_alloc_gnt(pmap, sva, nentries, oops); |
5185 | | | 5185 | |
5186 | pgnt = pmap_find_gnt(pmap, va, va + PAGE_SIZE); | | 5186 | pgnt = pmap_find_gnt(pmap, va, va + PAGE_SIZE); |
5187 | KASSERT(pgnt != NULL); | | 5187 | KASSERT(pgnt != NULL); |
5188 | | | 5188 | |
5189 | /* Look up the PTP. Allocate if none present. */ | | 5189 | /* Look up the PTP. Allocate if none present. */ |
5190 | ptp = NULL; | | 5190 | ptp = NULL; |
5191 | getptp = false; | | 5191 | getptp = false; |
5192 | ptp = pmap_find_ptp(pmap, va, 1); | | 5192 | ptp = pmap_find_ptp(pmap, va, 1); |
5193 | if (ptp == NULL) { | | 5193 | if (ptp == NULL) { |
5194 | getptp = true; | | 5194 | getptp = true; |
5195 | error = pmap_get_ptp(pmap, &pt, va, PMAP_CANFAIL, &ptp); | | 5195 | error = pmap_get_ptp(pmap, &pt, va, PMAP_CANFAIL, &ptp); |
5196 | if (error != 0) { | | 5196 | if (error != 0) { |
5197 | mutex_exit(&pmap->pm_lock); | | 5197 | mutex_exit(&pmap->pm_lock); |
5198 | return error; | | 5198 | return error; |
5199 | } | | 5199 | } |
5200 | } | | 5200 | } |
5201 | tree = &VM_PAGE_TO_PP(ptp)->pp_rb; | | 5201 | tree = &VM_PAGE_TO_PP(ptp)->pp_rb; |
5202 | | | 5202 | |
5203 | /* | | 5203 | /* |
5204 | * Look up the old PV entry at this VA (if any), and insert a new PV | | 5204 | * Look up the old PV entry at this VA (if any), and insert a new PV |
5205 | * entry if required for the new mapping. Temporarily track the old | | 5205 | * entry if required for the new mapping. Temporarily track the old |
5206 | * and new mappings concurrently. Only after the old mapping is | | 5206 | * and new mappings concurrently. Only after the old mapping is |
5207 | * evicted from the pmap will we remove its PV entry. Otherwise, | | 5207 | * evicted from the pmap will we remove its PV entry. Otherwise, |
5208 | * our picture of modified/accessed state for either page could get | | 5208 | * our picture of modified/accessed state for either page could get |
5209 | * out of sync (we need any P->V operation for either page to stall | | 5209 | * out of sync (we need any P->V operation for either page to stall |
5210 | * on pmap->pm_lock until done here). | | 5210 | * on pmap->pm_lock until done here). |
5211 | */ | | 5211 | */ |
5212 | old_pve = NULL; | | 5212 | old_pve = NULL; |
5213 | | | 5213 | |
5214 | old_pve = pmap_treelookup_pv(pmap, ptp, tree, va); | | 5214 | old_pve = pmap_treelookup_pv(pmap, ptp, tree, va); |
5215 | | | 5215 | |
5216 | /* Map PTEs into address space. */ | | 5216 | /* Map PTEs into address space. */ |
5217 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); | | 5217 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); |
5218 | | | 5218 | |
5219 | /* Install any newly allocated PTPs. */ | | 5219 | /* Install any newly allocated PTPs. */ |
5220 | if (getptp) { | | 5220 | if (getptp) { |
5221 | pmap_install_ptp(pmap, &pt, va, pdes); | | 5221 | pmap_install_ptp(pmap, &pt, va, pdes); |
5222 | } | | 5222 | } |
5223 | | | 5223 | |
5224 | /* Check if there is an existing mapping. */ | | 5224 | /* Check if there is an existing mapping. */ |
5225 | ptep = &ptes[pl1_i(va)]; | | 5225 | ptep = &ptes[pl1_i(va)]; |
5226 | opte = *ptep; | | 5226 | opte = *ptep; |
5227 | bool have_oldpa = pmap_valid_entry(opte); | | 5227 | bool have_oldpa = pmap_valid_entry(opte); |
5228 | paddr_t oldpa = pmap_pte2pa(opte); | | 5228 | paddr_t oldpa = pmap_pte2pa(opte); |
5229 | | | 5229 | |
5230 | /* | | 5230 | /* |
5231 | * Update the pte. | | 5231 | * Update the pte. |
5232 | */ | | 5232 | */ |
5233 | | | 5233 | |
5234 | idx = (va - pgnt->pd_gnt_sva) / PAGE_SIZE; | | 5234 | idx = (va - pgnt->pd_gnt_sva) / PAGE_SIZE; |
5235 | op = &pgnt->pd_gnt_ops[idx]; | | 5235 | op = &pgnt->pd_gnt_ops[idx]; |
5236 | | | 5236 | |
5237 | op->host_addr = xpmap_ptetomach(ptep); | | 5237 | op->host_addr = xpmap_ptetomach(ptep); |
5238 | op->dev_bus_addr = 0; | | 5238 | op->dev_bus_addr = 0; |
5239 | op->status = GNTST_general_error; | | 5239 | op->status = GNTST_general_error; |
5240 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, 1); | | 5240 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, 1); |
5241 | if (__predict_false(ret)) { | | 5241 | if (__predict_false(ret)) { |
5242 | printf("%s: GNTTABOP_map_grant_ref failed: %d\n", | | 5242 | printf("%s: GNTTABOP_map_grant_ref failed: %d\n", |
5243 | __func__, ret); | | 5243 | __func__, ret); |
5244 | op->status = GNTST_general_error; | | 5244 | op->status = GNTST_general_error; |
5245 | } | | 5245 | } |
5246 | for (int d = 0; d < 256 && op->status == GNTST_eagain; d++) { | | 5246 | for (int d = 0; d < 256 && op->status == GNTST_eagain; d++) { |
5247 | kpause("gntmap", false, mstohz(1), NULL); | | 5247 | kpause("gntmap", false, mstohz(1), NULL); |
5248 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, 1); | | 5248 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, 1); |
5249 | if (__predict_false(ret)) { | | 5249 | if (__predict_false(ret)) { |
5250 | printf("%s: GNTTABOP_map_grant_ref failed: %d\n", | | 5250 | printf("%s: GNTTABOP_map_grant_ref failed: %d\n", |
5251 | __func__, ret); | | 5251 | __func__, ret); |
5252 | op->status = GNTST_general_error; | | 5252 | op->status = GNTST_general_error; |
5253 | } | | 5253 | } |
5254 | } | | 5254 | } |
5255 | if (__predict_false(op->status != GNTST_okay)) { | | 5255 | if (__predict_false(op->status != GNTST_okay)) { |
5256 | printf("%s: GNTTABOP_map_grant_ref status: %d\n", | | 5256 | printf("%s: GNTTABOP_map_grant_ref status: %d\n", |
5257 | __func__, op->status); | | 5257 | __func__, op->status); |
5258 | if (ptp != NULL) { | | 5258 | if (ptp != NULL) { |
5259 | if (have_oldpa) { | | 5259 | if (have_oldpa) { |
5260 | ptp->wire_count--; | | 5260 | ptp->wire_count--; |
5261 | } | | 5261 | } |
5262 | } | | 5262 | } |
5263 | } else { | | 5263 | } else { |
5264 | pgnt->pd_gnt_refs++; | | 5264 | pgnt->pd_gnt_refs++; |
5265 | if (ptp != NULL) { | | 5265 | if (ptp != NULL) { |
5266 | if (!have_oldpa) { | | 5266 | if (!have_oldpa) { |
5267 | ptp->wire_count++; | | 5267 | ptp->wire_count++; |
5268 | } | | 5268 | } |
5269 | /* Remember minimum VA in PTP. */ | | 5269 | /* Remember minimum VA in PTP. */ |
5270 | pmap_ptp_range_set(ptp, va); | | 5270 | pmap_ptp_range_set(ptp, va); |
5271 | } | | 5271 | } |
5272 | } | | 5272 | } |
5273 | | | 5273 | |
5274 | /* | | 5274 | /* |
5275 | * Done with the PTEs: they can now be unmapped. | | 5275 | * Done with the PTEs: they can now be unmapped. |
5276 | */ | | 5276 | */ |
5277 | pmap_unmap_ptes(pmap, pmap2); | | 5277 | pmap_unmap_ptes(pmap, pmap2); |
5278 | | | 5278 | |
5279 | /* | | 5279 | /* |
5280 | * Update statistics and PTP's reference count. | | 5280 | * Update statistics and PTP's reference count. |
5281 | */ | | 5281 | */ |
5282 | pmap_stats_update_bypte(pmap, 0, opte); | | 5282 | pmap_stats_update_bypte(pmap, 0, opte); |
5283 | KASSERT(ptp == NULL || ptp->wire_count >= 1); | | 5283 | KASSERT(ptp == NULL || ptp->wire_count >= 1); |
5284 | | | 5284 | |
5285 | /* | | 5285 | /* |
5286 | * If old page is pv-tracked, remove pv_entry from its list. | | 5286 | * If old page is pv-tracked, remove pv_entry from its list. |
5287 | */ | | 5287 | */ |
5288 | if ((~opte & (PTE_P | PTE_PVLIST)) == 0) { | | 5288 | if ((~opte & (PTE_P | PTE_PVLIST)) == 0) { |
5289 | if ((old_pg = PHYS_TO_VM_PAGE(oldpa)) != NULL) { | | 5289 | if ((old_pg = PHYS_TO_VM_PAGE(oldpa)) != NULL) { |
5290 | old_pp = VM_PAGE_TO_PP(old_pg); | | 5290 | old_pp = VM_PAGE_TO_PP(old_pg); |
5291 | } else if ((old_pp = pmap_pv_tracked(oldpa)) == NULL) { | | 5291 | } else if ((old_pp = pmap_pv_tracked(oldpa)) == NULL) { |
5292 | panic("%s: PTE_PVLIST with pv-untracked page" | | 5292 | panic("%s: PTE_PVLIST with pv-untracked page" |
5293 | " va = %#"PRIxVADDR " pa = %#" PRIxPADDR, | | 5293 | " va = %#"PRIxVADDR " pa = %#" PRIxPADDR, |
5294 | __func__, va, oldpa); | | 5294 | __func__, va, oldpa); |
5295 | } | | 5295 | } |
5296 | | | 5296 | |
5297 | pmap_remove_pv(pmap, old_pp, ptp, va, old_pve, | | 5297 | pmap_remove_pv(pmap, old_pp, ptp, va, old_pve, |
5298 | pmap_pte_to_pp_attrs(opte)); | | 5298 | pmap_pte_to_pp_attrs(opte)); |
5299 | } else { | | 5299 | } else { |
5300 | KASSERT(old_pve == NULL); | | 5300 | KASSERT(old_pve == NULL); |
5301 | KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL); | | 5301 | KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL); |
5302 | } | | 5302 | } |
5303 | | | 5303 | |
5304 | pmap_drain_pv(pmap); | | 5304 | pmap_drain_pv(pmap); |
5305 | mutex_exit(&pmap->pm_lock); | | 5305 | mutex_exit(&pmap->pm_lock); |
5306 | return op->status; | | 5306 | return op->status; |
5307 | } | | 5307 | } |
5308 | | | 5308 | |
5309 | /* | | 5309 | /* |
5310 | * pmap_remove_gnt: grant mapping removal function. | | 5310 | * pmap_remove_gnt: grant mapping removal function. |
5311 | * | | 5311 | * |
5312 | * => caller should not be holding any pmap locks | | 5312 | * => caller should not be holding any pmap locks |
5313 | */ | | 5313 | */ |
5314 | static void | | 5314 | static void |
5315 | pmap_remove_gnt(struct pmap *pmap, vaddr_t sva, vaddr_t eva) | | 5315 | pmap_remove_gnt(struct pmap *pmap, vaddr_t sva, vaddr_t eva) |
5316 | { | | 5316 | { |
5317 | struct pmap_data_gnt *pgnt; | | 5317 | struct pmap_data_gnt *pgnt; |
5318 | pt_entry_t *ptes; | | 5318 | pt_entry_t *ptes; |
5319 | pd_entry_t pde; | | 5319 | pd_entry_t pde; |
5320 | pd_entry_t * const *pdes; | | 5320 | pd_entry_t * const *pdes; |
5321 | struct vm_page *ptp; | | 5321 | struct vm_page *ptp; |
5322 | struct pmap *pmap2; | | 5322 | struct pmap *pmap2; |
5323 | vaddr_t va; | | 5323 | vaddr_t va; |
5324 | int lvl; | | 5324 | int lvl; |
5325 | int idx; | | 5325 | int idx; |
5326 | struct gnttab_map_grant_ref *op; | | 5326 | struct gnttab_map_grant_ref *op; |
5327 | struct gnttab_unmap_grant_ref unmap_op; | | 5327 | struct gnttab_unmap_grant_ref unmap_op; |
5328 | int ret; | | 5328 | int ret; |
5329 | | | 5329 | |
5330 | KASSERT(pmap != pmap_kernel()); | | 5330 | KASSERT(pmap != pmap_kernel()); |
5331 | KASSERT(pmap->pm_remove == pmap_remove_gnt); | | 5331 | KASSERT(pmap->pm_remove == pmap_remove_gnt); |
5332 | | | 5332 | |
5333 | mutex_enter(&pmap->pm_lock); | | 5333 | mutex_enter(&pmap->pm_lock); |
5334 | for (va = sva; va < eva; va += PAGE_SIZE) { | | 5334 | for (va = sva; va < eva; va += PAGE_SIZE) { |
5335 | pgnt = pmap_find_gnt(pmap, va, va + PAGE_SIZE); | | 5335 | pgnt = pmap_find_gnt(pmap, va, va + PAGE_SIZE); |
5336 | if (pgnt == NULL) { | | 5336 | if (pgnt == NULL) { |
5337 | pmap_remove_locked(pmap, sva, eva); | | 5337 | pmap_remove_locked(pmap, sva, eva); |
5338 | continue; | | 5338 | continue; |
5339 | } | | 5339 | } |
5340 | | | 5340 | |
5341 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); | | 5341 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); |
5342 | if (!pmap_pdes_valid(va, pdes, &pde, &lvl)) { | | 5342 | if (!pmap_pdes_valid(va, pdes, &pde, &lvl)) { |
5343 | panic("pmap_remove_gnt pdes not valid"); | | 5343 | panic("pmap_remove_gnt pdes not valid"); |
5344 | } | | 5344 | } |
5345 | | | 5345 | |
5346 | idx = (va - pgnt->pd_gnt_sva) / PAGE_SIZE; | | 5346 | idx = (va - pgnt->pd_gnt_sva) / PAGE_SIZE; |
5347 | op = &pgnt->pd_gnt_ops[idx]; | | 5347 | op = &pgnt->pd_gnt_ops[idx]; |
5348 | KASSERT(lvl == 1); | | 5348 | KASSERT(lvl == 1); |
5349 | KASSERT(op->status == GNTST_okay); | | 5349 | KASSERT(op->status == GNTST_okay); |
5350 | | | 5350 | |
5351 | /* Get PTP if non-kernel mapping. */ | | 5351 | /* Get PTP if non-kernel mapping. */ |
5352 | ptp = pmap_find_ptp(pmap, va, 1); | | 5352 | ptp = pmap_find_ptp(pmap, va, 1); |
5353 | KASSERTMSG(ptp != NULL, | | 5353 | KASSERTMSG(ptp != NULL, |
5354 | "%s: unmanaged PTP detected", __func__); | | 5354 | "%s: unmanaged PTP detected", __func__); |
5355 | | | 5355 | |
5356 | if (op->status == GNTST_okay) { | | 5356 | if (op->status == GNTST_okay) { |
5357 | KASSERT(pmap_valid_entry(ptes[pl1_i(va)])); | | 5357 | KASSERT(pmap_valid_entry(ptes[pl1_i(va)])); |
5358 | unmap_op.handle = op->handle; | | 5358 | unmap_op.handle = op->handle; |
5359 | unmap_op.dev_bus_addr = 0; | | 5359 | unmap_op.dev_bus_addr = 0; |
5360 | unmap_op.host_addr = xpmap_ptetomach(&ptes[pl1_i(va)]); | | 5360 | unmap_op.host_addr = xpmap_ptetomach(&ptes[pl1_i(va)]); |
5361 | ret = HYPERVISOR_grant_table_op( | | 5361 | ret = HYPERVISOR_grant_table_op( |
5362 | GNTTABOP_unmap_grant_ref, &unmap_op, 1); | | 5362 | GNTTABOP_unmap_grant_ref, &unmap_op, 1); |
5363 | if (ret) { | | 5363 | if (ret) { |
5364 | printf("%s: GNTTABOP_unmap_grant_ref " | | 5364 | printf("%s: GNTTABOP_unmap_grant_ref " |
5365 | "failed: %d\n", __func__, ret); | | 5365 | "failed: %d\n", __func__, ret); |
5366 | } | | 5366 | } |
5367 | | | 5367 | |
5368 | ptp->wire_count--; | | 5368 | ptp->wire_count--; |
5369 | pgnt->pd_gnt_refs--; | | 5369 | pgnt->pd_gnt_refs--; |
5370 | if (pgnt->pd_gnt_refs == 0) { | | 5370 | if (pgnt->pd_gnt_refs == 0) { |
5371 | pmap_free_gnt(pmap, pgnt); | | 5371 | pmap_free_gnt(pmap, pgnt); |
5372 | } | | 5372 | } |
5373 | } | | 5373 | } |
5374 | /* | | 5374 | /* |
5375 | * if mapping removed and the PTP is no longer | | 5375 | * if mapping removed and the PTP is no longer |
5376 | * being used, free it! | | 5376 | * being used, free it! |
5377 | */ | | 5377 | */ |
5378 | | | 5378 | |
5379 | if (ptp && ptp->wire_count <= 1) | | 5379 | if (ptp && ptp->wire_count <= 1) |
5380 | pmap_free_ptp(pmap, ptp, va, ptes, pdes); | | 5380 | pmap_free_ptp(pmap, ptp, va, ptes, pdes); |
5381 | pmap_unmap_ptes(pmap, pmap2); | | 5381 | pmap_unmap_ptes(pmap, pmap2); |
5382 | } | | 5382 | } |
5383 | mutex_exit(&pmap->pm_lock); | | 5383 | mutex_exit(&pmap->pm_lock); |
5384 | } | | 5384 | } |
5385 | #endif /* XEN && DOM0OPS */ | | 5385 | #endif /* XEN && DOM0OPS */ |
5386 | | | 5386 | |
5387 | paddr_t | | 5387 | paddr_t |
5388 | pmap_get_physpage(void) | | 5388 | pmap_get_physpage(void) |
5389 | { | | 5389 | { |
5390 | struct vm_page *ptp; | | 5390 | struct vm_page *ptp; |
5391 | struct pmap *kpm = pmap_kernel(); | | 5391 | struct pmap *kpm = pmap_kernel(); |
5392 | paddr_t pa; | | 5392 | paddr_t pa; |
5393 | | | 5393 | |
5394 | if (!uvm.page_init_done) { | | 5394 | if (!uvm.page_init_done) { |
5395 | /* | | 5395 | /* |
5396 | * We're growing the kernel pmap early (from | | 5396 | * We're growing the kernel pmap early (from |
5397 | * uvm_pageboot_alloc()). This case must be | | 5397 | * uvm_pageboot_alloc()). This case must be |
5398 | * handled a little differently. | | 5398 | * handled a little differently. |
5399 | */ | | 5399 | */ |
5400 | | | 5400 | |
5401 | if (!uvm_page_physget(&pa)) | | 5401 | if (!uvm_page_physget(&pa)) |
5402 | panic("%s: out of memory", __func__); | | 5402 | panic("%s: out of memory", __func__); |
5403 | #if defined(__HAVE_DIRECT_MAP) | | 5403 | #if defined(__HAVE_DIRECT_MAP) |
5404 | memset((void *)PMAP_DIRECT_MAP(pa), 0, PAGE_SIZE); | | 5404 | memset((void *)PMAP_DIRECT_MAP(pa), 0, PAGE_SIZE); |
5405 | #else | | 5405 | #else |
5406 | #if defined(XENPV) | | 5406 | #if defined(XENPV) |
5407 | if (XEN_VERSION_SUPPORTED(3, 4)) { | | 5407 | if (XEN_VERSION_SUPPORTED(3, 4)) { |
5408 | xen_pagezero(pa); | | 5408 | xen_pagezero(pa); |
5409 | return pa; | | 5409 | return pa; |
5410 | } | | 5410 | } |
5411 | #endif | | 5411 | #endif |
5412 | kpreempt_disable(); | | 5412 | kpreempt_disable(); |
5413 | pmap_pte_set(early_zero_pte, pmap_pa2pte(pa) | PTE_P | | | 5413 | pmap_pte_set(early_zero_pte, pmap_pa2pte(pa) | PTE_P | |
5414 | PTE_W | pmap_pg_nx); | | 5414 | PTE_W | pmap_pg_nx); |
5415 | pmap_pte_flush(); | | 5415 | pmap_pte_flush(); |
5416 | pmap_update_pg((vaddr_t)early_zerop); | | 5416 | pmap_update_pg((vaddr_t)early_zerop); |
5417 | memset(early_zerop, 0, PAGE_SIZE); | | 5417 | memset(early_zerop, 0, PAGE_SIZE); |
5418 | #if defined(DIAGNOSTIC) || defined(XENPV) | | 5418 | #if defined(DIAGNOSTIC) || defined(XENPV) |
5419 | pmap_pte_set(early_zero_pte, 0); | | 5419 | pmap_pte_set(early_zero_pte, 0); |
5420 | pmap_pte_flush(); | | 5420 | pmap_pte_flush(); |
5421 | #endif /* defined(DIAGNOSTIC) */ | | 5421 | #endif /* defined(DIAGNOSTIC) */ |
5422 | kpreempt_enable(); | | 5422 | kpreempt_enable(); |
5423 | #endif /* defined(__HAVE_DIRECT_MAP) */ | | 5423 | #endif /* defined(__HAVE_DIRECT_MAP) */ |
5424 | } else { | | 5424 | } else { |
5425 | /* XXX */ | | 5425 | /* XXX */ |
5426 | ptp = uvm_pagealloc(NULL, 0, NULL, | | 5426 | ptp = uvm_pagealloc(NULL, 0, NULL, |
5427 | UVM_PGA_USERESERVE|UVM_PGA_ZERO); | | 5427 | UVM_PGA_USERESERVE|UVM_PGA_ZERO); |
5428 | if (ptp == NULL) | | 5428 | if (ptp == NULL) |
5429 | panic("%s: out of memory", __func__); | | 5429 | panic("%s: out of memory", __func__); |
5430 | ptp->flags &= ~PG_BUSY; | | 5430 | ptp->flags &= ~PG_BUSY; |
5431 | ptp->wire_count = 1; | | 5431 | ptp->wire_count = 1; |
5432 | pa = VM_PAGE_TO_PHYS(ptp); | | 5432 | pa = VM_PAGE_TO_PHYS(ptp); |
5433 | } | | 5433 | } |
5434 | pmap_stats_update(kpm, 1, 0); | | 5434 | pmap_stats_update(kpm, 1, 0); |
5435 | | | 5435 | |
5436 | return pa; | | 5436 | return pa; |
5437 | } | | 5437 | } |
5438 | | | 5438 | |
5439 | /* | | 5439 | /* |
5440 | * Expand the page tree with the specified amount of PTPs, mapping virtual | | 5440 | * Expand the page tree with the specified amount of PTPs, mapping virtual |
5441 | * addresses starting at kva. We populate all the levels but the last one | | 5441 | * addresses starting at kva. We populate all the levels but the last one |
5442 | * (L1). The nodes of the tree are created as RW, but the pages covered | | 5442 | * (L1). The nodes of the tree are created as RW, but the pages covered |
5443 | * will be kentered in L1, with proper permissions. | | 5443 | * will be kentered in L1, with proper permissions. |
5444 | * | | 5444 | * |
5445 | * Used only by pmap_growkernel. | | 5445 | * Used only by pmap_growkernel. |
5446 | */ | | 5446 | */ |
5447 | static void | | 5447 | static void |
5448 | pmap_alloc_level(struct pmap *cpm, vaddr_t kva, long *needed_ptps) | | 5448 | pmap_alloc_level(struct pmap *cpm, vaddr_t kva, long *needed_ptps) |
5449 | { | | 5449 | { |
5450 | unsigned long i; | | 5450 | unsigned long i; |
5451 | paddr_t pa; | | 5451 | paddr_t pa; |
5452 | unsigned long index, endindex; | | 5452 | unsigned long index, endindex; |
5453 | int level; | | 5453 | int level; |
5454 | pd_entry_t *pdep; | | 5454 | pd_entry_t *pdep; |
5455 | #ifdef XENPV | | 5455 | #ifdef XENPV |
5456 | int s = splvm(); /* protect xpq_* */ | | 5456 | int s = splvm(); /* protect xpq_* */ |
5457 | #endif | | 5457 | #endif |
5458 | | | 5458 | |
5459 | for (level = PTP_LEVELS; level > 1; level--) { | | 5459 | for (level = PTP_LEVELS; level > 1; level--) { |
5460 | if (level == PTP_LEVELS) | | 5460 | if (level == PTP_LEVELS) |
5461 | pdep = cpm->pm_pdir; | | 5461 | pdep = cpm->pm_pdir; |
5462 | else | | 5462 | else |
5463 | pdep = normal_pdes[level - 2]; | | 5463 | pdep = normal_pdes[level - 2]; |
5464 | index = pl_i_roundup(kva, level); | | 5464 | index = pl_i_roundup(kva, level); |
5465 | endindex = index + needed_ptps[level - 1] - 1; | | 5465 | endindex = index + needed_ptps[level - 1] - 1; |
5466 | | | 5466 | |
5467 | for (i = index; i <= endindex; i++) { | | 5467 | for (i = index; i <= endindex; i++) { |
5468 | pt_entry_t pte; | | 5468 | pt_entry_t pte; |
5469 | | | 5469 | |
5470 | KASSERT(!pmap_valid_entry(pdep[i])); | | 5470 | KASSERT(!pmap_valid_entry(pdep[i])); |
5471 | pa = pmap_get_physpage(); | | 5471 | pa = pmap_get_physpage(); |
5472 | pte = pmap_pa2pte(pa) | PTE_P | PTE_W; | | 5472 | pte = pmap_pa2pte(pa) | PTE_P | PTE_W; |
5473 | #ifdef __x86_64__ | | 5473 | #ifdef __x86_64__ |
5474 | pte |= pmap_pg_nx; | | 5474 | pte |= pmap_pg_nx; |
5475 | #endif | | 5475 | #endif |
5476 | pmap_pte_set(&pdep[i], pte); | | 5476 | pmap_pte_set(&pdep[i], pte); |
5477 | | | 5477 | |
5478 | #ifdef XENPV | | 5478 | #ifdef XENPV |
5479 | if (level == PTP_LEVELS && i >= PDIR_SLOT_KERN) { | | 5479 | if (level == PTP_LEVELS && i >= PDIR_SLOT_KERN) { |
5480 | if (__predict_true( | | 5480 | if (__predict_true( |
5481 | cpu_info_primary.ci_flags & CPUF_PRESENT)) { | | 5481 | cpu_info_primary.ci_flags & CPUF_PRESENT)) { |
5482 | /* update per-cpu PMDs on all cpus */ | | 5482 | /* update per-cpu PMDs on all cpus */ |
5483 | xen_kpm_sync(pmap_kernel(), i); | | 5483 | xen_kpm_sync(pmap_kernel(), i); |
5484 | } else { | | 5484 | } else { |
5485 | /* | | 5485 | /* |
5486 | * too early; update primary CPU | | 5486 | * too early; update primary CPU |
5487 | * PMD only (without locks) | | 5487 | * PMD only (without locks) |
5488 | */ | | 5488 | */ |
5489 | #ifdef __x86_64__ | | 5489 | #ifdef __x86_64__ |
5490 | pd_entry_t *cpu_pdep = | | 5490 | pd_entry_t *cpu_pdep = |
5491 | &cpu_info_primary.ci_kpm_pdir[i]; | | 5491 | &cpu_info_primary.ci_kpm_pdir[i]; |
5492 | #else | | 5492 | #else |
5493 | pd_entry_t *cpu_pdep = | | 5493 | pd_entry_t *cpu_pdep = |
5494 | &cpu_info_primary.ci_kpm_pdir[l2tol2(i)]; | | 5494 | &cpu_info_primary.ci_kpm_pdir[l2tol2(i)]; |
5495 | #endif | | 5495 | #endif |
5496 | pmap_pte_set(cpu_pdep, pte); | | 5496 | pmap_pte_set(cpu_pdep, pte); |
5497 | } | | 5497 | } |
5498 | } | | 5498 | } |
5499 | #endif | | 5499 | #endif |
5500 | | | 5500 | |
5501 | KASSERT(level != PTP_LEVELS || nkptp[level - 1] + | | 5501 | KASSERT(level != PTP_LEVELS || nkptp[level - 1] + |
5502 | pl_i(VM_MIN_KERNEL_ADDRESS, level) == i); | | 5502 | pl_i(VM_MIN_KERNEL_ADDRESS, level) == i); |
5503 | nkptp[level - 1]++; | | 5503 | nkptp[level - 1]++; |
5504 | } | | 5504 | } |
5505 | pmap_pte_flush(); | | 5505 | pmap_pte_flush(); |
5506 | } | | 5506 | } |
5507 | #ifdef XENPV | | 5507 | #ifdef XENPV |
5508 | splx(s); | | 5508 | splx(s); |
5509 | #endif | | 5509 | #endif |
5510 | } | | 5510 | } |
5511 | | | 5511 | |
5512 | /* | | 5512 | /* |
5513 | * pmap_growkernel: increase usage of KVM space. | | 5513 | * pmap_growkernel: increase usage of KVM space. |
5514 | * | | 5514 | * |
5515 | * => we allocate new PTPs for the kernel and install them in all | | 5515 | * => we allocate new PTPs for the kernel and install them in all |
5516 | * the pmaps on the system. | | 5516 | * the pmaps on the system. |
5517 | */ | | 5517 | */ |
5518 | vaddr_t | | 5518 | vaddr_t |
5519 | pmap_growkernel(vaddr_t maxkvaddr) | | 5519 | pmap_growkernel(vaddr_t maxkvaddr) |
5520 | { | | 5520 | { |
5521 | struct pmap *kpm = pmap_kernel(); | | 5521 | struct pmap *kpm = pmap_kernel(); |
5522 | struct pmap *cpm; | | 5522 | struct pmap *cpm; |
5523 | #if !defined(XENPV) || !defined(__x86_64__) | | 5523 | #if !defined(XENPV) || !defined(__x86_64__) |
5524 | struct pmap *pm; | | 5524 | struct pmap *pm; |
5525 | long old; | | 5525 | long old; |
5526 | #endif | | 5526 | #endif |
5527 | int s, i; | | 5527 | int s, i; |
5528 | long needed_kptp[PTP_LEVELS], target_nptp; | | 5528 | long needed_kptp[PTP_LEVELS], target_nptp; |
5529 | bool invalidate = false; | | 5529 | bool invalidate = false; |
5530 | | | 5530 | |
5531 | s = splvm(); /* to be safe */ | | 5531 | s = splvm(); /* to be safe */ |
5532 | mutex_enter(&kpm->pm_lock); | | 5532 | mutex_enter(&kpm->pm_lock); |
5533 | | | 5533 | |
5534 | if (maxkvaddr <= pmap_maxkvaddr) { | | 5534 | if (maxkvaddr <= pmap_maxkvaddr) { |
5535 | mutex_exit(&kpm->pm_lock); | | 5535 | mutex_exit(&kpm->pm_lock); |
5536 | splx(s); | | 5536 | splx(s); |
5537 | return pmap_maxkvaddr; | | 5537 | return pmap_maxkvaddr; |
5538 | } | | 5538 | } |
5539 | | | 5539 | |
5540 | maxkvaddr = x86_round_pdr(maxkvaddr); | | 5540 | maxkvaddr = x86_round_pdr(maxkvaddr); |
5541 | #if !defined(XENPV) || !defined(__x86_64__) | | 5541 | #if !defined(XENPV) || !defined(__x86_64__) |
5542 | old = nkptp[PTP_LEVELS - 1]; | | 5542 | old = nkptp[PTP_LEVELS - 1]; |
5543 | #endif | | 5543 | #endif |
5544 | | | 5544 | |
5545 | /* Initialize needed_kptp. */ | | 5545 | /* Initialize needed_kptp. */ |
5546 | for (i = PTP_LEVELS - 1; i >= 1; i--) { | | 5546 | for (i = PTP_LEVELS - 1; i >= 1; i--) { |
5547 | target_nptp = pl_i_roundup(maxkvaddr, i + 1) - | | 5547 | target_nptp = pl_i_roundup(maxkvaddr, i + 1) - |
5548 | pl_i_roundup(VM_MIN_KERNEL_ADDRESS, i + 1); | | 5548 | pl_i_roundup(VM_MIN_KERNEL_ADDRESS, i + 1); |
5549 | | | 5549 | |
5550 | if (target_nptp > nkptpmax[i]) | | 5550 | if (target_nptp > nkptpmax[i]) |
5551 | panic("out of KVA space"); | | 5551 | panic("out of KVA space"); |
5552 | KASSERT(target_nptp >= nkptp[i]); | | 5552 | KASSERT(target_nptp >= nkptp[i]); |
5553 | needed_kptp[i] = target_nptp - nkptp[i]; | | 5553 | needed_kptp[i] = target_nptp - nkptp[i]; |
5554 | } | | 5554 | } |
5555 | | | 5555 | |
5556 | #ifdef XENPV | | 5556 | #ifdef XENPV |
5557 | /* only pmap_kernel(), or the per-cpu map, has kernel entries */ | | 5557 | /* only pmap_kernel(), or the per-cpu map, has kernel entries */ |
5558 | cpm = kpm; | | 5558 | cpm = kpm; |
5559 | #else | | 5559 | #else |
5560 | /* Get the current pmap */ | | 5560 | /* Get the current pmap */ |
5561 | if (__predict_true(cpu_info_primary.ci_flags & CPUF_PRESENT)) { | | 5561 | if (__predict_true(cpu_info_primary.ci_flags & CPUF_PRESENT)) { |
5562 | cpm = curcpu()->ci_pmap; | | 5562 | cpm = curcpu()->ci_pmap; |
5563 | } else { | | 5563 | } else { |
5564 | cpm = kpm; | | 5564 | cpm = kpm; |
5565 | } | | 5565 | } |
5566 | #endif | | 5566 | #endif |
5567 | | | 5567 | |
5568 | kasan_shadow_map((void *)pmap_maxkvaddr, | | 5568 | kasan_shadow_map((void *)pmap_maxkvaddr, |
5569 | (size_t)(maxkvaddr - pmap_maxkvaddr)); | | 5569 | (size_t)(maxkvaddr - pmap_maxkvaddr)); |
5570 | kmsan_shadow_map((void *)pmap_maxkvaddr, | | 5570 | kmsan_shadow_map((void *)pmap_maxkvaddr, |
5571 | (size_t)(maxkvaddr - pmap_maxkvaddr)); | | 5571 | (size_t)(maxkvaddr - pmap_maxkvaddr)); |
5572 | | | 5572 | |
5573 | pmap_alloc_level(cpm, pmap_maxkvaddr, needed_kptp); | | 5573 | pmap_alloc_level(cpm, pmap_maxkvaddr, needed_kptp); |
5574 | | | 5574 | |
5575 | /* | | 5575 | /* |
5576 | * If the number of top level entries changed, update all pmaps. | | 5576 | * If the number of top level entries changed, update all pmaps. |
5577 | */ | | 5577 | */ |
5578 | if (needed_kptp[PTP_LEVELS - 1] != 0) { | | 5578 | if (needed_kptp[PTP_LEVELS - 1] != 0) { |
5579 | #ifdef XENPV | | 5579 | #ifdef XENPV |
5580 | #ifdef __x86_64__ | | 5580 | #ifdef __x86_64__ |
5581 | /* nothing, kernel entries are never entered in user pmap */ | | 5581 | /* nothing, kernel entries are never entered in user pmap */ |
5582 | #else | | 5582 | #else |
5583 | int pdkidx; | | 5583 | int pdkidx; |
5584 | | | 5584 | |
5585 | mutex_enter(&pmaps_lock); | | 5585 | mutex_enter(&pmaps_lock); |
5586 | LIST_FOREACH(pm, &pmaps, pm_list) { | | 5586 | LIST_FOREACH(pm, &pmaps, pm_list) { |
5587 | for (pdkidx = PDIR_SLOT_KERN + old; | | 5587 | for (pdkidx = PDIR_SLOT_KERN + old; |
5588 | pdkidx < PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1]; | | 5588 | pdkidx < PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1]; |
5589 | pdkidx++) { | | 5589 | pdkidx++) { |
5590 | pmap_pte_set(&pm->pm_pdir[pdkidx], | | 5590 | pmap_pte_set(&pm->pm_pdir[pdkidx], |
5591 | kpm->pm_pdir[pdkidx]); | | 5591 | kpm->pm_pdir[pdkidx]); |
5592 | } | | 5592 | } |
5593 | pmap_pte_flush(); | | 5593 | pmap_pte_flush(); |
5594 | } | | 5594 | } |
5595 | mutex_exit(&pmaps_lock); | | 5595 | mutex_exit(&pmaps_lock); |
5596 | #endif /* __x86_64__ */ | | 5596 | #endif /* __x86_64__ */ |
5597 | #else /* XENPV */ | | 5597 | #else /* XENPV */ |
5598 | size_t newpdes; | | 5598 | size_t newpdes; |
5599 | newpdes = nkptp[PTP_LEVELS - 1] - old; | | 5599 | newpdes = nkptp[PTP_LEVELS - 1] - old; |
5600 | if (cpm != kpm) { | | 5600 | if (cpm != kpm) { |
5601 | memcpy(&kpm->pm_pdir[PDIR_SLOT_KERN + old], | | 5601 | memcpy(&kpm->pm_pdir[PDIR_SLOT_KERN + old], |
5602 | &cpm->pm_pdir[PDIR_SLOT_KERN + old], | | 5602 | &cpm->pm_pdir[PDIR_SLOT_KERN + old], |
5603 | newpdes * sizeof(pd_entry_t)); | | 5603 | newpdes * sizeof(pd_entry_t)); |
5604 | } | | 5604 | } |
5605 | | | 5605 | |
5606 | mutex_enter(&pmaps_lock); | | 5606 | mutex_enter(&pmaps_lock); |
5607 | LIST_FOREACH(pm, &pmaps, pm_list) { | | 5607 | LIST_FOREACH(pm, &pmaps, pm_list) { |
5608 | if (__predict_false(pm->pm_enter != NULL)) { | | 5608 | if (__predict_false(pm->pm_enter != NULL)) { |
5609 | /* | | 5609 | /* |
5610 | * Not a native pmap, the kernel is not mapped, | | 5610 | * Not a native pmap, the kernel is not mapped, |
5611 | * so nothing to synchronize. | | 5611 | * so nothing to synchronize. |
5612 | */ | | 5612 | */ |
5613 | continue; | | 5613 | continue; |
5614 | } | | 5614 | } |
5615 | memcpy(&pm->pm_pdir[PDIR_SLOT_KERN + old], | | 5615 | memcpy(&pm->pm_pdir[PDIR_SLOT_KERN + old], |
5616 | &kpm->pm_pdir[PDIR_SLOT_KERN + old], | | 5616 | &kpm->pm_pdir[PDIR_SLOT_KERN + old], |
5617 | newpdes * sizeof(pd_entry_t)); | | 5617 | newpdes * sizeof(pd_entry_t)); |
5618 | } | | 5618 | } |
5619 | mutex_exit(&pmaps_lock); | | 5619 | mutex_exit(&pmaps_lock); |
5620 | #endif | | 5620 | #endif |
5621 | invalidate = true; | | 5621 | invalidate = true; |
5622 | } | | 5622 | } |
5623 | pmap_maxkvaddr = maxkvaddr; | | 5623 | pmap_maxkvaddr = maxkvaddr; |
5624 | mutex_exit(&kpm->pm_lock); | | 5624 | mutex_exit(&kpm->pm_lock); |
5625 | splx(s); | | 5625 | splx(s); |
5626 | | | 5626 | |
5627 | if (invalidate && pmap_initialized) { | | 5627 | if (invalidate && pmap_initialized) { |
5628 | /* Invalidate the pmap cache. */ | | 5628 | /* Invalidate the pmap cache. */ |
5629 | pool_cache_invalidate(&pmap_cache); | | 5629 | pool_cache_invalidate(&pmap_cache); |
5630 | } | | 5630 | } |
5631 | | | 5631 | |
5632 | return maxkvaddr; | | 5632 | return maxkvaddr; |
5633 | } | | 5633 | } |
5634 | | | 5634 | |
5635 | #ifdef DEBUG | | 5635 | #ifdef DEBUG |
5636 | void pmap_dump(struct pmap *, vaddr_t, vaddr_t); | | 5636 | void pmap_dump(struct pmap *, vaddr_t, vaddr_t); |
5637 | | | 5637 | |
5638 | /* | | 5638 | /* |
5639 | * pmap_dump: dump all the mappings from a pmap | | 5639 | * pmap_dump: dump all the mappings from a pmap |
5640 | * | | 5640 | * |
5641 | * => caller should not be holding any pmap locks | | 5641 | * => caller should not be holding any pmap locks |
5642 | */ | | 5642 | */ |
5643 | void | | 5643 | void |
5644 | pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) | | 5644 | pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) |
5645 | { | | 5645 | { |
5646 | pt_entry_t *ptes, *pte; | | 5646 | pt_entry_t *ptes, *pte; |
5647 | pd_entry_t * const *pdes; | | 5647 | pd_entry_t * const *pdes; |
5648 | struct pmap *pmap2; | | 5648 | struct pmap *pmap2; |
5649 | vaddr_t blkendva; | | 5649 | vaddr_t blkendva; |
5650 | int lvl; | | 5650 | int lvl; |
5651 | | | 5651 | |
5652 | /* | | 5652 | /* |
5653 | * if end is out of range truncate. | | 5653 | * if end is out of range truncate. |
5654 | * if (end == start) update to max. | | 5654 | * if (end == start) update to max. |
5655 | */ | | 5655 | */ |
5656 | | | 5656 | |
5657 | if (eva > VM_MAXUSER_ADDRESS || eva <= sva) | | 5657 | if (eva > VM_MAXUSER_ADDRESS || eva <= sva) |
5658 | eva = VM_MAXUSER_ADDRESS; | | 5658 | eva = VM_MAXUSER_ADDRESS; |
5659 | | | 5659 | |
5660 | mutex_enter(&pmap->pm_lock); | | 5660 | mutex_enter(&pmap->pm_lock); |
5661 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); | | 5661 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); |
5662 | | | 5662 | |
5663 | /* | | 5663 | /* |
5664 | * dumping a range of pages: we dump in PTP sized blocks (4MB) | | 5664 | * dumping a range of pages: we dump in PTP sized blocks (4MB) |
5665 | */ | | 5665 | */ |
5666 | | | 5666 | |
5667 | for (/* null */ ; sva < eva ; sva = blkendva) { | | 5667 | for (/* null */ ; sva < eva ; sva = blkendva) { |
5668 | | | 5668 | |
5669 | /* determine range of block */ | | 5669 | /* determine range of block */ |
5670 | blkendva = x86_round_pdr(sva+1); | | 5670 | blkendva = x86_round_pdr(sva+1); |
5671 | if (blkendva > eva) | | 5671 | if (blkendva > eva) |
5672 | blkendva = eva; | | 5672 | blkendva = eva; |
5673 | | | 5673 | |
5674 | /* valid block? */ | | 5674 | /* valid block? */ |
5675 | if (!pmap_pdes_valid(sva, pdes, NULL, &lvl)) | | 5675 | if (!pmap_pdes_valid(sva, pdes, NULL, &lvl)) |
5676 | continue; | | 5676 | continue; |
5677 | KASSERT(lvl == 1); | | 5677 | KASSERT(lvl == 1); |
5678 | | | 5678 | |
5679 | pte = &ptes[pl1_i(sva)]; | | 5679 | pte = &ptes[pl1_i(sva)]; |
5680 | for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) { | | 5680 | for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) { |
5681 | if (!pmap_valid_entry(*pte)) | | 5681 | if (!pmap_valid_entry(*pte)) |
5682 | continue; | | 5682 | continue; |
5683 | printf("va %#" PRIxVADDR " -> pa %#" PRIxPADDR | | 5683 | printf("va %#" PRIxVADDR " -> pa %#" PRIxPADDR |
5684 | " (pte=%#" PRIxPADDR ")\n", | | 5684 | " (pte=%#" PRIxPADDR ")\n", |
5685 | sva, (paddr_t)pmap_pte2pa(*pte), (paddr_t)*pte); | | 5685 | sva, (paddr_t)pmap_pte2pa(*pte), (paddr_t)*pte); |
5686 | } | | 5686 | } |
5687 | } | | 5687 | } |
5688 | pmap_unmap_ptes(pmap, pmap2); | | 5688 | pmap_unmap_ptes(pmap, pmap2); |
5689 | mutex_exit(&pmap->pm_lock); | | 5689 | mutex_exit(&pmap->pm_lock); |
5690 | } | | 5690 | } |
5691 | #endif | | 5691 | #endif |
5692 | | | 5692 | |
5693 | /* | | 5693 | /* |
5694 | * pmap_update: process deferred invalidations and frees. | | 5694 | * pmap_update: process deferred invalidations and frees. |
5695 | */ | | 5695 | */ |
5696 | void | | 5696 | void |
5697 | pmap_update(struct pmap *pmap) | | 5697 | pmap_update(struct pmap *pmap) |
5698 | { | | 5698 | { |
5699 | struct pmap_page *pp; | | 5699 | struct pmap_page *pp; |
5700 | struct vm_page *ptp; | | 5700 | struct vm_page *ptp; |
5701 | | | 5701 | |
5702 | /* | | 5702 | /* |
5703 | * Initiate any pending TLB shootdowns. Wait for them to | | 5703 | * Initiate any pending TLB shootdowns. Wait for them to |
5704 | * complete before returning control to the caller. | | 5704 | * complete before returning control to the caller. |
5705 | */ | | 5705 | */ |
5706 | kpreempt_disable(); | | 5706 | kpreempt_disable(); |
5707 | pmap_tlb_shootnow(); | | 5707 | pmap_tlb_shootnow(); |
5708 | kpreempt_enable(); | | 5708 | kpreempt_enable(); |
5709 | | | 5709 | |
5710 | /* | | 5710 | /* |
5711 | * Now that shootdowns are complete, process deferred frees. This | | 5711 | * Now that shootdowns are complete, process deferred frees. This |
5712 | * is an unlocked check, but is safe as we're only interested in | | 5712 | * is an unlocked check, but is safe as we're only interested in |
5713 | * work done in this LWP - we won't get a false negative. | | 5713 | * work done in this LWP - we won't get a false negative. |
5714 | */ | | 5714 | */ |
5715 | if (atomic_load_relaxed(&pmap->pm_gc_ptp.lh_first) == NULL) { | | 5715 | if (atomic_load_relaxed(&pmap->pm_gc_ptp.lh_first) == NULL) { |
5716 | return; | | 5716 | return; |
5717 | } | | 5717 | } |
5718 | | | 5718 | |
5719 | mutex_enter(&pmap->pm_lock); | | 5719 | mutex_enter(&pmap->pm_lock); |
5720 | while ((ptp = LIST_FIRST(&pmap->pm_gc_ptp)) != NULL) { | | 5720 | while ((ptp = LIST_FIRST(&pmap->pm_gc_ptp)) != NULL) { |
5721 | KASSERT(ptp->wire_count == 0); | | 5721 | KASSERT(ptp->wire_count == 0); |
5722 | KASSERT(ptp->uanon == NULL); | | 5722 | KASSERT(ptp->uanon == NULL); |
5723 | LIST_REMOVE(ptp, mdpage.mp_pp.pp_link); | | 5723 | LIST_REMOVE(ptp, mdpage.mp_pp.pp_link); |
5724 | pp = VM_PAGE_TO_PP(ptp); | | 5724 | pp = VM_PAGE_TO_PP(ptp); |
5725 | LIST_INIT(&pp->pp_pvlist); | | 5725 | LIST_INIT(&pp->pp_pvlist); |
5726 | pp->pp_attrs = 0; | | 5726 | pp->pp_attrs = 0; |
5727 | pp->pp_pte.pte_ptp = NULL; | | 5727 | pp->pp_pte.pte_ptp = NULL; |
5728 | pp->pp_pte.pte_va = 0; | | 5728 | pp->pp_pte.pte_va = 0; |
5729 | PMAP_CHECK_PP(VM_PAGE_TO_PP(ptp)); | | 5729 | PMAP_CHECK_PP(VM_PAGE_TO_PP(ptp)); |
5730 | | | 5730 | |
5731 | /* | | 5731 | /* |
5732 | * XXX Hack to avoid extra locking, and lock | | 5732 | * XXX Hack to avoid extra locking, and lock |
5733 | * assertions in uvm_pagefree(). Despite uobject | | 5733 | * assertions in uvm_pagefree(). Despite uobject |
5734 | * being set, this isn't a managed page. | | 5734 | * being set, this isn't a managed page. |
5735 | */ | | 5735 | */ |
5736 | PMAP_DUMMY_LOCK(pmap); | | 5736 | PMAP_DUMMY_LOCK(pmap); |
5737 | uvm_pagerealloc(ptp, NULL, 0); | | 5737 | uvm_pagerealloc(ptp, NULL, 0); |
5738 | PMAP_DUMMY_UNLOCK(pmap); | | 5738 | PMAP_DUMMY_UNLOCK(pmap); |
5739 | uvm_pagefree(ptp); | | 5739 | uvm_pagefree(ptp); |
5740 | } | | 5740 | } |
5741 | mutex_exit(&pmap->pm_lock); | | 5741 | mutex_exit(&pmap->pm_lock); |
5742 | } | | 5742 | } |
5743 | | | 5743 | |
5744 | #if PTP_LEVELS > 4 | | 5744 | #if PTP_LEVELS > 4 |
5745 | #error "Unsupported number of page table mappings" | | 5745 | #error "Unsupported number of page table mappings" |
5746 | #endif | | 5746 | #endif |
5747 | | | 5747 | |
5748 | paddr_t | | 5748 | paddr_t |
5749 | pmap_init_tmp_pgtbl(paddr_t pg) | | 5749 | pmap_init_tmp_pgtbl(paddr_t pg) |
5750 | { | | 5750 | { |
5751 | static bool maps_loaded; | | 5751 | static bool maps_loaded; |
5752 | static const paddr_t x86_tmp_pml_paddr[] = { | | 5752 | static const paddr_t x86_tmp_pml_paddr[] = { |
5753 | 4 * PAGE_SIZE, /* L1 */ | | 5753 | 4 * PAGE_SIZE, /* L1 */ |
5754 | 5 * PAGE_SIZE, /* L2 */ | | 5754 | 5 * PAGE_SIZE, /* L2 */ |
5755 | 6 * PAGE_SIZE, /* L3 */ | | 5755 | 6 * PAGE_SIZE, /* L3 */ |
5756 | 7 * PAGE_SIZE /* L4 */ | | 5756 | 7 * PAGE_SIZE /* L4 */ |
5757 | }; | | 5757 | }; |
5758 | static vaddr_t x86_tmp_pml_vaddr[] = { 0, 0, 0, 0 }; | | 5758 | static vaddr_t x86_tmp_pml_vaddr[] = { 0, 0, 0, 0 }; |
5759 | | | 5759 | |
5760 | pd_entry_t *tmp_pml, *kernel_pml; | | 5760 | pd_entry_t *tmp_pml, *kernel_pml; |
5761 | | | 5761 | |
5762 | int level; | | 5762 | int level; |
5763 | | | 5763 | |
5764 | if (!maps_loaded) { | | 5764 | if (!maps_loaded) { |
5765 | for (level = 0; level < PTP_LEVELS; ++level) { | | 5765 | for (level = 0; level < PTP_LEVELS; ++level) { |
5766 | x86_tmp_pml_vaddr[level] = | | 5766 | x86_tmp_pml_vaddr[level] = |
5767 | uvm_km_alloc(kernel_map, PAGE_SIZE, 0, | | 5767 | uvm_km_alloc(kernel_map, PAGE_SIZE, 0, |
5768 | UVM_KMF_VAONLY); | | 5768 | UVM_KMF_VAONLY); |
5769 | | | 5769 | |
5770 | if (x86_tmp_pml_vaddr[level] == 0) | | 5770 | if (x86_tmp_pml_vaddr[level] == 0) |
5771 | panic("mapping of real mode PML failed\n"); | | 5771 | panic("mapping of real mode PML failed\n"); |
5772 | pmap_kenter_pa(x86_tmp_pml_vaddr[level], | | 5772 | pmap_kenter_pa(x86_tmp_pml_vaddr[level], |
5773 | x86_tmp_pml_paddr[level], | | 5773 | x86_tmp_pml_paddr[level], |
5774 | VM_PROT_READ | VM_PROT_WRITE, 0); | | 5774 | VM_PROT_READ | VM_PROT_WRITE, 0); |
5775 | } | | 5775 | } |
5776 | pmap_update(pmap_kernel()); | | 5776 | pmap_update(pmap_kernel()); |
5777 | maps_loaded = true; | | 5777 | maps_loaded = true; |
5778 | } | | 5778 | } |
5779 | | | 5779 | |
5780 | /* Zero levels 1-3 */ | | 5780 | /* Zero levels 1-3 */ |
5781 | for (level = 0; level < PTP_LEVELS - 1; ++level) { | | 5781 | for (level = 0; level < PTP_LEVELS - 1; ++level) { |
5782 | tmp_pml = (void *)x86_tmp_pml_vaddr[level]; | | 5782 | tmp_pml = (void *)x86_tmp_pml_vaddr[level]; |
5783 | memset(tmp_pml, 0, PAGE_SIZE); | | 5783 | memset(tmp_pml, 0, PAGE_SIZE); |
5784 | } | | 5784 | } |
5785 | | | 5785 | |
5786 | /* Copy PML4 */ | | 5786 | /* Copy PML4 */ |
5787 | kernel_pml = pmap_kernel()->pm_pdir; | | 5787 | kernel_pml = pmap_kernel()->pm_pdir; |
5788 | tmp_pml = (void *)x86_tmp_pml_vaddr[PTP_LEVELS - 1]; | | 5788 | tmp_pml = (void *)x86_tmp_pml_vaddr[PTP_LEVELS - 1]; |
5789 | memcpy(tmp_pml, kernel_pml, PAGE_SIZE); | | 5789 | memcpy(tmp_pml, kernel_pml, PAGE_SIZE); |
5790 | | | 5790 | |
5791 | #ifdef PAE | | 5791 | #ifdef PAE |
5792 | /* | | 5792 | /* |
5793 | * Use the last 4 entries of the L2 page as L3 PD entries. These | | 5793 | * Use the last 4 entries of the L2 page as L3 PD entries. These |
5794 | * last entries are unlikely to be used for temporary mappings. | | 5794 | * last entries are unlikely to be used for temporary mappings. |
5795 | * 508: maps 0->1GB (userland) | | 5795 | * 508: maps 0->1GB (userland) |
5796 | * 509: unused | | 5796 | * 509: unused |
5797 | * 510: unused | | 5797 | * 510: unused |
5798 | * 511: maps 3->4GB (kernel) | | 5798 | * 511: maps 3->4GB (kernel) |
5799 | */ | | 5799 | */ |
5800 | tmp_pml[508] = x86_tmp_pml_paddr[PTP_LEVELS - 1] | PTE_P; | | 5800 | tmp_pml[508] = x86_tmp_pml_paddr[PTP_LEVELS - 1] | PTE_P; |
5801 | tmp_pml[509] = 0; | | 5801 | tmp_pml[509] = 0; |
5802 | tmp_pml[510] = 0; | | 5802 | tmp_pml[510] = 0; |
5803 | tmp_pml[511] = pmap_pdirpa(pmap_kernel(), PDIR_SLOT_KERN) | PTE_P; | | 5803 | tmp_pml[511] = pmap_pdirpa(pmap_kernel(), PDIR_SLOT_KERN) | PTE_P; |
5804 | #endif | | 5804 | #endif |
5805 | | | 5805 | |
5806 | for (level = PTP_LEVELS - 1; level > 0; --level) { | | 5806 | for (level = PTP_LEVELS - 1; level > 0; --level) { |
5807 | tmp_pml = (void *)x86_tmp_pml_vaddr[level]; | | 5807 | tmp_pml = (void *)x86_tmp_pml_vaddr[level]; |
5808 | | | 5808 | |
5809 | tmp_pml[pl_i(pg, level + 1)] = | | 5809 | tmp_pml[pl_i(pg, level + 1)] = |
5810 | (x86_tmp_pml_paddr[level - 1] & PTE_FRAME) | PTE_W | PTE_P; | | 5810 | (x86_tmp_pml_paddr[level - 1] & PTE_FRAME) | PTE_W | PTE_P; |
5811 | } | | 5811 | } |
5812 | | | 5812 | |
5813 | tmp_pml = (void *)x86_tmp_pml_vaddr[0]; | | 5813 | tmp_pml = (void *)x86_tmp_pml_vaddr[0]; |
5814 | tmp_pml[pl_i(pg, 1)] = (pg & PTE_FRAME) | PTE_W | PTE_P; | | 5814 | tmp_pml[pl_i(pg, 1)] = (pg & PTE_FRAME) | PTE_W | PTE_P; |
5815 | | | 5815 | |
5816 | #ifdef PAE | | 5816 | #ifdef PAE |
5817 | /* Return the PA of the L3 page (entry 508 of the L2 page) */ | | 5817 | /* Return the PA of the L3 page (entry 508 of the L2 page) */ |
5818 | return x86_tmp_pml_paddr[PTP_LEVELS - 1] + 508 * sizeof(pd_entry_t); | | 5818 | return x86_tmp_pml_paddr[PTP_LEVELS - 1] + 508 * sizeof(pd_entry_t); |
5819 | #endif | | 5819 | #endif |
5820 | | | 5820 | |
5821 | return x86_tmp_pml_paddr[PTP_LEVELS - 1]; | | 5821 | return x86_tmp_pml_paddr[PTP_LEVELS - 1]; |
5822 | } | | 5822 | } |
5823 | | | 5823 | |
5824 | u_int | | 5824 | u_int |
5825 | x86_mmap_flags(paddr_t mdpgno) | | 5825 | x86_mmap_flags(paddr_t mdpgno) |
5826 | { | | 5826 | { |
5827 | u_int nflag = (mdpgno >> X86_MMAP_FLAG_SHIFT) & X86_MMAP_FLAG_MASK; | | 5827 | u_int nflag = (mdpgno >> X86_MMAP_FLAG_SHIFT) & X86_MMAP_FLAG_MASK; |
5828 | u_int pflag = 0; | | 5828 | u_int pflag = 0; |
5829 | | | 5829 | |
5830 | if (nflag & X86_MMAP_FLAG_PREFETCH) | | 5830 | if (nflag & X86_MMAP_FLAG_PREFETCH) |
5831 | pflag |= PMAP_WRITE_COMBINE; | | 5831 | pflag |= PMAP_WRITE_COMBINE; |
5832 | | | 5832 | |
5833 | return pflag; | | 5833 | return pflag; |
5834 | } | | 5834 | } |
5835 | | | 5835 | |
5836 | #if defined(__HAVE_DIRECT_MAP) && defined(__x86_64__) && !defined(XENPV) | | 5836 | #if defined(__HAVE_DIRECT_MAP) && defined(__x86_64__) && !defined(XENPV) |
5837 | | | 5837 | |
5838 | /* | | 5838 | /* |
5839 | * ----------------------------------------------------------------------------- | | 5839 | * ----------------------------------------------------------------------------- |
5840 | * ***************************************************************************** | | 5840 | * ***************************************************************************** |
5841 | * ***************************************************************************** | | 5841 | * ***************************************************************************** |
5842 | * ***************************************************************************** | | 5842 | * ***************************************************************************** |
5843 | * ***************************************************************************** | | 5843 | * ***************************************************************************** |
5844 | * **************** HERE BEGINS THE EPT CODE, USED BY INTEL-VMX **************** | | 5844 | * **************** HERE BEGINS THE EPT CODE, USED BY INTEL-VMX **************** |
5845 | * ***************************************************************************** | | 5845 | * ***************************************************************************** |
5846 | * ***************************************************************************** | | 5846 | * ***************************************************************************** |
5847 | * ***************************************************************************** | | 5847 | * ***************************************************************************** |
5848 | * ***************************************************************************** | | 5848 | * ***************************************************************************** |
5849 | * ----------------------------------------------------------------------------- | | 5849 | * ----------------------------------------------------------------------------- |
5850 | * | | 5850 | * |
5851 | * These functions are invoked as callbacks from the code above. Contrary to | | 5851 | * These functions are invoked as callbacks from the code above. Contrary to |
5852 | * native, EPT does not have a recursive slot; therefore, it is not possible | | 5852 | * native, EPT does not have a recursive slot; therefore, it is not possible |
5853 | * to call pmap_map_ptes(). Instead, we use the direct map and walk down the | | 5853 | * to call pmap_map_ptes(). Instead, we use the direct map and walk down the |
5854 | * tree manually. | | 5854 | * tree manually. |
5855 | * | | 5855 | * |
5856 | * Apart from that, the logic is mostly the same as native. Once a pmap has | | 5856 | * Apart from that, the logic is mostly the same as native. Once a pmap has |
5857 | * been created, NVMM calls pmap_ept_transform() to make it an EPT pmap. | | 5857 | * been created, NVMM calls pmap_ept_transform() to make it an EPT pmap. |
5858 | * After that we're good, and the callbacks will handle the translations | | 5858 | * After that we're good, and the callbacks will handle the translations |
5859 | * for us. | | 5859 | * for us. |
5860 | * | | 5860 | * |
5861 | * ----------------------------------------------------------------------------- | | 5861 | * ----------------------------------------------------------------------------- |
5862 | */ | | 5862 | */ |
5863 | | | 5863 | |
5864 | /* Hardware bits. */ | | 5864 | /* Hardware bits. */ |
5865 | #define EPT_R __BIT(0) /* read */ | | 5865 | #define EPT_R __BIT(0) /* read */ |
5866 | #define EPT_W __BIT(1) /* write */ | | 5866 | #define EPT_W __BIT(1) /* write */ |
5867 | #define EPT_X __BIT(2) /* execute */ | | 5867 | #define EPT_X __BIT(2) /* execute */ |
5868 | #define EPT_T __BITS(5,3) /* type */ | | 5868 | #define EPT_T __BITS(5,3) /* type */ |
5869 | #define TYPE_UC 0 | | 5869 | #define TYPE_UC 0 |
5870 | #define TYPE_WC 1 | | 5870 | #define TYPE_WC 1 |
5871 | #define TYPE_WT 4 | | 5871 | #define TYPE_WT 4 |
5872 | #define TYPE_WP 5 | | 5872 | #define TYPE_WP 5 |
5873 | #define TYPE_WB 6 | | 5873 | #define TYPE_WB 6 |
5874 | #define EPT_NOPAT __BIT(6) | | 5874 | #define EPT_NOPAT __BIT(6) |
5875 | #define EPT_L __BIT(7) /* large */ | | 5875 | #define EPT_L __BIT(7) /* large */ |
5876 | #define EPT_A __BIT(8) /* accessed */ | | 5876 | #define EPT_A __BIT(8) /* accessed */ |
5877 | #define EPT_D __BIT(9) /* dirty */ | | 5877 | #define EPT_D __BIT(9) /* dirty */ |
5878 | /* Software bits. */ | | 5878 | /* Software bits. */ |
5879 | #define EPT_PVLIST __BIT(60) | | 5879 | #define EPT_PVLIST __BIT(60) |
5880 | #define EPT_WIRED __BIT(61) | | 5880 | #define EPT_WIRED __BIT(61) |
5881 | | | 5881 | |
5882 | #define pmap_ept_valid_entry(pte) (pte & EPT_R) | | 5882 | #define pmap_ept_valid_entry(pte) (pte & EPT_R) |
5883 | | | 5883 | |
5884 | bool pmap_ept_has_ad __read_mostly; | | 5884 | bool pmap_ept_has_ad __read_mostly; |
5885 | | | 5885 | |
5886 | static inline void | | 5886 | static inline void |
5887 | pmap_ept_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte) | | 5887 | pmap_ept_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte) |
5888 | { | | 5888 | { |
5889 | int resid_diff = ((npte & EPT_R) ? 1 : 0) - ((opte & EPT_R) ? 1 : 0); | | 5889 | int resid_diff = ((npte & EPT_R) ? 1 : 0) - ((opte & EPT_R) ? 1 : 0); |
5890 | int wired_diff = ((npte & EPT_WIRED) ? 1 : 0) - ((opte & EPT_WIRED) ? 1 : 0); | | 5890 | int wired_diff = ((npte & EPT_WIRED) ? 1 : 0) - ((opte & EPT_WIRED) ? 1 : 0); |
5891 | | | 5891 | |
5892 | KASSERT((npte & (EPT_R | EPT_WIRED)) != EPT_WIRED); | | 5892 | KASSERT((npte & (EPT_R | EPT_WIRED)) != EPT_WIRED); |
5893 | KASSERT((opte & (EPT_R | EPT_WIRED)) != EPT_WIRED); | | 5893 | KASSERT((opte & (EPT_R | EPT_WIRED)) != EPT_WIRED); |
5894 | | | 5894 | |
5895 | pmap_stats_update(pmap, resid_diff, wired_diff); | | 5895 | pmap_stats_update(pmap, resid_diff, wired_diff); |
5896 | } | | 5896 | } |
5897 | | | 5897 | |
5898 | static pt_entry_t | | 5898 | static pt_entry_t |
5899 | pmap_ept_type(u_int flags) | | 5899 | pmap_ept_type(u_int flags) |
5900 | { | | 5900 | { |
5901 | u_int cacheflags = (flags & PMAP_CACHE_MASK); | | 5901 | u_int cacheflags = (flags & PMAP_CACHE_MASK); |
5902 | pt_entry_t ret; | | 5902 | pt_entry_t ret; |
5903 | | | 5903 | |
5904 | switch (cacheflags) { | | 5904 | switch (cacheflags) { |
5905 | case PMAP_NOCACHE: | | 5905 | case PMAP_NOCACHE: |
5906 | case PMAP_NOCACHE_OVR: | | 5906 | case PMAP_NOCACHE_OVR: |
5907 | ret = __SHIFTIN(TYPE_UC, EPT_T); | | 5907 | ret = __SHIFTIN(TYPE_UC, EPT_T); |
5908 | break; | | 5908 | break; |
5909 | case PMAP_WRITE_COMBINE: | | 5909 | case PMAP_WRITE_COMBINE: |
5910 | ret = __SHIFTIN(TYPE_WC, EPT_T); | | 5910 | ret = __SHIFTIN(TYPE_WC, EPT_T); |
5911 | break; | | 5911 | break; |
5912 | case PMAP_WRITE_BACK: | | 5912 | case PMAP_WRITE_BACK: |
5913 | default: | | 5913 | default: |
5914 | ret = __SHIFTIN(TYPE_WB, EPT_T); | | 5914 | ret = __SHIFTIN(TYPE_WB, EPT_T); |
5915 | break; | | 5915 | break; |
5916 | } | | 5916 | } |
5917 | | | 5917 | |
5918 | ret |= EPT_NOPAT; | | 5918 | ret |= EPT_NOPAT; |
5919 | return ret; | | 5919 | return ret; |
5920 | } | | 5920 | } |
5921 | | | 5921 | |
5922 | static inline pt_entry_t | | 5922 | static inline pt_entry_t |
5923 | pmap_ept_prot(vm_prot_t prot) | | 5923 | pmap_ept_prot(vm_prot_t prot) |
5924 | { | | 5924 | { |
5925 | pt_entry_t res = 0; | | 5925 | pt_entry_t res = 0; |
5926 | | | 5926 | |
5927 | if (prot & VM_PROT_READ) | | 5927 | if (prot & VM_PROT_READ) |
5928 | res |= EPT_R; | | 5928 | res |= EPT_R; |
5929 | if (prot & VM_PROT_WRITE) | | 5929 | if (prot & VM_PROT_WRITE) |
5930 | res |= EPT_W; | | 5930 | res |= EPT_W; |
5931 | if (prot & VM_PROT_EXECUTE) | | 5931 | if (prot & VM_PROT_EXECUTE) |
5932 | res |= EPT_X; | | 5932 | res |= EPT_X; |
5933 | | | 5933 | |
5934 | return res; | | 5934 | return res; |
5935 | } | | 5935 | } |
5936 | | | 5936 | |
5937 | static inline uint8_t | | 5937 | static inline uint8_t |
5938 | pmap_ept_to_pp_attrs(pt_entry_t ept) | | 5938 | pmap_ept_to_pp_attrs(pt_entry_t ept) |
5939 | { | | 5939 | { |
5940 | uint8_t ret = 0; | | 5940 | uint8_t ret = 0; |
5941 | if (pmap_ept_has_ad) { | | 5941 | if (pmap_ept_has_ad) { |
5942 | if (ept & EPT_D) | | 5942 | if (ept & EPT_D) |
5943 | ret |= PP_ATTRS_D; | | 5943 | ret |= PP_ATTRS_D; |
5944 | if (ept & EPT_A) | | 5944 | if (ept & EPT_A) |
5945 | ret |= PP_ATTRS_A; | | 5945 | ret |= PP_ATTRS_A; |
5946 | } else { | | 5946 | } else { |
5947 | ret |= (PP_ATTRS_D|PP_ATTRS_A); | | 5947 | ret |= (PP_ATTRS_D|PP_ATTRS_A); |
5948 | } | | 5948 | } |
5949 | if (ept & EPT_W) | | 5949 | if (ept & EPT_W) |
5950 | ret |= PP_ATTRS_W; | | 5950 | ret |= PP_ATTRS_W; |
5951 | return ret; | | 5951 | return ret; |
5952 | } | | 5952 | } |
5953 | | | 5953 | |
5954 | static inline pt_entry_t | | 5954 | static inline pt_entry_t |
5955 | pmap_pp_attrs_to_ept(uint8_t attrs) | | 5955 | pmap_pp_attrs_to_ept(uint8_t attrs) |
5956 | { | | 5956 | { |
5957 | pt_entry_t ept = 0; | | 5957 | pt_entry_t ept = 0; |
5958 | if (attrs & PP_ATTRS_D) | | 5958 | if (attrs & PP_ATTRS_D) |
5959 | ept |= EPT_D; | | 5959 | ept |= EPT_D; |
5960 | if (attrs & PP_ATTRS_A) | | 5960 | if (attrs & PP_ATTRS_A) |
5961 | ept |= EPT_A; | | 5961 | ept |= EPT_A; |
5962 | if (attrs & PP_ATTRS_W) | | 5962 | if (attrs & PP_ATTRS_W) |
5963 | ept |= EPT_W; | | 5963 | ept |= EPT_W; |
5964 | return ept; | | 5964 | return ept; |
5965 | } | | 5965 | } |
5966 | | | 5966 | |
5967 | /* | | 5967 | /* |
5968 | * Helper for pmap_ept_free_ptp. | | 5968 | * Helper for pmap_ept_free_ptp. |
5969 | * tree[0] = &L2[L2idx] | | 5969 | * tree[0] = &L2[L2idx] |
5970 | * tree[1] = &L3[L3idx] | | 5970 | * tree[1] = &L3[L3idx] |
5971 | * tree[2] = &L4[L4idx] | | 5971 | * tree[2] = &L4[L4idx] |
5972 | */ | | 5972 | */ |
5973 | static void | | 5973 | static void |
5974 | pmap_ept_get_tree(struct pmap *pmap, vaddr_t va, pd_entry_t **tree) | | 5974 | pmap_ept_get_tree(struct pmap *pmap, vaddr_t va, pd_entry_t **tree) |
5975 | { | | 5975 | { |
5976 | pt_entry_t *pteva; | | 5976 | pt_entry_t *pteva; |
5977 | paddr_t ptepa; | | 5977 | paddr_t ptepa; |
5978 | int i, index; | | 5978 | int i, index; |
5979 | | | 5979 | |
5980 | ptepa = pmap->pm_pdirpa[0]; | | 5980 | ptepa = pmap->pm_pdirpa[0]; |
5981 | for (i = PTP_LEVELS; i > 1; i--) { | | 5981 | for (i = PTP_LEVELS; i > 1; i--) { |
5982 | index = pl_pi(va, i); | | 5982 | index = pl_pi(va, i); |
5983 | pteva = (pt_entry_t *)PMAP_DIRECT_MAP(ptepa); | | 5983 | pteva = (pt_entry_t *)PMAP_DIRECT_MAP(ptepa); |
5984 | KASSERT(pmap_ept_valid_entry(pteva[index])); | | 5984 | KASSERT(pmap_ept_valid_entry(pteva[index])); |
5985 | tree[i - 2] = &pteva[index]; | | 5985 | tree[i - 2] = &pteva[index]; |
5986 | ptepa = pmap_pte2pa(pteva[index]); | | 5986 | ptepa = pmap_pte2pa(pteva[index]); |
5987 | } | | 5987 | } |
5988 | } | | 5988 | } |
5989 | | | 5989 | |
5990 | static void | | 5990 | static void |
5991 | pmap_ept_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va) | | 5991 | pmap_ept_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va) |
5992 | { | | 5992 | { |
5993 | pd_entry_t *tree[3]; | | 5993 | pd_entry_t *tree[3]; |
5994 | int level; | | 5994 | int level; |
5995 | | | 5995 | |
5996 | KASSERT(pmap != pmap_kernel()); | | 5996 | KASSERT(pmap != pmap_kernel()); |
5997 | KASSERT(mutex_owned(&pmap->pm_lock)); | | 5997 | KASSERT(mutex_owned(&pmap->pm_lock)); |
5998 | KASSERT(kpreempt_disabled()); | | 5998 | KASSERT(kpreempt_disabled()); |
5999 | | | 5999 | |
6000 | pmap_ept_get_tree(pmap, va, tree); | | 6000 | pmap_ept_get_tree(pmap, va, tree); |
6001 | | | 6001 | |
6002 | level = 1; | | 6002 | level = 1; |
6003 | do { | | 6003 | do { |
6004 | (void)pmap_pte_testset(tree[level - 1], 0); | | 6004 | (void)pmap_pte_testset(tree[level - 1], 0); |
6005 | | | 6005 | |
6006 | pmap_freepage(pmap, ptp, level); | | 6006 | pmap_freepage(pmap, ptp, level); |
6007 | if (level < PTP_LEVELS - 1) { | | 6007 | if (level < PTP_LEVELS - 1) { |
6008 | ptp = pmap_find_ptp(pmap, va, level + 1); | | 6008 | ptp = pmap_find_ptp(pmap, va, level + 1); |
6009 | ptp->wire_count--; | | 6009 | ptp->wire_count--; |
6010 | if (ptp->wire_count > 1) | | 6010 | if (ptp->wire_count > 1) |
6011 | break; | | 6011 | break; |
6012 | } | | 6012 | } |
6013 | } while (++level < PTP_LEVELS); | | 6013 | } while (++level < PTP_LEVELS); |
6014 | pmap_pte_flush(); | | 6014 | pmap_pte_flush(); |
6015 | } | | 6015 | } |
6016 | | | 6016 | |
6017 | /* Allocate L4->L3->L2. Return L2. */ | | 6017 | /* Allocate L4->L3->L2. Return L2. */ |
6018 | static void | | 6018 | static void |
6019 | pmap_ept_install_ptp(struct pmap *pmap, struct pmap_ptparray *pt, vaddr_t va) | | 6019 | pmap_ept_install_ptp(struct pmap *pmap, struct pmap_ptparray *pt, vaddr_t va) |
6020 | { | | 6020 | { |
6021 | struct vm_page *ptp; | | 6021 | struct vm_page *ptp; |
6022 | unsigned long index; | | 6022 | unsigned long index; |
6023 | pd_entry_t *pteva; | | 6023 | pd_entry_t *pteva; |
6024 | paddr_t ptepa; | | 6024 | paddr_t ptepa; |
6025 | int i; | | 6025 | int i; |
6026 | | | 6026 | |
6027 | KASSERT(pmap != pmap_kernel()); | | 6027 | KASSERT(pmap != pmap_kernel()); |
6028 | KASSERT(mutex_owned(&pmap->pm_lock)); | | 6028 | KASSERT(mutex_owned(&pmap->pm_lock)); |
6029 | KASSERT(kpreempt_disabled()); | | 6029 | KASSERT(kpreempt_disabled()); |
6030 | | | 6030 | |
6031 | /* | | 6031 | /* |
6032 | * Now that we have all the pages looked up or allocated, | | 6032 | * Now that we have all the pages looked up or allocated, |
6033 | * loop through again installing any new ones into the tree. | | 6033 | * loop through again installing any new ones into the tree. |
6034 | */ | | 6034 | */ |
6035 | ptepa = pmap->pm_pdirpa[0]; | | 6035 | ptepa = pmap->pm_pdirpa[0]; |
6036 | for (i = PTP_LEVELS; i > 1; i--) { | | 6036 | for (i = PTP_LEVELS; i > 1; i--) { |
6037 | index = pl_pi(va, i); | | 6037 | index = pl_pi(va, i); |
6038 | pteva = (pt_entry_t *)PMAP_DIRECT_MAP(ptepa); | | 6038 | pteva = (pt_entry_t *)PMAP_DIRECT_MAP(ptepa); |
6039 | | | 6039 | |
6040 | if (pmap_ept_valid_entry(pteva[index])) { | | 6040 | if (pmap_ept_valid_entry(pteva[index])) { |
6041 | KASSERT(!pt->alloced[i]); | | 6041 | KASSERT(!pt->alloced[i]); |
6042 | ptepa = pmap_pte2pa(pteva[index]); | | 6042 | ptepa = pmap_pte2pa(pteva[index]); |
6043 | continue; | | 6043 | continue; |
6044 | } | | 6044 | } |
6045 | | | 6045 | |
6046 | ptp = pt->pg[i]; | | 6046 | ptp = pt->pg[i]; |
6047 | ptp->flags &= ~PG_BUSY; /* never busy */ | | 6047 | ptp->flags &= ~PG_BUSY; /* never busy */ |
6048 | ptp->wire_count = 1; | | 6048 | ptp->wire_count = 1; |
6049 | pmap->pm_ptphint[i - 2] = ptp; | | 6049 | pmap->pm_ptphint[i - 2] = ptp; |
6050 | ptepa = VM_PAGE_TO_PHYS(ptp); | | 6050 | ptepa = VM_PAGE_TO_PHYS(ptp); |
6051 | pmap_pte_set(&pteva[index], ptepa | EPT_R | EPT_W | EPT_X); | | 6051 | pmap_pte_set(&pteva[index], ptepa | EPT_R | EPT_W | EPT_X); |
6052 | | | 6052 | |
6053 | pmap_pte_flush(); | | 6053 | pmap_pte_flush(); |
6054 | pmap_stats_update(pmap, 1, 0); | | 6054 | pmap_stats_update(pmap, 1, 0); |
6055 | | | 6055 | |
6056 | /* | | 6056 | /* |
6057 | * If we're not in the top level, increase the | | 6057 | * If we're not in the top level, increase the |
6058 | * wire count of the parent page. | | 6058 | * wire count of the parent page. |
6059 | */ | | 6059 | */ |
6060 | if (i < PTP_LEVELS) { | | 6060 | if (i < PTP_LEVELS) { |
6061 | pt->pg[i + 1]->wire_count++; | | 6061 | pt->pg[i + 1]->wire_count++; |
6062 | } | | 6062 | } |
6063 | } | | 6063 | } |
6064 | } | | 6064 | } |
6065 | | | 6065 | |
6066 | static int | | 6066 | static int |
6067 | pmap_ept_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, | | 6067 | pmap_ept_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, |
6068 | u_int flags) | | 6068 | u_int flags) |
6069 | { | | 6069 | { |
6070 | pt_entry_t *ptes, opte, npte; | | 6070 | pt_entry_t *ptes, opte, npte; |
6071 | pt_entry_t *ptep; | | 6071 | pt_entry_t *ptep; |
6072 | struct vm_page *ptp; | | 6072 | struct vm_page *ptp; |
6073 | struct vm_page *new_pg, *old_pg; | | 6073 | struct vm_page *new_pg, *old_pg; |
6074 | struct pmap_page *new_pp, *old_pp; | | 6074 | struct pmap_page *new_pp, *old_pp; |
6075 | struct pv_entry *old_pve, *new_pve; | | 6075 | struct pv_entry *old_pve, *new_pve; |
6076 | bool wired = (flags & PMAP_WIRED) != 0; | | 6076 | bool wired = (flags & PMAP_WIRED) != 0; |
6077 | bool accessed; | | 6077 | bool accessed; |
6078 | struct pmap_ptparray pt; | | 6078 | struct pmap_ptparray pt; |
6079 | int error; | | 6079 | int error; |
6080 | bool getptp, samepage, new_embedded; | | 6080 | bool getptp, samepage, new_embedded; |
6081 | rb_tree_t *tree; | | 6081 | rb_tree_t *tree; |
6082 | | | 6082 | |
6083 | KASSERT(pmap_initialized); | | 6083 | KASSERT(pmap_initialized); |
6084 | KASSERT(va < VM_MAXUSER_ADDRESS); | | 6084 | KASSERT(va < VM_MAXUSER_ADDRESS); |