Mon Jul 6 10:12:04 2020 UTC ()
Include missing opt_multiprocessor.h and opt_pmap.h.


(rin)
diff -r1.20 -r1.21 src/sys/arch/powerpc/booke/e500_tlb.c

cvs diff -r1.20 -r1.21 src/sys/arch/powerpc/booke/e500_tlb.c (switch to unified diff)

--- src/sys/arch/powerpc/booke/e500_tlb.c 2020/07/06 09:34:16 1.20
+++ src/sys/arch/powerpc/booke/e500_tlb.c 2020/07/06 10:12:04 1.21
@@ -1,1041 +1,1043 @@ @@ -1,1041 +1,1043 @@
1/* $NetBSD: e500_tlb.c,v 1.20 2020/07/06 09:34:16 rin Exp $ */ 1/* $NetBSD: e500_tlb.c,v 1.21 2020/07/06 10:12:04 rin Exp $ */
2/*- 2/*-
3 * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc. 3 * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc.
4 * All rights reserved. 4 * All rights reserved.
5 * 5 *
6 * This code is derived from software contributed to The NetBSD Foundation 6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Raytheon BBN Technologies Corp and Defense Advanced Research Projects 7 * by Raytheon BBN Technologies Corp and Defense Advanced Research Projects
8 * Agency and which was developed by Matt Thomas of 3am Software Foundry. 8 * Agency and which was developed by Matt Thomas of 3am Software Foundry.
9 * 9 *
10 * This material is based upon work supported by the Defense Advanced Research 10 * This material is based upon work supported by the Defense Advanced Research
11 * Projects Agency and Space and Naval Warfare Systems Center, Pacific, under 11 * Projects Agency and Space and Naval Warfare Systems Center, Pacific, under
12 * Contract No. N66001-09-C-2073. 12 * Contract No. N66001-09-C-2073.
13 * Approved for Public Release, Distribution Unlimited 13 * Approved for Public Release, Distribution Unlimited
14 * 14 *
15 * Redistribution and use in source and binary forms, with or without 15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions 16 * modification, are permitted provided that the following conditions
17 * are met: 17 * are met:
18 * 1. Redistributions of source code must retain the above copyright 18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer. 19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright 20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the 21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution. 22 * documentation and/or other materials provided with the distribution.
23 * 23 *
24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE. 34 * POSSIBILITY OF SUCH DAMAGE.
35 */ 35 */
36 36
37#define __PMAP_PRIVATE 37#define __PMAP_PRIVATE
38 38
39#include <sys/cdefs.h> 39#include <sys/cdefs.h>
40__KERNEL_RCSID(0, "$NetBSD: e500_tlb.c,v 1.20 2020/07/06 09:34:16 rin Exp $"); 40__KERNEL_RCSID(0, "$NetBSD: e500_tlb.c,v 1.21 2020/07/06 10:12:04 rin Exp $");
41 41
42#ifdef _KERNEL_OPT 42#ifdef _KERNEL_OPT
 43#include "opt_multiprocessor.h"
 44#include "opt_pmap.h"
43#include "opt_ppcparam.h" 45#include "opt_ppcparam.h"
44#endif 46#endif
45 47
46#include <sys/param.h> 48#include <sys/param.h>
47 49
48#include <uvm/uvm_extern.h> 50#include <uvm/uvm_extern.h>
49 51
50#include <powerpc/spr.h> 52#include <powerpc/spr.h>
51#include <powerpc/booke/spr.h> 53#include <powerpc/booke/spr.h>
52#include <powerpc/booke/cpuvar.h> 54#include <powerpc/booke/cpuvar.h>
53#include <powerpc/booke/e500reg.h> 55#include <powerpc/booke/e500reg.h>
54#include <powerpc/booke/e500var.h> 56#include <powerpc/booke/e500var.h>
55#include <powerpc/booke/pmap.h> 57#include <powerpc/booke/pmap.h>
56 58
57struct e500_tlb { 59struct e500_tlb {
58 vaddr_t tlb_va; 60 vaddr_t tlb_va;
59 uint32_t tlb_pte; 61 uint32_t tlb_pte;
60 uint32_t tlb_asid; 62 uint32_t tlb_asid;
61 vsize_t tlb_size; 63 vsize_t tlb_size;
62}; 64};
63 65
64struct e500_hwtlb { 66struct e500_hwtlb {
65 uint32_t hwtlb_mas0; 67 uint32_t hwtlb_mas0;
66 uint32_t hwtlb_mas1; 68 uint32_t hwtlb_mas1;
67 uint32_t hwtlb_mas2; 69 uint32_t hwtlb_mas2;
68 uint32_t hwtlb_mas3; 70 uint32_t hwtlb_mas3;
69}; 71};
70 72
71struct e500_xtlb { 73struct e500_xtlb {
72 struct e500_tlb e_tlb; 74 struct e500_tlb e_tlb;
73 struct e500_hwtlb e_hwtlb; 75 struct e500_hwtlb e_hwtlb;
74 u_long e_refcnt; 76 u_long e_refcnt;
75}; 77};
76 78
77static struct e500_tlb1 { 79static struct e500_tlb1 {
78 uint32_t tlb1_maxsize; 80 uint32_t tlb1_maxsize;
79 uint32_t tlb1_minsize; 81 uint32_t tlb1_minsize;
80 u_int tlb1_numentries; 82 u_int tlb1_numentries;
81 u_int tlb1_numfree; 83 u_int tlb1_numfree;
82 u_int tlb1_freelist[32]; 84 u_int tlb1_freelist[32];
83 struct e500_xtlb tlb1_entries[32]; 85 struct e500_xtlb tlb1_entries[32];
84} e500_tlb1; 86} e500_tlb1;
85 87
86static inline register_t mftlb0cfg(void) __pure; 88static inline register_t mftlb0cfg(void) __pure;
87static inline register_t mftlb1cfg(void) __pure; 89static inline register_t mftlb1cfg(void) __pure;
88 90
89static inline register_t 91static inline register_t
90mftlb0cfg(void) 92mftlb0cfg(void)
91{ 93{
92 register_t tlb0cfg; 94 register_t tlb0cfg;
93 __asm("mfspr %0, %1" : "=r"(tlb0cfg) : "n"(SPR_TLB0CFG)); 95 __asm("mfspr %0, %1" : "=r"(tlb0cfg) : "n"(SPR_TLB0CFG));
94 return tlb0cfg; 96 return tlb0cfg;
95} 97}
96 98
97static inline register_t 99static inline register_t
98mftlb1cfg(void) 100mftlb1cfg(void)
99{ 101{
100 register_t tlb1cfg; 102 register_t tlb1cfg;
101 __asm("mfspr %0, %1" : "=r"(tlb1cfg) : "n"(SPR_TLB1CFG)); 103 __asm("mfspr %0, %1" : "=r"(tlb1cfg) : "n"(SPR_TLB1CFG));
102 return tlb1cfg; 104 return tlb1cfg;
103} 105}
104 106
105static struct e500_tlb 107static struct e500_tlb
106hwtlb_to_tlb(const struct e500_hwtlb hwtlb) 108hwtlb_to_tlb(const struct e500_hwtlb hwtlb)
107{ 109{
108 struct e500_tlb tlb; 110 struct e500_tlb tlb;
109 register_t prot_mask; 111 register_t prot_mask;
110 u_int prot_shift; 112 u_int prot_shift;
111 113
112 tlb.tlb_va = MAS2_EPN & hwtlb.hwtlb_mas2; 114 tlb.tlb_va = MAS2_EPN & hwtlb.hwtlb_mas2;
113 tlb.tlb_size = 1024 << (2 * MASX_TSIZE_GET(hwtlb.hwtlb_mas1)); 115 tlb.tlb_size = 1024 << (2 * MASX_TSIZE_GET(hwtlb.hwtlb_mas1));
114 tlb.tlb_asid = MASX_TID_GET(hwtlb.hwtlb_mas1); 116 tlb.tlb_asid = MASX_TID_GET(hwtlb.hwtlb_mas1);
115 tlb.tlb_pte = (hwtlb.hwtlb_mas2 & MAS2_WIMGE) 117 tlb.tlb_pte = (hwtlb.hwtlb_mas2 & MAS2_WIMGE)
116 | (hwtlb.hwtlb_mas3 & MAS3_RPN); 118 | (hwtlb.hwtlb_mas3 & MAS3_RPN);
117 if (hwtlb.hwtlb_mas1 & MAS1_TS) { 119 if (hwtlb.hwtlb_mas1 & MAS1_TS) {
118 prot_mask = MAS3_UX|MAS3_UW|MAS3_UR; 120 prot_mask = MAS3_UX|MAS3_UW|MAS3_UR;
119 prot_shift = PTE_RWX_SHIFT - 1; 121 prot_shift = PTE_RWX_SHIFT - 1;
120 } else { 122 } else {
121 prot_mask = MAS3_SX|MAS3_SW|MAS3_SR; 123 prot_mask = MAS3_SX|MAS3_SW|MAS3_SR;
122 prot_shift = PTE_RWX_SHIFT; 124 prot_shift = PTE_RWX_SHIFT;
123 } 125 }
124 tlb.tlb_pte |= (prot_mask & hwtlb.hwtlb_mas3) << prot_shift; 126 tlb.tlb_pte |= (prot_mask & hwtlb.hwtlb_mas3) << prot_shift;
125 return tlb; 127 return tlb;
126} 128}
127 129
128static inline struct e500_hwtlb 130static inline struct e500_hwtlb
129hwtlb_read(uint32_t mas0, u_int slot) 131hwtlb_read(uint32_t mas0, u_int slot)
130{ 132{
131 struct e500_hwtlb hwtlb; 133 struct e500_hwtlb hwtlb;
132 register_t tlbcfg; 134 register_t tlbcfg;
133 135
134 if (__predict_true(mas0 == MAS0_TLBSEL_TLB0)) { 136 if (__predict_true(mas0 == MAS0_TLBSEL_TLB0)) {
135 tlbcfg = mftlb0cfg(); 137 tlbcfg = mftlb0cfg();
136 } else if (mas0 == MAS0_TLBSEL_TLB1) { 138 } else if (mas0 == MAS0_TLBSEL_TLB1) {
137 tlbcfg = mftlb1cfg(); 139 tlbcfg = mftlb1cfg();
138 } else { 140 } else {
139 panic("%s:%d: unexpected MAS0 %#" PRIx32, 141 panic("%s:%d: unexpected MAS0 %#" PRIx32,
140 __func__, __LINE__, mas0); 142 __func__, __LINE__, mas0);
141 } 143 }
142 144
143 /* 145 /*
144 * ESEL is the way we want to look up. 146 * ESEL is the way we want to look up.
145 * If tlbassoc is the same as tlbentries (like in TLB1) then the TLB is 147 * If tlbassoc is the same as tlbentries (like in TLB1) then the TLB is
146 * fully associative, the entire slot is placed into ESEL. If tlbassoc  148 * fully associative, the entire slot is placed into ESEL. If tlbassoc
147 * is less than the number of tlb entries, the slot is split in two 149 * is less than the number of tlb entries, the slot is split in two
148 * fields. Since the TLB is M rows by N ways, the lowers bits are for 150 * fields. Since the TLB is M rows by N ways, the lowers bits are for
149 * row (MAS2[EPN]) and the upper for the way (MAS1[ESEL]).  151 * row (MAS2[EPN]) and the upper for the way (MAS1[ESEL]).
150 */ 152 */
151 const u_int tlbassoc = TLBCFG_ASSOC(tlbcfg); 153 const u_int tlbassoc = TLBCFG_ASSOC(tlbcfg);
152 const u_int tlbentries = TLBCFG_NENTRY(tlbcfg); 154 const u_int tlbentries = TLBCFG_NENTRY(tlbcfg);
153 const u_int esel_shift = 155 const u_int esel_shift =
154 __builtin_clz(tlbassoc) - __builtin_clz(tlbentries); 156 __builtin_clz(tlbassoc) - __builtin_clz(tlbentries);
155 157
156 /* 158 /*
157 * Disable interrupts since we don't want anyone else mucking with 159 * Disable interrupts since we don't want anyone else mucking with
158 * the MMU Assist registers 160 * the MMU Assist registers
159 */ 161 */
160 const register_t msr = wrtee(0); 162 const register_t msr = wrtee(0);
161 const register_t saved_mas0 = mfspr(SPR_MAS0); 163 const register_t saved_mas0 = mfspr(SPR_MAS0);
162 mtspr(SPR_MAS0, mas0 | MAS0_ESEL_MAKE(slot >> esel_shift)); 164 mtspr(SPR_MAS0, mas0 | MAS0_ESEL_MAKE(slot >> esel_shift));
163 165
164 if (__predict_true(tlbassoc > tlbentries)) 166 if (__predict_true(tlbassoc > tlbentries))
165 mtspr(SPR_MAS2, slot << PAGE_SHIFT); 167 mtspr(SPR_MAS2, slot << PAGE_SHIFT);
166 168
167 /* 169 /*
168 * Now select the entry and grab its contents. 170 * Now select the entry and grab its contents.
169 */ 171 */
170 __asm volatile("tlbre"); 172 __asm volatile("tlbre");
171  173
172 hwtlb.hwtlb_mas0 = mfspr(SPR_MAS0); 174 hwtlb.hwtlb_mas0 = mfspr(SPR_MAS0);
173 hwtlb.hwtlb_mas1 = mfspr(SPR_MAS1); 175 hwtlb.hwtlb_mas1 = mfspr(SPR_MAS1);
174 hwtlb.hwtlb_mas2 = mfspr(SPR_MAS2); 176 hwtlb.hwtlb_mas2 = mfspr(SPR_MAS2);
175 hwtlb.hwtlb_mas3 = mfspr(SPR_MAS3); 177 hwtlb.hwtlb_mas3 = mfspr(SPR_MAS3);
176 178
177 mtspr(SPR_MAS0, saved_mas0); 179 mtspr(SPR_MAS0, saved_mas0);
178 wrtee(msr); /* restore interrupts */ 180 wrtee(msr); /* restore interrupts */
179 181
180 return hwtlb; 182 return hwtlb;
181} 183}
182 184
183static inline void 185static inline void
184hwtlb_write(const struct e500_hwtlb hwtlb, bool needs_sync) 186hwtlb_write(const struct e500_hwtlb hwtlb, bool needs_sync)
185{ 187{
186 const register_t msr = wrtee(0); 188 const register_t msr = wrtee(0);
187 const uint32_t saved_mas0 = mfspr(SPR_MAS0); 189 const uint32_t saved_mas0 = mfspr(SPR_MAS0);
188 190
189 /* 191 /*
190 * Need to always write MAS0 and MAS1 192 * Need to always write MAS0 and MAS1
191 */ 193 */
192 mtspr(SPR_MAS0, hwtlb.hwtlb_mas0); 194 mtspr(SPR_MAS0, hwtlb.hwtlb_mas0);
193 mtspr(SPR_MAS1, hwtlb.hwtlb_mas1); 195 mtspr(SPR_MAS1, hwtlb.hwtlb_mas1);
194 196
195 /* 197 /*
196 * Only write the VPN/WIMGE if this is in TLB0 or if a valid mapping. 198 * Only write the VPN/WIMGE if this is in TLB0 or if a valid mapping.
197 */ 199 */
198 if ((hwtlb.hwtlb_mas0 & MAS0_TLBSEL) == MAS0_TLBSEL_TLB0 200 if ((hwtlb.hwtlb_mas0 & MAS0_TLBSEL) == MAS0_TLBSEL_TLB0
199 || (hwtlb.hwtlb_mas1 & MAS1_V)) { 201 || (hwtlb.hwtlb_mas1 & MAS1_V)) {
200 mtspr(SPR_MAS2, hwtlb.hwtlb_mas2); 202 mtspr(SPR_MAS2, hwtlb.hwtlb_mas2);
201 } 203 }
202 /* 204 /*
203 * Only need to write the RPN/prot if we are dealing with a valid 205 * Only need to write the RPN/prot if we are dealing with a valid
204 * mapping. 206 * mapping.
205 */ 207 */
206 if (hwtlb.hwtlb_mas1 & MAS1_V) { 208 if (hwtlb.hwtlb_mas1 & MAS1_V) {
207 mtspr(SPR_MAS3, hwtlb.hwtlb_mas3); 209 mtspr(SPR_MAS3, hwtlb.hwtlb_mas3);
208 //mtspr(SPR_MAS7, 0); 210 //mtspr(SPR_MAS7, 0);
209 } 211 }
210  212
211#if 0 213#if 0
212 printf("%s->[%x,%x,%x,%x]\n", 214 printf("%s->[%x,%x,%x,%x]\n",
213 __func__,  215 __func__,
214 hwtlb.hwtlb_mas0, hwtlb.hwtlb_mas1, 216 hwtlb.hwtlb_mas0, hwtlb.hwtlb_mas1,
215 hwtlb.hwtlb_mas2, hwtlb.hwtlb_mas3); 217 hwtlb.hwtlb_mas2, hwtlb.hwtlb_mas3);
216#endif 218#endif
217 __asm volatile("tlbwe"); 219 __asm volatile("tlbwe");
218 if (needs_sync) { 220 if (needs_sync) {
219 __asm volatile("tlbsync\n\tisync\n\tsync"); 221 __asm volatile("tlbsync\n\tisync\n\tsync");
220 } 222 }
221 223
222 mtspr(SPR_MAS0, saved_mas0); 224 mtspr(SPR_MAS0, saved_mas0);
223 wrtee(msr); 225 wrtee(msr);
224} 226}
225 227
226static struct e500_hwtlb 228static struct e500_hwtlb
227tlb_to_hwtlb(const struct e500_tlb tlb) 229tlb_to_hwtlb(const struct e500_tlb tlb)
228{ 230{
229 struct e500_hwtlb hwtlb; 231 struct e500_hwtlb hwtlb;
230 232
231 KASSERT(trunc_page(tlb.tlb_va) == tlb.tlb_va); 233 KASSERT(trunc_page(tlb.tlb_va) == tlb.tlb_va);
232 KASSERT(tlb.tlb_size != 0); 234 KASSERT(tlb.tlb_size != 0);
233 KASSERT((tlb.tlb_size & (tlb.tlb_size - 1)) == 0); 235 KASSERT((tlb.tlb_size & (tlb.tlb_size - 1)) == 0);
234 const uint32_t prot_mask = tlb.tlb_pte & PTE_RWX_MASK; 236 const uint32_t prot_mask = tlb.tlb_pte & PTE_RWX_MASK;
235 if (__predict_true(tlb.tlb_size == PAGE_SIZE)) { 237 if (__predict_true(tlb.tlb_size == PAGE_SIZE)) {
236 hwtlb.hwtlb_mas0 = 0; 238 hwtlb.hwtlb_mas0 = 0;
237 hwtlb.hwtlb_mas1 = MAS1_V | MASX_TSIZE_MAKE(1); 239 hwtlb.hwtlb_mas1 = MAS1_V | MASX_TSIZE_MAKE(1);
238 /* 240 /*
239 * A non-zero ASID means this is a user page so mark it as 241 * A non-zero ASID means this is a user page so mark it as
240 * being in the user's address space. 242 * being in the user's address space.
241 */ 243 */
242 if (tlb.tlb_asid) { 244 if (tlb.tlb_asid) {
243 hwtlb.hwtlb_mas1 |= MAS1_TS 245 hwtlb.hwtlb_mas1 |= MAS1_TS
244 | MASX_TID_MAKE(tlb.tlb_asid); 246 | MASX_TID_MAKE(tlb.tlb_asid);
245 hwtlb.hwtlb_mas3 = (prot_mask >> (PTE_RWX_SHIFT - 1)) 247 hwtlb.hwtlb_mas3 = (prot_mask >> (PTE_RWX_SHIFT - 1))
246 | ((prot_mask & ~PTE_xX) >> PTE_RWX_SHIFT); 248 | ((prot_mask & ~PTE_xX) >> PTE_RWX_SHIFT);
247 KASSERT(prot_mask & PTE_xR); 249 KASSERT(prot_mask & PTE_xR);
248 KASSERT(hwtlb.hwtlb_mas3 & MAS3_UR); 250 KASSERT(hwtlb.hwtlb_mas3 & MAS3_UR);
249 CTASSERT(MAS3_UR == (PTE_xR >> (PTE_RWX_SHIFT - 1))); 251 CTASSERT(MAS3_UR == (PTE_xR >> (PTE_RWX_SHIFT - 1)));
250 CTASSERT(MAS3_SR == (PTE_xR >> PTE_RWX_SHIFT)); 252 CTASSERT(MAS3_SR == (PTE_xR >> PTE_RWX_SHIFT));
251 } else { 253 } else {
252 hwtlb.hwtlb_mas3 = prot_mask >> PTE_RWX_SHIFT; 254 hwtlb.hwtlb_mas3 = prot_mask >> PTE_RWX_SHIFT;
253 } 255 }
254 if (tlb.tlb_pte & PTE_UNMODIFIED) 256 if (tlb.tlb_pte & PTE_UNMODIFIED)
255 hwtlb.hwtlb_mas3 &= ~(MAS3_UW|MAS3_SW); 257 hwtlb.hwtlb_mas3 &= ~(MAS3_UW|MAS3_SW);
256 if (tlb.tlb_pte & PTE_UNSYNCED) 258 if (tlb.tlb_pte & PTE_UNSYNCED)
257 hwtlb.hwtlb_mas3 &= ~(MAS3_UX|MAS3_SX); 259 hwtlb.hwtlb_mas3 &= ~(MAS3_UX|MAS3_SX);
258 } else { 260 } else {
259 KASSERT(tlb.tlb_asid == 0); 261 KASSERT(tlb.tlb_asid == 0);
260 KASSERT((tlb.tlb_size & 0xaaaaa7ff) == 0); 262 KASSERT((tlb.tlb_size & 0xaaaaa7ff) == 0);
261 u_int cntlz = __builtin_clz(tlb.tlb_size); 263 u_int cntlz = __builtin_clz(tlb.tlb_size);
262 KASSERT(cntlz & 1); 264 KASSERT(cntlz & 1);
263 KASSERT(cntlz <= 19); 265 KASSERT(cntlz <= 19);
264 hwtlb.hwtlb_mas0 = MAS0_TLBSEL_TLB1; 266 hwtlb.hwtlb_mas0 = MAS0_TLBSEL_TLB1;
265 /* 267 /*
266 * TSIZE is defined (4^TSIZE) Kbytes except a TSIZE of 0 is not 268 * TSIZE is defined (4^TSIZE) Kbytes except a TSIZE of 0 is not
267 * allowed. So 1K would be 0x00000400 giving 21 leading zero 269 * allowed. So 1K would be 0x00000400 giving 21 leading zero
268 * bits. Subtracting the leading number of zero bits from 21 270 * bits. Subtracting the leading number of zero bits from 21
269 * and dividing by 2 gives us the number that the MMU wants. 271 * and dividing by 2 gives us the number that the MMU wants.
270 */ 272 */
271 hwtlb.hwtlb_mas1 = MASX_TSIZE_MAKE(((31 - 10) - cntlz) / 2) 273 hwtlb.hwtlb_mas1 = MASX_TSIZE_MAKE(((31 - 10) - cntlz) / 2)
272 | MAS1_IPROT | MAS1_V; 274 | MAS1_IPROT | MAS1_V;
273 hwtlb.hwtlb_mas3 = prot_mask >> PTE_RWX_SHIFT; 275 hwtlb.hwtlb_mas3 = prot_mask >> PTE_RWX_SHIFT;
274 } 276 }
275 /* We are done with MAS1, on to MAS2 ... */ 277 /* We are done with MAS1, on to MAS2 ... */
276 hwtlb.hwtlb_mas2 = tlb.tlb_va | (tlb.tlb_pte & PTE_WIMGE_MASK); 278 hwtlb.hwtlb_mas2 = tlb.tlb_va | (tlb.tlb_pte & PTE_WIMGE_MASK);
277 hwtlb.hwtlb_mas3 |= tlb.tlb_pte & PTE_RPN_MASK; 279 hwtlb.hwtlb_mas3 |= tlb.tlb_pte & PTE_RPN_MASK;
278 280
279 return hwtlb; 281 return hwtlb;
280} 282}
281 283
282void * 284void *
283e500_tlb1_fetch(size_t slot) 285e500_tlb1_fetch(size_t slot)
284{ 286{
285 struct e500_tlb1 * const tlb1 = &e500_tlb1; 287 struct e500_tlb1 * const tlb1 = &e500_tlb1;
286 288
287 return &tlb1->tlb1_entries[slot].e_hwtlb; 289 return &tlb1->tlb1_entries[slot].e_hwtlb;
288} 290}
289 291
290void 292void
291e500_tlb1_sync(void) 293e500_tlb1_sync(void)
292{ 294{
293 struct e500_tlb1 * const tlb1 = &e500_tlb1; 295 struct e500_tlb1 * const tlb1 = &e500_tlb1;
294 for (u_int slot = 1; slot < tlb1->tlb1_numentries; slot++) { 296 for (u_int slot = 1; slot < tlb1->tlb1_numentries; slot++) {
295 const struct e500_hwtlb * const new_hwtlb = 297 const struct e500_hwtlb * const new_hwtlb =
296 &tlb1->tlb1_entries[slot].e_hwtlb; 298 &tlb1->tlb1_entries[slot].e_hwtlb;
297 const struct e500_hwtlb old_hwtlb = 299 const struct e500_hwtlb old_hwtlb =
298 hwtlb_read(MAS0_TLBSEL_TLB1, slot); 300 hwtlb_read(MAS0_TLBSEL_TLB1, slot);
299#define CHANGED(n,o,f) ((n)->f != (o).f) 301#define CHANGED(n,o,f) ((n)->f != (o).f)
300 bool mas1_changed_p = CHANGED(new_hwtlb, old_hwtlb, hwtlb_mas1); 302 bool mas1_changed_p = CHANGED(new_hwtlb, old_hwtlb, hwtlb_mas1);
301 bool mas2_changed_p = CHANGED(new_hwtlb, old_hwtlb, hwtlb_mas2); 303 bool mas2_changed_p = CHANGED(new_hwtlb, old_hwtlb, hwtlb_mas2);
302 bool mas3_changed_p = CHANGED(new_hwtlb, old_hwtlb, hwtlb_mas3); 304 bool mas3_changed_p = CHANGED(new_hwtlb, old_hwtlb, hwtlb_mas3);
303#undef CHANGED 305#undef CHANGED
304 bool new_valid_p = (new_hwtlb->hwtlb_mas1 & MAS1_V) != 0; 306 bool new_valid_p = (new_hwtlb->hwtlb_mas1 & MAS1_V) != 0;
305 bool old_valid_p = (old_hwtlb.hwtlb_mas1 & MAS1_V) != 0; 307 bool old_valid_p = (old_hwtlb.hwtlb_mas1 & MAS1_V) != 0;
306 if ((new_valid_p || old_valid_p) 308 if ((new_valid_p || old_valid_p)
307 && (mas1_changed_p 309 && (mas1_changed_p
308 || (new_valid_p 310 || (new_valid_p
309 && (mas2_changed_p || mas3_changed_p)))) 311 && (mas2_changed_p || mas3_changed_p))))
310 hwtlb_write(*new_hwtlb, true); 312 hwtlb_write(*new_hwtlb, true);
311 } 313 }
312} 314}
313 315
314static int 316static int
315e500_alloc_tlb1_entry(void) 317e500_alloc_tlb1_entry(void)
316{ 318{
317 struct e500_tlb1 * const tlb1 = &e500_tlb1; 319 struct e500_tlb1 * const tlb1 = &e500_tlb1;
318 320
319 if (tlb1->tlb1_numfree == 0) 321 if (tlb1->tlb1_numfree == 0)
320 return -1; 322 return -1;
321 const u_int slot = tlb1->tlb1_freelist[--tlb1->tlb1_numfree]; 323 const u_int slot = tlb1->tlb1_freelist[--tlb1->tlb1_numfree];
322 KASSERT((tlb1->tlb1_entries[slot].e_hwtlb.hwtlb_mas1 & MAS1_V) == 0); 324 KASSERT((tlb1->tlb1_entries[slot].e_hwtlb.hwtlb_mas1 & MAS1_V) == 0);
323 tlb1->tlb1_entries[slot].e_hwtlb.hwtlb_mas0 =  325 tlb1->tlb1_entries[slot].e_hwtlb.hwtlb_mas0 =
324 MAS0_TLBSEL_TLB1 | __SHIFTIN(slot, MAS0_ESEL); 326 MAS0_TLBSEL_TLB1 | __SHIFTIN(slot, MAS0_ESEL);
325 return (int)slot; 327 return (int)slot;
326} 328}
327 329
328static void 330static void
329e500_free_tlb1_entry(struct e500_xtlb *xtlb, u_int slot, bool needs_sync) 331e500_free_tlb1_entry(struct e500_xtlb *xtlb, u_int slot, bool needs_sync)
330{ 332{
331 struct e500_tlb1 * const tlb1 = &e500_tlb1; 333 struct e500_tlb1 * const tlb1 = &e500_tlb1;
332 KASSERT(slot < tlb1->tlb1_numentries); 334 KASSERT(slot < tlb1->tlb1_numentries);
333 KASSERT(&tlb1->tlb1_entries[slot] == xtlb); 335 KASSERT(&tlb1->tlb1_entries[slot] == xtlb);
334 336
335 KASSERT(xtlb->e_hwtlb.hwtlb_mas0 == (MAS0_TLBSEL_TLB1|__SHIFTIN(slot, MAS0_ESEL))); 337 KASSERT(xtlb->e_hwtlb.hwtlb_mas0 == (MAS0_TLBSEL_TLB1|__SHIFTIN(slot, MAS0_ESEL)));
336 xtlb->e_hwtlb.hwtlb_mas1 &= ~(MAS1_V|MAS1_IPROT); 338 xtlb->e_hwtlb.hwtlb_mas1 &= ~(MAS1_V|MAS1_IPROT);
337 hwtlb_write(xtlb->e_hwtlb, needs_sync); 339 hwtlb_write(xtlb->e_hwtlb, needs_sync);
338 340
339 const register_t msr = wrtee(0); 341 const register_t msr = wrtee(0);
340 tlb1->tlb1_freelist[tlb1->tlb1_numfree++] = slot; 342 tlb1->tlb1_freelist[tlb1->tlb1_numfree++] = slot;
341 wrtee(msr); 343 wrtee(msr);
342} 344}
343 345
344static tlb_asid_t 346static tlb_asid_t
345e500_tlb_get_asid(void) 347e500_tlb_get_asid(void)
346{ 348{
347 return mfspr(SPR_PID0); 349 return mfspr(SPR_PID0);
348} 350}
349 351
350static void 352static void
351e500_tlb_set_asid(tlb_asid_t asid) 353e500_tlb_set_asid(tlb_asid_t asid)
352{ 354{
353 mtspr(SPR_PID0, asid); 355 mtspr(SPR_PID0, asid);
354} 356}
355 357
356static void 358static void
357e500_tlb_invalidate_all(void) 359e500_tlb_invalidate_all(void)
358{ 360{
359 /* 361 /*
360 * This does a flash invalidate of all entries in TLB0. 362 * This does a flash invalidate of all entries in TLB0.
361 * We don't touch TLB1 since we don't expect those to be volatile. 363 * We don't touch TLB1 since we don't expect those to be volatile.
362 */ 364 */
363#if 1 365#if 1
364 __asm volatile("tlbivax\t0, %0" :: "b"(4)); /* INV_ALL */ 366 __asm volatile("tlbivax\t0, %0" :: "b"(4)); /* INV_ALL */
365 __asm volatile("tlbsync\n\tisync\n\tsync"); 367 __asm volatile("tlbsync\n\tisync\n\tsync");
366#else 368#else
367 mtspr(SPR_MMUCSR0, MMUCSR0_TLB0_FI); 369 mtspr(SPR_MMUCSR0, MMUCSR0_TLB0_FI);
368 while (mfspr(SPR_MMUCSR0) != 0) 370 while (mfspr(SPR_MMUCSR0) != 0)
369 ; 371 ;
370#endif 372#endif
371} 373}
372 374
373static void 375static void
374e500_tlb_invalidate_globals(void) 376e500_tlb_invalidate_globals(void)
375{ 377{
376#if defined(MULTIPROCESSOR) 378#if defined(MULTIPROCESSOR)
377 e500_tlb_invalidate_all(); 379 e500_tlb_invalidate_all();
378#else /* !MULTIPROCESSOR */ 380#else /* !MULTIPROCESSOR */
379 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg()); 381 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg());
380 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg()); 382 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg());
381 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT; 383 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT;
382 const vaddr_t kstack_lo = (uintptr_t)curlwp->l_addr; 384 const vaddr_t kstack_lo = (uintptr_t)curlwp->l_addr;
383 const vaddr_t kstack_hi = kstack_lo + USPACE - 1; 385 const vaddr_t kstack_hi = kstack_lo + USPACE - 1;
384 const vaddr_t epn_kstack_lo = kstack_lo & (max_epn - 1); 386 const vaddr_t epn_kstack_lo = kstack_lo & (max_epn - 1);
385 const vaddr_t epn_kstack_hi = kstack_hi & (max_epn - 1); 387 const vaddr_t epn_kstack_hi = kstack_hi & (max_epn - 1);
386 388
387 const register_t msr = wrtee(0); 389 const register_t msr = wrtee(0);
388 for (size_t assoc = 0; assoc < tlbassoc; assoc++) { 390 for (size_t assoc = 0; assoc < tlbassoc; assoc++) {
389 mtspr(SPR_MAS0, MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0); 391 mtspr(SPR_MAS0, MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0);
390 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) { 392 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) {
391 mtspr(SPR_MAS2, epn); 393 mtspr(SPR_MAS2, epn);
392 __asm volatile("tlbre"); 394 __asm volatile("tlbre");
393 uint32_t mas1 = mfspr(SPR_MAS1); 395 uint32_t mas1 = mfspr(SPR_MAS1);
394 396
395 /* 397 /*
396 * Make sure this is a valid kernel entry first. 398 * Make sure this is a valid kernel entry first.
397 */ 399 */
398 if ((mas1 & (MAS1_V|MAS1_TID|MAS1_TS)) != MAS1_V) 400 if ((mas1 & (MAS1_V|MAS1_TID|MAS1_TS)) != MAS1_V)
399 continue; 401 continue;
400 402
401 /* 403 /*
402 * We have a valid kernel TLB entry. But if it matches 404 * We have a valid kernel TLB entry. But if it matches
403 * the stack we are currently running on, it would 405 * the stack we are currently running on, it would
404 * unwise to invalidate it. First see if the epn 406 * unwise to invalidate it. First see if the epn
405 * overlaps the stack. If it does then get the 407 * overlaps the stack. If it does then get the
406 * VA and see if it really is part of the stack. 408 * VA and see if it really is part of the stack.
407 */ 409 */
408 if (epn_kstack_lo < epn_kstack_hi 410 if (epn_kstack_lo < epn_kstack_hi
409 ? (epn_kstack_lo <= epn && epn <= epn_kstack_hi) 411 ? (epn_kstack_lo <= epn && epn <= epn_kstack_hi)
410 : (epn <= epn_kstack_hi || epn_kstack_lo <= epn)) { 412 : (epn <= epn_kstack_hi || epn_kstack_lo <= epn)) {
411 const uint32_t mas2_epn = 413 const uint32_t mas2_epn =
412 mfspr(SPR_MAS2) & MAS2_EPN; 414 mfspr(SPR_MAS2) & MAS2_EPN;
413 if (kstack_lo <= mas2_epn 415 if (kstack_lo <= mas2_epn
414 && mas2_epn <= kstack_hi) 416 && mas2_epn <= kstack_hi)
415 continue; 417 continue;
416 } 418 }
417 mtspr(SPR_MAS1, mas1 ^ MAS1_V); 419 mtspr(SPR_MAS1, mas1 ^ MAS1_V);
418 __asm volatile("tlbwe"); 420 __asm volatile("tlbwe");
419 } 421 }
420 } 422 }
421 __asm volatile("isync\n\tsync"); 423 __asm volatile("isync\n\tsync");
422 wrtee(msr); 424 wrtee(msr);
423#endif /* MULTIPROCESSOR */ 425#endif /* MULTIPROCESSOR */
424} 426}
425 427
426static void 428static void
427e500_tlb_invalidate_asids(tlb_asid_t asid_lo, tlb_asid_t asid_hi) 429e500_tlb_invalidate_asids(tlb_asid_t asid_lo, tlb_asid_t asid_hi)
428{ 430{
429#if defined(MULTIPROCESSOR) 431#if defined(MULTIPROCESSOR)
430 e500_tlb_invalidate_all(); 432 e500_tlb_invalidate_all();
431#else /* !MULTIPROCESSOR */ 433#else /* !MULTIPROCESSOR */
432 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg()); 434 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg());
433 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg()); 435 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg());
434 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT; 436 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT;
435 437
436 asid_lo = __SHIFTIN(asid_lo, MAS1_TID); 438 asid_lo = __SHIFTIN(asid_lo, MAS1_TID);
437 asid_hi = __SHIFTIN(asid_hi, MAS1_TID); 439 asid_hi = __SHIFTIN(asid_hi, MAS1_TID);
438 440
439 const register_t msr = wrtee(0); 441 const register_t msr = wrtee(0);
440 for (size_t assoc = 0; assoc < tlbassoc; assoc++) { 442 for (size_t assoc = 0; assoc < tlbassoc; assoc++) {
441 mtspr(SPR_MAS0, MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0); 443 mtspr(SPR_MAS0, MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0);
442 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) { 444 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) {
443 mtspr(SPR_MAS2, epn); 445 mtspr(SPR_MAS2, epn);
444 __asm volatile("tlbre"); 446 __asm volatile("tlbre");
445 const uint32_t mas1 = mfspr(SPR_MAS1); 447 const uint32_t mas1 = mfspr(SPR_MAS1);
446 /* 448 /*
447 * If this is a valid entry for AS space 1 and 449 * If this is a valid entry for AS space 1 and
448 * its asid matches the constraints of the caller, 450 * its asid matches the constraints of the caller,
449 * clear its valid bit. 451 * clear its valid bit.
450 */ 452 */
451 if ((mas1 & (MAS1_V|MAS1_TS)) == (MAS1_V|MAS1_TS) 453 if ((mas1 & (MAS1_V|MAS1_TS)) == (MAS1_V|MAS1_TS)
452 && asid_lo <= (mas1 & MAS1_TID) 454 && asid_lo <= (mas1 & MAS1_TID)
453 && (mas1 & MAS1_TID) <= asid_hi) { 455 && (mas1 & MAS1_TID) <= asid_hi) {
454 mtspr(SPR_MAS1, mas1 ^ MAS1_V); 456 mtspr(SPR_MAS1, mas1 ^ MAS1_V);
455#if 0 457#if 0
456 printf("%s[%zu,%zu]->[%x]\n", 458 printf("%s[%zu,%zu]->[%x]\n",
457 __func__, assoc, epn, mas1); 459 __func__, assoc, epn, mas1);
458#endif 460#endif
459 __asm volatile("tlbwe"); 461 __asm volatile("tlbwe");
460 } 462 }
461 } 463 }
462 } 464 }
463 __asm volatile("isync\n\tsync"); 465 __asm volatile("isync\n\tsync");
464 wrtee(msr); 466 wrtee(msr);
465#endif /* MULTIPROCESSOR */ 467#endif /* MULTIPROCESSOR */
466} 468}
467 469
468static u_int 470static u_int
469e500_tlb_record_asids(u_long *bitmap, tlb_asid_t asid_max) 471e500_tlb_record_asids(u_long *bitmap, tlb_asid_t asid_max)
470{ 472{
471 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg()); 473 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg());
472 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg()); 474 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg());
473 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT; 475 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT;
474 const size_t nbits = 8 * sizeof(bitmap[0]); 476 const size_t nbits = 8 * sizeof(bitmap[0]);
475 u_int found = 0; 477 u_int found = 0;
476 478
477 const register_t msr = wrtee(0); 479 const register_t msr = wrtee(0);
478 for (size_t assoc = 0; assoc < tlbassoc; assoc++) { 480 for (size_t assoc = 0; assoc < tlbassoc; assoc++) {
479 mtspr(SPR_MAS0, MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0); 481 mtspr(SPR_MAS0, MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0);
480 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) { 482 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) {
481 mtspr(SPR_MAS2, epn); 483 mtspr(SPR_MAS2, epn);
482 __asm volatile("tlbre"); 484 __asm volatile("tlbre");
483 const uint32_t mas1 = mfspr(SPR_MAS1); 485 const uint32_t mas1 = mfspr(SPR_MAS1);
484 /* 486 /*
485 * If this is a valid entry for AS space 1 and 487 * If this is a valid entry for AS space 1 and
486 * its asid matches the constraints of the caller, 488 * its asid matches the constraints of the caller,
487 * clear its valid bit. 489 * clear its valid bit.
488 */ 490 */
489 if ((mas1 & (MAS1_V|MAS1_TS)) == (MAS1_V|MAS1_TS)) { 491 if ((mas1 & (MAS1_V|MAS1_TS)) == (MAS1_V|MAS1_TS)) {
490 const uint32_t asid = MASX_TID_GET(mas1); 492 const uint32_t asid = MASX_TID_GET(mas1);
491 const u_int i = asid / nbits; 493 const u_int i = asid / nbits;
492 const u_long mask = 1UL << (asid & (nbits - 1)); 494 const u_long mask = 1UL << (asid & (nbits - 1));
493 if ((bitmap[i] & mask) == 0) { 495 if ((bitmap[i] & mask) == 0) {
494 bitmap[i] |= mask; 496 bitmap[i] |= mask;
495 found++; 497 found++;
496 } 498 }
497 } 499 }
498 } 500 }
499 } 501 }
500 wrtee(msr); 502 wrtee(msr);
501 503
502 return found; 504 return found;
503} 505}
504 506
505static void 507static void
506e500_tlb_invalidate_addr(vaddr_t va, tlb_asid_t asid) 508e500_tlb_invalidate_addr(vaddr_t va, tlb_asid_t asid)
507{ 509{
508 KASSERT((va & PAGE_MASK) == 0); 510 KASSERT((va & PAGE_MASK) == 0);
509 /* 511 /*
510 * Bits 60 & 61 have meaning 512 * Bits 60 & 61 have meaning
511 */ 513 */
512 if (asid == KERNEL_PID) { 514 if (asid == KERNEL_PID) {
513 /* 515 /*
514 * For data accesses, the context-synchronizing instruction 516 * For data accesses, the context-synchronizing instruction
515 * before tlbwe or tlbivax ensures that all memory accesses 517 * before tlbwe or tlbivax ensures that all memory accesses
516 * due to preceding instructions have completed to a point 518 * due to preceding instructions have completed to a point
517 * at which they have reported all exceptions they will cause. 519 * at which they have reported all exceptions they will cause.
518 */ 520 */
519 __asm volatile("isync"); 521 __asm volatile("isync");
520 } 522 }
521 __asm volatile("tlbivax\t0, %0" :: "b"(va)); 523 __asm volatile("tlbivax\t0, %0" :: "b"(va));
522 __asm volatile("tlbsync"); 524 __asm volatile("tlbsync");
523 __asm volatile("tlbsync"); /* Why? */ 525 __asm volatile("tlbsync"); /* Why? */
524 if (asid == KERNEL_PID) { 526 if (asid == KERNEL_PID) {
525 /* 527 /*
526 * The context-synchronizing instruction after tlbwe or tlbivax 528 * The context-synchronizing instruction after tlbwe or tlbivax
527 * ensures that subsequent accesses (data and instruction) use 529 * ensures that subsequent accesses (data and instruction) use
528 * the updated value in any TLB entries affected. 530 * the updated value in any TLB entries affected.
529 */ 531 */
530 __asm volatile("isync\n\tsync"); 532 __asm volatile("isync\n\tsync");
531 } 533 }
532} 534}
533 535
534static bool 536static bool
535e500_tlb_update_addr(vaddr_t va, tlb_asid_t asid, pt_entry_t pte, bool insert) 537e500_tlb_update_addr(vaddr_t va, tlb_asid_t asid, pt_entry_t pte, bool insert)
536{ 538{
537#if defined(MULTIPROCESSOR) 539#if defined(MULTIPROCESSOR)
538 e500_tlb_invalidate_addr(va, asid); 540 e500_tlb_invalidate_addr(va, asid);
539 return true; 541 return true;
540#else /* !MULTIPROCESSOR */ 542#else /* !MULTIPROCESSOR */
541 struct e500_hwtlb hwtlb = tlb_to_hwtlb( 543 struct e500_hwtlb hwtlb = tlb_to_hwtlb(
542 (struct e500_tlb){ .tlb_va = va, .tlb_asid = asid, 544 (struct e500_tlb){ .tlb_va = va, .tlb_asid = asid,
543 .tlb_size = PAGE_SIZE, .tlb_pte = pte,}); 545 .tlb_size = PAGE_SIZE, .tlb_pte = pte,});
544 546
545 register_t msr = wrtee(0); 547 register_t msr = wrtee(0);
546 mtspr(SPR_MAS6, asid ? __SHIFTIN(asid, MAS6_SPID0) | MAS6_SAS : 0); 548 mtspr(SPR_MAS6, asid ? __SHIFTIN(asid, MAS6_SPID0) | MAS6_SAS : 0);
547 __asm volatile("tlbsx 0, %0" :: "b"(va)); 549 __asm volatile("tlbsx 0, %0" :: "b"(va));
548 register_t mas1 = mfspr(SPR_MAS1); 550 register_t mas1 = mfspr(SPR_MAS1);
549 if ((mas1 & MAS1_V) == 0) { 551 if ((mas1 & MAS1_V) == 0) {
550 if (!insert) { 552 if (!insert) {
551 wrtee(msr); 553 wrtee(msr);
552#if 0 554#if 0
553 printf("%s(%#lx,%#x,%#x,%x)<no update>\n", 555 printf("%s(%#lx,%#x,%#x,%x)<no update>\n",
554 __func__, va, asid, pte, insert); 556 __func__, va, asid, pte, insert);
555#endif 557#endif
556 return false; 558 return false;
557 } 559 }
558 mas1 = hwtlb.hwtlb_mas1 | MAS1_V; 560 mas1 = hwtlb.hwtlb_mas1 | MAS1_V;
559 mtspr(SPR_MAS1, mas1); 561 mtspr(SPR_MAS1, mas1);
560 } 562 }
561 mtspr(SPR_MAS2, hwtlb.hwtlb_mas2); 563 mtspr(SPR_MAS2, hwtlb.hwtlb_mas2);
562 mtspr(SPR_MAS3, hwtlb.hwtlb_mas3); 564 mtspr(SPR_MAS3, hwtlb.hwtlb_mas3);
563 //mtspr(SPR_MAS7, 0); 565 //mtspr(SPR_MAS7, 0);
564 __asm volatile("tlbwe"); 566 __asm volatile("tlbwe");
565 if (asid == KERNEL_PID) 567 if (asid == KERNEL_PID)
566 __asm volatile("isync\n\tsync"); 568 __asm volatile("isync\n\tsync");
567 wrtee(msr); 569 wrtee(msr);
568#if 0 570#if 0
569 if (asid) 571 if (asid)
570 printf("%s(%#lx,%#x,%#x,%x)->[%x,%x,%x]\n", 572 printf("%s(%#lx,%#x,%#x,%x)->[%x,%x,%x]\n",
571 __func__, va, asid, pte, insert, 573 __func__, va, asid, pte, insert,
572 hwtlb.hwtlb_mas1, hwtlb.hwtlb_mas2, hwtlb.hwtlb_mas3); 574 hwtlb.hwtlb_mas1, hwtlb.hwtlb_mas2, hwtlb.hwtlb_mas3);
573#endif 575#endif
574 return (mas1 & MAS1_V) != 0; 576 return (mas1 & MAS1_V) != 0;
575#endif /* MULTIPROCESSOR */ 577#endif /* MULTIPROCESSOR */
576} 578}
577 579
578static void 580static void
579e500_tlb_write_entry(size_t index, const struct tlbmask *tlb) 581e500_tlb_write_entry(size_t index, const struct tlbmask *tlb)
580{ 582{
581} 583}
582 584
583static void 585static void
584e500_tlb_read_entry(size_t index, struct tlbmask *tlb) 586e500_tlb_read_entry(size_t index, struct tlbmask *tlb)
585{ 587{
586} 588}
587 589
588static void 590static void
589e500_tlb_dump(void (*pr)(const char *, ...)) 591e500_tlb_dump(void (*pr)(const char *, ...))
590{ 592{
591 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg()); 593 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg());
592 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg()); 594 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg());
593 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT; 595 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT;
594 const uint32_t saved_mas0 = mfspr(SPR_MAS0); 596 const uint32_t saved_mas0 = mfspr(SPR_MAS0);
595 size_t valid = 0; 597 size_t valid = 0;
596 598
597 if (pr == NULL) 599 if (pr == NULL)
598 pr = printf; 600 pr = printf;
599 601
600 const register_t msr = wrtee(0); 602 const register_t msr = wrtee(0);
601 for (size_t assoc = 0; assoc < tlbassoc; assoc++) { 603 for (size_t assoc = 0; assoc < tlbassoc; assoc++) {
602 struct e500_hwtlb hwtlb; 604 struct e500_hwtlb hwtlb;
603 hwtlb.hwtlb_mas0 = MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0; 605 hwtlb.hwtlb_mas0 = MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0;
604 mtspr(SPR_MAS0, hwtlb.hwtlb_mas0); 606 mtspr(SPR_MAS0, hwtlb.hwtlb_mas0);
605 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) { 607 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) {
606 mtspr(SPR_MAS2, epn); 608 mtspr(SPR_MAS2, epn);
607 __asm volatile("tlbre"); 609 __asm volatile("tlbre");
608 hwtlb.hwtlb_mas1 = mfspr(SPR_MAS1); 610 hwtlb.hwtlb_mas1 = mfspr(SPR_MAS1);
609 /* 611 /*
610 * If this is a valid entry for AS space 1 and 612 * If this is a valid entry for AS space 1 and
611 * its asid matches the constraints of the caller, 613 * its asid matches the constraints of the caller,
612 * clear its valid bit. 614 * clear its valid bit.
613 */ 615 */
614 if (hwtlb.hwtlb_mas1 & MAS1_V) { 616 if (hwtlb.hwtlb_mas1 & MAS1_V) {
615 hwtlb.hwtlb_mas2 = mfspr(SPR_MAS2); 617 hwtlb.hwtlb_mas2 = mfspr(SPR_MAS2);
616 hwtlb.hwtlb_mas3 = mfspr(SPR_MAS3); 618 hwtlb.hwtlb_mas3 = mfspr(SPR_MAS3);
617 struct e500_tlb tlb = hwtlb_to_tlb(hwtlb); 619 struct e500_tlb tlb = hwtlb_to_tlb(hwtlb);
618 (*pr)("[%zu,%zu]->[%x,%x,%x]", 620 (*pr)("[%zu,%zu]->[%x,%x,%x]",
619 assoc, atop(epn), 621 assoc, atop(epn),
620 hwtlb.hwtlb_mas1,  622 hwtlb.hwtlb_mas1,
621 hwtlb.hwtlb_mas2,  623 hwtlb.hwtlb_mas2,
622 hwtlb.hwtlb_mas3); 624 hwtlb.hwtlb_mas3);
623 (*pr)(": VA=%#lx size=4KB asid=%u pte=%x", 625 (*pr)(": VA=%#lx size=4KB asid=%u pte=%x",
624 tlb.tlb_va, tlb.tlb_asid, tlb.tlb_pte); 626 tlb.tlb_va, tlb.tlb_asid, tlb.tlb_pte);
625 (*pr)(" (RPN=%#x,%s%s%s%s%s,%s%s%s%s%s)\n", 627 (*pr)(" (RPN=%#x,%s%s%s%s%s,%s%s%s%s%s)\n",
626 tlb.tlb_pte & PTE_RPN_MASK, 628 tlb.tlb_pte & PTE_RPN_MASK,
627 tlb.tlb_pte & PTE_xR ? "R" : "", 629 tlb.tlb_pte & PTE_xR ? "R" : "",
628 tlb.tlb_pte & PTE_xW ? "W" : "", 630 tlb.tlb_pte & PTE_xW ? "W" : "",
629 tlb.tlb_pte & PTE_UNMODIFIED ? "*" : "", 631 tlb.tlb_pte & PTE_UNMODIFIED ? "*" : "",
630 tlb.tlb_pte & PTE_xX ? "X" : "", 632 tlb.tlb_pte & PTE_xX ? "X" : "",
631 tlb.tlb_pte & PTE_UNSYNCED ? "*" : "", 633 tlb.tlb_pte & PTE_UNSYNCED ? "*" : "",
632 tlb.tlb_pte & PTE_W ? "W" : "", 634 tlb.tlb_pte & PTE_W ? "W" : "",
633 tlb.tlb_pte & PTE_I ? "I" : "", 635 tlb.tlb_pte & PTE_I ? "I" : "",
634 tlb.tlb_pte & PTE_M ? "M" : "", 636 tlb.tlb_pte & PTE_M ? "M" : "",
635 tlb.tlb_pte & PTE_G ? "G" : "", 637 tlb.tlb_pte & PTE_G ? "G" : "",
636 tlb.tlb_pte & PTE_E ? "E" : ""); 638 tlb.tlb_pte & PTE_E ? "E" : "");
637 valid++; 639 valid++;
638 } 640 }
639 } 641 }
640 } 642 }
641 mtspr(SPR_MAS0, saved_mas0); 643 mtspr(SPR_MAS0, saved_mas0);
642 wrtee(msr); 644 wrtee(msr);
643 (*pr)("%s: %zu valid entries\n", __func__, valid); 645 (*pr)("%s: %zu valid entries\n", __func__, valid);
644} 646}
645 647
646static void 648static void
647e500_tlb_walk(void *ctx, bool (*func)(void *, vaddr_t, uint32_t, uint32_t)) 649e500_tlb_walk(void *ctx, bool (*func)(void *, vaddr_t, uint32_t, uint32_t))
648{ 650{
649 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg()); 651 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg());
650 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg()); 652 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg());
651 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT; 653 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT;
652 const uint32_t saved_mas0 = mfspr(SPR_MAS0); 654 const uint32_t saved_mas0 = mfspr(SPR_MAS0);
653 655
654 const register_t msr = wrtee(0); 656 const register_t msr = wrtee(0);
655 for (size_t assoc = 0; assoc < tlbassoc; assoc++) { 657 for (size_t assoc = 0; assoc < tlbassoc; assoc++) {
656 struct e500_hwtlb hwtlb; 658 struct e500_hwtlb hwtlb;
657 hwtlb.hwtlb_mas0 = MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0; 659 hwtlb.hwtlb_mas0 = MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0;
658 mtspr(SPR_MAS0, hwtlb.hwtlb_mas0); 660 mtspr(SPR_MAS0, hwtlb.hwtlb_mas0);
659 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) { 661 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) {
660 mtspr(SPR_MAS2, epn); 662 mtspr(SPR_MAS2, epn);
661 __asm volatile("tlbre"); 663 __asm volatile("tlbre");
662 hwtlb.hwtlb_mas1 = mfspr(SPR_MAS1); 664 hwtlb.hwtlb_mas1 = mfspr(SPR_MAS1);
663 if (hwtlb.hwtlb_mas1 & MAS1_V) { 665 if (hwtlb.hwtlb_mas1 & MAS1_V) {
664 hwtlb.hwtlb_mas2 = mfspr(SPR_MAS2); 666 hwtlb.hwtlb_mas2 = mfspr(SPR_MAS2);
665 hwtlb.hwtlb_mas3 = mfspr(SPR_MAS3); 667 hwtlb.hwtlb_mas3 = mfspr(SPR_MAS3);
666 struct e500_tlb tlb = hwtlb_to_tlb(hwtlb); 668 struct e500_tlb tlb = hwtlb_to_tlb(hwtlb);
667 if (!(*func)(ctx, tlb.tlb_va, tlb.tlb_asid, 669 if (!(*func)(ctx, tlb.tlb_va, tlb.tlb_asid,
668 tlb.tlb_pte)) 670 tlb.tlb_pte))
669 break; 671 break;
670 } 672 }
671 } 673 }
672 } 674 }
673 mtspr(SPR_MAS0, saved_mas0); 675 mtspr(SPR_MAS0, saved_mas0);
674 wrtee(msr); 676 wrtee(msr);
675} 677}
676 678
677static struct e500_xtlb * 679static struct e500_xtlb *
678e500_tlb_lookup_xtlb_pa(vaddr_t pa, u_int *slotp) 680e500_tlb_lookup_xtlb_pa(vaddr_t pa, u_int *slotp)
679{ 681{
680 struct e500_tlb1 * const tlb1 = &e500_tlb1; 682 struct e500_tlb1 * const tlb1 = &e500_tlb1;
681 struct e500_xtlb *xtlb = tlb1->tlb1_entries; 683 struct e500_xtlb *xtlb = tlb1->tlb1_entries;
682 684
683 /* 685 /*
684 * See if we have a TLB entry for the pa. 686 * See if we have a TLB entry for the pa.
685 */ 687 */
686 for (u_int i = 0; i < tlb1->tlb1_numentries; i++, xtlb++) { 688 for (u_int i = 0; i < tlb1->tlb1_numentries; i++, xtlb++) {
687 psize_t mask = ~(xtlb->e_tlb.tlb_size - 1); 689 psize_t mask = ~(xtlb->e_tlb.tlb_size - 1);
688 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V) 690 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V)
689 && ((pa ^ xtlb->e_tlb.tlb_pte) & mask) == 0) { 691 && ((pa ^ xtlb->e_tlb.tlb_pte) & mask) == 0) {
690 if (slotp != NULL) 692 if (slotp != NULL)
691 *slotp = i; 693 *slotp = i;
692 return xtlb; 694 return xtlb;
693 } 695 }
694 } 696 }
695 697
696 return NULL; 698 return NULL;
697} 699}
698 700
699struct e500_xtlb * 701struct e500_xtlb *
700e500_tlb_lookup_xtlb(vaddr_t va, u_int *slotp) 702e500_tlb_lookup_xtlb(vaddr_t va, u_int *slotp)
701{ 703{
702 struct e500_tlb1 * const tlb1 = &e500_tlb1; 704 struct e500_tlb1 * const tlb1 = &e500_tlb1;
703 struct e500_xtlb *xtlb = tlb1->tlb1_entries; 705 struct e500_xtlb *xtlb = tlb1->tlb1_entries;
704 706
705 /* 707 /*
706 * See if we have a TLB entry for the va. 708 * See if we have a TLB entry for the va.
707 */ 709 */
708 for (u_int i = 0; i < tlb1->tlb1_numentries; i++, xtlb++) { 710 for (u_int i = 0; i < tlb1->tlb1_numentries; i++, xtlb++) {
709 vsize_t mask = ~(xtlb->e_tlb.tlb_size - 1); 711 vsize_t mask = ~(xtlb->e_tlb.tlb_size - 1);
710 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V) 712 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V)
711 && ((va ^ xtlb->e_tlb.tlb_va) & mask) == 0) { 713 && ((va ^ xtlb->e_tlb.tlb_va) & mask) == 0) {
712 if (slotp != NULL) 714 if (slotp != NULL)
713 *slotp = i; 715 *slotp = i;
714 return xtlb; 716 return xtlb;
715 } 717 }
716 } 718 }
717 719
718 return NULL; 720 return NULL;
719} 721}
720 722
721static struct e500_xtlb * 723static struct e500_xtlb *
722e500_tlb_lookup_xtlb2(vaddr_t va, vsize_t len) 724e500_tlb_lookup_xtlb2(vaddr_t va, vsize_t len)
723{ 725{
724 struct e500_tlb1 * const tlb1 = &e500_tlb1; 726 struct e500_tlb1 * const tlb1 = &e500_tlb1;
725 struct e500_xtlb *xtlb = tlb1->tlb1_entries; 727 struct e500_xtlb *xtlb = tlb1->tlb1_entries;
726 728
727 /* 729 /*
728 * See if we have a TLB entry for the pa. 730 * See if we have a TLB entry for the pa.
729 */ 731 */
730 for (u_int i = 0; i < tlb1->tlb1_numentries; i++, xtlb++) { 732 for (u_int i = 0; i < tlb1->tlb1_numentries; i++, xtlb++) {
731 vsize_t mask = ~(xtlb->e_tlb.tlb_size - 1); 733 vsize_t mask = ~(xtlb->e_tlb.tlb_size - 1);
732 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V) 734 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V)
733 && ((va ^ xtlb->e_tlb.tlb_va) & mask) == 0 735 && ((va ^ xtlb->e_tlb.tlb_va) & mask) == 0
734 && (((va + len - 1) ^ va) & mask) == 0) { 736 && (((va + len - 1) ^ va) & mask) == 0) {
735 return xtlb; 737 return xtlb;
736 } 738 }
737 } 739 }
738 740
739 return NULL; 741 return NULL;
740} 742}
741 743
742static void * 744static void *
743e500_tlb_mapiodev(paddr_t pa, psize_t len, bool prefetchable) 745e500_tlb_mapiodev(paddr_t pa, psize_t len, bool prefetchable)
744{ 746{
745 struct e500_xtlb * const xtlb = e500_tlb_lookup_xtlb_pa(pa, NULL); 747 struct e500_xtlb * const xtlb = e500_tlb_lookup_xtlb_pa(pa, NULL);
746 748
747 /* 749 /*
748 * See if we have a TLB entry for the pa. If completely falls within 750 * See if we have a TLB entry for the pa. If completely falls within
749 * mark the reference and return the pa. But only if the tlb entry 751 * mark the reference and return the pa. But only if the tlb entry
750 * is not cacheable. 752 * is not cacheable.
751 */ 753 */
752 if (xtlb 754 if (xtlb
753 && (prefetchable 755 && (prefetchable
754 || (xtlb->e_tlb.tlb_pte & PTE_WIG) == (PTE_I|PTE_G))) { 756 || (xtlb->e_tlb.tlb_pte & PTE_WIG) == (PTE_I|PTE_G))) {
755 xtlb->e_refcnt++; 757 xtlb->e_refcnt++;
756 return (void *) (xtlb->e_tlb.tlb_va 758 return (void *) (xtlb->e_tlb.tlb_va
757 + pa - (xtlb->e_tlb.tlb_pte & PTE_RPN_MASK)); 759 + pa - (xtlb->e_tlb.tlb_pte & PTE_RPN_MASK));
758 } 760 }
759 return NULL; 761 return NULL;
760} 762}
761 763
762static void 764static void
763e500_tlb_unmapiodev(vaddr_t va, vsize_t len) 765e500_tlb_unmapiodev(vaddr_t va, vsize_t len)
764{ 766{
765 if (va < VM_MIN_KERNEL_ADDRESS || VM_MAX_KERNEL_ADDRESS <= va) { 767 if (va < VM_MIN_KERNEL_ADDRESS || VM_MAX_KERNEL_ADDRESS <= va) {
766 struct e500_xtlb * const xtlb = e500_tlb_lookup_xtlb(va, NULL); 768 struct e500_xtlb * const xtlb = e500_tlb_lookup_xtlb(va, NULL);
767 if (xtlb) 769 if (xtlb)
768 xtlb->e_refcnt--; 770 xtlb->e_refcnt--;
769 } 771 }
770} 772}
771 773
772static int 774static int
773e500_tlb_ioreserve(vaddr_t va, vsize_t len, pt_entry_t pte) 775e500_tlb_ioreserve(vaddr_t va, vsize_t len, pt_entry_t pte)
774{ 776{
775 struct e500_tlb1 * const tlb1 = &e500_tlb1; 777 struct e500_tlb1 * const tlb1 = &e500_tlb1;
776 struct e500_xtlb *xtlb; 778 struct e500_xtlb *xtlb;
777 779
778 KASSERT(len & 0x55555000); 780 KASSERT(len & 0x55555000);
779 KASSERT((len & ~0x55555000) == 0); 781 KASSERT((len & ~0x55555000) == 0);
780 KASSERT(len >= PAGE_SIZE); 782 KASSERT(len >= PAGE_SIZE);
781 KASSERT((len & (len - 1)) == 0); 783 KASSERT((len & (len - 1)) == 0);
782 KASSERT((va & (len - 1)) == 0); 784 KASSERT((va & (len - 1)) == 0);
783 KASSERT(((pte & PTE_RPN_MASK) & (len - 1)) == 0); 785 KASSERT(((pte & PTE_RPN_MASK) & (len - 1)) == 0);
784 786
785 if ((xtlb = e500_tlb_lookup_xtlb2(va, len)) != NULL) { 787 if ((xtlb = e500_tlb_lookup_xtlb2(va, len)) != NULL) {
786 psize_t mask __diagused = ~(xtlb->e_tlb.tlb_size - 1); 788 psize_t mask __diagused = ~(xtlb->e_tlb.tlb_size - 1);
787 KASSERT(len <= xtlb->e_tlb.tlb_size); 789 KASSERT(len <= xtlb->e_tlb.tlb_size);
788 KASSERT((pte & mask) == (xtlb->e_tlb.tlb_pte & mask)); 790 KASSERT((pte & mask) == (xtlb->e_tlb.tlb_pte & mask));
789 xtlb->e_refcnt++; 791 xtlb->e_refcnt++;
790 return 0; 792 return 0;
791 } 793 }
792 794
793 const int slot = e500_alloc_tlb1_entry(); 795 const int slot = e500_alloc_tlb1_entry();
794 if (slot < 0) 796 if (slot < 0)
795 return ENOMEM; 797 return ENOMEM;
796 798
797 xtlb = &tlb1->tlb1_entries[slot];  799 xtlb = &tlb1->tlb1_entries[slot];
798 xtlb->e_tlb.tlb_va = va; 800 xtlb->e_tlb.tlb_va = va;
799 xtlb->e_tlb.tlb_size = len; 801 xtlb->e_tlb.tlb_size = len;
800 xtlb->e_tlb.tlb_pte = pte; 802 xtlb->e_tlb.tlb_pte = pte;
801 xtlb->e_tlb.tlb_asid = KERNEL_PID; 803 xtlb->e_tlb.tlb_asid = KERNEL_PID;
802 804
803 xtlb->e_hwtlb = tlb_to_hwtlb(xtlb->e_tlb); 805 xtlb->e_hwtlb = tlb_to_hwtlb(xtlb->e_tlb);
804 xtlb->e_hwtlb.hwtlb_mas0 |= __SHIFTIN(slot, MAS0_ESEL); 806 xtlb->e_hwtlb.hwtlb_mas0 |= __SHIFTIN(slot, MAS0_ESEL);
805 hwtlb_write(xtlb->e_hwtlb, true); 807 hwtlb_write(xtlb->e_hwtlb, true);
806 808
807#if defined(MULTIPROCESSOR) 809#if defined(MULTIPROCESSOR)
808 cpu_send_ipi(IPI_DST_NOTME, IPI_TLB1SYNC); 810 cpu_send_ipi(IPI_DST_NOTME, IPI_TLB1SYNC);
809#endif 811#endif
810 812
811 return 0; 813 return 0;
812} 814}
813 815
814static int 816static int
815e500_tlb_iorelease(vaddr_t va) 817e500_tlb_iorelease(vaddr_t va)
816{ 818{
817 u_int slot; 819 u_int slot;
818 struct e500_xtlb * const xtlb = e500_tlb_lookup_xtlb(va, &slot); 820 struct e500_xtlb * const xtlb = e500_tlb_lookup_xtlb(va, &slot);
819 821
820 if (xtlb == NULL) 822 if (xtlb == NULL)
821 return ENOENT; 823 return ENOENT;
822 824
823 if (xtlb->e_refcnt) 825 if (xtlb->e_refcnt)
824 return EBUSY; 826 return EBUSY;
825 827
826 e500_free_tlb1_entry(xtlb, slot, true); 828 e500_free_tlb1_entry(xtlb, slot, true);
827 829
828#if defined(MULTIPROCESSOR) 830#if defined(MULTIPROCESSOR)
829 cpu_send_ipi(IPI_DST_NOTME, IPI_TLB1SYNC); 831 cpu_send_ipi(IPI_DST_NOTME, IPI_TLB1SYNC);
830#endif 832#endif
831 833
832 return 0; 834 return 0;
833} 835}
834 836
835static u_int 837static u_int
836e500_tlbmemmap(paddr_t memstart, psize_t memsize, struct e500_tlb1 *tlb1) 838e500_tlbmemmap(paddr_t memstart, psize_t memsize, struct e500_tlb1 *tlb1)
837{ 839{
838 u_int slotmask = 0; 840 u_int slotmask = 0;
839 u_int slots = 0, nextslot = 0; 841 u_int slots = 0, nextslot = 0;
840 KASSERT(tlb1->tlb1_numfree > 1); 842 KASSERT(tlb1->tlb1_numfree > 1);
841 KASSERT(((memstart + memsize - 1) & -memsize) == memstart); 843 KASSERT(((memstart + memsize - 1) & -memsize) == memstart);
842 for (paddr_t lastaddr = memstart; 0 < memsize; ) { 844 for (paddr_t lastaddr = memstart; 0 < memsize; ) {
843 u_int cnt = __builtin_clz(memsize); 845 u_int cnt = __builtin_clz(memsize);
844 psize_t size = uimin(1UL << (31 - (cnt | 1)), tlb1->tlb1_maxsize); 846 psize_t size = uimin(1UL << (31 - (cnt | 1)), tlb1->tlb1_maxsize);
845 slots += memsize / size; 847 slots += memsize / size;
846 if (slots > 4) 848 if (slots > 4)
847 panic("%s: %d: can't map memory (%#lx) into TLB1: %s", 849 panic("%s: %d: can't map memory (%#lx) into TLB1: %s",
848 __func__, __LINE__, memsize, "too fragmented"); 850 __func__, __LINE__, memsize, "too fragmented");
849 if (slots > tlb1->tlb1_numfree - 1) 851 if (slots > tlb1->tlb1_numfree - 1)
850 panic("%s: %d: can't map memory (%#lx) into TLB1: %s", 852 panic("%s: %d: can't map memory (%#lx) into TLB1: %s",
851 __func__, __LINE__, memsize, 853 __func__, __LINE__, memsize,
852 "insufficent TLB entries"); 854 "insufficent TLB entries");
853 for (; nextslot < slots; nextslot++) { 855 for (; nextslot < slots; nextslot++) {
854 const u_int freeslot = e500_alloc_tlb1_entry(); 856 const u_int freeslot = e500_alloc_tlb1_entry();
855 struct e500_xtlb * const xtlb = 857 struct e500_xtlb * const xtlb =
856 &tlb1->tlb1_entries[freeslot]; 858 &tlb1->tlb1_entries[freeslot];
857 xtlb->e_tlb.tlb_asid = KERNEL_PID; 859 xtlb->e_tlb.tlb_asid = KERNEL_PID;
858 xtlb->e_tlb.tlb_size = size; 860 xtlb->e_tlb.tlb_size = size;
859 xtlb->e_tlb.tlb_va = lastaddr; 861 xtlb->e_tlb.tlb_va = lastaddr;
860 xtlb->e_tlb.tlb_pte = lastaddr 862 xtlb->e_tlb.tlb_pte = lastaddr
861 | PTE_M | PTE_xX | PTE_xW | PTE_xR; 863 | PTE_M | PTE_xX | PTE_xW | PTE_xR;
862 lastaddr += size; 864 lastaddr += size;
863 memsize -= size; 865 memsize -= size;
864 slotmask |= 1 << (31 - freeslot); /* clz friendly */ 866 slotmask |= 1 << (31 - freeslot); /* clz friendly */
865 } 867 }
866 } 868 }
867 869
868#if defined(MULTIPROCESSOR) 870#if defined(MULTIPROCESSOR)
869 cpu_send_ipi(IPI_DST_NOTME, IPI_TLB1SYNC); 871 cpu_send_ipi(IPI_DST_NOTME, IPI_TLB1SYNC);
870#endif 872#endif
871 873
872 return nextslot; 874 return nextslot;
873} 875}
874 876
875static const struct tlb_md_ops e500_tlb_ops = { 877static const struct tlb_md_ops e500_tlb_ops = {
876 .md_tlb_get_asid = e500_tlb_get_asid, 878 .md_tlb_get_asid = e500_tlb_get_asid,
877 .md_tlb_set_asid = e500_tlb_set_asid, 879 .md_tlb_set_asid = e500_tlb_set_asid,
878 .md_tlb_invalidate_all = e500_tlb_invalidate_all, 880 .md_tlb_invalidate_all = e500_tlb_invalidate_all,
879 .md_tlb_invalidate_globals = e500_tlb_invalidate_globals, 881 .md_tlb_invalidate_globals = e500_tlb_invalidate_globals,
880 .md_tlb_invalidate_asids = e500_tlb_invalidate_asids, 882 .md_tlb_invalidate_asids = e500_tlb_invalidate_asids,
881 .md_tlb_invalidate_addr = e500_tlb_invalidate_addr, 883 .md_tlb_invalidate_addr = e500_tlb_invalidate_addr,
882 .md_tlb_update_addr = e500_tlb_update_addr, 884 .md_tlb_update_addr = e500_tlb_update_addr,
883 .md_tlb_record_asids = e500_tlb_record_asids, 885 .md_tlb_record_asids = e500_tlb_record_asids,
884 .md_tlb_write_entry = e500_tlb_write_entry, 886 .md_tlb_write_entry = e500_tlb_write_entry,
885 .md_tlb_read_entry = e500_tlb_read_entry, 887 .md_tlb_read_entry = e500_tlb_read_entry,
886 .md_tlb_dump = e500_tlb_dump, 888 .md_tlb_dump = e500_tlb_dump,
887 .md_tlb_walk = e500_tlb_walk, 889 .md_tlb_walk = e500_tlb_walk,
888}; 890};
889 891
890static const struct tlb_md_io_ops e500_tlb_io_ops = { 892static const struct tlb_md_io_ops e500_tlb_io_ops = {
891 .md_tlb_mapiodev = e500_tlb_mapiodev, 893 .md_tlb_mapiodev = e500_tlb_mapiodev,
892 .md_tlb_unmapiodev = e500_tlb_unmapiodev, 894 .md_tlb_unmapiodev = e500_tlb_unmapiodev,
893 .md_tlb_ioreserve = e500_tlb_ioreserve, 895 .md_tlb_ioreserve = e500_tlb_ioreserve,
894 .md_tlb_iorelease = e500_tlb_iorelease, 896 .md_tlb_iorelease = e500_tlb_iorelease,
895}; 897};
896 898
897void 899void
898e500_tlb_init(vaddr_t endkernel, psize_t memsize) 900e500_tlb_init(vaddr_t endkernel, psize_t memsize)
899{ 901{
900 struct e500_tlb1 * const tlb1 = &e500_tlb1; 902 struct e500_tlb1 * const tlb1 = &e500_tlb1;
901 903
902#if 0 904#if 0
903 register_t mmucfg = mfspr(SPR_MMUCFG); 905 register_t mmucfg = mfspr(SPR_MMUCFG);
904 register_t mas4 = mfspr(SPR_MAS4); 906 register_t mas4 = mfspr(SPR_MAS4);
905#endif 907#endif
906 908
907 const uint32_t tlb1cfg = mftlb1cfg(); 909 const uint32_t tlb1cfg = mftlb1cfg();
908 tlb1->tlb1_numentries = TLBCFG_NENTRY(tlb1cfg); 910 tlb1->tlb1_numentries = TLBCFG_NENTRY(tlb1cfg);
909 KASSERT(tlb1->tlb1_numentries <= __arraycount(tlb1->tlb1_entries)); 911 KASSERT(tlb1->tlb1_numentries <= __arraycount(tlb1->tlb1_entries));
910 /* 912 /*
911 * Limit maxsize to 1G since 4G isn't really useful to us. 913 * Limit maxsize to 1G since 4G isn't really useful to us.
912 */ 914 */
913 tlb1->tlb1_minsize = 1024 << (2 * TLBCFG_MINSIZE(tlb1cfg)); 915 tlb1->tlb1_minsize = 1024 << (2 * TLBCFG_MINSIZE(tlb1cfg));
914 tlb1->tlb1_maxsize = 1024 << (2 * uimin(10, TLBCFG_MAXSIZE(tlb1cfg))); 916 tlb1->tlb1_maxsize = 1024 << (2 * uimin(10, TLBCFG_MAXSIZE(tlb1cfg)));
915 917
916#ifdef VERBOSE_INITPPC 918#ifdef VERBOSE_INITPPC
917 printf(" tlb1cfg=%#x numentries=%u minsize=%#xKB maxsize=%#xKB", 919 printf(" tlb1cfg=%#x numentries=%u minsize=%#xKB maxsize=%#xKB",
918 tlb1cfg, tlb1->tlb1_numentries, tlb1->tlb1_minsize >> 10, 920 tlb1cfg, tlb1->tlb1_numentries, tlb1->tlb1_minsize >> 10,
919 tlb1->tlb1_maxsize >> 10); 921 tlb1->tlb1_maxsize >> 10);
920#endif 922#endif
921 923
922 /* 924 /*
923 * Let's see what's in TLB1 and we need to invalidate any entry that 925 * Let's see what's in TLB1 and we need to invalidate any entry that
924 * would fit within the kernel's mapped address space. 926 * would fit within the kernel's mapped address space.
925 */ 927 */
926 psize_t memmapped = 0; 928 psize_t memmapped = 0;
927 for (u_int i = 0; i < tlb1->tlb1_numentries; i++) { 929 for (u_int i = 0; i < tlb1->tlb1_numentries; i++) {
928 struct e500_xtlb * const xtlb = &tlb1->tlb1_entries[i]; 930 struct e500_xtlb * const xtlb = &tlb1->tlb1_entries[i];
929 931
930 xtlb->e_hwtlb = hwtlb_read(MAS0_TLBSEL_TLB1, i); 932 xtlb->e_hwtlb = hwtlb_read(MAS0_TLBSEL_TLB1, i);
931 933
932 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V) == 0) { 934 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V) == 0) {
933 tlb1->tlb1_freelist[tlb1->tlb1_numfree++] = i; 935 tlb1->tlb1_freelist[tlb1->tlb1_numfree++] = i;
934#ifdef VERBOSE_INITPPC 936#ifdef VERBOSE_INITPPC
935 printf(" TLB1[%u]=<unused>", i); 937 printf(" TLB1[%u]=<unused>", i);
936#endif 938#endif
937 continue; 939 continue;
938 } 940 }
939 941
940 xtlb->e_tlb = hwtlb_to_tlb(xtlb->e_hwtlb); 942 xtlb->e_tlb = hwtlb_to_tlb(xtlb->e_hwtlb);
941#ifdef VERBOSE_INITPPC 943#ifdef VERBOSE_INITPPC
942 printf(" TLB1[%u]=<%#lx,%#lx,%#x,%#x>", 944 printf(" TLB1[%u]=<%#lx,%#lx,%#x,%#x>",
943 i, xtlb->e_tlb.tlb_va, xtlb->e_tlb.tlb_size, 945 i, xtlb->e_tlb.tlb_va, xtlb->e_tlb.tlb_size,
944 xtlb->e_tlb.tlb_asid, xtlb->e_tlb.tlb_pte); 946 xtlb->e_tlb.tlb_asid, xtlb->e_tlb.tlb_pte);
945#endif 947#endif
946 if ((VM_MIN_KERNEL_ADDRESS <= xtlb->e_tlb.tlb_va 948 if ((VM_MIN_KERNEL_ADDRESS <= xtlb->e_tlb.tlb_va
947 && xtlb->e_tlb.tlb_va < VM_MAX_KERNEL_ADDRESS) 949 && xtlb->e_tlb.tlb_va < VM_MAX_KERNEL_ADDRESS)
948 || (xtlb->e_tlb.tlb_va < VM_MIN_KERNEL_ADDRESS 950 || (xtlb->e_tlb.tlb_va < VM_MIN_KERNEL_ADDRESS
949 && VM_MIN_KERNEL_ADDRESS < 951 && VM_MIN_KERNEL_ADDRESS <
950 xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size)) { 952 xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size)) {
951#ifdef VERBOSE_INITPPC 953#ifdef VERBOSE_INITPPC
952 printf("free"); 954 printf("free");
953#endif 955#endif
954 e500_free_tlb1_entry(xtlb, i, false); 956 e500_free_tlb1_entry(xtlb, i, false);
955#ifdef VERBOSE_INITPPC 957#ifdef VERBOSE_INITPPC
956 printf("d"); 958 printf("d");
957#endif 959#endif
958 continue; 960 continue;
959 } 961 }
960 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_IPROT) == 0) { 962 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_IPROT) == 0) {
961 xtlb->e_hwtlb.hwtlb_mas1 |= MAS1_IPROT; 963 xtlb->e_hwtlb.hwtlb_mas1 |= MAS1_IPROT;
962 hwtlb_write(xtlb->e_hwtlb, false); 964 hwtlb_write(xtlb->e_hwtlb, false);
963#ifdef VERBOSE_INITPPC 965#ifdef VERBOSE_INITPPC
964 printf("+iprot"); 966 printf("+iprot");
965#endif 967#endif
966 } 968 }
967 if (xtlb->e_tlb.tlb_pte & PTE_I) 969 if (xtlb->e_tlb.tlb_pte & PTE_I)
968 continue; 970 continue;
969 971
970 if (xtlb->e_tlb.tlb_va == 0 972 if (xtlb->e_tlb.tlb_va == 0
971 || xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size <= memsize) { 973 || xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size <= memsize) {
972 memmapped += xtlb->e_tlb.tlb_size; 974 memmapped += xtlb->e_tlb.tlb_size;
973 /* 975 /*
974 * Let make sure main memory is setup so it's memory 976 * Let make sure main memory is setup so it's memory
975 * coherent. For some reason u-boot doesn't set it up 977 * coherent. For some reason u-boot doesn't set it up
976 * that way. 978 * that way.
977 */ 979 */
978 if ((xtlb->e_hwtlb.hwtlb_mas2 & MAS2_M) == 0) { 980 if ((xtlb->e_hwtlb.hwtlb_mas2 & MAS2_M) == 0) {
979 xtlb->e_hwtlb.hwtlb_mas2 |= MAS2_M; 981 xtlb->e_hwtlb.hwtlb_mas2 |= MAS2_M;
980 hwtlb_write(xtlb->e_hwtlb, true); 982 hwtlb_write(xtlb->e_hwtlb, true);
981 } 983 }
982 } 984 }
983 } 985 }
984 986
985 cpu_md_ops.md_tlb_ops = &e500_tlb_ops; 987 cpu_md_ops.md_tlb_ops = &e500_tlb_ops;
986 cpu_md_ops.md_tlb_io_ops = &e500_tlb_io_ops; 988 cpu_md_ops.md_tlb_io_ops = &e500_tlb_io_ops;
987 989
988 if (__predict_false(memmapped < memsize)) { 990 if (__predict_false(memmapped < memsize)) {
989 /* 991 /*
990 * Let's see how many TLB entries are needed to map memory. 992 * Let's see how many TLB entries are needed to map memory.
991 */ 993 */
992 u_int slotmask = e500_tlbmemmap(0, memsize, tlb1); 994 u_int slotmask = e500_tlbmemmap(0, memsize, tlb1);
993 995
994 /* 996 /*
995 * To map main memory into the TLB, we need to flush any 997 * To map main memory into the TLB, we need to flush any
996 * existing entries from the TLB that overlap the virtual 998 * existing entries from the TLB that overlap the virtual
997 * address space needed to map physical memory. That may 999 * address space needed to map physical memory. That may
998 * include the entries for the pages currently used by the 1000 * include the entries for the pages currently used by the
999 * stack or that we are executing. So to avoid problems, we 1001 * stack or that we are executing. So to avoid problems, we
1000 * are going to temporarily map the kernel and stack into AS 1, 1002 * are going to temporarily map the kernel and stack into AS 1,
1001 * switch to it, and clear out the TLB entries from AS 0, 1003 * switch to it, and clear out the TLB entries from AS 0,
1002 * install the new TLB entries to map memory, and then switch 1004 * install the new TLB entries to map memory, and then switch
1003 * back to AS 0 and free the temp entry used for AS1. 1005 * back to AS 0 and free the temp entry used for AS1.
1004 */ 1006 */
1005 u_int b = __builtin_clz(endkernel); 1007 u_int b = __builtin_clz(endkernel);
1006 1008
1007 /* 1009 /*
1008 * If the kernel doesn't end on a clean power of 2, we need 1010 * If the kernel doesn't end on a clean power of 2, we need
1009 * to round the size up (by decrementing the number of leading 1011 * to round the size up (by decrementing the number of leading
1010 * zero bits). If the size isn't a power of 4KB, decrement 1012 * zero bits). If the size isn't a power of 4KB, decrement
1011 * again to make it one. 1013 * again to make it one.
1012 */ 1014 */
1013 if (endkernel & (endkernel - 1)) 1015 if (endkernel & (endkernel - 1))
1014 b--; 1016 b--;
1015 if ((b & 1) == 0) 1017 if ((b & 1) == 0)
1016 b--; 1018 b--;
1017 1019
1018 /* 1020 /*
1019 * Create a TLB1 mapping for the kernel in AS1. 1021 * Create a TLB1 mapping for the kernel in AS1.
1020 */ 1022 */
1021 const u_int kslot = e500_alloc_tlb1_entry(); 1023 const u_int kslot = e500_alloc_tlb1_entry();
1022 struct e500_xtlb * const kxtlb = &tlb1->tlb1_entries[kslot]; 1024 struct e500_xtlb * const kxtlb = &tlb1->tlb1_entries[kslot];
1023 kxtlb->e_tlb.tlb_va = 0; 1025 kxtlb->e_tlb.tlb_va = 0;
1024 kxtlb->e_tlb.tlb_size = 1UL << (31 - b); 1026 kxtlb->e_tlb.tlb_size = 1UL << (31 - b);
1025 kxtlb->e_tlb.tlb_pte = PTE_M|PTE_xR|PTE_xW|PTE_xX; 1027 kxtlb->e_tlb.tlb_pte = PTE_M|PTE_xR|PTE_xW|PTE_xX;
1026 kxtlb->e_tlb.tlb_asid = KERNEL_PID; 1028 kxtlb->e_tlb.tlb_asid = KERNEL_PID;
1027 1029
1028 kxtlb->e_hwtlb = tlb_to_hwtlb(kxtlb->e_tlb); 1030 kxtlb->e_hwtlb = tlb_to_hwtlb(kxtlb->e_tlb);
1029 kxtlb->e_hwtlb.hwtlb_mas0 |= __SHIFTIN(kslot, MAS0_ESEL); 1031 kxtlb->e_hwtlb.hwtlb_mas0 |= __SHIFTIN(kslot, MAS0_ESEL);
1030 kxtlb->e_hwtlb.hwtlb_mas1 |= MAS1_TS; 1032 kxtlb->e_hwtlb.hwtlb_mas1 |= MAS1_TS;
1031 hwtlb_write(kxtlb->e_hwtlb, true); 1033 hwtlb_write(kxtlb->e_hwtlb, true);
1032 1034
1033 /* 1035 /*
1034 * Now that we have a TLB mapping in AS1 for the kernel and its 1036 * Now that we have a TLB mapping in AS1 for the kernel and its
1035 * stack, we switch to AS1 to cleanup the TLB mappings for TLB0. 1037 * stack, we switch to AS1 to cleanup the TLB mappings for TLB0.
1036 */ 1038 */
1037 const register_t saved_msr = mfmsr(); 1039 const register_t saved_msr = mfmsr();
1038 mtmsr(saved_msr | PSL_DS | PSL_IS); 1040 mtmsr(saved_msr | PSL_DS | PSL_IS);
1039 __asm volatile("isync"); 1041 __asm volatile("isync");
1040 1042
1041 /* 1043 /*