Tue Jun 7 01:01:43 2011 UTC ()
zero PCU area when loading used area.


(matt)
diff -r1.4 -r1.5 src/sys/arch/powerpc/booke/spe.c
diff -r1.24 -r1.25 src/sys/arch/powerpc/oea/altivec.c
diff -r1.30 -r1.31 src/sys/arch/powerpc/powerpc/fpu.c

cvs diff -r1.4 -r1.5 src/sys/arch/powerpc/booke/spe.c (switch to unified diff)

--- src/sys/arch/powerpc/booke/spe.c 2011/05/02 06:43:16 1.4
+++ src/sys/arch/powerpc/booke/spe.c 2011/06/07 01:01:42 1.5
@@ -1,237 +1,242 @@ @@ -1,237 +1,242 @@
1/* $NetBSD: spe.c,v 1.4 2011/05/02 06:43:16 matt Exp $ */ 1/* $NetBSD: spe.c,v 1.5 2011/06/07 01:01:42 matt Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2011 The NetBSD Foundation, Inc. 4 * Copyright (c) 2011 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matt Thomas of 3am Software Foundry. 8 * by Matt Thomas of 3am Software Foundry.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32#include <sys/cdefs.h> 32#include <sys/cdefs.h>
33__KERNEL_RCSID(0, "$NetBSD: spe.c,v 1.4 2011/05/02 06:43:16 matt Exp $"); 33__KERNEL_RCSID(0, "$NetBSD: spe.c,v 1.5 2011/06/07 01:01:42 matt Exp $");
34 34
35#include "opt_altivec.h" 35#include "opt_altivec.h"
36 36
37#ifdef PPC_HAVE_SPE 37#ifdef PPC_HAVE_SPE
38 38
39#include <sys/param.h> 39#include <sys/param.h>
40#include <sys/proc.h> 40#include <sys/proc.h>
41#include <sys/systm.h> 41#include <sys/systm.h>
42#include <sys/atomic.h> 42#include <sys/atomic.h>
43#include <sys/siginfo.h> 43#include <sys/siginfo.h>
44#include <sys/pcu.h> 44#include <sys/pcu.h>
45 45
46#include <powerpc/altivec.h> 46#include <powerpc/altivec.h>
47#include <powerpc/spr.h> 47#include <powerpc/spr.h>
48#include <powerpc/booke/spr.h> 48#include <powerpc/booke/spr.h>
49#include <powerpc/psl.h> 49#include <powerpc/psl.h>
50#include <powerpc/pcb.h> 50#include <powerpc/pcb.h>
51 51
52static void vec_state_load(lwp_t *, bool); 52static void vec_state_load(lwp_t *, bool);
53static void vec_state_save(lwp_t *); 53static void vec_state_save(lwp_t *);
54static void vec_state_release(lwp_t *); 54static void vec_state_release(lwp_t *);
55 55
56const pcu_ops_t vec_ops = { 56const pcu_ops_t vec_ops = {
57 .pcu_id = PCU_VEC, 57 .pcu_id = PCU_VEC,
58 .pcu_state_load = vec_state_load, 58 .pcu_state_load = vec_state_load,
59 .pcu_state_save = vec_state_save, 59 .pcu_state_save = vec_state_save,
60 .pcu_state_release = vec_state_release, 60 .pcu_state_release = vec_state_release,
61}; 61};
62 62
63bool 63bool
64vec_used_p(lwp_t *l) 64vec_used_p(lwp_t *l)
65{ 65{
66 return (l->l_md.md_flags & MDLWP_USEDVEC) != 0; 66 return (l->l_md.md_flags & MDLWP_USEDVEC) != 0;
67} 67}
68 68
69void 69void
70vec_mark_used(lwp_t *l) 70vec_mark_used(lwp_t *l)
71{ 71{
72 l->l_md.md_flags |= MDLWP_USEDVEC; 72 l->l_md.md_flags |= MDLWP_USEDVEC;
73} 73}
74 74
75void 75void
76vec_state_load(lwp_t *l, bool used) 76vec_state_load(lwp_t *l, bool used)
77{ 77{
78 struct pcb * const pcb = lwp_getpcb(l); 78 struct pcb * const pcb = lwp_getpcb(l);
79 79
 80 if (__predict_false(!vec_used_p(l))) {
 81 memset(&pcb->pcb_vr, 0, sizeof(pcb->pcb_vr));
 82 vec_mark_used(l);
 83 }
 84
80 /* 85 /*
81 * Enable SPE temporarily (and disable interrupts). 86 * Enable SPE temporarily (and disable interrupts).
82 */ 87 */
83 const register_t msr = mfmsr(); 88 const register_t msr = mfmsr();
84 mtmsr((msr & ~PSL_EE) | PSL_SPV); 89 mtmsr((msr & ~PSL_EE) | PSL_SPV);
85 __asm volatile ("isync"); 90 __asm volatile ("isync");
86 91
87 /* 92 /*
88 * Call an assembly routine to do load everything. 93 * Call an assembly routine to do load everything.
89 */ 94 */
90 vec_load_from_vreg(&pcb->pcb_vr); 95 vec_load_from_vreg(&pcb->pcb_vr);
91 __asm volatile ("sync"); 96 __asm volatile ("sync");
92 97
93 98
94 /* 99 /*
95 * Restore MSR (turn off SPE) 100 * Restore MSR (turn off SPE)
96 */ 101 */
97 mtmsr(msr); 102 mtmsr(msr);
98 __asm volatile ("isync"); 103 __asm volatile ("isync");
99 104
100 /* 105 /*
101 * Note that vector has now been used. 106 * Note that vector has now been used.
102 */ 107 */
103 l->l_md.md_flags |= MDLWP_USEDVEC; 108 l->l_md.md_flags |= MDLWP_USEDVEC;
104 l->l_md.md_utf->tf_srr1 |= PSL_SPV; 109 l->l_md.md_utf->tf_srr1 |= PSL_SPV;
105} 110}
106 111
107void 112void
108vec_state_save(lwp_t *l) 113vec_state_save(lwp_t *l)
109{ 114{
110 struct pcb * const pcb = lwp_getpcb(l); 115 struct pcb * const pcb = lwp_getpcb(l);
111 116
112 /* 117 /*
113 * Turn on SPE, turn off interrupts. 118 * Turn on SPE, turn off interrupts.
114 */ 119 */
115 const register_t msr = mfmsr(); 120 const register_t msr = mfmsr();
116 mtmsr((msr & ~PSL_EE) | PSL_SPV); 121 mtmsr((msr & ~PSL_EE) | PSL_SPV);
117 __asm volatile ("isync"); 122 __asm volatile ("isync");
118 123
119 /* 124 /*
120 * Save the vector state which is best done in assembly. 125 * Save the vector state which is best done in assembly.
121 */ 126 */
122 vec_unload_to_vreg(&pcb->pcb_vr); 127 vec_unload_to_vreg(&pcb->pcb_vr);
123 __asm volatile ("sync"); 128 __asm volatile ("sync");
124 129
125 /* 130 /*
126 * Restore MSR (turn off SPE) 131 * Restore MSR (turn off SPE)
127 */ 132 */
128 mtmsr(msr); 133 mtmsr(msr);
129 __asm volatile ("isync"); 134 __asm volatile ("isync");
130} 135}
131 136
132void 137void
133vec_state_release(lwp_t *l) 138vec_state_release(lwp_t *l)
134{ 139{
135 /* 140 /*
136 * Turn off SPV so the next SPE instruction will cause a 141 * Turn off SPV so the next SPE instruction will cause a
137 * SPE unavailable exception 142 * SPE unavailable exception
138 */ 143 */
139 l->l_md.md_utf->tf_srr1 &= ~PSL_SPV; 144 l->l_md.md_utf->tf_srr1 &= ~PSL_SPV;
140} 145}
141 146
142void 147void
143vec_restore_from_mcontext(lwp_t *l, const mcontext_t *mcp) 148vec_restore_from_mcontext(lwp_t *l, const mcontext_t *mcp)
144{ 149{
145 struct pcb * const pcb = lwp_getpcb(l); 150 struct pcb * const pcb = lwp_getpcb(l);
146 const union __vr *vr = mcp->__vrf.__vrs; 151 const union __vr *vr = mcp->__vrf.__vrs;
147 152
148 KASSERT(l == curlwp); 153 KASSERT(l == curlwp);
149 154
150 vec_save(); 155 vec_save();
151 156
152 /* grab the accumulator */ 157 /* grab the accumulator */
153 pcb->pcb_vr.vreg[8][0] = vr->__vr32[2]; 158 pcb->pcb_vr.vreg[8][0] = vr->__vr32[2];
154 pcb->pcb_vr.vreg[8][1] = vr->__vr32[3]; 159 pcb->pcb_vr.vreg[8][1] = vr->__vr32[3];
155 160
156 /* 161 /*
157 * We store the high parts of each register in the first 8 vectors. 162 * We store the high parts of each register in the first 8 vectors.
158 */ 163 */
159 for (u_int i = 0; i < 8; i++, vr += 4) { 164 for (u_int i = 0; i < 8; i++, vr += 4) {
160 pcb->pcb_vr.vreg[i][0] = vr[0].__vr32[0]; 165 pcb->pcb_vr.vreg[i][0] = vr[0].__vr32[0];
161 pcb->pcb_vr.vreg[i][1] = vr[1].__vr32[0]; 166 pcb->pcb_vr.vreg[i][1] = vr[1].__vr32[0];
162 pcb->pcb_vr.vreg[i][2] = vr[2].__vr32[0]; 167 pcb->pcb_vr.vreg[i][2] = vr[2].__vr32[0];
163 pcb->pcb_vr.vreg[i][3] = vr[3].__vr32[0]; 168 pcb->pcb_vr.vreg[i][3] = vr[3].__vr32[0];
164 } 169 }
165 l->l_md.md_utf->tf_spefscr = pcb->pcb_vr.vscr = mcp->__vrf.__vscr; 170 l->l_md.md_utf->tf_spefscr = pcb->pcb_vr.vscr = mcp->__vrf.__vscr;
166 pcb->pcb_vr.vrsave = mcp->__vrf.__vrsave; 171 pcb->pcb_vr.vrsave = mcp->__vrf.__vrsave;
167} 172}
168 173
169bool 174bool
170vec_save_to_mcontext(lwp_t *l, mcontext_t *mcp, unsigned int *flagp) 175vec_save_to_mcontext(lwp_t *l, mcontext_t *mcp, unsigned int *flagp)
171{ 176{
172 struct pcb * const pcb = lwp_getpcb(l); 177 struct pcb * const pcb = lwp_getpcb(l);
173 178
174 KASSERT(l == curlwp); 179 KASSERT(l == curlwp);
175 180
176 if (!vec_used_p(l)) 181 if (!vec_used_p(l))
177 return false; 182 return false;
178 183
179 vec_save(); 184 vec_save();
180 185
181 mcp->__gregs[_REG_MSR] |= PSL_SPV; 186 mcp->__gregs[_REG_MSR] |= PSL_SPV;
182 187
183 union __vr *vr = mcp->__vrf.__vrs; 188 union __vr *vr = mcp->__vrf.__vrs;
184 const register_t *fixreg = l->l_md.md_utf->tf_fixreg; 189 const register_t *fixreg = l->l_md.md_utf->tf_fixreg;
185 for (u_int i = 0; i < 32; i++, vr += 4, fixreg += 4) { 190 for (u_int i = 0; i < 32; i++, vr += 4, fixreg += 4) {
186 vr[0].__vr32[0] = pcb->pcb_vr.vreg[i][0]; 191 vr[0].__vr32[0] = pcb->pcb_vr.vreg[i][0];
187 vr[0].__vr32[1] = fixreg[0]; 192 vr[0].__vr32[1] = fixreg[0];
188 vr[0].__vr32[2] = 0; 193 vr[0].__vr32[2] = 0;
189 vr[0].__vr32[3] = 0; 194 vr[0].__vr32[3] = 0;
190 vr[1].__vr32[0] = pcb->pcb_vr.vreg[i][1]; 195 vr[1].__vr32[0] = pcb->pcb_vr.vreg[i][1];
191 vr[1].__vr32[1] = fixreg[1]; 196 vr[1].__vr32[1] = fixreg[1];
192 vr[1].__vr32[2] = 0; 197 vr[1].__vr32[2] = 0;
193 vr[1].__vr32[3] = 0; 198 vr[1].__vr32[3] = 0;
194 vr[2].__vr32[0] = pcb->pcb_vr.vreg[i][2]; 199 vr[2].__vr32[0] = pcb->pcb_vr.vreg[i][2];
195 vr[2].__vr32[1] = fixreg[2]; 200 vr[2].__vr32[1] = fixreg[2];
196 vr[2].__vr32[2] = 0; 201 vr[2].__vr32[2] = 0;
197 vr[2].__vr32[3] = 0; 202 vr[2].__vr32[3] = 0;
198 vr[3].__vr32[0] = pcb->pcb_vr.vreg[i][3]; 203 vr[3].__vr32[0] = pcb->pcb_vr.vreg[i][3];
199 vr[3].__vr32[1] = fixreg[3]; 204 vr[3].__vr32[1] = fixreg[3];
200 vr[3].__vr32[2] = 0; 205 vr[3].__vr32[2] = 0;
201 vr[3].__vr32[3] = 0; 206 vr[3].__vr32[3] = 0;
202 } 207 }
203 208
204 mcp->__vrf.__vrs[0].__vr32[2] = pcb->pcb_vr.vreg[8][0]; 209 mcp->__vrf.__vrs[0].__vr32[2] = pcb->pcb_vr.vreg[8][0];
205 mcp->__vrf.__vrs[0].__vr32[3] = pcb->pcb_vr.vreg[8][1]; 210 mcp->__vrf.__vrs[0].__vr32[3] = pcb->pcb_vr.vreg[8][1];
206 211
207 mcp->__vrf.__vrsave = pcb->pcb_vr.vrsave; 212 mcp->__vrf.__vrsave = pcb->pcb_vr.vrsave;
208 mcp->__vrf.__vscr = l->l_md.md_utf->tf_spefscr; 213 mcp->__vrf.__vscr = l->l_md.md_utf->tf_spefscr;
209 214
210 *flagp |= _UC_POWERPC_SPE; 215 *flagp |= _UC_POWERPC_SPE;
211 216
212 return true; 217 return true;
213} 218}
214 219
215static const struct { 220static const struct {
216 uint32_t mask; 221 uint32_t mask;
217 int code; 222 int code;
218} spefscr_siginfo_map[] = { 223} spefscr_siginfo_map[] = {
219 { SPEFSCR_FINV|SPEFSCR_FINVH, FPE_FLTINV }, 224 { SPEFSCR_FINV|SPEFSCR_FINVH, FPE_FLTINV },
220 { SPEFSCR_FOVF|SPEFSCR_FOVFH, FPE_FLTOVF }, 225 { SPEFSCR_FOVF|SPEFSCR_FOVFH, FPE_FLTOVF },
221 { SPEFSCR_FUNF|SPEFSCR_FUNFH, FPE_FLTUND }, 226 { SPEFSCR_FUNF|SPEFSCR_FUNFH, FPE_FLTUND },
222 { SPEFSCR_FX |SPEFSCR_FXH, FPE_FLTRES }, 227 { SPEFSCR_FX |SPEFSCR_FXH, FPE_FLTRES },
223 { SPEFSCR_FDBZ|SPEFSCR_FDBZH, FPE_FLTDIV }, 228 { SPEFSCR_FDBZ|SPEFSCR_FDBZH, FPE_FLTDIV },
224 { SPEFSCR_OV |SPEFSCR_OVH, FPE_INTOVF }, 229 { SPEFSCR_OV |SPEFSCR_OVH, FPE_INTOVF },
225}; 230};
226 231
227int 232int
228vec_siginfo_code(const struct trapframe *tf) 233vec_siginfo_code(const struct trapframe *tf)
229{ 234{
230 for (u_int i = 0; i < __arraycount(spefscr_siginfo_map); i++) { 235 for (u_int i = 0; i < __arraycount(spefscr_siginfo_map); i++) {
231 if (tf->tf_spefscr & spefscr_siginfo_map[i].mask) 236 if (tf->tf_spefscr & spefscr_siginfo_map[i].mask)
232 return spefscr_siginfo_map[i].code; 237 return spefscr_siginfo_map[i].code;
233 } 238 }
234 return 0; 239 return 0;
235} 240}
236 241
237#endif /* PPC_HAVE_SPE */ 242#endif /* PPC_HAVE_SPE */

cvs diff -r1.24 -r1.25 src/sys/arch/powerpc/oea/altivec.c (switch to unified diff)

--- src/sys/arch/powerpc/oea/altivec.c 2011/05/25 05:42:37 1.24
+++ src/sys/arch/powerpc/oea/altivec.c 2011/06/07 01:01:43 1.25
@@ -1,314 +1,319 @@ @@ -1,314 +1,319 @@
1/* $NetBSD: altivec.c,v 1.24 2011/05/25 05:42:37 matt Exp $ */ 1/* $NetBSD: altivec.c,v 1.25 2011/06/07 01:01:43 matt Exp $ */
2 2
3/* 3/*
4 * Copyright (C) 1996 Wolfgang Solfrank. 4 * Copyright (C) 1996 Wolfgang Solfrank.
5 * Copyright (C) 1996 TooLs GmbH. 5 * Copyright (C) 1996 TooLs GmbH.
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions 9 * modification, are permitted provided that the following conditions
10 * are met: 10 * are met:
11 * 1. Redistributions of source code must retain the above copyright 11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer. 12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright 13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the 14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution. 15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software 16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement: 17 * must display the following acknowledgement:
18 * This product includes software developed by TooLs GmbH. 18 * This product includes software developed by TooLs GmbH.
19 * 4. The name of TooLs GmbH may not be used to endorse or promote products 19 * 4. The name of TooLs GmbH may not be used to endorse or promote products
20 * derived from this software without specific prior written permission. 20 * derived from this software without specific prior written permission.
21 * 21 *
22 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR 22 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
28 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 28 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 29 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 30 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
31 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */ 32 */
33 33
34#include <sys/cdefs.h> 34#include <sys/cdefs.h>
35__KERNEL_RCSID(0, "$NetBSD: altivec.c,v 1.24 2011/05/25 05:42:37 matt Exp $"); 35__KERNEL_RCSID(0, "$NetBSD: altivec.c,v 1.25 2011/06/07 01:01:43 matt Exp $");
36 36
37#include "opt_multiprocessor.h" 37#include "opt_multiprocessor.h"
38 38
39#include <sys/param.h> 39#include <sys/param.h>
40#include <sys/proc.h> 40#include <sys/proc.h>
41#include <sys/systm.h> 41#include <sys/systm.h>
42#include <sys/atomic.h> 42#include <sys/atomic.h>
43 43
44#include <uvm/uvm_extern.h> /* for vcopypage/vzeropage */ 44#include <uvm/uvm_extern.h> /* for vcopypage/vzeropage */
45 45
46#include <powerpc/pcb.h> 46#include <powerpc/pcb.h>
47#include <powerpc/altivec.h> 47#include <powerpc/altivec.h>
48#include <powerpc/spr.h> 48#include <powerpc/spr.h>
49#include <powerpc/oea/spr.h> 49#include <powerpc/oea/spr.h>
50#include <powerpc/psl.h> 50#include <powerpc/psl.h>
51 51
52static void vec_state_load(lwp_t *, bool); 52static void vec_state_load(lwp_t *, bool);
53static void vec_state_save(lwp_t *); 53static void vec_state_save(lwp_t *);
54static void vec_state_release(lwp_t *); 54static void vec_state_release(lwp_t *);
55 55
56const pcu_ops_t vec_ops = { 56const pcu_ops_t vec_ops = {
57 .pcu_id = PCU_VEC, 57 .pcu_id = PCU_VEC,
58 .pcu_state_load = vec_state_load, 58 .pcu_state_load = vec_state_load,
59 .pcu_state_save = vec_state_save, 59 .pcu_state_save = vec_state_save,
60 .pcu_state_release = vec_state_release, 60 .pcu_state_release = vec_state_release,
61}; 61};
62 62
63bool 63bool
64vec_used_p(lwp_t *l) 64vec_used_p(lwp_t *l)
65{ 65{
66 return (l->l_md.md_flags & MDLWP_USEDVEC) != 0; 66 return (l->l_md.md_flags & MDLWP_USEDVEC) != 0;
67} 67}
68 68
69void 69void
70vec_mark_used(lwp_t *l) 70vec_mark_used(lwp_t *l)
71{ 71{
72 l->l_md.md_flags |= MDLWP_USEDVEC; 72 l->l_md.md_flags |= MDLWP_USEDVEC;
73} 73}
74 74
75void 75void
76vec_state_load(lwp_t *l, bool used) 76vec_state_load(lwp_t *l, bool used)
77{ 77{
78 struct pcb * const pcb = lwp_getpcb(l); 78 struct pcb * const pcb = lwp_getpcb(l);
79 79
 80 if (__predict_false(!vec_used_p(l))) {
 81 memset(&pcb->pcb_vr, 0, sizeof(pcb->pcb_vr));
 82 vec_mark_used(l);
 83 }
 84
80 /* 85 /*
81 * Enable AltiVec temporarily (and disable interrupts). 86 * Enable AltiVec temporarily (and disable interrupts).
82 */ 87 */
83 const register_t msr = mfmsr(); 88 const register_t msr = mfmsr();
84 mtmsr((msr & ~PSL_EE) | PSL_VEC); 89 mtmsr((msr & ~PSL_EE) | PSL_VEC);
85 __asm volatile ("isync"); 90 __asm volatile ("isync");
86 91
87 /* 92 /*
88 * Load the vector unit from vreg which is best done in 93 * Load the vector unit from vreg which is best done in
89 * assembly. 94 * assembly.
90 */ 95 */
91 vec_load_from_vreg(&pcb->pcb_vr); 96 vec_load_from_vreg(&pcb->pcb_vr);
92 97
93 /* 98 /*
94 * VRSAVE will be restored when trap frame returns 99 * VRSAVE will be restored when trap frame returns
95 */ 100 */
96 l->l_md.md_utf->tf_vrsave = pcb->pcb_vr.vrsave; 101 l->l_md.md_utf->tf_vrsave = pcb->pcb_vr.vrsave;
97 102
98 /* 103 /*
99 * Restore MSR (turn off AltiVec) 104 * Restore MSR (turn off AltiVec)
100 */ 105 */
101 mtmsr(msr); 106 mtmsr(msr);
102 __asm volatile ("isync"); 107 __asm volatile ("isync");
103 108
104 /* 109 /*
105 * Mark vector registers as modified. 110 * Mark vector registers as modified.
106 */ 111 */
107 l->l_md.md_flags |= MDLWP_USEDVEC|PSL_VEC; 112 l->l_md.md_flags |= MDLWP_USEDVEC|PSL_VEC;
108 l->l_md.md_utf->tf_srr1 |= PSL_VEC; 113 l->l_md.md_utf->tf_srr1 |= PSL_VEC;
109} 114}
110 115
111void 116void
112vec_state_save(lwp_t *l) 117vec_state_save(lwp_t *l)
113{ 118{
114 struct pcb * const pcb = lwp_getpcb(l); 119 struct pcb * const pcb = lwp_getpcb(l);
115 120
116 /* 121 /*
117 * Turn on AltiVEC, turn off interrupts. 122 * Turn on AltiVEC, turn off interrupts.
118 */ 123 */
119 const register_t msr = mfmsr(); 124 const register_t msr = mfmsr();
120 mtmsr((msr & ~PSL_EE) | PSL_VEC); 125 mtmsr((msr & ~PSL_EE) | PSL_VEC);
121 __asm volatile ("isync"); 126 __asm volatile ("isync");
122 127
123 /* 128 /*
124 * Grab contents of vector unit. 129 * Grab contents of vector unit.
125 */ 130 */
126 vec_unload_to_vreg(&pcb->pcb_vr); 131 vec_unload_to_vreg(&pcb->pcb_vr);
127 132
128 /* 133 /*
129 * Save VRSAVE 134 * Save VRSAVE
130 */ 135 */
131 pcb->pcb_vr.vrsave = l->l_md.md_utf->tf_vrsave; 136 pcb->pcb_vr.vrsave = l->l_md.md_utf->tf_vrsave;
132 137
133 /* 138 /*
134 * Note that we aren't using any CPU resources and stop any 139 * Note that we aren't using any CPU resources and stop any
135 * data streams. 140 * data streams.
136 */ 141 */
137 __asm volatile ("dssall; sync"); 142 __asm volatile ("dssall; sync");
138 143
139 /* 144 /*
140 * Restore MSR (turn off AltiVec) 145 * Restore MSR (turn off AltiVec)
141 */ 146 */
142 mtmsr(msr); 147 mtmsr(msr);
143 __asm volatile ("isync"); 148 __asm volatile ("isync");
144} 149}
145 150
146void 151void
147vec_state_release(lwp_t *l) 152vec_state_release(lwp_t *l)
148{ 153{
149 __asm volatile("dssall;sync"); 154 __asm volatile("dssall;sync");
150 l->l_md.md_utf->tf_srr1 &= ~PSL_VEC; 155 l->l_md.md_utf->tf_srr1 &= ~PSL_VEC;
151 l->l_md.md_flags &= ~PSL_VEC; 156 l->l_md.md_flags &= ~PSL_VEC;
152} 157}
153 158
154void 159void
155vec_restore_from_mcontext(struct lwp *l, const mcontext_t *mcp) 160vec_restore_from_mcontext(struct lwp *l, const mcontext_t *mcp)
156{ 161{
157 struct pcb * const pcb = lwp_getpcb(l); 162 struct pcb * const pcb = lwp_getpcb(l);
158 163
159 KASSERT(l == curlwp); 164 KASSERT(l == curlwp);
160 165
161 /* we don't need to save the state, just drop it */ 166 /* we don't need to save the state, just drop it */
162 pcu_discard(&vec_ops); 167 pcu_discard(&vec_ops);
163 memcpy(pcb->pcb_vr.vreg, &mcp->__vrf.__vrs, sizeof (pcb->pcb_vr.vreg)); 168 memcpy(pcb->pcb_vr.vreg, &mcp->__vrf.__vrs, sizeof (pcb->pcb_vr.vreg));
164 pcb->pcb_vr.vscr = mcp->__vrf.__vscr; 169 pcb->pcb_vr.vscr = mcp->__vrf.__vscr;
165 pcb->pcb_vr.vrsave = mcp->__vrf.__vrsave; 170 pcb->pcb_vr.vrsave = mcp->__vrf.__vrsave;
166 l->l_md.md_utf->tf_vrsave = pcb->pcb_vr.vrsave; 171 l->l_md.md_utf->tf_vrsave = pcb->pcb_vr.vrsave;
167} 172}
168 173
169bool 174bool
170vec_save_to_mcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flagp) 175vec_save_to_mcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flagp)
171{ 176{
172 struct pcb * const pcb = lwp_getpcb(l); 177 struct pcb * const pcb = lwp_getpcb(l);
173 178
174 KASSERT(l == curlwp); 179 KASSERT(l == curlwp);
175 180
176 /* Save AltiVec context, if any. */ 181 /* Save AltiVec context, if any. */
177 if (!vec_used_p(l)) 182 if (!vec_used_p(l))
178 return false; 183 return false;
179 184
180 /* 185 /*
181 * If we're the AltiVec owner, dump its context to the PCB first. 186 * If we're the AltiVec owner, dump its context to the PCB first.
182 */ 187 */
183 pcu_save(&vec_ops); 188 pcu_save(&vec_ops);
184 189
185 mcp->__gregs[_REG_MSR] |= PSL_VEC; 190 mcp->__gregs[_REG_MSR] |= PSL_VEC;
186 mcp->__vrf.__vscr = pcb->pcb_vr.vscr; 191 mcp->__vrf.__vscr = pcb->pcb_vr.vscr;
187 mcp->__vrf.__vrsave = l->l_md.md_utf->tf_vrsave; 192 mcp->__vrf.__vrsave = l->l_md.md_utf->tf_vrsave;
188 memcpy(mcp->__vrf.__vrs, pcb->pcb_vr.vreg, sizeof (mcp->__vrf.__vrs)); 193 memcpy(mcp->__vrf.__vrs, pcb->pcb_vr.vreg, sizeof (mcp->__vrf.__vrs));
189 *flagp |= _UC_POWERPC_VEC; 194 *flagp |= _UC_POWERPC_VEC;
190 return true; 195 return true;
191} 196}
192 197
193#define ZERO_VEC 19 198#define ZERO_VEC 19
194 199
195void 200void
196vzeropage(paddr_t pa) 201vzeropage(paddr_t pa)
197{ 202{
198 const paddr_t ea = pa + PAGE_SIZE; 203 const paddr_t ea = pa + PAGE_SIZE;
199 uint32_t vec[7], *vp = (void *) roundup((uintptr_t) vec, 16); 204 uint32_t vec[7], *vp = (void *) roundup((uintptr_t) vec, 16);
200 register_t omsr, msr; 205 register_t omsr, msr;
201 206
202 __asm volatile("mfmsr %0" : "=r"(omsr) :); 207 __asm volatile("mfmsr %0" : "=r"(omsr) :);
203 208
204 /* 209 /*
205 * Turn on AltiVec, turn off interrupts. 210 * Turn on AltiVec, turn off interrupts.
206 */ 211 */
207 msr = (omsr & ~PSL_EE) | PSL_VEC; 212 msr = (omsr & ~PSL_EE) | PSL_VEC;
208 __asm volatile("sync; mtmsr %0; isync" :: "r"(msr)); 213 __asm volatile("sync; mtmsr %0; isync" :: "r"(msr));
209 214
210 /* 215 /*
211 * Save the VEC register we are going to use before we disable 216 * Save the VEC register we are going to use before we disable
212 * relocation. 217 * relocation.
213 */ 218 */
214 __asm("stvx %1,0,%0" :: "r"(vp), "n"(ZERO_VEC)); 219 __asm("stvx %1,0,%0" :: "r"(vp), "n"(ZERO_VEC));
215 __asm("vxor %0,%0,%0" :: "n"(ZERO_VEC)); 220 __asm("vxor %0,%0,%0" :: "n"(ZERO_VEC));
216 221
217 /* 222 /*
218 * Zero the page using a single cache line. 223 * Zero the page using a single cache line.
219 */ 224 */
220 __asm volatile( 225 __asm volatile(
221 " sync ;" 226 " sync ;"
222 " mfmsr %[msr];" 227 " mfmsr %[msr];"
223 " rlwinm %[msr],%[msr],0,28,26;" /* Clear PSL_DR */ 228 " rlwinm %[msr],%[msr],0,28,26;" /* Clear PSL_DR */
224 " mtmsr %[msr];" /* Turn off DMMU */ 229 " mtmsr %[msr];" /* Turn off DMMU */
225 " isync;" 230 " isync;"
226 "1: stvx %[zv], %[pa], %[off0];" 231 "1: stvx %[zv], %[pa], %[off0];"
227 " stvxl %[zv], %[pa], %[off16];" 232 " stvxl %[zv], %[pa], %[off16];"
228 " stvx %[zv], %[pa], %[off32];" 233 " stvx %[zv], %[pa], %[off32];"
229 " stvxl %[zv], %[pa], %[off48];" 234 " stvxl %[zv], %[pa], %[off48];"
230 " addi %[pa], %[pa], 64;" 235 " addi %[pa], %[pa], 64;"
231 " cmplw %[pa], %[ea];" 236 " cmplw %[pa], %[ea];"
232 " blt+ 1b;" 237 " blt+ 1b;"
233 " ori %[msr], %[msr], 0x10;" /* Set PSL_DR */ 238 " ori %[msr], %[msr], 0x10;" /* Set PSL_DR */
234 " sync;" 239 " sync;"
235 " mtmsr %[msr];" /* Turn on DMMU */ 240 " mtmsr %[msr];" /* Turn on DMMU */
236 " isync;" 241 " isync;"
237 :: [msr] "r"(msr), [pa] "b"(pa), [ea] "b"(ea), 242 :: [msr] "r"(msr), [pa] "b"(pa), [ea] "b"(ea),
238 [off0] "r"(0), [off16] "r"(16), [off32] "r"(32), [off48] "r"(48),  243 [off0] "r"(0), [off16] "r"(16), [off32] "r"(32), [off48] "r"(48),
239 [zv] "n"(ZERO_VEC)); 244 [zv] "n"(ZERO_VEC));
240 245
241 /* 246 /*
242 * Restore VEC register (now that we can access the stack again). 247 * Restore VEC register (now that we can access the stack again).
243 */ 248 */
244 __asm("lvx %1,0,%0" :: "r"(vp), "n"(ZERO_VEC)); 249 __asm("lvx %1,0,%0" :: "r"(vp), "n"(ZERO_VEC));
245 250
246 /* 251 /*
247 * Restore old MSR (AltiVec OFF). 252 * Restore old MSR (AltiVec OFF).
248 */ 253 */
249 __asm volatile("sync; mtmsr %0; isync" :: "r"(omsr)); 254 __asm volatile("sync; mtmsr %0; isync" :: "r"(omsr));
250} 255}
251 256
252#define LO_VEC 16 257#define LO_VEC 16
253#define HI_VEC 17 258#define HI_VEC 17
254 259
255void 260void
256vcopypage(paddr_t dst, paddr_t src) 261vcopypage(paddr_t dst, paddr_t src)
257{ 262{
258 const paddr_t edst = dst + PAGE_SIZE; 263 const paddr_t edst = dst + PAGE_SIZE;
259 uint32_t vec[11], *vp = (void *) roundup((uintptr_t) vec, 16); 264 uint32_t vec[11], *vp = (void *) roundup((uintptr_t) vec, 16);
260 register_t omsr, msr; 265 register_t omsr, msr;
261 266
262 __asm volatile("mfmsr %0" : "=r"(omsr) :); 267 __asm volatile("mfmsr %0" : "=r"(omsr) :);
263 268
264 /* 269 /*
265 * Turn on AltiVec, turn off interrupts. 270 * Turn on AltiVec, turn off interrupts.
266 */ 271 */
267 msr = (omsr & ~PSL_EE) | PSL_VEC; 272 msr = (omsr & ~PSL_EE) | PSL_VEC;
268 __asm volatile("sync; mtmsr %0; isync" :: "r"(msr)); 273 __asm volatile("sync; mtmsr %0; isync" :: "r"(msr));
269 274
270 /* 275 /*
271 * Save the VEC registers we will be using before we disable 276 * Save the VEC registers we will be using before we disable
272 * relocation. 277 * relocation.
273 */ 278 */
274 __asm("stvx %2,%1,%0" :: "b"(vp), "r"( 0), "n"(LO_VEC)); 279 __asm("stvx %2,%1,%0" :: "b"(vp), "r"( 0), "n"(LO_VEC));
275 __asm("stvx %2,%1,%0" :: "b"(vp), "r"(16), "n"(HI_VEC)); 280 __asm("stvx %2,%1,%0" :: "b"(vp), "r"(16), "n"(HI_VEC));
276 281
277 /* 282 /*
278 * Copy the page using a single cache line, with DMMU 283 * Copy the page using a single cache line, with DMMU
279 * disabled. On most PPCs, two vector registers occupy one 284 * disabled. On most PPCs, two vector registers occupy one
280 * cache line. 285 * cache line.
281 */ 286 */
282 __asm volatile( 287 __asm volatile(
283 " sync ;" 288 " sync ;"
284 " mfmsr %[msr];" 289 " mfmsr %[msr];"
285 " rlwinm %[msr],%[msr],0,28,26;" /* Clear PSL_DR */ 290 " rlwinm %[msr],%[msr],0,28,26;" /* Clear PSL_DR */
286 " mtmsr %[msr];" /* Turn off DMMU */ 291 " mtmsr %[msr];" /* Turn off DMMU */
287 " isync;" 292 " isync;"
288 "1: lvx %[lv], %[src], %[off0];" 293 "1: lvx %[lv], %[src], %[off0];"
289 " stvx %[lv], %[dst], %[off0];" 294 " stvx %[lv], %[dst], %[off0];"
290 " lvxl %[hv], %[src], %[off16];" 295 " lvxl %[hv], %[src], %[off16];"
291 " stvxl %[hv], %[dst], %[off16];" 296 " stvxl %[hv], %[dst], %[off16];"
292 " addi %[src], %[src], 32;" 297 " addi %[src], %[src], 32;"
293 " addi %[dst], %[dst], 32;" 298 " addi %[dst], %[dst], 32;"
294 " cmplw %[dst], %[edst];" 299 " cmplw %[dst], %[edst];"
295 " blt+ 1b;" 300 " blt+ 1b;"
296 " ori %[msr], %[msr], 0x10;" /* Set PSL_DR */ 301 " ori %[msr], %[msr], 0x10;" /* Set PSL_DR */
297 " sync;" 302 " sync;"
298 " mtmsr %[msr];" /* Turn on DMMU */ 303 " mtmsr %[msr];" /* Turn on DMMU */
299 " isync;" 304 " isync;"
300 :: [msr] "r"(msr), [src] "b"(src), [dst] "b"(dst), 305 :: [msr] "r"(msr), [src] "b"(src), [dst] "b"(dst),
301 [edst] "b"(edst), [off0] "r"(0), [off16] "r"(16),  306 [edst] "b"(edst), [off0] "r"(0), [off16] "r"(16),
302 [lv] "n"(LO_VEC), [hv] "n"(HI_VEC)); 307 [lv] "n"(LO_VEC), [hv] "n"(HI_VEC));
303 308
304 /* 309 /*
305 * Restore VEC registers (now that we can access the stack again). 310 * Restore VEC registers (now that we can access the stack again).
306 */ 311 */
307 __asm("lvx %2,%1,%0" :: "b"(vp), "r"( 0), "n"(LO_VEC)); 312 __asm("lvx %2,%1,%0" :: "b"(vp), "r"( 0), "n"(LO_VEC));
308 __asm("lvx %2,%1,%0" :: "b"(vp), "r"(16), "n"(HI_VEC)); 313 __asm("lvx %2,%1,%0" :: "b"(vp), "r"(16), "n"(HI_VEC));
309 314
310 /* 315 /*
311 * Restore old MSR (AltiVec OFF). 316 * Restore old MSR (AltiVec OFF).
312 */ 317 */
313 __asm volatile("sync; mtmsr %0; isync" :: "r"(omsr)); 318 __asm volatile("sync; mtmsr %0; isync" :: "r"(omsr));
314} 319}

cvs diff -r1.30 -r1.31 src/sys/arch/powerpc/powerpc/fpu.c (switch to unified diff)

--- src/sys/arch/powerpc/powerpc/fpu.c 2011/06/07 00:48:32 1.30
+++ src/sys/arch/powerpc/powerpc/fpu.c 2011/06/07 01:01:43 1.31
@@ -1,249 +1,250 @@ @@ -1,249 +1,250 @@
1/* $NetBSD: fpu.c,v 1.30 2011/06/07 00:48:32 matt Exp $ */ 1/* $NetBSD: fpu.c,v 1.31 2011/06/07 01:01:43 matt Exp $ */
2 2
3/* 3/*
4 * Copyright (C) 1996 Wolfgang Solfrank. 4 * Copyright (C) 1996 Wolfgang Solfrank.
5 * Copyright (C) 1996 TooLs GmbH. 5 * Copyright (C) 1996 TooLs GmbH.
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions 9 * modification, are permitted provided that the following conditions
10 * are met: 10 * are met:
11 * 1. Redistributions of source code must retain the above copyright 11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer. 12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright 13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the 14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution. 15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software 16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement: 17 * must display the following acknowledgement:
18 * This product includes software developed by TooLs GmbH. 18 * This product includes software developed by TooLs GmbH.
19 * 4. The name of TooLs GmbH may not be used to endorse or promote products 19 * 4. The name of TooLs GmbH may not be used to endorse or promote products
20 * derived from this software without specific prior written permission. 20 * derived from this software without specific prior written permission.
21 * 21 *
22 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR 22 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
28 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 28 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 29 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 30 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
31 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */ 32 */
33 33
34#include <sys/cdefs.h> 34#include <sys/cdefs.h>
35__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.30 2011/06/07 00:48:32 matt Exp $"); 35__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.31 2011/06/07 01:01:43 matt Exp $");
36 36
37#include "opt_multiprocessor.h" 37#include "opt_multiprocessor.h"
38 38
39#include <sys/param.h> 39#include <sys/param.h>
40#include <sys/proc.h> 40#include <sys/proc.h>
41#include <sys/systm.h> 41#include <sys/systm.h>
42#include <sys/atomic.h> 42#include <sys/atomic.h>
43#include <sys/siginfo.h> 43#include <sys/siginfo.h>
44#include <sys/pcu.h> 44#include <sys/pcu.h>
45 45
46#include <machine/pcb.h> 46#include <machine/pcb.h>
47#include <machine/fpu.h> 47#include <machine/fpu.h>
48#include <machine/psl.h> 48#include <machine/psl.h>
49 49
50#ifdef PPC_HAVE_FPU 50#ifdef PPC_HAVE_FPU
51static void fpu_state_load(lwp_t *, bool); 51static void fpu_state_load(lwp_t *, bool);
52static void fpu_state_save(lwp_t *); 52static void fpu_state_save(lwp_t *);
53static void fpu_state_release(lwp_t *); 53static void fpu_state_release(lwp_t *);
54#endif 54#endif
55 55
56const pcu_ops_t fpu_ops = { 56const pcu_ops_t fpu_ops = {
57 .pcu_id = PCU_FPU, 57 .pcu_id = PCU_FPU,
58#ifdef PPC_HAVE_FPU 58#ifdef PPC_HAVE_FPU
59 .pcu_state_load = fpu_state_load, 59 .pcu_state_load = fpu_state_load,
60 .pcu_state_save = fpu_state_save, 60 .pcu_state_save = fpu_state_save,
61 .pcu_state_release = fpu_state_release, 61 .pcu_state_release = fpu_state_release,
62#endif 62#endif
63}; 63};
64 64
65bool 65bool
66fpu_used_p(lwp_t *l) 66fpu_used_p(lwp_t *l)
67{ 67{
68 return (l->l_md.md_flags & MDLWP_USEDFPU) != 0; 68 return (l->l_md.md_flags & MDLWP_USEDFPU) != 0;
69} 69}
70 70
71void 71void
72fpu_mark_used(lwp_t *l) 72fpu_mark_used(lwp_t *l)
73{ 73{
74 l->l_md.md_flags |= MDLWP_USEDFPU; 74 l->l_md.md_flags |= MDLWP_USEDFPU;
75} 75}
76 76
77#ifdef PPC_HAVE_FPU 77#ifdef PPC_HAVE_FPU
78void 78void
79fpu_state_load(lwp_t *l, bool used) 79fpu_state_load(lwp_t *l, bool used)
80{ 80{
81 struct pcb * const pcb = lwp_getpcb(l); 81 struct pcb * const pcb = lwp_getpcb(l);
82 82
83 if (__predict_false(!used)) { 83 if (__predict_false(!fpu_used_p(l))) {
84 memset(&pcb->pcb_fpu, 0, sizeof(pcb->pcb_fpu)); 84 memset(&pcb->pcb_fpu, 0, sizeof(pcb->pcb_fpu));
 85 fpu_mark_used(l);
85 } 86 }
86 87
87 const register_t msr = mfmsr(); 88 const register_t msr = mfmsr();
88 mtmsr((msr & ~PSL_EE) | PSL_FP); 89 mtmsr((msr & ~PSL_EE) | PSL_FP);
89 __asm volatile ("isync"); 90 __asm volatile ("isync");
90 91
91 fpu_load_from_fpreg(&pcb->pcb_fpu); 92 fpu_load_from_fpreg(&pcb->pcb_fpu);
92 __asm volatile ("sync"); 93 __asm volatile ("sync");
93 94
94 mtmsr(msr); 95 mtmsr(msr);
95 __asm volatile ("isync"); 96 __asm volatile ("isync");
96 97
97 curcpu()->ci_ev_fpusw.ev_count++; 98 curcpu()->ci_ev_fpusw.ev_count++;
98 l->l_md.md_utf->tf_srr1 |= PSL_FP|(pcb->pcb_flags & (PCB_FE0|PCB_FE1)); 99 l->l_md.md_utf->tf_srr1 |= PSL_FP|(pcb->pcb_flags & (PCB_FE0|PCB_FE1));
99 l->l_md.md_flags |= MDLWP_USEDFPU; 100 l->l_md.md_flags |= MDLWP_USEDFPU;
100} 101}
101 102
102/* 103/*
103 * Save the contents of the current CPU's FPU to its PCB. 104 * Save the contents of the current CPU's FPU to its PCB.
104 */ 105 */
105void 106void
106fpu_state_save(lwp_t *l) 107fpu_state_save(lwp_t *l)
107{ 108{
108 struct pcb * const pcb = lwp_getpcb(l); 109 struct pcb * const pcb = lwp_getpcb(l);
109 110
110 const register_t msr = mfmsr(); 111 const register_t msr = mfmsr();
111 mtmsr((msr & ~PSL_EE) | PSL_FP); 112 mtmsr((msr & ~PSL_EE) | PSL_FP);
112 __asm volatile ("isync"); 113 __asm volatile ("isync");
113 114
114 fpu_unload_to_fpreg(&pcb->pcb_fpu); 115 fpu_unload_to_fpreg(&pcb->pcb_fpu);
115 __asm volatile ("sync"); 116 __asm volatile ("sync");
116 117
117 mtmsr(msr); 118 mtmsr(msr);
118 __asm volatile ("isync"); 119 __asm volatile ("isync");
119} 120}
120 121
121void 122void
122fpu_state_release(lwp_t *l) 123fpu_state_release(lwp_t *l)
123{ 124{
124 l->l_md.md_utf->tf_srr1 &= ~PSL_FP; 125 l->l_md.md_utf->tf_srr1 &= ~PSL_FP;
125} 126}
126 127
127#define STICKYBITS (FPSCR_VX|FPSCR_OX|FPSCR_UX|FPSCR_ZX|FPSCR_XX) 128#define STICKYBITS (FPSCR_VX|FPSCR_OX|FPSCR_UX|FPSCR_ZX|FPSCR_XX)
128#define STICKYSHIFT 25 129#define STICKYSHIFT 25
129#define MASKBITS (FPSCR_VE|FPSCR_OE|FPSCR_UE|FPSCR_ZE|FPSCR_XE) 130#define MASKBITS (FPSCR_VE|FPSCR_OE|FPSCR_UE|FPSCR_ZE|FPSCR_XE)
130#define MASKSHIFT 3 131#define MASKSHIFT 3
131 132
132int 133int
133fpu_get_fault_code(void) 134fpu_get_fault_code(void)
134{ 135{
135 lwp_t * const l = curlwp; 136 lwp_t * const l = curlwp;
136 struct pcb * const pcb = lwp_getpcb(l); 137 struct pcb * const pcb = lwp_getpcb(l);
137 uint64_t fpscr64; 138 uint64_t fpscr64;
138 uint32_t fpscr, ofpscr; 139 uint32_t fpscr, ofpscr;
139 int code; 140 int code;
140 141
141 int s = splsoftclock(); /* disable preemption */ 142 int s = splsoftclock(); /* disable preemption */
142 143
143 struct cpu_info * const ci = curcpu(); 144 struct cpu_info * const ci = curcpu();
144 /* 145 /*
145 * If we got preempted, we may be running on a different CPU. So we 146 * If we got preempted, we may be running on a different CPU. So we
146 * need to check for that. 147 * need to check for that.
147 */ 148 */
148 KASSERT(fpu_used_p(l)); 149 KASSERT(fpu_used_p(l));
149 if (__predict_true(l->l_pcu_cpu[PCU_FPU] == ci)) { 150 if (__predict_true(l->l_pcu_cpu[PCU_FPU] == ci)) {
150 uint64_t tmp; 151 uint64_t tmp;
151 const register_t msr = mfmsr(); 152 const register_t msr = mfmsr();
152 mtmsr((msr & ~PSL_EE) | PSL_FP); 153 mtmsr((msr & ~PSL_EE) | PSL_FP);
153 __asm volatile ("isync"); 154 __asm volatile ("isync");
154 __asm volatile ( 155 __asm volatile (
155 "stfd 0,0(%[tmp])\n" /* save f0 */ 156 "stfd 0,0(%[tmp])\n" /* save f0 */
156 "mffs 0\n" /* get FPSCR */ 157 "mffs 0\n" /* get FPSCR */
157 "stfd 0,0(%[fpscr64])\n" /* store a temp copy */ 158 "stfd 0,0(%[fpscr64])\n" /* store a temp copy */
158 "mtfsb0 0\n" /* clear FPSCR_FX */ 159 "mtfsb0 0\n" /* clear FPSCR_FX */
159 "mtfsb0 24\n" /* clear FPSCR_VE */ 160 "mtfsb0 24\n" /* clear FPSCR_VE */
160 "mtfsb0 25\n" /* clear FPSCR_OE */ 161 "mtfsb0 25\n" /* clear FPSCR_OE */
161 "mtfsb0 26\n" /* clear FPSCR_UE */ 162 "mtfsb0 26\n" /* clear FPSCR_UE */
162 "mtfsb0 27\n" /* clear FPSCR_ZE */ 163 "mtfsb0 27\n" /* clear FPSCR_ZE */
163 "mtfsb0 28\n" /* clear FPSCR_XE */ 164 "mtfsb0 28\n" /* clear FPSCR_XE */
164 "mffs 0\n" /* get FPSCR */ 165 "mffs 0\n" /* get FPSCR */
165 "stfd 0,0(%[fpscr])\n" /* store it */ 166 "stfd 0,0(%[fpscr])\n" /* store it */
166 "lfd 0,0(%[tmp])\n" /* restore f0 */ 167 "lfd 0,0(%[tmp])\n" /* restore f0 */
167 :: [tmp] "b"(&tmp), 168 :: [tmp] "b"(&tmp),
168 [fpscr] "b"(&pcb->pcb_fpu.fpscr), 169 [fpscr] "b"(&pcb->pcb_fpu.fpscr),
169 [fpscr64] "b"(&fpscr64)); 170 [fpscr64] "b"(&fpscr64));
170 mtmsr(msr); 171 mtmsr(msr);
171 __asm volatile ("isync"); 172 __asm volatile ("isync");
172 } else { 173 } else {
173 /* 174 /*
174 * We got preempted to a different CPU so we need to save 175 * We got preempted to a different CPU so we need to save
175 * our FPU state. 176 * our FPU state.
176 */ 177 */
177 fpu_save(); 178 fpu_save();
178 fpscr64 = *(uint64_t *)&pcb->pcb_fpu.fpscr; 179 fpscr64 = *(uint64_t *)&pcb->pcb_fpu.fpscr;
179 ((uint32_t *)&pcb->pcb_fpu.fpscr)[_QUAD_LOWWORD] &= ~MASKBITS; 180 ((uint32_t *)&pcb->pcb_fpu.fpscr)[_QUAD_LOWWORD] &= ~MASKBITS;
180 } 181 }
181 182
182 splx(s); /* allow preemption */ 183 splx(s); /* allow preemption */
183 184
184 /* 185 /*
185 * Now determine the fault type. First we test to see if any of sticky 186 * Now determine the fault type. First we test to see if any of sticky
186 * bits correspond to the enabled exceptions. If so, we only test 187 * bits correspond to the enabled exceptions. If so, we only test
187 * those bits. If not, we look at all the bits. (In reality, we only 188 * those bits. If not, we look at all the bits. (In reality, we only
188 * could get an exception if FPSCR_FEX changed state. So we should 189 * could get an exception if FPSCR_FEX changed state. So we should
189 * have at least one bit that corresponds). 190 * have at least one bit that corresponds).
190 */ 191 */
191 ofpscr = (uint32_t)fpscr64; 192 ofpscr = (uint32_t)fpscr64;
192 ofpscr &= ofpscr << (STICKYSHIFT - MASKSHIFT); 193 ofpscr &= ofpscr << (STICKYSHIFT - MASKSHIFT);
193 fpscr = ((uint32_t *)&pcb->pcb_fpu.fpscr)[_QUAD_LOWWORD]; 194 fpscr = ((uint32_t *)&pcb->pcb_fpu.fpscr)[_QUAD_LOWWORD];
194 if (fpscr & ofpscr & STICKYBITS) 195 if (fpscr & ofpscr & STICKYBITS)
195 fpscr &= ofpscr; 196 fpscr &= ofpscr;
196 197
197 /* 198 /*
198 * Let's determine what the appropriate code is. 199 * Let's determine what the appropriate code is.
199 */ 200 */
200 if (fpscr & FPSCR_VX) code = FPE_FLTINV; 201 if (fpscr & FPSCR_VX) code = FPE_FLTINV;
201 else if (fpscr & FPSCR_OX) code = FPE_FLTOVF; 202 else if (fpscr & FPSCR_OX) code = FPE_FLTOVF;
202 else if (fpscr & FPSCR_UX) code = FPE_FLTUND; 203 else if (fpscr & FPSCR_UX) code = FPE_FLTUND;
203 else if (fpscr & FPSCR_ZX) code = FPE_FLTDIV; 204 else if (fpscr & FPSCR_ZX) code = FPE_FLTDIV;
204 else if (fpscr & FPSCR_XX) code = FPE_FLTRES; 205 else if (fpscr & FPSCR_XX) code = FPE_FLTRES;
205 else code = 0; 206 else code = 0;
206 return code; 207 return code;
207} 208}
208#endif /* PPC_HAVE_FPU */ 209#endif /* PPC_HAVE_FPU */
209 210
210bool 211bool
211fpu_save_to_mcontext(lwp_t *l, mcontext_t *mcp, unsigned int *flagp) 212fpu_save_to_mcontext(lwp_t *l, mcontext_t *mcp, unsigned int *flagp)
212{ 213{
213 KASSERT(l == curlwp); 214 KASSERT(l == curlwp);
214 215
215 if (!pcu_used_p(&fpu_ops)) 216 if (!pcu_used_p(&fpu_ops))
216 return false; 217 return false;
217 218
218 struct pcb * const pcb = lwp_getpcb(l); 219 struct pcb * const pcb = lwp_getpcb(l);
219 220
220#ifdef PPC_HAVE_FPU 221#ifdef PPC_HAVE_FPU
221 /* If we're the FPU owner, dump its context to the PCB first. */ 222 /* If we're the FPU owner, dump its context to the PCB first. */
222 pcu_save(&fpu_ops); 223 pcu_save(&fpu_ops);
223#endif 224#endif
224 (void)memcpy(mcp->__fpregs.__fpu_regs, pcb->pcb_fpu.fpreg, 225 (void)memcpy(mcp->__fpregs.__fpu_regs, pcb->pcb_fpu.fpreg,
225 sizeof (mcp->__fpregs.__fpu_regs)); 226 sizeof (mcp->__fpregs.__fpu_regs));
226 mcp->__fpregs.__fpu_fpscr = 227 mcp->__fpregs.__fpu_fpscr =
227 ((int *)&pcb->pcb_fpu.fpscr)[_QUAD_LOWWORD]; 228 ((int *)&pcb->pcb_fpu.fpscr)[_QUAD_LOWWORD];
228 mcp->__fpregs.__fpu_valid = 1; 229 mcp->__fpregs.__fpu_valid = 1;
229 *flagp |= _UC_FPU; 230 *flagp |= _UC_FPU;
230 return true; 231 return true;
231} 232}
232 233
233void 234void
234fpu_restore_from_mcontext(lwp_t *l, const mcontext_t *mcp) 235fpu_restore_from_mcontext(lwp_t *l, const mcontext_t *mcp)
235{ 236{
236 if (!mcp->__fpregs.__fpu_valid) 237 if (!mcp->__fpregs.__fpu_valid)
237 return; 238 return;
238 239
239 struct pcb * const pcb = lwp_getpcb(l); 240 struct pcb * const pcb = lwp_getpcb(l);
240 241
241#ifdef PPC_HAVE_FPU 242#ifdef PPC_HAVE_FPU
242 /* we don't need to save the state, just drop it */ 243 /* we don't need to save the state, just drop it */
243 if (l == curlwp) 244 if (l == curlwp)
244 pcu_discard(&fpu_ops); 245 pcu_discard(&fpu_ops);
245#endif 246#endif
246 (void)memcpy(&pcb->pcb_fpu.fpreg, &mcp->__fpregs.__fpu_regs, 247 (void)memcpy(&pcb->pcb_fpu.fpreg, &mcp->__fpregs.__fpu_regs,
247 sizeof (pcb->pcb_fpu.fpreg)); 248 sizeof (pcb->pcb_fpu.fpreg));
248 ((int *)&pcb->pcb_fpu.fpscr)[_QUAD_LOWWORD] = mcp->__fpregs.__fpu_fpscr; 249 ((int *)&pcb->pcb_fpu.fpscr)[_QUAD_LOWWORD] = mcp->__fpregs.__fpu_fpscr;
249} 250}