| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: fpu.c,v 1.46 2018/07/01 08:32:41 maxv Exp $ */ | | 1 | /* $NetBSD: fpu.c,v 1.47 2018/09/17 15:53:06 maxv Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright (c) 2008 The NetBSD Foundation, Inc. All | | 4 | * Copyright (c) 2008 The NetBSD Foundation, Inc. All |
5 | * rights reserved. | | 5 | * rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software developed for The NetBSD Foundation | | 7 | * This code is derived from software developed for The NetBSD Foundation |
8 | * by Andrew Doran. | | 8 | * by Andrew Doran. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
| @@ -86,166 +86,80 @@ | | | @@ -86,166 +86,80 @@ |
86 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 86 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
87 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 87 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
88 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 88 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
89 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 89 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
90 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 90 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
91 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 91 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
92 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 92 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
93 | * SUCH DAMAGE. | | 93 | * SUCH DAMAGE. |
94 | * | | 94 | * |
95 | * @(#)npx.c 7.2 (Berkeley) 5/12/91 | | 95 | * @(#)npx.c 7.2 (Berkeley) 5/12/91 |
96 | */ | | 96 | */ |
97 | | | 97 | |
98 | #include <sys/cdefs.h> | | 98 | #include <sys/cdefs.h> |
99 | __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.46 2018/07/01 08:32:41 maxv Exp $"); | | 99 | __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.47 2018/09/17 15:53:06 maxv Exp $"); |
100 | | | 100 | |
101 | #include "opt_multiprocessor.h" | | 101 | #include "opt_multiprocessor.h" |
102 | | | 102 | |
103 | #include <sys/param.h> | | 103 | #include <sys/param.h> |
104 | #include <sys/systm.h> | | 104 | #include <sys/systm.h> |
105 | #include <sys/conf.h> | | 105 | #include <sys/conf.h> |
106 | #include <sys/cpu.h> | | 106 | #include <sys/cpu.h> |
107 | #include <sys/file.h> | | 107 | #include <sys/file.h> |
108 | #include <sys/proc.h> | | 108 | #include <sys/proc.h> |
109 | #include <sys/kernel.h> | | 109 | #include <sys/kernel.h> |
110 | #include <sys/sysctl.h> | | 110 | #include <sys/sysctl.h> |
111 | #include <sys/xcall.h> | | 111 | #include <sys/xcall.h> |
112 | | | 112 | |
113 | #include <machine/cpu.h> | | 113 | #include <machine/cpu.h> |
114 | #include <machine/cpuvar.h> | | 114 | #include <machine/cpuvar.h> |
115 | #include <machine/cputypes.h> | | 115 | #include <machine/cputypes.h> |
116 | #include <machine/intr.h> | | 116 | #include <machine/intr.h> |
117 | #include <machine/cpufunc.h> | | 117 | #include <machine/cpufunc.h> |
118 | #include <machine/pcb.h> | | 118 | #include <machine/pcb.h> |
119 | #include <machine/trap.h> | | 119 | #include <machine/trap.h> |
120 | #include <machine/specialreg.h> | | 120 | #include <machine/specialreg.h> |
121 | #include <x86/cpu.h> | | 121 | #include <x86/cpu.h> |
122 | #include <x86/fpu.h> | | 122 | #include <x86/fpu.h> |
123 | | | 123 | |
124 | /* Check some duplicate definitions match */ | | | |
125 | #include <machine/fenv.h> | | | |
126 | | | | |
127 | #ifdef XEN | | 124 | #ifdef XEN |
128 | #define clts() HYPERVISOR_fpu_taskswitch(0) | | 125 | #define clts() HYPERVISOR_fpu_taskswitch(0) |
129 | #define stts() HYPERVISOR_fpu_taskswitch(1) | | 126 | #define stts() HYPERVISOR_fpu_taskswitch(1) |
130 | #endif | | 127 | #endif |
131 | | | 128 | |
132 | bool x86_fpu_eager __read_mostly = false; | | 129 | bool x86_fpu_eager __read_mostly = false; |
133 | | | 130 | |
134 | static uint32_t x86_fpu_mxcsr_mask __read_mostly = 0; | | 131 | static uint32_t x86_fpu_mxcsr_mask __read_mostly = 0; |
135 | | | 132 | |
136 | static inline union savefpu * | | 133 | static inline union savefpu * |
137 | process_fpframe(struct lwp *lwp) | | 134 | lwp_fpuarea(struct lwp *l) |
138 | { | | 135 | { |
139 | struct pcb *pcb = lwp_getpcb(lwp); | | 136 | struct pcb *pcb = lwp_getpcb(l); |
140 | | | 137 | |
141 | return &pcb->pcb_savefpu; | | 138 | return &pcb->pcb_savefpu; |
142 | } | | 139 | } |
143 | | | 140 | |
144 | /* | | | |
145 | * The following table is used to ensure that the FPE_... value | | | |
146 | * that is passed as a trapcode to the signal handler of the user | | | |
147 | * process does not have more than one bit set. | | | |
148 | * | | | |
149 | * Multiple bits may be set if SSE simd instructions generate errors | | | |
150 | * on more than one value or if the user process modifies the control | | | |
151 | * word while a status word bit is already set (which this is a sign | | | |
152 | * of bad coding). | | | |
153 | * We have no choise than to narrow them down to one bit, since we must | | | |
154 | * not send a trapcode that is not exactly one of the FPE_ macros. | | | |
155 | * | | | |
156 | * The mechanism has a static table with 127 entries. Each combination | | | |
157 | * of the 7 FPU status word exception bits directly translates to a | | | |
158 | * position in this table, where a single FPE_... value is stored. | | | |
159 | * This FPE_... value stored there is considered the "most important" | | | |
160 | * of the exception bits and will be sent as the signal code. The | | | |
161 | * precedence of the bits is based upon Intel Document "Numerical | | | |
162 | * Applications", Chapter "Special Computational Situations". | | | |
163 | * | | | |
164 | * The code to choose one of these values does these steps: | | | |
165 | * 1) Throw away status word bits that cannot be masked. | | | |
166 | * 2) Throw away the bits currently masked in the control word, | | | |
167 | * assuming the user isn't interested in them anymore. | | | |
168 | * 3) Reinsert status word bit 7 (stack fault) if it is set, which | | | |
169 | * cannot be masked but must be preserved. | | | |
170 | * 'Stack fault' is a sub-class of 'invalid operation'. | | | |
171 | * 4) Use the remaining bits to point into the trapcode table. | | | |
172 | * | | | |
173 | * The 6 maskable bits in order of their preference, as stated in the | | | |
174 | * above referenced Intel manual: | | | |
175 | * 1 Invalid operation (FP_X_INV) | | | |
176 | * 1a Stack underflow | | | |
177 | * 1b Stack overflow | | | |
178 | * 1c Operand of unsupported format | | | |
179 | * 1d SNaN operand. | | | |
180 | * 2 QNaN operand (not an exception, irrelevant here) | | | |
181 | * 3 Any other invalid-operation not mentioned above or zero divide | | | |
182 | * (FP_X_INV, FP_X_DZ) | | | |
183 | * 4 Denormal operand (FP_X_DNML) | | | |
184 | * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) | | | |
185 | * 6 Inexact result (FP_X_IMP) | | | |
186 | * | | | |
187 | * NB: the above seems to mix up the mxscr error bits and the x87 ones. | | | |
188 | * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx | | | |
189 | * status. | | | |
190 | * | | | |
191 | * The table is nearly, but not quite, in bit order (ZERODIV and DENORM | | | |
192 | * are swapped). | | | |
193 | * | | | |
194 | * This table assumes that any stack fault is cleared - so that an INVOP | | | |
195 | * fault will only be reported as FLTSUB once. | | | |
196 | * This might not happen if the mask is being changed. | | | |
197 | */ | | | |
198 | #define FPE_xxx1(f) (f & EN_SW_INVOP \ | | | |
199 | ? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \ | | | |
200 | : f & EN_SW_ZERODIV ? FPE_FLTDIV \ | | | |
201 | : f & EN_SW_DENORM ? FPE_FLTUND \ | | | |
202 | : f & EN_SW_OVERFLOW ? FPE_FLTOVF \ | | | |
203 | : f & EN_SW_UNDERFLOW ? FPE_FLTUND \ | | | |
204 | : f & EN_SW_PRECLOSS ? FPE_FLTRES \ | | | |
205 | : f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0) | | | |
206 | #define FPE_xxx2(f) FPE_xxx1(f), FPE_xxx1((f + 1)) | | | |
207 | #define FPE_xxx4(f) FPE_xxx2(f), FPE_xxx2((f + 2)) | | | |
208 | #define FPE_xxx8(f) FPE_xxx4(f), FPE_xxx4((f + 4)) | | | |
209 | #define FPE_xxx16(f) FPE_xxx8(f), FPE_xxx8((f + 8)) | | | |
210 | #define FPE_xxx32(f) FPE_xxx16(f), FPE_xxx16((f + 16)) | | | |
211 | static const uint8_t fpetable[128] = { | | | |
212 | FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96) | | | |
213 | }; | | | |
214 | #undef FPE_xxx1 | | | |
215 | #undef FPE_xxx2 | | | |
216 | #undef FPE_xxx4 | | | |
217 | #undef FPE_xxx8 | | | |
218 | #undef FPE_xxx16 | | | |
219 | #undef FPE_xxx32 | | | |
220 | | | | |
221 | /* | | | |
222 | * Init the FPU. | | | |
223 | * | | | |
224 | * This might not be strictly necessary since it will be initialised | | | |
225 | * for each process. However it does no harm. | | | |
226 | */ | | | |
227 | void | | 141 | void |
228 | fpuinit(struct cpu_info *ci) | | 142 | fpuinit(struct cpu_info *ci) |
229 | { | | 143 | { |
230 | | | 144 | /* |
| | | 145 | * This might not be strictly necessary since it will be initialized |
| | | 146 | * for each process. However it does no harm. |
| | | 147 | */ |
231 | clts(); | | 148 | clts(); |
232 | fninit(); | | 149 | fninit(); |
233 | stts(); | | 150 | stts(); |
234 | } | | 151 | } |
235 | | | 152 | |
236 | /* | | | |
237 | * Get the value of MXCSR_MASK supported by the CPU. | | | |
238 | */ | | | |
239 | void | | 153 | void |
240 | fpuinit_mxcsr_mask(void) | | 154 | fpuinit_mxcsr_mask(void) |
241 | { | | 155 | { |
242 | #ifndef XEN | | 156 | #ifndef XEN |
243 | union savefpu fpusave __aligned(16); | | 157 | union savefpu fpusave __aligned(16); |
244 | u_long psl; | | 158 | u_long psl; |
245 | | | 159 | |
246 | memset(&fpusave, 0, sizeof(fpusave)); | | 160 | memset(&fpusave, 0, sizeof(fpusave)); |
247 | | | 161 | |
248 | /* Disable interrupts, and enable FPU */ | | 162 | /* Disable interrupts, and enable FPU */ |
249 | psl = x86_read_psl(); | | 163 | psl = x86_read_psl(); |
250 | x86_disable_intr(); | | 164 | x86_disable_intr(); |
251 | clts(); | | 165 | clts(); |
| @@ -286,145 +200,211 @@ fpu_clear_amd(void) | | | @@ -286,145 +200,211 @@ fpu_clear_amd(void) |
286 | * currently set, in order to avoid causing a fault in the | | 200 | * currently set, in order to avoid causing a fault in the |
287 | * upcoming load. | | 201 | * upcoming load. |
288 | * | | 202 | * |
289 | * Newer generations of AMD CPUs have CPUID_Fn80000008_EBX[2], | | 203 | * Newer generations of AMD CPUs have CPUID_Fn80000008_EBX[2], |
290 | * which indicates that FIP/FDP/FOP are restored (same behavior | | 204 | * which indicates that FIP/FDP/FOP are restored (same behavior |
291 | * as Intel). We're not using it though. | | 205 | * as Intel). We're not using it though. |
292 | */ | | 206 | */ |
293 | if (fngetsw() & 0x80) | | 207 | if (fngetsw() & 0x80) |
294 | fnclex(); | | 208 | fnclex(); |
295 | fldummy(); | | 209 | fldummy(); |
296 | } | | 210 | } |
297 | | | 211 | |
298 | static void | | 212 | static void |
299 | fpu_save(struct lwp *l) | | 213 | fpu_area_save(void *area) |
300 | { | | 214 | { |
301 | struct pcb *pcb = lwp_getpcb(l); | | 215 | clts(); |
302 | | | 216 | |
303 | switch (x86_fpu_save) { | | 217 | switch (x86_fpu_save) { |
304 | case FPU_SAVE_FSAVE: | | 218 | case FPU_SAVE_FSAVE: |
305 | fnsave(&pcb->pcb_savefpu); | | 219 | fnsave(area); |
306 | break; | | 220 | break; |
307 | case FPU_SAVE_FXSAVE: | | 221 | case FPU_SAVE_FXSAVE: |
308 | fxsave(&pcb->pcb_savefpu); | | 222 | fxsave(area); |
309 | break; | | 223 | break; |
310 | case FPU_SAVE_XSAVE: | | 224 | case FPU_SAVE_XSAVE: |
311 | xsave(&pcb->pcb_savefpu, x86_xsave_features); | | 225 | xsave(area, x86_xsave_features); |
312 | break; | | 226 | break; |
313 | case FPU_SAVE_XSAVEOPT: | | 227 | case FPU_SAVE_XSAVEOPT: |
314 | xsaveopt(&pcb->pcb_savefpu, x86_xsave_features); | | 228 | xsaveopt(area, x86_xsave_features); |
315 | break; | | 229 | break; |
316 | } | | 230 | } |
317 | } | | 231 | } |
318 | | | 232 | |
319 | static void | | 233 | static void |
320 | fpu_restore(struct lwp *l) | | 234 | fpu_area_restore(void *area) |
321 | { | | 235 | { |
322 | struct pcb *pcb = lwp_getpcb(l); | | 236 | clts(); |
323 | | | 237 | |
324 | switch (x86_fpu_save) { | | 238 | switch (x86_fpu_save) { |
325 | case FPU_SAVE_FSAVE: | | 239 | case FPU_SAVE_FSAVE: |
326 | frstor(&pcb->pcb_savefpu); | | 240 | frstor(area); |
327 | break; | | 241 | break; |
328 | case FPU_SAVE_FXSAVE: | | 242 | case FPU_SAVE_FXSAVE: |
329 | if (cpu_vendor == CPUVENDOR_AMD) | | 243 | if (cpu_vendor == CPUVENDOR_AMD) |
330 | fpu_clear_amd(); | | 244 | fpu_clear_amd(); |
331 | fxrstor(&pcb->pcb_savefpu); | | 245 | fxrstor(area); |
332 | break; | | 246 | break; |
333 | case FPU_SAVE_XSAVE: | | 247 | case FPU_SAVE_XSAVE: |
334 | case FPU_SAVE_XSAVEOPT: | | 248 | case FPU_SAVE_XSAVEOPT: |
335 | if (cpu_vendor == CPUVENDOR_AMD) | | 249 | if (cpu_vendor == CPUVENDOR_AMD) |
336 | fpu_clear_amd(); | | 250 | fpu_clear_amd(); |
337 | xrstor(&pcb->pcb_savefpu, x86_xsave_features); | | 251 | xrstor(area, x86_xsave_features); |
338 | break; | | 252 | break; |
339 | } | | 253 | } |
340 | } | | 254 | } |
341 | | | 255 | |
342 | static void | | 256 | static void |
343 | fpu_eagerrestore(struct lwp *l) | | 257 | fpu_lwp_install(struct lwp *l) |
344 | { | | 258 | { |
345 | struct pcb *pcb = lwp_getpcb(l); | | 259 | struct pcb *pcb = lwp_getpcb(l); |
346 | struct cpu_info *ci = curcpu(); | | 260 | struct cpu_info *ci = curcpu(); |
347 | | | 261 | |
348 | clts(); | | | |
349 | KASSERT(ci->ci_fpcurlwp == NULL); | | 262 | KASSERT(ci->ci_fpcurlwp == NULL); |
350 | KASSERT(pcb->pcb_fpcpu == NULL); | | 263 | KASSERT(pcb->pcb_fpcpu == NULL); |
351 | ci->ci_fpcurlwp = l; | | 264 | ci->ci_fpcurlwp = l; |
352 | pcb->pcb_fpcpu = ci; | | 265 | pcb->pcb_fpcpu = ci; |
353 | fpu_restore(l); | | 266 | fpu_area_restore(&pcb->pcb_savefpu); |
354 | } | | 267 | } |
355 | | | 268 | |
356 | void | | 269 | void |
357 | fpu_eagerswitch(struct lwp *oldlwp, struct lwp *newlwp) | | 270 | fpu_eagerswitch(struct lwp *oldlwp, struct lwp *newlwp) |
358 | { | | 271 | { |
359 | int s; | | 272 | int s; |
360 | | | 273 | |
361 | s = splhigh(); | | 274 | s = splhigh(); |
362 | #ifdef DIAGNOSTIC | | 275 | #ifdef DIAGNOSTIC |
363 | if (oldlwp != NULL) { | | 276 | if (oldlwp != NULL) { |
364 | struct pcb *pcb = lwp_getpcb(oldlwp); | | 277 | struct pcb *pcb = lwp_getpcb(oldlwp); |
365 | struct cpu_info *ci = curcpu(); | | 278 | struct cpu_info *ci = curcpu(); |
366 | if (pcb->pcb_fpcpu == NULL) { | | 279 | if (pcb->pcb_fpcpu == NULL) { |
367 | KASSERT(ci->ci_fpcurlwp != oldlwp); | | 280 | KASSERT(ci->ci_fpcurlwp != oldlwp); |
368 | } else if (pcb->pcb_fpcpu == ci) { | | 281 | } else if (pcb->pcb_fpcpu == ci) { |
369 | KASSERT(ci->ci_fpcurlwp == oldlwp); | | 282 | KASSERT(ci->ci_fpcurlwp == oldlwp); |
370 | } else { | | 283 | } else { |
371 | panic("%s: oldlwp's state installed elsewhere", | | 284 | panic("%s: oldlwp's state installed elsewhere", |
372 | __func__); | | 285 | __func__); |
373 | } | | 286 | } |
374 | } | | 287 | } |
375 | #endif | | 288 | #endif |
376 | fpusave_cpu(true); | | 289 | fpusave_cpu(true); |
377 | if (!(newlwp->l_flag & LW_SYSTEM)) | | 290 | if (!(newlwp->l_flag & LW_SYSTEM)) |
378 | fpu_eagerrestore(newlwp); | | 291 | fpu_lwp_install(newlwp); |
379 | splx(s); | | 292 | splx(s); |
380 | } | | 293 | } |
381 | | | 294 | |
382 | /* -------------------------------------------------------------------------- */ | | 295 | /* -------------------------------------------------------------------------- */ |
383 | | | 296 | |
384 | /* | | 297 | /* |
385 | * This is a synchronous trap on either an x87 instruction (due to an | | 298 | * The following table is used to ensure that the FPE_... value |
386 | * unmasked error on the previous x87 instruction) or on an SSE/SSE2 etc | | 299 | * that is passed as a trapcode to the signal handler of the user |
387 | * instruction due to an error on the instruction itself. | | 300 | * process does not have more than one bit set. |
388 | * | | 301 | * |
389 | * If trap actually generates a signal, then the fpu state is saved | | 302 | * Multiple bits may be set if SSE simd instructions generate errors |
390 | * and then copied onto the process's user-stack, and then recovered | | 303 | * on more than one value or if the user process modifies the control |
391 | * from there when the signal returns (or from the jmp_buf if the | | 304 | * word while a status word bit is already set (which this is a sign |
392 | * signal handler exits with a longjmp()). | | 305 | * of bad coding). |
393 | * | | 306 | * We have no choise than to narrow them down to one bit, since we must |
394 | * All this code need to do is save the reason for the trap. | | 307 | * not send a trapcode that is not exactly one of the FPE_ macros. |
395 | * For x87 interrupts the status word bits need clearing to stop the | | 308 | * |
396 | * trap re-occurring. | | 309 | * The mechanism has a static table with 127 entries. Each combination |
397 | * | | 310 | * of the 7 FPU status word exception bits directly translates to a |
398 | * The mxcsr bits are 'sticky' and need clearing to not confuse a later trap. | | 311 | * position in this table, where a single FPE_... value is stored. |
399 | * | | 312 | * This FPE_... value stored there is considered the "most important" |
400 | * Since this is a synchronous trap, the fpu registers must still belong | | 313 | * of the exception bits and will be sent as the signal code. The |
401 | * to the correct process (we trap through an interrupt gate so that | | 314 | * precedence of the bits is based upon Intel Document "Numerical |
402 | * interrupts are disabled on entry). | | 315 | * Applications", Chapter "Special Computational Situations". |
403 | * Interrupts (these better include IPIs) are left disabled until we've | | 316 | * |
404 | * finished looking at fpu registers. | | 317 | * The code to choose one of these values does these steps: |
| | | 318 | * 1) Throw away status word bits that cannot be masked. |
| | | 319 | * 2) Throw away the bits currently masked in the control word, |
| | | 320 | * assuming the user isn't interested in them anymore. |
| | | 321 | * 3) Reinsert status word bit 7 (stack fault) if it is set, which |
| | | 322 | * cannot be masked but must be preserved. |
| | | 323 | * 'Stack fault' is a sub-class of 'invalid operation'. |
| | | 324 | * 4) Use the remaining bits to point into the trapcode table. |
| | | 325 | * |
| | | 326 | * The 6 maskable bits in order of their preference, as stated in the |
| | | 327 | * above referenced Intel manual: |
| | | 328 | * 1 Invalid operation (FP_X_INV) |
| | | 329 | * 1a Stack underflow |
| | | 330 | * 1b Stack overflow |
| | | 331 | * 1c Operand of unsupported format |
| | | 332 | * 1d SNaN operand. |
| | | 333 | * 2 QNaN operand (not an exception, irrelevant here) |
| | | 334 | * 3 Any other invalid-operation not mentioned above or zero divide |
| | | 335 | * (FP_X_INV, FP_X_DZ) |
| | | 336 | * 4 Denormal operand (FP_X_DNML) |
| | | 337 | * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) |
| | | 338 | * 6 Inexact result (FP_X_IMP) |
| | | 339 | * |
| | | 340 | * NB: the above seems to mix up the mxscr error bits and the x87 ones. |
| | | 341 | * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx |
| | | 342 | * status. |
405 | * | | 343 | * |
406 | * For amd64 the calling code (in amd64_trap.S) has already checked | | 344 | * The table is nearly, but not quite, in bit order (ZERODIV and DENORM |
407 | * that we trapped from usermode. | | 345 | * are swapped). |
| | | 346 | * |
| | | 347 | * This table assumes that any stack fault is cleared - so that an INVOP |
| | | 348 | * fault will only be reported as FLTSUB once. |
| | | 349 | * This might not happen if the mask is being changed. |
408 | */ | | 350 | */ |
| | | 351 | #define FPE_xxx1(f) (f & EN_SW_INVOP \ |
| | | 352 | ? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \ |
| | | 353 | : f & EN_SW_ZERODIV ? FPE_FLTDIV \ |
| | | 354 | : f & EN_SW_DENORM ? FPE_FLTUND \ |
| | | 355 | : f & EN_SW_OVERFLOW ? FPE_FLTOVF \ |
| | | 356 | : f & EN_SW_UNDERFLOW ? FPE_FLTUND \ |
| | | 357 | : f & EN_SW_PRECLOSS ? FPE_FLTRES \ |
| | | 358 | : f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0) |
| | | 359 | #define FPE_xxx2(f) FPE_xxx1(f), FPE_xxx1((f + 1)) |
| | | 360 | #define FPE_xxx4(f) FPE_xxx2(f), FPE_xxx2((f + 2)) |
| | | 361 | #define FPE_xxx8(f) FPE_xxx4(f), FPE_xxx4((f + 4)) |
| | | 362 | #define FPE_xxx16(f) FPE_xxx8(f), FPE_xxx8((f + 8)) |
| | | 363 | #define FPE_xxx32(f) FPE_xxx16(f), FPE_xxx16((f + 16)) |
| | | 364 | static const uint8_t fpetable[128] = { |
| | | 365 | FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96) |
| | | 366 | }; |
| | | 367 | #undef FPE_xxx1 |
| | | 368 | #undef FPE_xxx2 |
| | | 369 | #undef FPE_xxx4 |
| | | 370 | #undef FPE_xxx8 |
| | | 371 | #undef FPE_xxx16 |
| | | 372 | #undef FPE_xxx32 |
409 | | | 373 | |
| | | 374 | /* |
| | | 375 | * This is a synchronous trap on either an x87 instruction (due to an unmasked |
| | | 376 | * error on the previous x87 instruction) or on an SSE/SSE2/etc instruction due |
| | | 377 | * to an error on the instruction itself. |
| | | 378 | * |
| | | 379 | * If trap actually generates a signal, then the fpu state is saved and then |
| | | 380 | * copied onto the lwp's user-stack, and then recovered from there when the |
| | | 381 | * signal returns. |
| | | 382 | * |
| | | 383 | * All this code needs to do is save the reason for the trap. For x87 traps the |
| | | 384 | * status word bits need clearing to stop the trap re-occurring. For SSE traps |
| | | 385 | * the mxcsr bits are 'sticky' and need clearing to not confuse a later trap. |
| | | 386 | * |
| | | 387 | * We come here with interrupts disabled. |
| | | 388 | */ |
410 | void | | 389 | void |
411 | fputrap(struct trapframe *frame) | | 390 | fputrap(struct trapframe *frame) |
412 | { | | 391 | { |
413 | uint32_t statbits; | | 392 | uint32_t statbits; |
414 | ksiginfo_t ksi; | | 393 | ksiginfo_t ksi; |
415 | | | 394 | |
416 | if (!USERMODE(frame->tf_cs)) | | 395 | if (__predict_false(!USERMODE(frame->tf_cs))) { |
417 | panic("fpu trap from kernel, trapframe %p\n", frame); | | 396 | panic("fpu trap from kernel, trapframe %p\n", frame); |
| | | 397 | } |
418 | | | 398 | |
419 | /* | | 399 | /* |
420 | * At this point, fpcurlwp should be curlwp. If it wasn't, the TS bit | | 400 | * At this point, fpcurlwp should be curlwp. If it wasn't, the TS bit |
421 | * should be set, and we should have gotten a DNA exception. | | 401 | * should be set, and we should have gotten a DNA exception. |
422 | */ | | 402 | */ |
423 | KASSERT(curcpu()->ci_fpcurlwp == curlwp); | | 403 | KASSERT(curcpu()->ci_fpcurlwp == curlwp); |
424 | | | 404 | |
425 | if (frame->tf_trapno == T_XMM) { | | 405 | if (frame->tf_trapno == T_XMM) { |
426 | uint32_t mxcsr; | | 406 | uint32_t mxcsr; |
427 | x86_stmxcsr(&mxcsr); | | 407 | x86_stmxcsr(&mxcsr); |
428 | statbits = mxcsr; | | 408 | statbits = mxcsr; |
429 | /* Clear the sticky status bits */ | | 409 | /* Clear the sticky status bits */ |
430 | mxcsr &= ~0x3f; | | 410 | mxcsr &= ~0x3f; |
| @@ -432,65 +412,63 @@ fputrap(struct trapframe *frame) | | | @@ -432,65 +412,63 @@ fputrap(struct trapframe *frame) |
432 | | | 412 | |
433 | /* Remove masked interrupts and non-status bits */ | | 413 | /* Remove masked interrupts and non-status bits */ |
434 | statbits &= ~(statbits >> 7) & 0x3f; | | 414 | statbits &= ~(statbits >> 7) & 0x3f; |
435 | /* Mark this is an XMM status */ | | 415 | /* Mark this is an XMM status */ |
436 | statbits |= 0x10000; | | 416 | statbits |= 0x10000; |
437 | } else { | | 417 | } else { |
438 | uint16_t cw, sw; | | 418 | uint16_t cw, sw; |
439 | /* Get current control and status words */ | | 419 | /* Get current control and status words */ |
440 | fnstcw(&cw); | | 420 | fnstcw(&cw); |
441 | fnstsw(&sw); | | 421 | fnstsw(&sw); |
442 | /* Clear any pending exceptions from status word */ | | 422 | /* Clear any pending exceptions from status word */ |
443 | fnclex(); | | 423 | fnclex(); |
444 | | | 424 | |
445 | /* Removed masked interrupts */ | | 425 | /* Remove masked interrupts */ |
446 | statbits = sw & ~(cw & 0x3f); | | 426 | statbits = sw & ~(cw & 0x3f); |
447 | } | | 427 | } |
448 | | | 428 | |
449 | /* Doesn't matter now if we get pre-empted */ | | 429 | /* Doesn't matter now if we get pre-empted */ |
450 | x86_enable_intr(); | | 430 | x86_enable_intr(); |
451 | | | 431 | |
452 | KSI_INIT_TRAP(&ksi); | | 432 | KSI_INIT_TRAP(&ksi); |
453 | ksi.ksi_signo = SIGFPE; | | 433 | ksi.ksi_signo = SIGFPE; |
454 | ksi.ksi_addr = (void *)X86_TF_RIP(frame); | | 434 | ksi.ksi_addr = (void *)X86_TF_RIP(frame); |
455 | ksi.ksi_code = fpetable[statbits & 0x7f]; | | 435 | ksi.ksi_code = fpetable[statbits & 0x7f]; |
456 | ksi.ksi_trap = statbits; | | 436 | ksi.ksi_trap = statbits; |
457 | (*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi); | | 437 | (*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi); |
458 | } | | 438 | } |
459 | | | 439 | |
460 | /* | | 440 | /* |
461 | * Implement device not available (DNA) exception | | 441 | * Implement device not available (DNA) exception. |
462 | * | | 442 | * |
463 | * If we were the last lwp to use the FPU, we can simply return. | | 443 | * If we were the last lwp to use the FPU, we can simply return. |
464 | * Otherwise, we save the previous state, if necessary, and restore | | 444 | * Otherwise, we save the previous state, if necessary, and restore |
465 | * our last saved state. | | 445 | * our last saved state. |
466 | * | | 446 | * |
467 | * Called directly from the trap 0x13 entry with interrupts still disabled. | | 447 | * Called directly from the trap 0x13 entry with interrupts still disabled. |
468 | */ | | 448 | */ |
469 | void | | 449 | void |
470 | fpudna(struct trapframe *frame) | | 450 | fpudna(struct trapframe *frame) |
471 | { | | 451 | { |
472 | struct cpu_info *ci; | | 452 | struct cpu_info *ci = curcpu(); |
473 | struct lwp *l, *fl; | | 453 | struct lwp *l, *fl; |
474 | struct pcb *pcb; | | 454 | struct pcb *pcb; |
475 | int s; | | 455 | int s; |
476 | | | 456 | |
477 | if (!USERMODE(frame->tf_cs)) | | 457 | if (!USERMODE(frame->tf_cs)) { |
478 | panic("fpudna from kernel, ip %p, trapframe %p\n", | | 458 | panic("fpudna from kernel, ip %p, trapframe %p\n", |
479 | (void *)X86_TF_RIP(frame), frame); | | 459 | (void *)X86_TF_RIP(frame), frame); |
| | | 460 | } |
480 | | | 461 | |
481 | ci = curcpu(); | | | |
482 | | | | |
483 | /* Save soft spl level - interrupts are hard disabled */ | | | |
484 | s = splhigh(); | | 462 | s = splhigh(); |
485 | | | 463 | |
486 | /* Save state on current CPU. */ | | 464 | /* Save state on current CPU. */ |
487 | l = ci->ci_curlwp; | | 465 | l = ci->ci_curlwp; |
488 | pcb = lwp_getpcb(l); | | 466 | pcb = lwp_getpcb(l); |
489 | fl = ci->ci_fpcurlwp; | | 467 | fl = ci->ci_fpcurlwp; |
490 | if (fl != NULL) { | | 468 | if (fl != NULL) { |
491 | if (__predict_false(x86_fpu_eager)) { | | 469 | if (__predict_false(x86_fpu_eager)) { |
492 | panic("%s: FPU busy with EagerFPU enabled", | | 470 | panic("%s: FPU busy with EagerFPU enabled", |
493 | __func__); | | 471 | __func__); |
494 | } | | 472 | } |
495 | | | 473 | |
496 | /* | | 474 | /* |
| @@ -516,62 +494,56 @@ fpudna(struct trapframe *frame) | | | @@ -516,62 +494,56 @@ fpudna(struct trapframe *frame) |
516 | /* Explicitly disable preemption before dropping spl. */ | | 494 | /* Explicitly disable preemption before dropping spl. */ |
517 | kpreempt_disable(); | | 495 | kpreempt_disable(); |
518 | splx(s); | | 496 | splx(s); |
519 | | | 497 | |
520 | /* Actually enable interrupts */ | | 498 | /* Actually enable interrupts */ |
521 | x86_enable_intr(); | | 499 | x86_enable_intr(); |
522 | | | 500 | |
523 | fpusave_lwp(l, true); | | 501 | fpusave_lwp(l, true); |
524 | KASSERT(pcb->pcb_fpcpu == NULL); | | 502 | KASSERT(pcb->pcb_fpcpu == NULL); |
525 | s = splhigh(); | | 503 | s = splhigh(); |
526 | kpreempt_enable(); | | 504 | kpreempt_enable(); |
527 | } | | 505 | } |
528 | | | 506 | |
529 | /* | | 507 | /* Install the LWP's FPU state. */ |
530 | * Restore state on this CPU, or initialize. Ensure that | | 508 | fpu_lwp_install(l); |
531 | * the entire update is atomic with respect to FPU-sync IPIs. | | | |
532 | */ | | | |
533 | clts(); | | | |
534 | ci->ci_fpcurlwp = l; | | | |
535 | pcb->pcb_fpcpu = ci; | | | |
536 | | | | |
537 | fpu_restore(l); | | | |
538 | | | 509 | |
539 | KASSERT(ci == curcpu()); | | 510 | KASSERT(ci == curcpu()); |
540 | splx(s); | | 511 | splx(s); |
541 | } | | 512 | } |
542 | | | 513 | |
| | | 514 | /* -------------------------------------------------------------------------- */ |
| | | 515 | |
543 | /* | | 516 | /* |
544 | * Save current CPU's FPU state. Must be called at IPL_HIGH. | | 517 | * Save current CPU's FPU state. Must be called at IPL_HIGH. |
545 | */ | | 518 | */ |
546 | void | | 519 | void |
547 | fpusave_cpu(bool save) | | 520 | fpusave_cpu(bool save) |
548 | { | | 521 | { |
549 | struct cpu_info *ci; | | 522 | struct cpu_info *ci; |
550 | struct pcb *pcb; | | 523 | struct pcb *pcb; |
551 | struct lwp *l; | | 524 | struct lwp *l; |
552 | | | 525 | |
553 | KASSERT(curcpu()->ci_ilevel == IPL_HIGH); | | 526 | KASSERT(curcpu()->ci_ilevel == IPL_HIGH); |
554 | | | 527 | |
555 | ci = curcpu(); | | 528 | ci = curcpu(); |
556 | l = ci->ci_fpcurlwp; | | 529 | l = ci->ci_fpcurlwp; |
557 | if (l == NULL) { | | 530 | if (l == NULL) { |
558 | return; | | 531 | return; |
559 | } | | 532 | } |
560 | pcb = lwp_getpcb(l); | | 533 | pcb = lwp_getpcb(l); |
561 | | | 534 | |
562 | if (save) { | | 535 | if (save) { |
563 | clts(); | | 536 | fpu_area_save(&pcb->pcb_savefpu); |
564 | fpu_save(l); | | | |
565 | } | | 537 | } |
566 | | | 538 | |
567 | stts(); | | 539 | stts(); |
568 | pcb->pcb_fpcpu = NULL; | | 540 | pcb->pcb_fpcpu = NULL; |
569 | ci->ci_fpcurlwp = NULL; | | 541 | ci->ci_fpcurlwp = NULL; |
570 | } | | 542 | } |
571 | | | 543 | |
572 | /* | | 544 | /* |
573 | * Save l's FPU state, which may be on this processor or another processor. | | 545 | * Save l's FPU state, which may be on this processor or another processor. |
574 | * It may take some time, so we avoid disabling preemption where possible. | | 546 | * It may take some time, so we avoid disabling preemption where possible. |
575 | * Caller must know that the target LWP is stopped, otherwise this routine | | 547 | * Caller must know that the target LWP is stopped, otherwise this routine |
576 | * may race against it. | | 548 | * may race against it. |
577 | */ | | 549 | */ |
| @@ -609,27 +581,27 @@ fpusave_lwp(struct lwp *l, bool save) | | | @@ -609,27 +581,27 @@ fpusave_lwp(struct lwp *l, bool save) |
609 | while (pcb->pcb_fpcpu == oci && ticks == hardclock_ticks) { | | 581 | while (pcb->pcb_fpcpu == oci && ticks == hardclock_ticks) { |
610 | x86_pause(); | | 582 | x86_pause(); |
611 | spins++; | | 583 | spins++; |
612 | } | | 584 | } |
613 | if (spins > 100000000) { | | 585 | if (spins > 100000000) { |
614 | panic("fpusave_lwp: did not"); | | 586 | panic("fpusave_lwp: did not"); |
615 | } | | 587 | } |
616 | } | | 588 | } |
617 | } | | 589 | } |
618 | | | 590 | |
619 | void | | 591 | void |
620 | fpu_set_default_cw(struct lwp *l, unsigned int x87_cw) | | 592 | fpu_set_default_cw(struct lwp *l, unsigned int x87_cw) |
621 | { | | 593 | { |
622 | union savefpu *fpu_save = process_fpframe(l); | | 594 | union savefpu *fpu_save = lwp_fpuarea(l); |
623 | struct pcb *pcb = lwp_getpcb(l); | | 595 | struct pcb *pcb = lwp_getpcb(l); |
624 | | | 596 | |
625 | if (i386_use_fxsave) { | | 597 | if (i386_use_fxsave) { |
626 | fpu_save->sv_xmm.fx_cw = x87_cw; | | 598 | fpu_save->sv_xmm.fx_cw = x87_cw; |
627 | | | 599 | |
628 | /* Force a reload of CW */ | | 600 | /* Force a reload of CW */ |
629 | if ((x87_cw != __INITIAL_NPXCW__) && | | 601 | if ((x87_cw != __INITIAL_NPXCW__) && |
630 | (x86_fpu_save == FPU_SAVE_XSAVE || | | 602 | (x86_fpu_save == FPU_SAVE_XSAVE || |
631 | x86_fpu_save == FPU_SAVE_XSAVEOPT)) { | | 603 | x86_fpu_save == FPU_SAVE_XSAVEOPT)) { |
632 | fpu_save->sv_xsave_hdr.xsh_xstate_bv |= | | 604 | fpu_save->sv_xsave_hdr.xsh_xstate_bv |= |
633 | XCR0_X87; | | 605 | XCR0_X87; |
634 | } | | 606 | } |
635 | } else { | | 607 | } else { |
| @@ -637,27 +609,27 @@ fpu_set_default_cw(struct lwp *l, unsign | | | @@ -637,27 +609,27 @@ fpu_set_default_cw(struct lwp *l, unsign |
637 | } | | 609 | } |
638 | pcb->pcb_fpu_dflt_cw = x87_cw; | | 610 | pcb->pcb_fpu_dflt_cw = x87_cw; |
639 | } | | 611 | } |
640 | | | 612 | |
641 | void | | 613 | void |
642 | fpu_save_area_clear(struct lwp *l, unsigned int x87_cw) | | 614 | fpu_save_area_clear(struct lwp *l, unsigned int x87_cw) |
643 | { | | 615 | { |
644 | union savefpu *fpu_save; | | 616 | union savefpu *fpu_save; |
645 | struct pcb *pcb; | | 617 | struct pcb *pcb; |
646 | int s; | | 618 | int s; |
647 | | | 619 | |
648 | KASSERT(l == curlwp); | | 620 | KASSERT(l == curlwp); |
649 | KASSERT((l->l_flag & LW_SYSTEM) == 0); | | 621 | KASSERT((l->l_flag & LW_SYSTEM) == 0); |
650 | fpu_save = process_fpframe(l); | | 622 | fpu_save = lwp_fpuarea(l); |
651 | pcb = lwp_getpcb(l); | | 623 | pcb = lwp_getpcb(l); |
652 | | | 624 | |
653 | s = splhigh(); | | 625 | s = splhigh(); |
654 | if (x86_fpu_eager) { | | 626 | if (x86_fpu_eager) { |
655 | KASSERT(pcb->pcb_fpcpu == NULL || | | 627 | KASSERT(pcb->pcb_fpcpu == NULL || |
656 | pcb->pcb_fpcpu == curcpu()); | | 628 | pcb->pcb_fpcpu == curcpu()); |
657 | fpusave_cpu(false); | | 629 | fpusave_cpu(false); |
658 | } else { | | 630 | } else { |
659 | splx(s); | | 631 | splx(s); |
660 | fpusave_lwp(l, false); | | 632 | fpusave_lwp(l, false); |
661 | } | | 633 | } |
662 | KASSERT(pcb->pcb_fpcpu == NULL); | | 634 | KASSERT(pcb->pcb_fpcpu == NULL); |
663 | | | 635 | |
| @@ -684,35 +656,35 @@ fpu_save_area_clear(struct lwp *l, unsig | | | @@ -684,35 +656,35 @@ fpu_save_area_clear(struct lwp *l, unsig |
684 | * Force a reload of CW if we're using the non-default | | 656 | * Force a reload of CW if we're using the non-default |
685 | * value. | | 657 | * value. |
686 | */ | | 658 | */ |
687 | if (__predict_false(x87_cw != __INITIAL_NPXCW__)) { | | 659 | if (__predict_false(x87_cw != __INITIAL_NPXCW__)) { |
688 | fpu_save->sv_xsave_hdr.xsh_xstate_bv |= | | 660 | fpu_save->sv_xsave_hdr.xsh_xstate_bv |= |
689 | XCR0_X87; | | 661 | XCR0_X87; |
690 | } | | 662 | } |
691 | break; | | 663 | break; |
692 | } | | 664 | } |
693 | | | 665 | |
694 | pcb->pcb_fpu_dflt_cw = x87_cw; | | 666 | pcb->pcb_fpu_dflt_cw = x87_cw; |
695 | | | 667 | |
696 | if (x86_fpu_eager) { | | 668 | if (x86_fpu_eager) { |
697 | fpu_eagerrestore(l); | | 669 | fpu_lwp_install(l); |
698 | splx(s); | | 670 | splx(s); |
699 | } | | 671 | } |
700 | } | | 672 | } |
701 | | | 673 | |
702 | void | | 674 | void |
703 | fpu_save_area_reset(struct lwp *l) | | 675 | fpu_save_area_reset(struct lwp *l) |
704 | { | | 676 | { |
705 | union savefpu *fpu_save = process_fpframe(l); | | 677 | union savefpu *fpu_save = lwp_fpuarea(l); |
706 | struct pcb *pcb = lwp_getpcb(l); | | 678 | struct pcb *pcb = lwp_getpcb(l); |
707 | | | 679 | |
708 | /* | | 680 | /* |
709 | * For signal handlers the register values don't matter. Just reset | | 681 | * For signal handlers the register values don't matter. Just reset |
710 | * a few fields. | | 682 | * a few fields. |
711 | */ | | 683 | */ |
712 | if (i386_use_fxsave) { | | 684 | if (i386_use_fxsave) { |
713 | fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__; | | 685 | fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__; |
714 | fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask; | | 686 | fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask; |
715 | fpu_save->sv_xmm.fx_tw = 0; | | 687 | fpu_save->sv_xmm.fx_tw = 0; |
716 | fpu_save->sv_xmm.fx_cw = pcb->pcb_fpu_dflt_cw; | | 688 | fpu_save->sv_xmm.fx_cw = pcb->pcb_fpu_dflt_cw; |
717 | } else { | | 689 | } else { |
718 | fpu_save->sv_87.s87_tw = 0xffff; | | 690 | fpu_save->sv_87.s87_tw = 0xffff; |
| @@ -847,27 +819,27 @@ process_s87_to_xmm(const struct save87 * | | | @@ -847,27 +819,27 @@ process_s87_to_xmm(const struct save87 * |
847 | /* FP registers (in stack order) */ | | 819 | /* FP registers (in stack order) */ |
848 | fx_reg = sxmm->fx_87_ac; | | 820 | fx_reg = sxmm->fx_87_ac; |
849 | s87_reg = s87->s87_ac; | | 821 | s87_reg = s87->s87_ac; |
850 | for (i = 0; i < 8; fx_reg++, s87_reg++, i++) | | 822 | for (i = 0; i < 8; fx_reg++, s87_reg++, i++) |
851 | fx_reg->r = *s87_reg; | | 823 | fx_reg->r = *s87_reg; |
852 | } | | 824 | } |
853 | | | 825 | |
854 | void | | 826 | void |
855 | process_write_fpregs_xmm(struct lwp *l, const struct fxsave *fpregs) | | 827 | process_write_fpregs_xmm(struct lwp *l, const struct fxsave *fpregs) |
856 | { | | 828 | { |
857 | union savefpu *fpu_save; | | 829 | union savefpu *fpu_save; |
858 | | | 830 | |
859 | fpusave_lwp(l, false); | | 831 | fpusave_lwp(l, false); |
860 | fpu_save = process_fpframe(l); | | 832 | fpu_save = lwp_fpuarea(l); |
861 | | | 833 | |
862 | if (i386_use_fxsave) { | | 834 | if (i386_use_fxsave) { |
863 | memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm)); | | 835 | memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm)); |
864 | | | 836 | |
865 | /* | | 837 | /* |
866 | * Invalid bits in mxcsr or mxcsr_mask will cause faults. | | 838 | * Invalid bits in mxcsr or mxcsr_mask will cause faults. |
867 | */ | | 839 | */ |
868 | fpu_save->sv_xmm.fx_mxcsr_mask &= x86_fpu_mxcsr_mask; | | 840 | fpu_save->sv_xmm.fx_mxcsr_mask &= x86_fpu_mxcsr_mask; |
869 | fpu_save->sv_xmm.fx_mxcsr &= fpu_save->sv_xmm.fx_mxcsr_mask; | | 841 | fpu_save->sv_xmm.fx_mxcsr &= fpu_save->sv_xmm.fx_mxcsr_mask; |
870 | | | 842 | |
871 | /* | | 843 | /* |
872 | * Make sure the x87 and SSE bits are set in xstate_bv. | | 844 | * Make sure the x87 and SSE bits are set in xstate_bv. |
873 | * Otherwise xrstor will not restore them. | | 845 | * Otherwise xrstor will not restore them. |
| @@ -880,68 +852,68 @@ process_write_fpregs_xmm(struct lwp *l, | | | @@ -880,68 +852,68 @@ process_write_fpregs_xmm(struct lwp *l, |
880 | } else { | | 852 | } else { |
881 | process_xmm_to_s87(fpregs, &fpu_save->sv_87); | | 853 | process_xmm_to_s87(fpregs, &fpu_save->sv_87); |
882 | } | | 854 | } |
883 | } | | 855 | } |
884 | | | 856 | |
885 | void | | 857 | void |
886 | process_write_fpregs_s87(struct lwp *l, const struct save87 *fpregs) | | 858 | process_write_fpregs_s87(struct lwp *l, const struct save87 *fpregs) |
887 | { | | 859 | { |
888 | union savefpu *fpu_save; | | 860 | union savefpu *fpu_save; |
889 | | | 861 | |
890 | if (i386_use_fxsave) { | | 862 | if (i386_use_fxsave) { |
891 | /* Save so we don't lose the xmm registers */ | | 863 | /* Save so we don't lose the xmm registers */ |
892 | fpusave_lwp(l, true); | | 864 | fpusave_lwp(l, true); |
893 | fpu_save = process_fpframe(l); | | 865 | fpu_save = lwp_fpuarea(l); |
894 | process_s87_to_xmm(fpregs, &fpu_save->sv_xmm); | | 866 | process_s87_to_xmm(fpregs, &fpu_save->sv_xmm); |
895 | | | 867 | |
896 | /* | | 868 | /* |
897 | * Make sure the x87 and SSE bits are set in xstate_bv. | | 869 | * Make sure the x87 and SSE bits are set in xstate_bv. |
898 | * Otherwise xrstor will not restore them. | | 870 | * Otherwise xrstor will not restore them. |
899 | */ | | 871 | */ |
900 | if (x86_fpu_save == FPU_SAVE_XSAVE || | | 872 | if (x86_fpu_save == FPU_SAVE_XSAVE || |
901 | x86_fpu_save == FPU_SAVE_XSAVEOPT) { | | 873 | x86_fpu_save == FPU_SAVE_XSAVEOPT) { |
902 | fpu_save->sv_xsave_hdr.xsh_xstate_bv |= | | 874 | fpu_save->sv_xsave_hdr.xsh_xstate_bv |= |
903 | (XCR0_X87 | XCR0_SSE); | | 875 | (XCR0_X87 | XCR0_SSE); |
904 | } | | 876 | } |
905 | } else { | | 877 | } else { |
906 | fpusave_lwp(l, false); | | 878 | fpusave_lwp(l, false); |
907 | fpu_save = process_fpframe(l); | | 879 | fpu_save = lwp_fpuarea(l); |
908 | memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87)); | | 880 | memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87)); |
909 | } | | 881 | } |
910 | } | | 882 | } |
911 | | | 883 | |
912 | void | | 884 | void |
913 | process_read_fpregs_xmm(struct lwp *l, struct fxsave *fpregs) | | 885 | process_read_fpregs_xmm(struct lwp *l, struct fxsave *fpregs) |
914 | { | | 886 | { |
915 | union savefpu *fpu_save; | | 887 | union savefpu *fpu_save; |
916 | | | 888 | |
917 | fpusave_lwp(l, true); | | 889 | fpusave_lwp(l, true); |
918 | fpu_save = process_fpframe(l); | | 890 | fpu_save = lwp_fpuarea(l); |
919 | | | 891 | |
920 | if (i386_use_fxsave) { | | 892 | if (i386_use_fxsave) { |
921 | memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm)); | | 893 | memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm)); |
922 | } else { | | 894 | } else { |
923 | memset(fpregs, 0, sizeof(*fpregs)); | | 895 | memset(fpregs, 0, sizeof(*fpregs)); |
924 | process_s87_to_xmm(&fpu_save->sv_87, fpregs); | | 896 | process_s87_to_xmm(&fpu_save->sv_87, fpregs); |
925 | } | | 897 | } |
926 | } | | 898 | } |
927 | | | 899 | |
928 | void | | 900 | void |
929 | process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs) | | 901 | process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs) |
930 | { | | 902 | { |
931 | union savefpu *fpu_save; | | 903 | union savefpu *fpu_save; |
932 | | | 904 | |
933 | fpusave_lwp(l, true); | | 905 | fpusave_lwp(l, true); |
934 | fpu_save = process_fpframe(l); | | 906 | fpu_save = lwp_fpuarea(l); |
935 | | | 907 | |
936 | if (i386_use_fxsave) { | | 908 | if (i386_use_fxsave) { |
937 | memset(fpregs, 0, sizeof(*fpregs)); | | 909 | memset(fpregs, 0, sizeof(*fpregs)); |
938 | process_xmm_to_s87(&fpu_save->sv_xmm, fpregs); | | 910 | process_xmm_to_s87(&fpu_save->sv_xmm, fpregs); |
939 | } else { | | 911 | } else { |
940 | memcpy(fpregs, &fpu_save->sv_87, sizeof(fpu_save->sv_87)); | | 912 | memcpy(fpregs, &fpu_save->sv_87, sizeof(fpu_save->sv_87)); |
941 | } | | 913 | } |
942 | } | | 914 | } |
943 | | | 915 | |
944 | /* -------------------------------------------------------------------------- */ | | 916 | /* -------------------------------------------------------------------------- */ |
945 | | | 917 | |
946 | static volatile unsigned long eagerfpu_cpu_barrier1 __cacheline_aligned; | | 918 | static volatile unsigned long eagerfpu_cpu_barrier1 __cacheline_aligned; |
947 | static volatile unsigned long eagerfpu_cpu_barrier2 __cacheline_aligned; | | 919 | static volatile unsigned long eagerfpu_cpu_barrier2 __cacheline_aligned; |