| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: fpu.c,v 1.83 2023/02/25 18:28:57 riastradh Exp $ */ | | 1 | /* $NetBSD: fpu.c,v 1.84 2023/03/03 14:32:38 riastradh Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc. All | | 4 | * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc. All |
5 | * rights reserved. | | 5 | * rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software developed for The NetBSD Foundation | | 7 | * This code is derived from software developed for The NetBSD Foundation |
8 | * by Andrew Doran and Maxime Villard. | | 8 | * by Andrew Doran and Maxime Villard. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
| @@ -86,58 +86,55 @@ | | | @@ -86,58 +86,55 @@ |
86 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | | 86 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
87 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 87 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
88 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 88 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
89 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 89 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
90 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 90 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
91 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 91 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
92 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 92 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
93 | * SUCH DAMAGE. | | 93 | * SUCH DAMAGE. |
94 | * | | 94 | * |
95 | * @(#)npx.c 7.2 (Berkeley) 5/12/91 | | 95 | * @(#)npx.c 7.2 (Berkeley) 5/12/91 |
96 | */ | | 96 | */ |
97 | | | 97 | |
98 | #include <sys/cdefs.h> | | 98 | #include <sys/cdefs.h> |
99 | __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.83 2023/02/25 18:28:57 riastradh Exp $"); | | 99 | __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.84 2023/03/03 14:32:38 riastradh Exp $"); |
100 | | | 100 | |
101 | #include "opt_multiprocessor.h" | | 101 | #include "opt_multiprocessor.h" |
102 | | | 102 | |
103 | #include <sys/param.h> | | 103 | #include <sys/param.h> |
104 | #include <sys/types.h> | | 104 | #include <sys/systm.h> |
105 | | | | |
106 | #include <sys/conf.h> | | 105 | #include <sys/conf.h> |
107 | #include <sys/cpu.h> | | 106 | #include <sys/cpu.h> |
108 | #include <sys/file.h> | | 107 | #include <sys/file.h> |
| | | 108 | #include <sys/proc.h> |
109 | #include <sys/kernel.h> | | 109 | #include <sys/kernel.h> |
110 | #include <sys/kthread.h> | | 110 | #include <sys/kthread.h> |
111 | #include <sys/proc.h> | | | |
112 | #include <sys/sysctl.h> | | 111 | #include <sys/sysctl.h> |
113 | #include <sys/systm.h> | | | |
114 | #include <sys/xcall.h> | | 112 | #include <sys/xcall.h> |
115 | | | 113 | |
116 | #include <machine/cpu.h> | | 114 | #include <machine/cpu.h> |
117 | #include <machine/cpufunc.h> | | | |
118 | #include <machine/cputypes.h> | | | |
119 | #include <machine/cpuvar.h> | | 115 | #include <machine/cpuvar.h> |
| | | 116 | #include <machine/cputypes.h> |
120 | #include <machine/intr.h> | | 117 | #include <machine/intr.h> |
| | | 118 | #include <machine/cpufunc.h> |
121 | #include <machine/pcb.h> | | 119 | #include <machine/pcb.h> |
122 | #include <machine/specialreg.h> | | | |
123 | #include <machine/trap.h> | | 120 | #include <machine/trap.h> |
124 | | | 121 | #include <machine/specialreg.h> |
125 | #include <x86/cpu.h> | | 122 | #include <x86/cpu.h> |
126 | #include <x86/fpu.h> | | 123 | #include <x86/fpu.h> |
127 | | | 124 | |
128 | #ifdef XENPV | | 125 | #ifdef XENPV |
129 | #define clts() HYPERVISOR_fpu_taskswitch(0) | | 126 | #define clts() HYPERVISOR_fpu_taskswitch(0) |
130 | #define stts() HYPERVISOR_fpu_taskswitch(1) | | 127 | #define stts() HYPERVISOR_fpu_taskswitch(1) |
131 | #endif | | 128 | #endif |
132 | | | 129 | |
133 | void fpu_handle_deferred(void); | | 130 | void fpu_handle_deferred(void); |
134 | void fpu_switch(struct lwp *, struct lwp *); | | 131 | void fpu_switch(struct lwp *, struct lwp *); |
135 | | | 132 | |
136 | uint32_t x86_fpu_mxcsr_mask __read_mostly = 0; | | 133 | uint32_t x86_fpu_mxcsr_mask __read_mostly = 0; |
137 | | | 134 | |
138 | /* | | 135 | /* |
139 | * True if this a thread that is allowed to use the FPU -- either a | | 136 | * True if this a thread that is allowed to use the FPU -- either a |
140 | * user thread, or a system thread with LW_SYSTEM_FPU enabled. | | 137 | * user thread, or a system thread with LW_SYSTEM_FPU enabled. |
141 | */ | | 138 | */ |
142 | static inline bool | | 139 | static inline bool |
143 | lwp_can_haz_fpu(struct lwp *l) | | 140 | lwp_can_haz_fpu(struct lwp *l) |
| @@ -172,48 +169,45 @@ fpu_lwp_area(struct lwp *l) | | | @@ -172,48 +169,45 @@ fpu_lwp_area(struct lwp *l) |
172 | return area; | | 169 | return area; |
173 | } | | 170 | } |
174 | | | 171 | |
175 | static inline void | | 172 | static inline void |
176 | fpu_save_lwp(struct lwp *l) | | 173 | fpu_save_lwp(struct lwp *l) |
177 | { | | 174 | { |
178 | struct pcb *pcb = lwp_getpcb(l); | | 175 | struct pcb *pcb = lwp_getpcb(l); |
179 | union savefpu *area = &pcb->pcb_savefpu; | | 176 | union savefpu *area = &pcb->pcb_savefpu; |
180 | int s; | | 177 | int s; |
181 | | | 178 | |
182 | s = splvm(); | | 179 | s = splvm(); |
183 | if (l->l_md.md_flags & MDL_FPU_IN_CPU) { | | 180 | if (l->l_md.md_flags & MDL_FPU_IN_CPU) { |
184 | KASSERT(lwp_can_haz_fpu(l)); | | 181 | KASSERT(lwp_can_haz_fpu(l)); |
185 | fpu_area_save(area, x86_xsave_features, | | 182 | fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & PK_32)); |
186 | !(l->l_proc->p_flag & PK_32)); | | | |
187 | l->l_md.md_flags &= ~MDL_FPU_IN_CPU; | | 183 | l->l_md.md_flags &= ~MDL_FPU_IN_CPU; |
188 | } | | 184 | } |
189 | splx(s); | | 185 | splx(s); |
190 | } | | 186 | } |
191 | | | 187 | |
192 | /* | | 188 | /* |
193 | * Bring curlwp's FPU state in memory. It will get installed back in the CPU | | 189 | * Bring curlwp's FPU state in memory. It will get installed back in the CPU |
194 | * when returning to userland. | | 190 | * when returning to userland. |
195 | */ | | 191 | */ |
196 | void | | 192 | void |
197 | fpu_save(void) | | 193 | fpu_save(void) |
198 | { | | 194 | { |
199 | | | | |
200 | fpu_save_lwp(curlwp); | | 195 | fpu_save_lwp(curlwp); |
201 | } | | 196 | } |
202 | | | 197 | |
203 | void | | 198 | void |
204 | fpuinit(struct cpu_info *ci) | | 199 | fpuinit(struct cpu_info *ci) |
205 | { | | 200 | { |
206 | | | | |
207 | /* | | 201 | /* |
208 | * This might not be strictly necessary since it will be initialized | | 202 | * This might not be strictly necessary since it will be initialized |
209 | * for each process. However it does no harm. | | 203 | * for each process. However it does no harm. |
210 | */ | | 204 | */ |
211 | clts(); | | 205 | clts(); |
212 | fninit(); | | 206 | fninit(); |
213 | stts(); | | 207 | stts(); |
214 | } | | 208 | } |
215 | | | 209 | |
216 | void | | 210 | void |
217 | fpuinit_mxcsr_mask(void) | | 211 | fpuinit_mxcsr_mask(void) |
218 | { | | 212 | { |
219 | #ifndef XENPV | | 213 | #ifndef XENPV |
| @@ -274,74 +268,71 @@ fpu_errata_amd(void) | | | @@ -274,74 +268,71 @@ fpu_errata_amd(void) |
274 | fnclex(); | | 268 | fnclex(); |
275 | fldummy(); | | 269 | fldummy(); |
276 | } | | 270 | } |
277 | | | 271 | |
278 | #ifdef __x86_64__ | | 272 | #ifdef __x86_64__ |
279 | #define XS64(x) (is_64bit ? x##64 : x) | | 273 | #define XS64(x) (is_64bit ? x##64 : x) |
280 | #else | | 274 | #else |
281 | #define XS64(x) x | | 275 | #define XS64(x) x |
282 | #endif | | 276 | #endif |
283 | | | 277 | |
284 | void | | 278 | void |
285 | fpu_area_save(void *area, uint64_t xsave_features, bool is_64bit) | | 279 | fpu_area_save(void *area, uint64_t xsave_features, bool is_64bit) |
286 | { | | 280 | { |
287 | | | | |
288 | switch (x86_fpu_save) { | | 281 | switch (x86_fpu_save) { |
289 | case FPU_SAVE_FSAVE: | | 282 | case FPU_SAVE_FSAVE: |
290 | fnsave(area); | | 283 | fnsave(area); |
291 | break; | | 284 | break; |
292 | case FPU_SAVE_FXSAVE: | | 285 | case FPU_SAVE_FXSAVE: |
293 | XS64(fxsave)(area); | | 286 | XS64(fxsave)(area); |
294 | break; | | 287 | break; |
295 | case FPU_SAVE_XSAVE: | | 288 | case FPU_SAVE_XSAVE: |
296 | XS64(xsave)(area, xsave_features); | | 289 | XS64(xsave)(area, xsave_features); |
297 | break; | | 290 | break; |
298 | case FPU_SAVE_XSAVEOPT: | | 291 | case FPU_SAVE_XSAVEOPT: |
299 | XS64(xsaveopt)(area, xsave_features); | | 292 | XS64(xsaveopt)(area, xsave_features); |
300 | break; | | 293 | break; |
301 | } | | 294 | } |
302 | | | 295 | |
303 | stts(); | | 296 | stts(); |
304 | } | | 297 | } |
305 | | | 298 | |
306 | void | | 299 | void |
307 | fpu_area_restore(const void *area, uint64_t xsave_features, bool is_64bit) | | 300 | fpu_area_restore(const void *area, uint64_t xsave_features, bool is_64bit) |
308 | { | | 301 | { |
309 | | | | |
310 | clts(); | | 302 | clts(); |
311 | | | 303 | |
312 | switch (x86_fpu_save) { | | 304 | switch (x86_fpu_save) { |
313 | case FPU_SAVE_FSAVE: | | 305 | case FPU_SAVE_FSAVE: |
314 | frstor(area); | | 306 | frstor(area); |
315 | break; | | 307 | break; |
316 | case FPU_SAVE_FXSAVE: | | 308 | case FPU_SAVE_FXSAVE: |
317 | if (cpu_vendor == CPUVENDOR_AMD) | | 309 | if (cpu_vendor == CPUVENDOR_AMD) |
318 | fpu_errata_amd(); | | 310 | fpu_errata_amd(); |
319 | XS64(fxrstor)(area); | | 311 | XS64(fxrstor)(area); |
320 | break; | | 312 | break; |
321 | case FPU_SAVE_XSAVE: | | 313 | case FPU_SAVE_XSAVE: |
322 | case FPU_SAVE_XSAVEOPT: | | 314 | case FPU_SAVE_XSAVEOPT: |
323 | if (cpu_vendor == CPUVENDOR_AMD) | | 315 | if (cpu_vendor == CPUVENDOR_AMD) |
324 | fpu_errata_amd(); | | 316 | fpu_errata_amd(); |
325 | XS64(xrstor)(area, xsave_features); | | 317 | XS64(xrstor)(area, xsave_features); |
326 | break; | | 318 | break; |
327 | } | | 319 | } |
328 | } | | 320 | } |
329 | | | 321 | |
330 | void | | 322 | void |
331 | fpu_handle_deferred(void) | | 323 | fpu_handle_deferred(void) |
332 | { | | 324 | { |
333 | struct pcb *pcb = lwp_getpcb(curlwp); | | 325 | struct pcb *pcb = lwp_getpcb(curlwp); |
334 | | | | |
335 | fpu_area_restore(&pcb->pcb_savefpu, x86_xsave_features, | | 326 | fpu_area_restore(&pcb->pcb_savefpu, x86_xsave_features, |
336 | !(curlwp->l_proc->p_flag & PK_32)); | | 327 | !(curlwp->l_proc->p_flag & PK_32)); |
337 | } | | 328 | } |
338 | | | 329 | |
339 | void | | 330 | void |
340 | fpu_switch(struct lwp *oldlwp, struct lwp *newlwp) | | 331 | fpu_switch(struct lwp *oldlwp, struct lwp *newlwp) |
341 | { | | 332 | { |
342 | struct cpu_info *ci __diagused = curcpu(); | | 333 | struct cpu_info *ci __diagused = curcpu(); |
343 | struct pcb *pcb; | | 334 | struct pcb *pcb; |
344 | | | 335 | |
345 | KASSERTMSG(ci->ci_ilevel >= IPL_SCHED, "cpu%d ilevel=%d", | | 336 | KASSERTMSG(ci->ci_ilevel >= IPL_SCHED, "cpu%d ilevel=%d", |
346 | cpu_index(ci), ci->ci_ilevel); | | 337 | cpu_index(ci), ci->ci_ilevel); |
347 | | | 338 | |
| @@ -378,27 +369,27 @@ fpu_lwp_fork(struct lwp *l1, struct lwp | | | @@ -378,27 +369,27 @@ fpu_lwp_fork(struct lwp *l1, struct lwp |
378 | | | 369 | |
379 | void | | 370 | void |
380 | fpu_lwp_abandon(struct lwp *l) | | 371 | fpu_lwp_abandon(struct lwp *l) |
381 | { | | 372 | { |
382 | int s; | | 373 | int s; |
383 | | | 374 | |
384 | KASSERT(l == curlwp); | | 375 | KASSERT(l == curlwp); |
385 | s = splvm(); | | 376 | s = splvm(); |
386 | l->l_md.md_flags &= ~MDL_FPU_IN_CPU; | | 377 | l->l_md.md_flags &= ~MDL_FPU_IN_CPU; |
387 | stts(); | | 378 | stts(); |
388 | splx(s); | | 379 | splx(s); |
389 | } | | 380 | } |
390 | | | 381 | |
391 | /* ------------------------------------------------------------------------- */ | | 382 | /* -------------------------------------------------------------------------- */ |
392 | | | 383 | |
393 | static const union savefpu safe_fpu __aligned(64) = { | | 384 | static const union savefpu safe_fpu __aligned(64) = { |
394 | .sv_xmm = { | | 385 | .sv_xmm = { |
395 | .fx_mxcsr = __SAFE_MXCSR__, | | 386 | .fx_mxcsr = __SAFE_MXCSR__, |
396 | }, | | 387 | }, |
397 | }; | | 388 | }; |
398 | static const union savefpu zero_fpu __aligned(64); | | 389 | static const union savefpu zero_fpu __aligned(64); |
399 | | | 390 | |
400 | /* | | 391 | /* |
401 | * fpu_kern_enter() | | 392 | * fpu_kern_enter() |
402 | * | | 393 | * |
403 | * Begin using the FPU. Raises to splvm, disabling most | | 394 | * Begin using the FPU. Raises to splvm, disabling most |
404 | * interrupts and rendering the thread non-preemptible; caller | | 395 | * interrupts and rendering the thread non-preemptible; caller |
| @@ -427,28 +418,28 @@ fpu_kern_enter(void) | | | @@ -427,28 +418,28 @@ fpu_kern_enter(void) |
427 | #if 0 | | 418 | #if 0 |
428 | /* | | 419 | /* |
429 | * Can't assert this because if the caller holds a spin lock at | | 420 | * Can't assert this because if the caller holds a spin lock at |
430 | * IPL_VM, and previously held and released a spin lock at | | 421 | * IPL_VM, and previously held and released a spin lock at |
431 | * higher IPL, the IPL remains raised above IPL_VM. | | 422 | * higher IPL, the IPL remains raised above IPL_VM. |
432 | */ | | 423 | */ |
433 | KASSERTMSG(ci->ci_ilevel <= IPL_VM || cold, "ilevel=%d", | | 424 | KASSERTMSG(ci->ci_ilevel <= IPL_VM || cold, "ilevel=%d", |
434 | ci->ci_ilevel); | | 425 | ci->ci_ilevel); |
435 | #endif | | 426 | #endif |
436 | KASSERT(ci->ci_kfpu_spl == -1); | | 427 | KASSERT(ci->ci_kfpu_spl == -1); |
437 | ci->ci_kfpu_spl = s; | | 428 | ci->ci_kfpu_spl = s; |
438 | | | 429 | |
439 | /* | | 430 | /* |
440 | * If we are in a softint and have a pinned lwp, the fpu state | | 431 | * If we are in a softint and have a pinned lwp, the fpu state is that |
441 | * is that of the pinned lwp, so save it there. | | 432 | * of the pinned lwp, so save it there. |
442 | */ | | 433 | */ |
443 | while ((l->l_pflag & LP_INTR) && (l->l_switchto != NULL)) | | 434 | while ((l->l_pflag & LP_INTR) && (l->l_switchto != NULL)) |
444 | l = l->l_switchto; | | 435 | l = l->l_switchto; |
445 | fpu_save_lwp(l); | | 436 | fpu_save_lwp(l); |
446 | | | 437 | |
447 | /* | | 438 | /* |
448 | * Clear CR0_TS, which fpu_save_lwp set if it saved anything -- | | 439 | * Clear CR0_TS, which fpu_save_lwp set if it saved anything -- |
449 | * otherwise the CPU will trap if we try to use the FPU under | | 440 | * otherwise the CPU will trap if we try to use the FPU under |
450 | * the false impression that there has been a task switch since | | 441 | * the false impression that there has been a task switch since |
451 | * the last FPU usage requiring that we save the FPU state. | | 442 | * the last FPU usage requiring that we save the FPU state. |
452 | */ | | 443 | */ |
453 | clts(); | | 444 | clts(); |
454 | | | 445 | |
| @@ -512,27 +503,27 @@ kthread_fpu_enter_md(void) | | | @@ -512,27 +503,27 @@ kthread_fpu_enter_md(void) |
512 | clts(); | | 503 | clts(); |
513 | fpu_area_restore(&safe_fpu, x86_xsave_features, /*is_64bit*/false); | | 504 | fpu_area_restore(&safe_fpu, x86_xsave_features, /*is_64bit*/false); |
514 | } | | 505 | } |
515 | | | 506 | |
516 | void | | 507 | void |
517 | kthread_fpu_exit_md(void) | | 508 | kthread_fpu_exit_md(void) |
518 | { | | 509 | { |
519 | | | 510 | |
520 | /* Zero the FPU state and disable the FPU by setting CR0_TS. */ | | 511 | /* Zero the FPU state and disable the FPU by setting CR0_TS. */ |
521 | fpu_area_restore(&zero_fpu, x86_xsave_features, /*is_64bit*/false); | | 512 | fpu_area_restore(&zero_fpu, x86_xsave_features, /*is_64bit*/false); |
522 | stts(); | | 513 | stts(); |
523 | } | | 514 | } |
524 | | | 515 | |
525 | /* ------------------------------------------------------------------------- */ | | 516 | /* -------------------------------------------------------------------------- */ |
526 | | | 517 | |
527 | /* | | 518 | /* |
528 | * The following table is used to ensure that the FPE_... value | | 519 | * The following table is used to ensure that the FPE_... value |
529 | * that is passed as a trapcode to the signal handler of the user | | 520 | * that is passed as a trapcode to the signal handler of the user |
530 | * process does not have more than one bit set. | | 521 | * process does not have more than one bit set. |
531 | * | | 522 | * |
532 | * Multiple bits may be set if SSE simd instructions generate errors | | 523 | * Multiple bits may be set if SSE simd instructions generate errors |
533 | * on more than one value or if the user process modifies the control | | 524 | * on more than one value or if the user process modifies the control |
534 | * word while a status word bit is already set (which this is a sign | | 525 | * word while a status word bit is already set (which this is a sign |
535 | * of bad coding). | | 526 | * of bad coding). |
536 | * We have no choice than to narrow them down to one bit, since we must | | 527 | * We have no choice than to narrow them down to one bit, since we must |
537 | * not send a trapcode that is not exactly one of the FPE_ macros. | | 528 | * not send a trapcode that is not exactly one of the FPE_ macros. |
538 | * | | 529 | * |
| @@ -666,27 +657,26 @@ fputrap(struct trapframe *frame) | | | @@ -666,27 +657,26 @@ fputrap(struct trapframe *frame) |
666 | void | | 657 | void |
667 | fpudna(struct trapframe *frame) | | 658 | fpudna(struct trapframe *frame) |
668 | { | | 659 | { |
669 | panic("fpudna from %s, ip %p, trapframe %p", | | 660 | panic("fpudna from %s, ip %p, trapframe %p", |
670 | USERMODE(frame->tf_cs) ? "userland" : "kernel", | | 661 | USERMODE(frame->tf_cs) ? "userland" : "kernel", |
671 | (void *)X86_TF_RIP(frame), frame); | | 662 | (void *)X86_TF_RIP(frame), frame); |
672 | } | | 663 | } |
673 | | | 664 | |
674 | /* -------------------------------------------------------------------------- */ | | 665 | /* -------------------------------------------------------------------------- */ |
675 | | | 666 | |
676 | static inline void | | 667 | static inline void |
677 | fpu_xstate_reload(union savefpu *fpu_save, uint64_t xstate) | | 668 | fpu_xstate_reload(union savefpu *fpu_save, uint64_t xstate) |
678 | { | | 669 | { |
679 | | | | |
680 | /* | | 670 | /* |
681 | * Force a reload of the given xstate during the next XRSTOR. | | 671 | * Force a reload of the given xstate during the next XRSTOR. |
682 | */ | | 672 | */ |
683 | if (x86_fpu_save >= FPU_SAVE_XSAVE) { | | 673 | if (x86_fpu_save >= FPU_SAVE_XSAVE) { |
684 | fpu_save->sv_xsave_hdr.xsh_xstate_bv |= xstate; | | 674 | fpu_save->sv_xsave_hdr.xsh_xstate_bv |= xstate; |
685 | } | | 675 | } |
686 | } | | 676 | } |
687 | | | 677 | |
688 | void | | 678 | void |
689 | fpu_set_default_cw(struct lwp *l, unsigned int x87_cw) | | 679 | fpu_set_default_cw(struct lwp *l, unsigned int x87_cw) |
690 | { | | 680 | { |
691 | union savefpu *fpu_save = fpu_lwp_area(l); | | 681 | union savefpu *fpu_save = fpu_lwp_area(l); |
692 | struct pcb *pcb = lwp_getpcb(l); | | 682 | struct pcb *pcb = lwp_getpcb(l); |
| @@ -865,27 +855,26 @@ process_read_xstate(struct lwp *l, struc | | | @@ -865,27 +855,26 @@ process_read_xstate(struct lwp *l, struc |
865 | COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128); | | 855 | COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128); |
866 | COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask); | | 856 | COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask); |
867 | COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256); | | 857 | COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256); |
868 | COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm); | | 858 | COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm); |
869 | | | 859 | |
870 | #undef COPY_COMPONENT | | 860 | #undef COPY_COMPONENT |
871 | | | 861 | |
872 | return 0; | | 862 | return 0; |
873 | } | | 863 | } |
874 | | | 864 | |
875 | int | | 865 | int |
876 | process_verify_xstate(const struct xstate *xstate) | | 866 | process_verify_xstate(const struct xstate *xstate) |
877 | { | | 867 | { |
878 | | | | |
879 | /* xstate_bv must be a subset of RFBM */ | | 868 | /* xstate_bv must be a subset of RFBM */ |
880 | if (xstate->xs_xstate_bv & ~xstate->xs_rfbm) | | 869 | if (xstate->xs_xstate_bv & ~xstate->xs_rfbm) |
881 | return EINVAL; | | 870 | return EINVAL; |
882 | | | 871 | |
883 | switch (x86_fpu_save) { | | 872 | switch (x86_fpu_save) { |
884 | case FPU_SAVE_FSAVE: | | 873 | case FPU_SAVE_FSAVE: |
885 | if ((xstate->xs_rfbm & ~XCR0_X87)) | | 874 | if ((xstate->xs_rfbm & ~XCR0_X87)) |
886 | return EINVAL; | | 875 | return EINVAL; |
887 | break; | | 876 | break; |
888 | case FPU_SAVE_FXSAVE: | | 877 | case FPU_SAVE_FXSAVE: |
889 | if ((xstate->xs_rfbm & ~(XCR0_X87 | XCR0_SSE))) | | 878 | if ((xstate->xs_rfbm & ~(XCR0_X87 | XCR0_SSE))) |
890 | return EINVAL; | | 879 | return EINVAL; |
891 | break; | | 880 | break; |
| @@ -895,30 +884,28 @@ process_verify_xstate(const struct xstat | | | @@ -895,30 +884,28 @@ process_verify_xstate(const struct xstat |
895 | return EINVAL; | | 884 | return EINVAL; |
896 | } | | 885 | } |
897 | | | 886 | |
898 | return 0; | | 887 | return 0; |
899 | } | | 888 | } |
900 | | | 889 | |
901 | int | | 890 | int |
902 | process_write_xstate(struct lwp *l, const struct xstate *xstate) | | 891 | process_write_xstate(struct lwp *l, const struct xstate *xstate) |
903 | { | | 892 | { |
904 | union savefpu *fpu_save = fpu_lwp_area(l); | | 893 | union savefpu *fpu_save = fpu_lwp_area(l); |
905 | | | 894 | |
906 | /* Convert data into legacy FSAVE format. */ | | 895 | /* Convert data into legacy FSAVE format. */ |
907 | if (x86_fpu_save == FPU_SAVE_FSAVE) { | | 896 | if (x86_fpu_save == FPU_SAVE_FSAVE) { |
908 | if (xstate->xs_xstate_bv & XCR0_X87) { | | 897 | if (xstate->xs_xstate_bv & XCR0_X87) |
909 | process_xmm_to_s87(&xstate->xs_fxsave, | | 898 | process_xmm_to_s87(&xstate->xs_fxsave, &fpu_save->sv_87); |
910 | &fpu_save->sv_87); | | | |
911 | } | | | |
912 | return 0; | | 899 | return 0; |
913 | } | | 900 | } |
914 | | | 901 | |
915 | /* If XSAVE is supported, make sure that xstate_bv is set correctly. */ | | 902 | /* If XSAVE is supported, make sure that xstate_bv is set correctly. */ |
916 | if (x86_fpu_save >= FPU_SAVE_XSAVE) { | | 903 | if (x86_fpu_save >= FPU_SAVE_XSAVE) { |
917 | /* | | 904 | /* |
918 | * Bit-wise "xstate->xs_rfbm ? xstate->xs_xstate_bv : | | 905 | * Bit-wise "xstate->xs_rfbm ? xstate->xs_xstate_bv : |
919 | * fpu_save->sv_xsave_hdr.xsh_xstate_bv" | | 906 | * fpu_save->sv_xsave_hdr.xsh_xstate_bv" |
920 | */ | | 907 | */ |
921 | fpu_save->sv_xsave_hdr.xsh_xstate_bv = | | 908 | fpu_save->sv_xsave_hdr.xsh_xstate_bv = |
922 | (fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) | | | 909 | (fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) | |
923 | xstate->xs_xstate_bv; | | 910 | xstate->xs_xstate_bv; |
924 | } | | 911 | } |
| @@ -931,36 +918,35 @@ process_write_xstate(struct lwp *l, cons | | | @@ -931,36 +918,35 @@ process_write_xstate(struct lwp *l, cons |
931 | memcpy(&fpu_save->sv_xmm, &xstate->xs_fxsave, 24); | | 918 | memcpy(&fpu_save->sv_xmm, &xstate->xs_fxsave, 24); |
932 | memcpy(fpu_save->sv_xmm.fx_87_ac, xstate->xs_fxsave.fx_87_ac, | | 919 | memcpy(fpu_save->sv_xmm.fx_87_ac, xstate->xs_fxsave.fx_87_ac, |
933 | sizeof(xstate->xs_fxsave.fx_87_ac)); | | 920 | sizeof(xstate->xs_fxsave.fx_87_ac)); |
934 | } | | 921 | } |
935 | | | 922 | |
936 | /* | | 923 | /* |
937 | * Copy MXCSR if either SSE or AVX state is requested, to match the | | 924 | * Copy MXCSR if either SSE or AVX state is requested, to match the |
938 | * XSAVE behavior for those flags. | | 925 | * XSAVE behavior for those flags. |
939 | */ | | 926 | */ |
940 | if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) { | | 927 | if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) { |
941 | /* | | 928 | /* |
942 | * Invalid bits in mxcsr or mxcsr_mask will cause faults. | | 929 | * Invalid bits in mxcsr or mxcsr_mask will cause faults. |
943 | */ | | 930 | */ |
944 | fpu_save->sv_xmm.fx_mxcsr_mask = | | 931 | fpu_save->sv_xmm.fx_mxcsr_mask = xstate->xs_fxsave.fx_mxcsr_mask |
945 | xstate->xs_fxsave.fx_mxcsr_mask & x86_fpu_mxcsr_mask; | | 932 | & x86_fpu_mxcsr_mask; |
946 | fpu_save->sv_xmm.fx_mxcsr = xstate->xs_fxsave.fx_mxcsr & | | 933 | fpu_save->sv_xmm.fx_mxcsr = xstate->xs_fxsave.fx_mxcsr & |
947 | fpu_save->sv_xmm.fx_mxcsr_mask; | | 934 | fpu_save->sv_xmm.fx_mxcsr_mask; |
948 | } | | 935 | } |
949 | | | 936 | |
950 | if (xstate->xs_xstate_bv & XCR0_SSE) { | | 937 | if (xstate->xs_xstate_bv & XCR0_SSE) { |
951 | memcpy(&fpu_save->sv_xsave_hdr.xsh_fxsave[160], | | 938 | memcpy(&fpu_save->sv_xsave_hdr.xsh_fxsave[160], |
952 | xstate->xs_fxsave.fx_xmm, | | 939 | xstate->xs_fxsave.fx_xmm, sizeof(xstate->xs_fxsave.fx_xmm)); |
953 | sizeof(xstate->xs_fxsave.fx_xmm)); | | | |
954 | } | | 940 | } |
955 | | | 941 | |
956 | #define COPY_COMPONENT(xcr0_val, xsave_val, field) \ | | 942 | #define COPY_COMPONENT(xcr0_val, xsave_val, field) \ |
957 | if (xstate->xs_xstate_bv & xcr0_val) { \ | | 943 | if (xstate->xs_xstate_bv & xcr0_val) { \ |
958 | KASSERT(x86_xsave_offsets[xsave_val] \ | | 944 | KASSERT(x86_xsave_offsets[xsave_val] \ |
959 | >= sizeof(struct xsave_header)); \ | | 945 | >= sizeof(struct xsave_header)); \ |
960 | KASSERT(x86_xsave_sizes[xsave_val] \ | | 946 | KASSERT(x86_xsave_sizes[xsave_val] \ |
961 | >= sizeof(xstate->field)); \ | | 947 | >= sizeof(xstate->field)); \ |
962 | memcpy((char *)fpu_save + x86_xsave_offsets[xsave_val], \ | | 948 | memcpy((char *)fpu_save + x86_xsave_offsets[xsave_val], \ |
963 | &xstate->field, sizeof(xstate->field)); \ | | 949 | &xstate->field, sizeof(xstate->field)); \ |
964 | } | | 950 | } |
965 | | | 951 | |
966 | COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128); | | 952 | COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128); |