Fri Mar 3 14:32:38 2023 UTC ()
Revert "x86/fpu.c: Sprinkle KNF."

kthread_fpu_enter/exit changes broke some hardware, unclear why, to
investigate before fixing and reapplying these changes.


(riastradh)
diff -r1.83 -r1.84 src/sys/arch/x86/x86/fpu.c

cvs diff -r1.83 -r1.84 src/sys/arch/x86/x86/fpu.c (expand / switch to unified diff)

--- src/sys/arch/x86/x86/fpu.c 2023/02/25 18:28:57 1.83
+++ src/sys/arch/x86/x86/fpu.c 2023/03/03 14:32:38 1.84
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: fpu.c,v 1.83 2023/02/25 18:28:57 riastradh Exp $ */ 1/* $NetBSD: fpu.c,v 1.84 2023/03/03 14:32:38 riastradh Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc. All 4 * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc. All
5 * rights reserved. 5 * rights reserved.
6 * 6 *
7 * This code is derived from software developed for The NetBSD Foundation 7 * This code is derived from software developed for The NetBSD Foundation
8 * by Andrew Doran and Maxime Villard. 8 * by Andrew Doran and Maxime Villard.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
@@ -86,58 +86,55 @@ @@ -86,58 +86,55 @@
86 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 86 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
87 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 87 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
88 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 88 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
89 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 89 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
90 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 90 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
91 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 91 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
92 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 92 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
93 * SUCH DAMAGE. 93 * SUCH DAMAGE.
94 * 94 *
95 * @(#)npx.c 7.2 (Berkeley) 5/12/91 95 * @(#)npx.c 7.2 (Berkeley) 5/12/91
96 */ 96 */
97 97
98#include <sys/cdefs.h> 98#include <sys/cdefs.h>
99__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.83 2023/02/25 18:28:57 riastradh Exp $"); 99__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.84 2023/03/03 14:32:38 riastradh Exp $");
100 100
101#include "opt_multiprocessor.h" 101#include "opt_multiprocessor.h"
102 102
103#include <sys/param.h> 103#include <sys/param.h>
104#include <sys/types.h> 104#include <sys/systm.h>
105 
106#include <sys/conf.h> 105#include <sys/conf.h>
107#include <sys/cpu.h> 106#include <sys/cpu.h>
108#include <sys/file.h> 107#include <sys/file.h>
 108#include <sys/proc.h>
109#include <sys/kernel.h> 109#include <sys/kernel.h>
110#include <sys/kthread.h> 110#include <sys/kthread.h>
111#include <sys/proc.h> 
112#include <sys/sysctl.h> 111#include <sys/sysctl.h>
113#include <sys/systm.h> 
114#include <sys/xcall.h> 112#include <sys/xcall.h>
115 113
116#include <machine/cpu.h> 114#include <machine/cpu.h>
117#include <machine/cpufunc.h> 
118#include <machine/cputypes.h> 
119#include <machine/cpuvar.h> 115#include <machine/cpuvar.h>
 116#include <machine/cputypes.h>
120#include <machine/intr.h> 117#include <machine/intr.h>
 118#include <machine/cpufunc.h>
121#include <machine/pcb.h> 119#include <machine/pcb.h>
122#include <machine/specialreg.h> 
123#include <machine/trap.h> 120#include <machine/trap.h>
124 121#include <machine/specialreg.h>
125#include <x86/cpu.h> 122#include <x86/cpu.h>
126#include <x86/fpu.h> 123#include <x86/fpu.h>
127 124
128#ifdef XENPV 125#ifdef XENPV
129#define clts() HYPERVISOR_fpu_taskswitch(0) 126#define clts() HYPERVISOR_fpu_taskswitch(0)
130#define stts() HYPERVISOR_fpu_taskswitch(1) 127#define stts() HYPERVISOR_fpu_taskswitch(1)
131#endif 128#endif
132 129
133void fpu_handle_deferred(void); 130void fpu_handle_deferred(void);
134void fpu_switch(struct lwp *, struct lwp *); 131void fpu_switch(struct lwp *, struct lwp *);
135 132
136uint32_t x86_fpu_mxcsr_mask __read_mostly = 0; 133uint32_t x86_fpu_mxcsr_mask __read_mostly = 0;
137 134
138/* 135/*
139 * True if this a thread that is allowed to use the FPU -- either a 136 * True if this a thread that is allowed to use the FPU -- either a
140 * user thread, or a system thread with LW_SYSTEM_FPU enabled. 137 * user thread, or a system thread with LW_SYSTEM_FPU enabled.
141 */ 138 */
142static inline bool 139static inline bool
143lwp_can_haz_fpu(struct lwp *l) 140lwp_can_haz_fpu(struct lwp *l)
@@ -172,48 +169,45 @@ fpu_lwp_area(struct lwp *l) @@ -172,48 +169,45 @@ fpu_lwp_area(struct lwp *l)
172 return area; 169 return area;
173} 170}
174 171
175static inline void 172static inline void
176fpu_save_lwp(struct lwp *l) 173fpu_save_lwp(struct lwp *l)
177{ 174{
178 struct pcb *pcb = lwp_getpcb(l); 175 struct pcb *pcb = lwp_getpcb(l);
179 union savefpu *area = &pcb->pcb_savefpu; 176 union savefpu *area = &pcb->pcb_savefpu;
180 int s; 177 int s;
181 178
182 s = splvm(); 179 s = splvm();
183 if (l->l_md.md_flags & MDL_FPU_IN_CPU) { 180 if (l->l_md.md_flags & MDL_FPU_IN_CPU) {
184 KASSERT(lwp_can_haz_fpu(l)); 181 KASSERT(lwp_can_haz_fpu(l));
185 fpu_area_save(area, x86_xsave_features, 182 fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & PK_32));
186 !(l->l_proc->p_flag & PK_32)); 
187 l->l_md.md_flags &= ~MDL_FPU_IN_CPU; 183 l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
188 } 184 }
189 splx(s); 185 splx(s);
190} 186}
191 187
192/* 188/*
193 * Bring curlwp's FPU state in memory. It will get installed back in the CPU 189 * Bring curlwp's FPU state in memory. It will get installed back in the CPU
194 * when returning to userland. 190 * when returning to userland.
195 */ 191 */
196void 192void
197fpu_save(void) 193fpu_save(void)
198{ 194{
199 
200 fpu_save_lwp(curlwp); 195 fpu_save_lwp(curlwp);
201} 196}
202 197
203void 198void
204fpuinit(struct cpu_info *ci) 199fpuinit(struct cpu_info *ci)
205{ 200{
206 
207 /* 201 /*
208 * This might not be strictly necessary since it will be initialized 202 * This might not be strictly necessary since it will be initialized
209 * for each process. However it does no harm. 203 * for each process. However it does no harm.
210 */ 204 */
211 clts(); 205 clts();
212 fninit(); 206 fninit();
213 stts(); 207 stts();
214} 208}
215 209
216void 210void
217fpuinit_mxcsr_mask(void) 211fpuinit_mxcsr_mask(void)
218{ 212{
219#ifndef XENPV 213#ifndef XENPV
@@ -274,74 +268,71 @@ fpu_errata_amd(void) @@ -274,74 +268,71 @@ fpu_errata_amd(void)
274 fnclex(); 268 fnclex();
275 fldummy(); 269 fldummy();
276} 270}
277 271
278#ifdef __x86_64__ 272#ifdef __x86_64__
279#define XS64(x) (is_64bit ? x##64 : x) 273#define XS64(x) (is_64bit ? x##64 : x)
280#else 274#else
281#define XS64(x) x 275#define XS64(x) x
282#endif 276#endif
283 277
284void 278void
285fpu_area_save(void *area, uint64_t xsave_features, bool is_64bit) 279fpu_area_save(void *area, uint64_t xsave_features, bool is_64bit)
286{ 280{
287 
288 switch (x86_fpu_save) { 281 switch (x86_fpu_save) {
289 case FPU_SAVE_FSAVE: 282 case FPU_SAVE_FSAVE:
290 fnsave(area); 283 fnsave(area);
291 break; 284 break;
292 case FPU_SAVE_FXSAVE: 285 case FPU_SAVE_FXSAVE:
293 XS64(fxsave)(area); 286 XS64(fxsave)(area);
294 break; 287 break;
295 case FPU_SAVE_XSAVE: 288 case FPU_SAVE_XSAVE:
296 XS64(xsave)(area, xsave_features); 289 XS64(xsave)(area, xsave_features);
297 break; 290 break;
298 case FPU_SAVE_XSAVEOPT: 291 case FPU_SAVE_XSAVEOPT:
299 XS64(xsaveopt)(area, xsave_features); 292 XS64(xsaveopt)(area, xsave_features);
300 break; 293 break;
301 } 294 }
302 295
303 stts(); 296 stts();
304} 297}
305 298
306void 299void
307fpu_area_restore(const void *area, uint64_t xsave_features, bool is_64bit) 300fpu_area_restore(const void *area, uint64_t xsave_features, bool is_64bit)
308{ 301{
309 
310 clts(); 302 clts();
311 303
312 switch (x86_fpu_save) { 304 switch (x86_fpu_save) {
313 case FPU_SAVE_FSAVE: 305 case FPU_SAVE_FSAVE:
314 frstor(area); 306 frstor(area);
315 break; 307 break;
316 case FPU_SAVE_FXSAVE: 308 case FPU_SAVE_FXSAVE:
317 if (cpu_vendor == CPUVENDOR_AMD) 309 if (cpu_vendor == CPUVENDOR_AMD)
318 fpu_errata_amd(); 310 fpu_errata_amd();
319 XS64(fxrstor)(area); 311 XS64(fxrstor)(area);
320 break; 312 break;
321 case FPU_SAVE_XSAVE: 313 case FPU_SAVE_XSAVE:
322 case FPU_SAVE_XSAVEOPT: 314 case FPU_SAVE_XSAVEOPT:
323 if (cpu_vendor == CPUVENDOR_AMD) 315 if (cpu_vendor == CPUVENDOR_AMD)
324 fpu_errata_amd(); 316 fpu_errata_amd();
325 XS64(xrstor)(area, xsave_features); 317 XS64(xrstor)(area, xsave_features);
326 break; 318 break;
327 } 319 }
328} 320}
329 321
330void 322void
331fpu_handle_deferred(void) 323fpu_handle_deferred(void)
332{ 324{
333 struct pcb *pcb = lwp_getpcb(curlwp); 325 struct pcb *pcb = lwp_getpcb(curlwp);
334 
335 fpu_area_restore(&pcb->pcb_savefpu, x86_xsave_features, 326 fpu_area_restore(&pcb->pcb_savefpu, x86_xsave_features,
336 !(curlwp->l_proc->p_flag & PK_32)); 327 !(curlwp->l_proc->p_flag & PK_32));
337} 328}
338 329
339void 330void
340fpu_switch(struct lwp *oldlwp, struct lwp *newlwp) 331fpu_switch(struct lwp *oldlwp, struct lwp *newlwp)
341{ 332{
342 struct cpu_info *ci __diagused = curcpu(); 333 struct cpu_info *ci __diagused = curcpu();
343 struct pcb *pcb; 334 struct pcb *pcb;
344 335
345 KASSERTMSG(ci->ci_ilevel >= IPL_SCHED, "cpu%d ilevel=%d", 336 KASSERTMSG(ci->ci_ilevel >= IPL_SCHED, "cpu%d ilevel=%d",
346 cpu_index(ci), ci->ci_ilevel); 337 cpu_index(ci), ci->ci_ilevel);
347 338
@@ -378,27 +369,27 @@ fpu_lwp_fork(struct lwp *l1, struct lwp  @@ -378,27 +369,27 @@ fpu_lwp_fork(struct lwp *l1, struct lwp
378 369
379void 370void
380fpu_lwp_abandon(struct lwp *l) 371fpu_lwp_abandon(struct lwp *l)
381{ 372{
382 int s; 373 int s;
383 374
384 KASSERT(l == curlwp); 375 KASSERT(l == curlwp);
385 s = splvm(); 376 s = splvm();
386 l->l_md.md_flags &= ~MDL_FPU_IN_CPU; 377 l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
387 stts(); 378 stts();
388 splx(s); 379 splx(s);
389} 380}
390 381
391/* ------------------------------------------------------------------------- */ 382/* -------------------------------------------------------------------------- */
392 383
393static const union savefpu safe_fpu __aligned(64) = { 384static const union savefpu safe_fpu __aligned(64) = {
394 .sv_xmm = { 385 .sv_xmm = {
395 .fx_mxcsr = __SAFE_MXCSR__, 386 .fx_mxcsr = __SAFE_MXCSR__,
396 }, 387 },
397}; 388};
398static const union savefpu zero_fpu __aligned(64); 389static const union savefpu zero_fpu __aligned(64);
399 390
400/* 391/*
401 * fpu_kern_enter() 392 * fpu_kern_enter()
402 * 393 *
403 * Begin using the FPU. Raises to splvm, disabling most 394 * Begin using the FPU. Raises to splvm, disabling most
404 * interrupts and rendering the thread non-preemptible; caller 395 * interrupts and rendering the thread non-preemptible; caller
@@ -427,28 +418,28 @@ fpu_kern_enter(void) @@ -427,28 +418,28 @@ fpu_kern_enter(void)
427#if 0 418#if 0
428 /* 419 /*
429 * Can't assert this because if the caller holds a spin lock at 420 * Can't assert this because if the caller holds a spin lock at
430 * IPL_VM, and previously held and released a spin lock at 421 * IPL_VM, and previously held and released a spin lock at
431 * higher IPL, the IPL remains raised above IPL_VM. 422 * higher IPL, the IPL remains raised above IPL_VM.
432 */ 423 */
433 KASSERTMSG(ci->ci_ilevel <= IPL_VM || cold, "ilevel=%d", 424 KASSERTMSG(ci->ci_ilevel <= IPL_VM || cold, "ilevel=%d",
434 ci->ci_ilevel); 425 ci->ci_ilevel);
435#endif 426#endif
436 KASSERT(ci->ci_kfpu_spl == -1); 427 KASSERT(ci->ci_kfpu_spl == -1);
437 ci->ci_kfpu_spl = s; 428 ci->ci_kfpu_spl = s;
438 429
439 /* 430 /*
440 * If we are in a softint and have a pinned lwp, the fpu state 431 * If we are in a softint and have a pinned lwp, the fpu state is that
441 * is that of the pinned lwp, so save it there. 432 * of the pinned lwp, so save it there.
442 */ 433 */
443 while ((l->l_pflag & LP_INTR) && (l->l_switchto != NULL)) 434 while ((l->l_pflag & LP_INTR) && (l->l_switchto != NULL))
444 l = l->l_switchto; 435 l = l->l_switchto;
445 fpu_save_lwp(l); 436 fpu_save_lwp(l);
446 437
447 /* 438 /*
448 * Clear CR0_TS, which fpu_save_lwp set if it saved anything -- 439 * Clear CR0_TS, which fpu_save_lwp set if it saved anything --
449 * otherwise the CPU will trap if we try to use the FPU under 440 * otherwise the CPU will trap if we try to use the FPU under
450 * the false impression that there has been a task switch since 441 * the false impression that there has been a task switch since
451 * the last FPU usage requiring that we save the FPU state. 442 * the last FPU usage requiring that we save the FPU state.
452 */ 443 */
453 clts(); 444 clts();
454 445
@@ -512,27 +503,27 @@ kthread_fpu_enter_md(void) @@ -512,27 +503,27 @@ kthread_fpu_enter_md(void)
512 clts(); 503 clts();
513 fpu_area_restore(&safe_fpu, x86_xsave_features, /*is_64bit*/false); 504 fpu_area_restore(&safe_fpu, x86_xsave_features, /*is_64bit*/false);
514} 505}
515 506
516void 507void
517kthread_fpu_exit_md(void) 508kthread_fpu_exit_md(void)
518{ 509{
519 510
520 /* Zero the FPU state and disable the FPU by setting CR0_TS. */ 511 /* Zero the FPU state and disable the FPU by setting CR0_TS. */
521 fpu_area_restore(&zero_fpu, x86_xsave_features, /*is_64bit*/false); 512 fpu_area_restore(&zero_fpu, x86_xsave_features, /*is_64bit*/false);
522 stts(); 513 stts();
523} 514}
524 515
525/* ------------------------------------------------------------------------- */ 516/* -------------------------------------------------------------------------- */
526 517
527/* 518/*
528 * The following table is used to ensure that the FPE_... value 519 * The following table is used to ensure that the FPE_... value
529 * that is passed as a trapcode to the signal handler of the user 520 * that is passed as a trapcode to the signal handler of the user
530 * process does not have more than one bit set. 521 * process does not have more than one bit set.
531 * 522 *
532 * Multiple bits may be set if SSE simd instructions generate errors 523 * Multiple bits may be set if SSE simd instructions generate errors
533 * on more than one value or if the user process modifies the control 524 * on more than one value or if the user process modifies the control
534 * word while a status word bit is already set (which this is a sign 525 * word while a status word bit is already set (which this is a sign
535 * of bad coding). 526 * of bad coding).
536 * We have no choice than to narrow them down to one bit, since we must 527 * We have no choice than to narrow them down to one bit, since we must
537 * not send a trapcode that is not exactly one of the FPE_ macros. 528 * not send a trapcode that is not exactly one of the FPE_ macros.
538 * 529 *
@@ -666,27 +657,26 @@ fputrap(struct trapframe *frame) @@ -666,27 +657,26 @@ fputrap(struct trapframe *frame)
666void 657void
667fpudna(struct trapframe *frame) 658fpudna(struct trapframe *frame)
668{ 659{
669 panic("fpudna from %s, ip %p, trapframe %p", 660 panic("fpudna from %s, ip %p, trapframe %p",
670 USERMODE(frame->tf_cs) ? "userland" : "kernel", 661 USERMODE(frame->tf_cs) ? "userland" : "kernel",
671 (void *)X86_TF_RIP(frame), frame); 662 (void *)X86_TF_RIP(frame), frame);
672} 663}
673 664
674/* -------------------------------------------------------------------------- */ 665/* -------------------------------------------------------------------------- */
675 666
676static inline void 667static inline void
677fpu_xstate_reload(union savefpu *fpu_save, uint64_t xstate) 668fpu_xstate_reload(union savefpu *fpu_save, uint64_t xstate)
678{ 669{
679 
680 /* 670 /*
681 * Force a reload of the given xstate during the next XRSTOR. 671 * Force a reload of the given xstate during the next XRSTOR.
682 */ 672 */
683 if (x86_fpu_save >= FPU_SAVE_XSAVE) { 673 if (x86_fpu_save >= FPU_SAVE_XSAVE) {
684 fpu_save->sv_xsave_hdr.xsh_xstate_bv |= xstate; 674 fpu_save->sv_xsave_hdr.xsh_xstate_bv |= xstate;
685 } 675 }
686} 676}
687 677
688void 678void
689fpu_set_default_cw(struct lwp *l, unsigned int x87_cw) 679fpu_set_default_cw(struct lwp *l, unsigned int x87_cw)
690{ 680{
691 union savefpu *fpu_save = fpu_lwp_area(l); 681 union savefpu *fpu_save = fpu_lwp_area(l);
692 struct pcb *pcb = lwp_getpcb(l); 682 struct pcb *pcb = lwp_getpcb(l);
@@ -865,27 +855,26 @@ process_read_xstate(struct lwp *l, struc @@ -865,27 +855,26 @@ process_read_xstate(struct lwp *l, struc
865 COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128); 855 COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);
866 COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask); 856 COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask);
867 COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256); 857 COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256);
868 COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm); 858 COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm);
869 859
870#undef COPY_COMPONENT 860#undef COPY_COMPONENT
871 861
872 return 0; 862 return 0;
873} 863}
874 864
875int 865int
876process_verify_xstate(const struct xstate *xstate) 866process_verify_xstate(const struct xstate *xstate)
877{ 867{
878 
879 /* xstate_bv must be a subset of RFBM */ 868 /* xstate_bv must be a subset of RFBM */
880 if (xstate->xs_xstate_bv & ~xstate->xs_rfbm) 869 if (xstate->xs_xstate_bv & ~xstate->xs_rfbm)
881 return EINVAL; 870 return EINVAL;
882 871
883 switch (x86_fpu_save) { 872 switch (x86_fpu_save) {
884 case FPU_SAVE_FSAVE: 873 case FPU_SAVE_FSAVE:
885 if ((xstate->xs_rfbm & ~XCR0_X87)) 874 if ((xstate->xs_rfbm & ~XCR0_X87))
886 return EINVAL; 875 return EINVAL;
887 break; 876 break;
888 case FPU_SAVE_FXSAVE: 877 case FPU_SAVE_FXSAVE:
889 if ((xstate->xs_rfbm & ~(XCR0_X87 | XCR0_SSE))) 878 if ((xstate->xs_rfbm & ~(XCR0_X87 | XCR0_SSE)))
890 return EINVAL; 879 return EINVAL;
891 break; 880 break;
@@ -895,30 +884,28 @@ process_verify_xstate(const struct xstat @@ -895,30 +884,28 @@ process_verify_xstate(const struct xstat
895 return EINVAL; 884 return EINVAL;
896 } 885 }
897 886
898 return 0; 887 return 0;
899} 888}
900 889
901int 890int
902process_write_xstate(struct lwp *l, const struct xstate *xstate) 891process_write_xstate(struct lwp *l, const struct xstate *xstate)
903{ 892{
904 union savefpu *fpu_save = fpu_lwp_area(l); 893 union savefpu *fpu_save = fpu_lwp_area(l);
905 894
906 /* Convert data into legacy FSAVE format. */ 895 /* Convert data into legacy FSAVE format. */
907 if (x86_fpu_save == FPU_SAVE_FSAVE) { 896 if (x86_fpu_save == FPU_SAVE_FSAVE) {
908 if (xstate->xs_xstate_bv & XCR0_X87) { 897 if (xstate->xs_xstate_bv & XCR0_X87)
909 process_xmm_to_s87(&xstate->xs_fxsave, 898 process_xmm_to_s87(&xstate->xs_fxsave, &fpu_save->sv_87);
910 &fpu_save->sv_87); 
911 } 
912 return 0; 899 return 0;
913 } 900 }
914 901
915 /* If XSAVE is supported, make sure that xstate_bv is set correctly. */ 902 /* If XSAVE is supported, make sure that xstate_bv is set correctly. */
916 if (x86_fpu_save >= FPU_SAVE_XSAVE) { 903 if (x86_fpu_save >= FPU_SAVE_XSAVE) {
917 /* 904 /*
918 * Bit-wise "xstate->xs_rfbm ? xstate->xs_xstate_bv : 905 * Bit-wise "xstate->xs_rfbm ? xstate->xs_xstate_bv :
919 * fpu_save->sv_xsave_hdr.xsh_xstate_bv" 906 * fpu_save->sv_xsave_hdr.xsh_xstate_bv"
920 */ 907 */
921 fpu_save->sv_xsave_hdr.xsh_xstate_bv = 908 fpu_save->sv_xsave_hdr.xsh_xstate_bv =
922 (fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) | 909 (fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) |
923 xstate->xs_xstate_bv; 910 xstate->xs_xstate_bv;
924 } 911 }
@@ -931,36 +918,35 @@ process_write_xstate(struct lwp *l, cons @@ -931,36 +918,35 @@ process_write_xstate(struct lwp *l, cons
931 memcpy(&fpu_save->sv_xmm, &xstate->xs_fxsave, 24); 918 memcpy(&fpu_save->sv_xmm, &xstate->xs_fxsave, 24);
932 memcpy(fpu_save->sv_xmm.fx_87_ac, xstate->xs_fxsave.fx_87_ac, 919 memcpy(fpu_save->sv_xmm.fx_87_ac, xstate->xs_fxsave.fx_87_ac,
933 sizeof(xstate->xs_fxsave.fx_87_ac)); 920 sizeof(xstate->xs_fxsave.fx_87_ac));
934 } 921 }
935 922
936 /* 923 /*
937 * Copy MXCSR if either SSE or AVX state is requested, to match the 924 * Copy MXCSR if either SSE or AVX state is requested, to match the
938 * XSAVE behavior for those flags. 925 * XSAVE behavior for those flags.
939 */ 926 */
940 if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) { 927 if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) {
941 /* 928 /*
942 * Invalid bits in mxcsr or mxcsr_mask will cause faults. 929 * Invalid bits in mxcsr or mxcsr_mask will cause faults.
943 */ 930 */
944 fpu_save->sv_xmm.fx_mxcsr_mask = 931 fpu_save->sv_xmm.fx_mxcsr_mask = xstate->xs_fxsave.fx_mxcsr_mask
945 xstate->xs_fxsave.fx_mxcsr_mask & x86_fpu_mxcsr_mask; 932 & x86_fpu_mxcsr_mask;
946 fpu_save->sv_xmm.fx_mxcsr = xstate->xs_fxsave.fx_mxcsr & 933 fpu_save->sv_xmm.fx_mxcsr = xstate->xs_fxsave.fx_mxcsr &
947 fpu_save->sv_xmm.fx_mxcsr_mask; 934 fpu_save->sv_xmm.fx_mxcsr_mask;
948 } 935 }
949 936
950 if (xstate->xs_xstate_bv & XCR0_SSE) { 937 if (xstate->xs_xstate_bv & XCR0_SSE) {
951 memcpy(&fpu_save->sv_xsave_hdr.xsh_fxsave[160], 938 memcpy(&fpu_save->sv_xsave_hdr.xsh_fxsave[160],
952 xstate->xs_fxsave.fx_xmm, 939 xstate->xs_fxsave.fx_xmm, sizeof(xstate->xs_fxsave.fx_xmm));
953 sizeof(xstate->xs_fxsave.fx_xmm)); 
954 } 940 }
955 941
956#define COPY_COMPONENT(xcr0_val, xsave_val, field) \ 942#define COPY_COMPONENT(xcr0_val, xsave_val, field) \
957 if (xstate->xs_xstate_bv & xcr0_val) { \ 943 if (xstate->xs_xstate_bv & xcr0_val) { \
958 KASSERT(x86_xsave_offsets[xsave_val] \ 944 KASSERT(x86_xsave_offsets[xsave_val] \
959 >= sizeof(struct xsave_header)); \ 945 >= sizeof(struct xsave_header)); \
960 KASSERT(x86_xsave_sizes[xsave_val] \ 946 KASSERT(x86_xsave_sizes[xsave_val] \
961 >= sizeof(xstate->field)); \ 947 >= sizeof(xstate->field)); \
962 memcpy((char *)fpu_save + x86_xsave_offsets[xsave_val], \ 948 memcpy((char *)fpu_save + x86_xsave_offsets[xsave_val], \
963 &xstate->field, sizeof(xstate->field)); \ 949 &xstate->field, sizeof(xstate->field)); \
964 } 950 }
965 951
966 COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128); 952 COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);