Wed Jul 7 03:30:35 2021 UTC ()
Provide a BWX version of alpha_copystr() and patch it into place if
the system supports the BWX extension.  The inner loop of the BWX
version is 42% shorter than the non-BWX version (7 vs 12 insns).


(thorpej)
diff -r1.138 -r1.139 src/sys/arch/alpha/alpha/locore.s
diff -r1.5 -r1.6 src/sys/arch/alpha/alpha/patch.c

cvs diff -r1.138 -r1.139 src/sys/arch/alpha/alpha/locore.s (expand / switch to unified diff)

--- src/sys/arch/alpha/alpha/locore.s 2021/07/07 02:44:04 1.138
+++ src/sys/arch/alpha/alpha/locore.s 2021/07/07 03:30:35 1.139
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: locore.s,v 1.138 2021/07/07 02:44:04 thorpej Exp $ */ 1/* $NetBSD: locore.s,v 1.139 2021/07/07 03:30:35 thorpej Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1999, 2000, 2019 The NetBSD Foundation, Inc. 4 * Copyright (c) 1999, 2000, 2019 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center. 9 * NASA Ames Research Center.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
@@ -57,27 +57,27 @@ @@ -57,27 +57,27 @@
57 * rights to redistribute these changes. 57 * rights to redistribute these changes.
58 */ 58 */
59 59
60.stabs __FILE__,100,0,0,kernel_text 60.stabs __FILE__,100,0,0,kernel_text
61 61
62#include "opt_ddb.h" 62#include "opt_ddb.h"
63#include "opt_kgdb.h" 63#include "opt_kgdb.h"
64#include "opt_multiprocessor.h" 64#include "opt_multiprocessor.h"
65#include "opt_lockdebug.h" 65#include "opt_lockdebug.h"
66#include "opt_compat_netbsd.h" 66#include "opt_compat_netbsd.h"
67 67
68#include <machine/asm.h> 68#include <machine/asm.h>
69 69
70__KERNEL_RCSID(0, "$NetBSD: locore.s,v 1.138 2021/07/07 02:44:04 thorpej Exp $"); 70__KERNEL_RCSID(0, "$NetBSD: locore.s,v 1.139 2021/07/07 03:30:35 thorpej Exp $");
71 71
72#include "assym.h" 72#include "assym.h"
73 73
74.stabs __FILE__,132,0,0,kernel_text 74.stabs __FILE__,132,0,0,kernel_text
75 75
76 /* don't reorder instructions; paranoia. */ 76 /* don't reorder instructions; paranoia. */
77 .set noreorder 77 .set noreorder
78 .text 78 .text
79 79
80 .macro bfalse reg, dst 80 .macro bfalse reg, dst
81 beq \reg, \dst 81 beq \reg, \dst
82 .endm 82 .endm
83 83
@@ -892,26 +892,64 @@ LEAF_NOPROFILE(lwp_trampoline, 0) @@ -892,26 +892,64 @@ LEAF_NOPROFILE(lwp_trampoline, 0)
892 mov s3, a1 /* a1 = new_lwp (that's us!) */ 892 mov s3, a1 /* a1 = new_lwp (that's us!) */
893 CALL(lwp_startup) /* lwp_startup(prev_lwp, new_lwp); */ 893 CALL(lwp_startup) /* lwp_startup(prev_lwp, new_lwp); */
894 mov s0, pv /* pv = func */ 894 mov s0, pv /* pv = func */
895 mov s1, ra /* ra = (probably exception_return()) */ 895 mov s1, ra /* ra = (probably exception_return()) */
896 mov s2, a0 /* a0 = arg */ 896 mov s2, a0 /* a0 = arg */
897 jmp zero, (pv) /* func(arg) */ 897 jmp zero, (pv) /* func(arg) */
898 END(lwp_trampoline) 898 END(lwp_trampoline)
899 899
900/**************************************************************************/ 900/**************************************************************************/
901 901
902/* 902/*
903 * alpha_copystr(const void *from, void *to, size_t len, size_t *donep) 903 * alpha_copystr(const void *from, void *to, size_t len, size_t *donep)
904 */ 904 */
 905 .arch ev56
 906LEAF(alpha_copystr_bwx, 4)
 907 LDGP(pv)
 908
 909 mov a2, t0 /* t0 = i = len */
 910 beq a2, 5f /* if (len == 0), bail */
 911
 9121: ldbu t1, 0(a0) /* t1 = *from */
 913 subl a2, 1, a2 /* len-- */
 914 addq a0, 1, a0 /* from++ */
 915 stb t1, 0(a1) /* *to = t1 */
 916 beq t1, 2f /* if (t1 == '\0'), bail out */
 917 addq a1, 1, a1 /* to++ */
 918 bne a2, 1b /* if (len != 0), copy more */
 919
 9202: beq a3, 3f /* if (lenp != NULL) */
 921 subl t0, a2, t0 /* *lenp = (i - len) */
 922 stq t0, 0(a3)
 9233: bne t1, 4f /* *from != '\0'; leave in a huff */
 924
 925 mov zero, v0 /* return 0. */
 926 RET
 927
 9284: ldiq v0, ENAMETOOLONG
 929 RET
 930
 9315: ldiq t1, 1 /* fool the test above... */
 932 br zero, 2b
 933
 934 nop /* pad to same length as... */
 935 nop /* non-BWX version. */
 936 nop
 937 nop
 938 nop
 939 EXPORT(alpha_copystr_bwx_end)
 940 END(alpha_copystr_bwx)
 941 .arch ev4
 942
905LEAF(alpha_copystr, 4) 943LEAF(alpha_copystr, 4)
906 LDGP(pv) 944 LDGP(pv)
907 945
908 mov a2, t0 /* t0 = i = len */ 946 mov a2, t0 /* t0 = i = len */
909 beq a2, 5f /* if (len == 0), bail */ 947 beq a2, 5f /* if (len == 0), bail */
910 948
9111: ldq_u t1, 0(a0) /* t1 = *from */ 9491: ldq_u t1, 0(a0) /* t1 = *from */
912 extbl t1, a0, t1 950 extbl t1, a0, t1
913 ldq_u t3, 0(a1) /* set up t2 with quad around *to */ 951 ldq_u t3, 0(a1) /* set up t2 with quad around *to */
914 insbl t1, a1, t2 952 insbl t1, a1, t2
915 mskbl t3, a1, t3 953 mskbl t3, a1, t3
916 or t3, t2, t3 /* add *from to quad around *to */ 954 or t3, t2, t3 /* add *from to quad around *to */
917 stq_u t3, 0(a1) /* write out that quad */ 955 stq_u t3, 0(a1) /* write out that quad */
@@ -925,26 +963,27 @@ LEAF(alpha_copystr, 4) @@ -925,26 +963,27 @@ LEAF(alpha_copystr, 4)
9252: beq a3, 3f /* if (lenp != NULL) */ 9632: beq a3, 3f /* if (lenp != NULL) */
926 subl t0, a2, t0 /* *lenp = (i - len) */ 964 subl t0, a2, t0 /* *lenp = (i - len) */
927 stq t0, 0(a3) 965 stq t0, 0(a3)
9283: bne t1, 4f /* *from != '\0'; leave in a huff */ 9663: bne t1, 4f /* *from != '\0'; leave in a huff */
929 967
930 mov zero, v0 /* return 0. */ 968 mov zero, v0 /* return 0. */
931 RET 969 RET
932 970
9334: ldiq v0, ENAMETOOLONG 9714: ldiq v0, ENAMETOOLONG
934 RET 972 RET
935 973
9365: ldiq t1, 1 /* fool the test above... */ 9745: ldiq t1, 1 /* fool the test above... */
937 br zero, 2b 975 br zero, 2b
 976 EXPORT(alpha_copystr_end)
938 END(alpha_copystr) 977 END(alpha_copystr)
939 978
940NESTED(copyinstr, 4, 16, ra, IM_RA|IM_S0, 0) 979NESTED(copyinstr, 4, 16, ra, IM_RA|IM_S0, 0)
941 LDGP(pv) 980 LDGP(pv)
942 lda sp, -16(sp) /* set up stack frame */ 981 lda sp, -16(sp) /* set up stack frame */
943 stq ra, (16-8)(sp) /* save ra */ 982 stq ra, (16-8)(sp) /* save ra */
944 stq s0, (16-16)(sp) /* save s0 */ 983 stq s0, (16-16)(sp) /* save s0 */
945 ldiq t0, VM_MAX_ADDRESS /* make sure that src addr */ 984 ldiq t0, VM_MAX_ADDRESS /* make sure that src addr */
946 cmpult a0, t0, t1 /* is in user space. */ 985 cmpult a0, t0, t1 /* is in user space. */
947 beq t1, copyerr_efault /* if it's not, error out. */ 986 beq t1, copyerr_efault /* if it's not, error out. */
948 /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ 987 /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */
949 GET_CURLWP 988 GET_CURLWP
950 ldq s0, L_PCB(v0) /* s0 = pcb */ 989 ldq s0, L_PCB(v0) /* s0 = pcb */

cvs diff -r1.5 -r1.6 src/sys/arch/alpha/alpha/patch.c (expand / switch to unified diff)

--- src/sys/arch/alpha/alpha/patch.c 2020/09/04 03:41:49 1.5
+++ src/sys/arch/alpha/alpha/patch.c 2021/07/07 03:30:35 1.6
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: patch.c,v 1.5 2020/09/04 03:41:49 thorpej Exp $ */ 1/* $NetBSD: patch.c,v 1.6 2021/07/07 03:30:35 thorpej Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran and Jason R. Thorpe. 8 * by Andrew Doran and Jason R. Thorpe.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
@@ -25,48 +25,51 @@ @@ -25,48 +25,51 @@
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * Patch kernel code at boot time, depending on available CPU features 33 * Patch kernel code at boot time, depending on available CPU features
34 * and configuration. 34 * and configuration.
35 */ 35 */
36 36
37#include <sys/cdefs.h> 37#include <sys/cdefs.h>
38__KERNEL_RCSID(0, "$NetBSD: patch.c,v 1.5 2020/09/04 03:41:49 thorpej Exp $"); 38__KERNEL_RCSID(0, "$NetBSD: patch.c,v 1.6 2021/07/07 03:30:35 thorpej Exp $");
39 39
40#include "opt_multiprocessor.h" 40#include "opt_multiprocessor.h"
41 41
42#include <sys/types.h> 42#include <sys/types.h>
43#include <sys/systm.h> 43#include <sys/systm.h>
44#include <sys/lwp.h> 44#include <sys/lwp.h>
45 45
46#include <machine/cpu.h> 46#include <machine/cpu.h>
47#include <machine/alpha.h> 47#include <machine/alpha.h>
48#include <machine/intr.h> 48#include <machine/intr.h>
49 49
50void _membar_producer(void); 50void _membar_producer(void);
51void _membar_producer_end(void); 51void _membar_producer_end(void);
52void _membar_producer_mp(void); 52void _membar_producer_mp(void);
53void _membar_producer_mp_end(void); 53void _membar_producer_mp_end(void);
54 54
55void _membar_sync(void); 55void _membar_sync(void);
56void _membar_sync_end(void); 56void _membar_sync_end(void);
57void _membar_sync_mp(void); 57void _membar_sync_mp(void);
58void _membar_sync_mp_end(void); 58void _membar_sync_mp_end(void);
59 59
 60extern char alpha_copystr_bwx[], alpha_copystr_bwx_end[];
 61extern char alpha_copystr[], alpha_copystr_end[];
 62
60static void __attribute__((__unused__)) 63static void __attribute__((__unused__))
61patchfunc(void *from_s, void *from_e, void *to_s, void *to_e) 64patchfunc(void *from_s, void *from_e, void *to_s, void *to_e)
62{ 65{
63 int s; 66 int s;
64 67
65 s = splhigh(); 68 s = splhigh();
66 69
67 if ((uintptr_t)from_e - (uintptr_t)from_s != 70 if ((uintptr_t)from_e - (uintptr_t)from_s !=
68 (uintptr_t)to_e - (uintptr_t)to_s) 71 (uintptr_t)to_e - (uintptr_t)to_s)
69 panic("patchfunc: sizes do not match (from=%p)", from_s); 72 panic("patchfunc: sizes do not match (from=%p)", from_s);
70  73
71 memcpy(to_s, from_s, (uintptr_t)to_e - (uintptr_t)to_s); 74 memcpy(to_s, from_s, (uintptr_t)to_e - (uintptr_t)to_s);
72 alpha_pal_imb(); 75 alpha_pal_imb();
@@ -75,26 +78,31 @@ patchfunc(void *from_s, void *from_e, vo @@ -75,26 +78,31 @@ patchfunc(void *from_s, void *from_e, vo
75} 78}
76 79
77void 80void
78alpha_patch(bool is_mp) 81alpha_patch(bool is_mp)
79{ 82{
80 83
81 /* 84 /*
82 * We allow this function to be called multiple times 85 * We allow this function to be called multiple times
83 * (there is no harm in doing so), so long as other 86 * (there is no harm in doing so), so long as other
84 * CPUs have not yet actually hatched to start running 87 * CPUs have not yet actually hatched to start running
85 * kernel code. 88 * kernel code.
86 */ 89 */
87 90
 91 if (cpu_amask & ALPHA_AMASK_BWX) {
 92 patchfunc(alpha_copystr_bwx, alpha_copystr_bwx_end,
 93 alpha_copystr, alpha_copystr_end);
 94 }
 95
88#if defined(MULTIPROCESSOR) 96#if defined(MULTIPROCESSOR)
89 if (is_mp) { 97 if (is_mp) {
90 KASSERT(curcpu()->ci_flags & CPUF_PRIMARY); 98 KASSERT(curcpu()->ci_flags & CPUF_PRIMARY);
91 KASSERT((cpus_running & ~(1UL << cpu_number())) == 0); 99 KASSERT((cpus_running & ~(1UL << cpu_number())) == 0);
92 100
93 patchfunc(_membar_producer_mp, _membar_producer_mp_end, 101 patchfunc(_membar_producer_mp, _membar_producer_mp_end,
94 _membar_producer, _membar_producer_end); 102 _membar_producer, _membar_producer_end);
95 patchfunc(_membar_sync_mp, _membar_sync_mp_end, 103 patchfunc(_membar_sync_mp, _membar_sync_mp_end,
96 _membar_sync, _membar_sync_end); 104 _membar_sync, _membar_sync_end);
97 } 105 }
98#else 106#else
99 KASSERT(is_mp == false); 107 KASSERT(is_mp == false);
100#endif /* MULTIPROCESSOR */ 108#endif /* MULTIPROCESSOR */