Sun Jul 19 23:45:29 2009 UTC ()
revert changes that made new kernels hang in ACPI detection


(christos)
diff -r1.2 -r1.3 src/common/lib/libc/arch/x86_64/string/ffs.S
diff -r1.2 -r1.3 src/common/lib/libc/arch/x86_64/string/memchr.S
diff -r1.4 -r1.5 src/common/lib/libc/arch/x86_64/string/strchr.S

cvs diff -r1.2 -r1.3 src/common/lib/libc/arch/x86_64/string/ffs.S (expand / switch to unified diff)

--- src/common/lib/libc/arch/x86_64/string/ffs.S 2009/07/18 12:03:31 1.2
+++ src/common/lib/libc/arch/x86_64/string/ffs.S 2009/07/19 23:45:29 1.3
@@ -1,20 +1,21 @@ @@ -1,20 +1,21 @@
1/* 1/*
2 * Written by J.T. Conklin <jtc@NetBSD.org>. 2 * Written by J.T. Conklin <jtc@NetBSD.org>.
3 * Public domain. 3 * Public domain.
4 * Adapted for NetBSD/x86_64 by Frank van der Linden <fvdl@wasabisystems.com> 4 * Adapted for NetBSD/x86_64 by Frank van der Linden <fvdl@wasabisystems.com>
5 */ 5 */
6 6
7#include <machine/asm.h> 7#include <machine/asm.h>
8 8
9#if defined(LIBC_SCCS) 9#if defined(LIBC_SCCS)
10 RCSID("$NetBSD: ffs.S,v 1.2 2009/07/18 12:03:31 dsl Exp $") 10 RCSID("$NetBSD: ffs.S,v 1.3 2009/07/19 23:45:29 christos Exp $")
11#endif 11#endif
12 12
13ENTRY(ffs) 13ENTRY(ffs)
14 bsfl %edi,%eax 14 bsfl %edi,%eax
15 jz 1f /* ZF is set if all bits are 0 */ 15 jz L1 /* ZF is set if all bits are 0 */
16 incl %eax /* bits numbered from 1, not 0 */ 16 incl %eax /* bits numbered from 1, not 0 */
17 ret 17 ret
18 18
191: xorl %eax,%eax /* clear result */ 19 _ALIGN_TEXT
 20L1: xorl %eax,%eax /* clear result */
20 ret 21 ret

cvs diff -r1.2 -r1.3 src/common/lib/libc/arch/x86_64/string/memchr.S (expand / switch to unified diff)

--- src/common/lib/libc/arch/x86_64/string/memchr.S 2009/07/18 18:06:56 1.2
+++ src/common/lib/libc/arch/x86_64/string/memchr.S 2009/07/19 23:45:29 1.3
@@ -1,115 +1,111 @@ @@ -1,115 +1,111 @@
1/* $NetBSD: memchr.S,v 1.2 2009/07/18 18:06:56 dsl Exp $ */ 1/*
2 2 * Written by J.T. Conklin <jtc@acorntoolworks.com>
3/*- 3 * Public domain.
4 * Copyright (c) 2009 The NetBSD Foundation, Inc. 
5 * All rights reserved. 
6 * 
7 * This code is derived from software contributed to The NetBSD Foundation 
8 * by David Laight. 
9 * 
10 * Redistribution and use in source and binary forms, with or without 
11 * modification, are permitted provided that the following conditions 
12 * are met: 
13 * 1. Redistributions of source code must retain the above copyright 
14 * notice, this list of conditions and the following disclaimer. 
15 * 2. Redistributions in binary form must reproduce the above copyright 
16 * notice, this list of conditions and the following disclaimer in the 
17 * documentation and/or other materials provided with the distribution. 
18 * 
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
29 * POSSIBILITY OF SUCH DAMAGE. 
30 */ 4 */
31 5
32#include <machine/asm.h> 6#include <machine/asm.h>
33 7
34#if defined(LIBC_SCCS) 8#if defined(LIBC_SCCS)
35 RCSID("$NetBSD: memchr.S,v 1.2 2009/07/18 18:06:56 dsl Exp $") 9 RCSID("$NetBSD: memchr.S,v 1.3 2009/07/19 23:45:29 christos Exp $")
36#endif 10#endif
37 11
38/* 
39 * The instruction sequences used try to avoid data dependencies 
40 * between adjacent instructions (to allow parallel execution). 
41 * The 'imul' for %r9 could be put into the delay following the 
42 * memory read (ie inside the loop) at no obvious cost - except 
43 * that the loop is currently exactly 32 bytes - 2 fetch blocks!. 
44 * 
45 * I don't think aligning any of the other branch targets is useful. 
46 */ 
47 
48ENTRY(memchr) 12ENTRY(memchr)
49 movabsq $0x0101010101010101,%r8 13 movzbq %sil,%rcx
50 lea (%rdi,%rdx),%r10 /* limit of buffer to scan */ 
51 movzbq %sil,%rsi /* mask high bits! */ 
52 14
53 /* 'directpath' imuls can execute 3 at a time ... (amd) */ 15 /*
54 imul %r8,%rsi /* search byte replicated in word */ 16 * Align to word boundary.
55 imul $0x80,%r8,%r9 /* 0x8080808080808080 */ 17 * Consider unrolling loop?
56 test $7,%dil 18 */
57 jnz 20f /* jump if misaligned */ 19 testq %rdx,%rdx /* nbytes == 0? */
58 jmp 1f /* jump to avoid 4 nops (13 bytes) in gap */ 20 je .Lzero
59 21.Lalign:
60 _ALIGN_TEXT /* entire loop now in 32 aligned bytes */ 22 testb $7,%dil
611: 23 je .Lword_aligned
62 cmpq %r10,%rdi /* end of buffer ? */ 24 movq %rdi,%rax
63 jae 30f /* jump if so */ 25 cmpb (%rdi),%cl
 26 je .Ldone
 27 incq %rdi
 28 decq %rdx
 29 jnz .Lalign
 30 jmp .Lzero
 31
 32.Lword_aligned:
 33 /* copy char to all bytes in word */
 34 movb %cl,%ch
 35 movq %rcx,%rsi
 36 salq $16,%rcx
 37 orq %rsi,%rcx
 38 movq %rcx,%rsi
 39 salq $32,%rcx
 40 orq %rsi,%rcx
64 41
65 movq (%rdi),%rax /* value to check */ 42 movabsq $0x0101010101010101,%r8
662: 43 movabsq $0x8080808080808080,%r9
 44
 45 _ALIGN_TEXT
 46.Lloop:
 47 cmpq $7,%rdx /* nbytes > 8 */
 48 jbe .Lbyte
 49 movq (%rdi),%rsi
67 addq $8,%rdi 50 addq $8,%rdi
68 xorq %rsi,%rax /* now looking for zeros */ 51 xorq %rcx,%rsi
69 mov %rax,%rcx 52 subq $8,%rdx
70 subq %r8,%rax /* x - 0x10 */ 53 subq %r8,%rsi
71 not %rcx 54 testq %r9,%rsi
72 andq %r9,%rax /* (x - 0x10) & 0x80 */ 55 je .Lloop
73 andq %rcx,%rax /* ((x - 0x10) & 0x80) ^ ~x */ 56
74 je 1b /* jump if not found */ 57 /*
75 58 * In rare cases, the above loop may exit prematurely. We must
76/* Found byte in word, get its address */ 59 * return to the loop if none of the bytes in the word are
77 bsf %rax,%rax 60 * equal to ch.
78 shr $3,%eax 61 */
79 lea -8(%rax,%rdi),%rax 62
80 cmpq %r10,%rax /* need to check not beyond buffer */ 63 leaq -8(%rdi),%rax
81 jae 30f 64 cmpb -8(%rdi),%cl /* 1st byte == ch? */
82 rep 65 je .Ldone
83 ret /* amd - no ret after jmp */ 66
84 67 leaq -7(%rdi),%rax
85/* Input misaligned, read aligned and kill low bits */ 68 cmpb -7(%rdi),%cl /* 2nd byte == ch? */
86/* (Getting a -1 is surprisingly hard work!) */ 69 je .Ldone
8720: 70
88 xor %eax,%eax /* zeros all 64 bits */ 71 leaq -6(%rdi),%rax
89 mov %dil,%cl /* misalignment amount 1..7 (+high bits )*/ 72 cmpb -6(%rdi),%cl /* 3rd byte == ch? */
90 test %rdx,%rdx /* zero length, don't read */ 73 je .Ldone
91 jz 30f 74
92 75 leaq -5(%rdi),%rax
93 and $~7,%dil /* %rdi now start of word */ 76 cmpb -5(%rdi),%cl /* 4th byte == ch? */
94 lea -1(%rax),%r11 /* all 0xff */ 77 je .Ldone
95 and $7,%cl /* 1..7 */ 78
96 79 leaq -4(%rdi),%rax
97 mov (%rdi),%rax /* word containing first byte */ 80 cmpb -4(%rdi),%cl /* 5th byte == ch? */
98 shl $3,%cl /* 8..56 */ 81 je .Ldone
99 test %r11,%rsi /* searching for 0xff */ 82
100 jz 25f 83 leaq -3(%rdi),%rax
101 84 cmpb -3(%rdi),%cl /* 6th byte == ch? */
102 /* Searching for other than 0xff, set low bytes */ 85 je .Ldone
103 shl %cl,%r11 /* 0xff in high (wanted) bytes */ 86
104 not %r11 /* 0xff in low (unwanted) bytes */ 87 leaq -2(%rdi),%rax
105 or %r11,%rax /* low bytes now set */ 88 cmpb -2(%rdi),%cl /* 7th byte == ch? */
106 jmp 2b 89 je .Ldone
107 90
10825: /* Searching for 0xff, clear low bytes */ 91 leaq -1(%rdi),%rax
109 shl %cl,%r11 /* 0xff in high (wanted) bytes */ 92 cmpb -1(%rdi),%cl /* 7th byte == ch? */
110 and %r11,%rax /* low bytes now zero */ 93 jne .Lloop
111 jmp 2b 94 ret
 95
 96.Lbyte:
 97 testq %rdx,%rdx
 98 je .Lzero
 99.Lbyte_loop:
 100 movq %rdi,%rax
 101 cmpb (%rdi),%cl
 102 je .Ldone
 103 incq %rdi
 104 decq %rdx
 105 jnz .Lbyte_loop
 106
 107.Lzero:
 108 xorq %rax,%rax
112 109
113/* Not found */ 110.Ldone:
11430: xorq %rax,%rax 
115 ret 111 ret

cvs diff -r1.4 -r1.5 src/common/lib/libc/arch/x86_64/string/strchr.S (expand / switch to unified diff)

--- src/common/lib/libc/arch/x86_64/string/strchr.S 2009/07/18 16:40:31 1.4
+++ src/common/lib/libc/arch/x86_64/string/strchr.S 2009/07/19 23:45:29 1.5
@@ -1,153 +1,133 @@ @@ -1,153 +1,133 @@
1/* $NetBSD: strchr.S,v 1.4 2009/07/18 16:40:31 dsl Exp $ */ 1/*
2 2 * Written by J.T. Conklin <jtc@acorntoolworks.com>
3/*- 3 * Public domain.
4 * Copyright (c) 2009 The NetBSD Foundation, Inc. 
5 * All rights reserved. 
6 * 
7 * This code is derived from software contributed to The NetBSD Foundation 
8 * by David Laight. 
9 * 
10 * Redistribution and use in source and binary forms, with or without 
11 * modification, are permitted provided that the following conditions 
12 * are met: 
13 * 1. Redistributions of source code must retain the above copyright 
14 * notice, this list of conditions and the following disclaimer. 
15 * 2. Redistributions in binary form must reproduce the above copyright 
16 * notice, this list of conditions and the following disclaimer in the 
17 * documentation and/or other materials provided with the distribution. 
18 * 
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
29 * POSSIBILITY OF SUCH DAMAGE. 
30 */ 4 */
31 5
32/* See comments in strlen.S about checking words for byte values */ 
33 
34#include <machine/asm.h> 6#include <machine/asm.h>
35 7
36#if defined(LIBC_SCCS) 8#if defined(LIBC_SCCS)
37 RCSID("$NetBSD: strchr.S,v 1.4 2009/07/18 16:40:31 dsl Exp $") 9 RCSID("$NetBSD: strchr.S,v 1.5 2009/07/19 23:45:29 christos Exp $")
38#endif 10#endif
39 11
40/* 
41 * On entry %rdi is the buffer and the low byte of %rsi (%sil) the 
42 * character to search for. 
43 * 
44 * Registers %rdx, %rcx, %r8-%r11 and %rax are also usable 
45 */ 
46 
47/* Uncomment below to get regression test to run this version but 
48 * have everything else use the trivial one below. */ 
49/* #define TEST_STRCHR */ 
50 
51#ifdef TEST_STRCHR 
52ENTRY(test_strchr) 
53#else 
54ENTRY(strchr) 12ENTRY(strchr)
55#endif 13 movzbq %sil,%rcx
56 movabsq $0x0101010101010101,%r8 
57 14
58 movzbq %sil,%rdx /* value to search for (c) */ 15 /*
59 /* These imul are 'directpath' on athlons, so are fast */ 16 * Align to word boundary.
60 imul $0x80,%r8,%r9 /* 0x8080808080808080 */ 17 * Consider unrolling loop?
61 imul %r8,%rdx /* (c) copied to all bytes */ 18 */
62 test $7,%dil 19.Lalign:
63 jnz 20f /* jump if misaligned */ 20 testb $7,%dil
64 21 je .Lword_aligned
65 _ALIGN_TEXT /* one byte nop */ 22 movb (%rdi),%dl
661: 23 cmpb %cl,%dl
67 movq (%rdi),%rax /* bytes to check (x) */ 24 je .Ldone
682: 25 incq %rdi
69 addq $8,%rdi 26 testb %dl,%dl
70 mov %rax,%r10 27 jne .Lalign
71 mov %rax,%r11 /* for 'char' check */ 28 jmp .Lzero
72 not %r10 /* invert of data (~x) */ 29
73 30.Lword_aligned:
74 xorq %rdx,%r11 /* convert 'char' test to one for NUL */ 31 /* copy char to all bytes in word */
75 subq %r8,%rax /* x - 0x10 */ 32 movb %cl,%ch
76 movq %r10,%rsi /* ~x */ 33 movq %rcx,%rdx
77 subq %r8,%r11 /* (x ^ c) - 0x10 */ 34 salq $16,%rcx
78/* 35 orq %rdx,%rcx
79 * Here we could check ((x - 0x10) | ((x ^ c) - 0x10)) & 0x80 36 movq %rcx,%rdx
80 * and short-circuit the case where no top bits are set, and 37 salq $32,%rcx
81 * we continue the loop. 38 orq %rdx,%rcx
82 * However it needs 3 more clocks that are difficult to interleave 
83 * in the existing dependency chain ... 
84 */ 
85 andq %r9,%rax /* (x - 0x10) & 0x80 */ 
86 xorq %rdx,%rsi /* c ^ ~x == ~(c ^ x) */ 
87 andq %r9,%r11 /* ((x ^ c) - 0x10) & 0x80 */ 
88 andq %r10,%rax /* (x - 0x10) & 0x80 & ~x */ 
89 jne 10f /* jump if string ends */ 
90 andq %rsi,%r11 /* ((x ^ c) - 0x10) & 0x80 & ~(x ^ c) */ 
91 je 1b /* jump if no match */ 
92 
93 /* Found char, since LE can use bit scan */ 
94 bsf %r11,%r11 /* 7, 15, 23 ... 63 */ 
958: shr $3,%r11 /* 0, 1, 2 .. 7 */ 
96 lea -8(%r11,%rdi),%rax 
97 ret 
98 39
99/* End of string, check whether char is before NUL */ 40 movabsq $0x0101010101010101,%r8
100 _ALIGN_TEXT /* adds three byte nop */ 41 movabsq $0x8080808080808080,%r9
10110: 
102 bsf %rax,%rax /* count to NUL */ 
103 andq %rsi,%r11 /* check for char in last 8 bytes */ 
104 je 11f 
105 bsf %r11,%r11 /* NUL and char - see which was first */ 
106 cmp %r11,%rax 
107 jae 8b /* return 'found' if same - searching for NUL */ 
10811: xor %eax,%eax /* char not found */ 
109 ret 
110 42
111/* Source misaligned: read aligned word and make low bytes invalid */ 43 /* Check whether any byte in the word is equal to ch or 0. */
112/* I (dsl) think a _ALIGN_TEXT here will slow things down! */ 44 _ALIGN_TEXT
11320: 45.Lloop:
114 xor %rcx,%rcx 46 movq (%rdi),%rdx
115 sub %dil,%cl /* Convert low address values 1..7 ... */ 47 addq $8,%rdi
116 sbb %rsi,%rsi /* carry was set, so %rsi now ~0u! */ 48 movq %rdx,%rsi
117 and $7,%cl /* ... to 7..1 */ 49 subq %r8,%rdx
118 and $~7,%dil /* move address to start of word */ 50 xorq %rcx,%rsi
119 shl $3,%cl /* now 56, 48 ... 16, 8 */ 51 subq %r8,%rsi
120 movq (%rdi),%rax /* aligned word containing first data */ 52 orq %rsi,%rdx
121 xor %rdx,%rsi /* invert of search pattern (~c) */ 53 testq %r9,%rdx
122 je 22f /* searching for 0xff */ 54 je .Lloop
12321: shr %cl,%rsi /* ~c in low bytes */ 55
124 or %rsi,%rax /* set some bits making low bytes invalid */ 56 /*
125 jmp 2b 57 * In rare cases, the above loop may exit prematurely. We must
126 58 * return to the loop if none of the bytes in the word match
127/* We are searching for 0xff, so can't use ~pattern for invalid value */ 59 * ch or are equal to 0.
12822: 60 */
129 mov %r8,%r10 /* 0x01 pattern */ 61
130 lea (%r8,%r8),%rsi /* 0x02 - bits gets set (above) */ 62 movb -8(%rdi),%dl
131 not %r10 /* now 0xfe */ 63 cmpb %cl,%dl /* 1st byte == ch? */
132 sar %cl,%r10 /* top bytes 0xff */ 64 jne 1f
133 and %r10,%rax /* clear lsb from unwanted low bytes */ 65 subq $8,%rdi
134 jmp 21b 66 jmp .Ldone
 671: testb %dl,%dl /* 1st byte == 0? */
 68 je .Lzero
 69
 70 movb -7(%rdi),%dl
 71 cmpb %cl,%dl /* 2nd byte == ch? */
 72 jne 1f
 73 subq $7,%rdi
 74 jmp .Ldone
 751: testb %dl,%dl /* 2nd byte == 0? */
 76 je .Lzero
 77
 78 movb -6(%rdi),%dl
 79 cmpb %cl,%dl /* 3rd byte == ch? */
 80 jne 1f
 81 subq $6,%rdi
 82 jmp .Ldone
 831: testb %dl,%dl /* 3rd byte == 0? */
 84 je .Lzero
 85
 86 movb -5(%rdi),%dl
 87 cmpb %cl,%dl /* 4th byte == ch? */
 88 jne 1f
 89 subq $5,%rdi
 90 jmp .Ldone
 911: testb %dl,%dl /* 4th byte == 0? */
 92 je .Lzero
 93
 94 movb -4(%rdi),%dl
 95 cmpb %cl,%dl /* 5th byte == ch? */
 96 jne 1f
 97 subq $4,%rdi
 98 jmp .Ldone
 991: testb %dl,%dl /* 5th byte == 0? */
 100 je .Lzero
 101
 102 movb -3(%rdi),%dl
 103 cmpb %cl,%dl /* 6th byte == ch? */
 104 jne 1f
 105 subq $3,%rdi
 106 jmp .Ldone
 1071: testb %dl,%dl /* 6th byte == 0? */
 108 je .Lzero
 109
 110 movb -2(%rdi),%dl
 111 cmpb %cl,%dl /* 7th byte == ch? */
 112 jne 1f
 113 subq $2,%rdi
 114 jmp .Ldone
 1151: testb %dl,%dl /* 7th byte == 0? */
 116 je .Lzero
 117
 118 movb -1(%rdi),%dl
 119 cmpb %cl,%dl /* 8th byte == ch? */
 120 jne 1f
 121 subq $1,%rdi
 122 jmp .Ldone
 1231: testb %dl,%dl /* 8th byte == 0? */
 124 jne .Lloop
 125
 126.Lzero:
 127 /* If a ch wasn't found, return 0. */
 128 xorq %rdi,%rdi
135 129
136#ifdef TEST_STRCHR 130.Ldone:
137/* Trivial version for bug-fixing above */ 131 movq %rdi,%rax
138ENTRY(strchr) 
139 movq %rsi,%rdx 
140 movq %rdi,%rsi 
1411: 
142 lodsb 
143 cmp %al,%dl 
144 je 2f 
145 test %al,%al 
146 jne 1b 
147 xor %eax,%eax 
148 ret 
1492: lea -1(%rsi),%rax 
150 ret 132 ret
151#endif 
152 
153STRONG_ALIAS(index,strchr) 133STRONG_ALIAS(index,strchr)