Put back dsl's string changes, but fix memchr.S to use cmp so that the condition code is set (and fix the comments 0x10->0x01). From Anon Ymous We need a test for memchr(x, -1)...diff -r1.3 -r1.4 src/common/lib/libc/arch/x86_64/string/ffs.S
(christos)
--- src/common/lib/libc/arch/x86_64/string/ffs.S 2009/07/19 23:45:29 1.3
+++ src/common/lib/libc/arch/x86_64/string/ffs.S 2009/07/20 15:21:00 1.4
@@ -1,21 +1,20 @@ | @@ -1,21 +1,20 @@ | |||
1 | /* | 1 | /* | |
2 | * Written by J.T. Conklin <jtc@NetBSD.org>. | 2 | * Written by J.T. Conklin <jtc@NetBSD.org>. | |
3 | * Public domain. | 3 | * Public domain. | |
4 | * Adapted for NetBSD/x86_64 by Frank van der Linden <fvdl@wasabisystems.com> | 4 | * Adapted for NetBSD/x86_64 by Frank van der Linden <fvdl@wasabisystems.com> | |
5 | */ | 5 | */ | |
6 | 6 | |||
7 | #include <machine/asm.h> | 7 | #include <machine/asm.h> | |
8 | 8 | |||
9 | #if defined(LIBC_SCCS) | 9 | #if defined(LIBC_SCCS) | |
10 | RCSID("$NetBSD: ffs.S,v 1.3 2009/07/19 23:45:29 christos Exp $") | 10 | RCSID("$NetBSD: ffs.S,v 1.4 2009/07/20 15:21:00 christos Exp $") | |
11 | #endif | 11 | #endif | |
12 | 12 | |||
13 | ENTRY(ffs) | 13 | ENTRY(ffs) | |
14 | bsfl %edi,%eax | 14 | bsfl %edi,%eax | |
15 | jz L1 /* ZF is set if all bits are 0 */ | 15 | jz 1f /* ZF is set if all bits are 0 */ | |
16 | incl %eax /* bits numbered from 1, not 0 */ | 16 | incl %eax /* bits numbered from 1, not 0 */ | |
17 | ret | 17 | ret | |
18 | 18 | |||
19 | _ALIGN_TEXT | 19 | 1: xorl %eax,%eax /* clear result */ | |
20 | L1: xorl %eax,%eax /* clear result */ | |||
21 | ret | 20 | ret |
--- src/common/lib/libc/arch/x86_64/string/memchr.S 2009/07/19 23:45:29 1.3
+++ src/common/lib/libc/arch/x86_64/string/memchr.S 2009/07/20 15:21:00 1.4
@@ -1,111 +1,115 @@ | @@ -1,111 +1,115 @@ | |||
1 | /* | 1 | /* $NetBSD: memchr.S,v 1.4 2009/07/20 15:21:00 christos Exp $ */ | |
2 | * Written by J.T. Conklin <jtc@acorntoolworks.com> | 2 | ||
3 | * Public domain. | 3 | /*- | |
4 | * Copyright (c) 2009 The NetBSD Foundation, Inc. | |||
5 | * All rights reserved. | |||
6 | * | |||
7 | * This code is derived from software contributed to The NetBSD Foundation | |||
8 | * by David Laight. | |||
9 | * | |||
10 | * Redistribution and use in source and binary forms, with or without | |||
11 | * modification, are permitted provided that the following conditions | |||
12 | * are met: | |||
13 | * 1. Redistributions of source code must retain the above copyright | |||
14 | * notice, this list of conditions and the following disclaimer. | |||
15 | * 2. Redistributions in binary form must reproduce the above copyright | |||
16 | * notice, this list of conditions and the following disclaimer in the | |||
17 | * documentation and/or other materials provided with the distribution. | |||
18 | * | |||
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |||
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |||
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |||
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |||
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
29 | * POSSIBILITY OF SUCH DAMAGE. | |||
4 | */ | 30 | */ | |
5 | 31 | |||
6 | #include <machine/asm.h> | 32 | #include <machine/asm.h> | |
7 | 33 | |||
8 | #if defined(LIBC_SCCS) | 34 | #if defined(LIBC_SCCS) | |
9 | RCSID("$NetBSD: memchr.S,v 1.3 2009/07/19 23:45:29 christos Exp $") | 35 | RCSID("$NetBSD: memchr.S,v 1.4 2009/07/20 15:21:00 christos Exp $") | |
10 | #endif | 36 | #endif | |
11 | 37 | |||
12 | ENTRY(memchr) | 38 | /* | |
13 | movzbq %sil,%rcx | 39 | * The instruction sequences used try to avoid data dependencies | |
14 | 40 | * between adjacent instructions (to allow parallel execution). | ||
15 | /* | 41 | * The 'imul' for %r9 could be put into the delay following the | |
16 | * Align to word boundary. | 42 | * memory read (ie inside the loop) at no obvious cost - except | |
17 | * Consider unrolling loop? | 43 | * that the loop is currently exactly 32 bytes - 2 fetch blocks!. | |
18 | */ | 44 | * | |
19 | testq %rdx,%rdx /* nbytes == 0? */ | 45 | * I don't think aligning any of the other branch targets is useful. | |
20 | je .Lzero | 46 | */ | |
21 | .Lalign: | |||
22 | testb $7,%dil | |||
23 | je .Lword_aligned | |||
24 | movq %rdi,%rax | |||
25 | cmpb (%rdi),%cl | |||
26 | je .Ldone | |||
27 | incq %rdi | |||
28 | decq %rdx | |||
29 | jnz .Lalign | |||
30 | jmp .Lzero | |||
31 | ||||
32 | .Lword_aligned: | |||
33 | /* copy char to all bytes in word */ | |||
34 | movb %cl,%ch | |||
35 | movq %rcx,%rsi | |||
36 | salq $16,%rcx | |||
37 | orq %rsi,%rcx | |||
38 | movq %rcx,%rsi | |||
39 | salq $32,%rcx | |||
40 | orq %rsi,%rcx | |||
41 | 47 | |||
48 | ENTRY(memchr) | |||
42 | movabsq $0x0101010101010101,%r8 | 49 | movabsq $0x0101010101010101,%r8 | |
43 | movabsq $0x8080808080808080,%r9 | 50 | lea (%rdi,%rdx),%r10 /* limit of buffer to scan */ | |
51 | movzbq %sil,%rsi /* mask high bits! */ | |||
44 | 52 | |||
45 | _ALIGN_TEXT | 53 | /* 'directpath' imuls can execute 3 at a time ... (amd) */ | |
46 | .Lloop: | 54 | imul %r8,%rsi /* search byte replicated in word */ | |
47 | cmpq $7,%rdx /* nbytes > 8 */ | 55 | imul $0x80,%r8,%r9 /* 0x8080808080808080 */ | |
48 | jbe .Lbyte | 56 | test $7,%dil | |
49 | movq (%rdi),%rsi | 57 | jnz 20f /* jump if misaligned */ | |
50 | addq $8,%rdi | 58 | jmp 1f /* jump to avoid 4 nops (13 bytes) in gap */ | |
51 | xorq %rcx,%rsi | 59 | ||
52 | subq $8,%rdx | 60 | _ALIGN_TEXT /* entire loop now in 32 aligned bytes */ | |
53 | subq %r8,%rsi | 61 | 1: | |
54 | testq %r9,%rsi | 62 | cmpq %r10,%rdi /* end of buffer ? */ | |
55 | je .Lloop | 63 | jae 30f /* jump if so */ | |
56 | ||||
57 | /* | |||
58 | * In rare cases, the above loop may exit prematurely. We must | |||
59 | * return to the loop if none of the bytes in the word are | |||
60 | * equal to ch. | |||
61 | */ | |||
62 | ||||
63 | leaq -8(%rdi),%rax | |||
64 | cmpb -8(%rdi),%cl /* 1st byte == ch? */ | |||
65 | je .Ldone | |||
66 | ||||
67 | leaq -7(%rdi),%rax | |||
68 | cmpb -7(%rdi),%cl /* 2nd byte == ch? */ | |||
69 | je .Ldone | |||
70 | ||||
71 | leaq -6(%rdi),%rax | |||
72 | cmpb -6(%rdi),%cl /* 3rd byte == ch? */ | |||
73 | je .Ldone | |||
74 | ||||
75 | leaq -5(%rdi),%rax | |||
76 | cmpb -5(%rdi),%cl /* 4th byte == ch? */ | |||
77 | je .Ldone | |||
78 | ||||
79 | leaq -4(%rdi),%rax | |||
80 | cmpb -4(%rdi),%cl /* 5th byte == ch? */ | |||
81 | je .Ldone | |||
82 | ||||
83 | leaq -3(%rdi),%rax | |||
84 | cmpb -3(%rdi),%cl /* 6th byte == ch? */ | |||
85 | je .Ldone | |||
86 | ||||
87 | leaq -2(%rdi),%rax | |||
88 | cmpb -2(%rdi),%cl /* 7th byte == ch? */ | |||
89 | je .Ldone | |||
90 | ||||
91 | leaq -1(%rdi),%rax | |||
92 | cmpb -1(%rdi),%cl /* 7th byte == ch? */ | |||
93 | jne .Lloop | |||
94 | ret | |||
95 | 64 | |||
96 | .Lbyte: | 65 | movq (%rdi),%rax /* value to check */ | |
97 | testq %rdx,%rdx | 66 | 2: | |
98 | je .Lzero | 67 | addq $8,%rdi | |
99 | .Lbyte_loop: | 68 | xorq %rsi,%rax /* now looking for zeros */ | |
100 | movq %rdi,%rax | 69 | mov %rax,%rcx | |
101 | cmpb (%rdi),%cl | 70 | subq %r8,%rax /* x - 0x01 */ | |
102 | je .Ldone | 71 | not %rcx | |
103 | incq %rdi | 72 | andq %r9,%rax /* (x - 0x01) & 0x80 */ | |
104 | decq %rdx | 73 | andq %rcx,%rax /* ((x - 0x01) & 0x80) & ~x */ | |
105 | jnz .Lbyte_loop | 74 | je 1b /* jump if not found */ | |
106 | 75 | |||
107 | .Lzero: | 76 | /* Found byte in word, get its address */ | |
108 | xorq %rax,%rax | 77 | bsf %rax,%rax | |
78 | shr $3,%eax | |||
79 | lea -8(%rax,%rdi),%rax | |||
80 | cmpq %r10,%rax /* need to check not beyond buffer */ | |||
81 | jae 30f | |||
82 | rep | |||
83 | ret /* amd - no ret after jmp */ | |||
84 | ||||
85 | /* Input misaligned, read aligned and kill low bits */ | |||
86 | /* (Getting a -1 is surprisingly hard work!) */ | |||
87 | 20: | |||
88 | xor %eax,%eax /* zeros all 64 bits */ | |||
89 | mov %dil,%cl /* misalignment amount 1..7 (+high bits )*/ | |||
90 | test %rdx,%rdx /* zero length, don't read */ | |||
91 | jz 30f | |||
92 | ||||
93 | and $~7,%dil /* %rdi now start of word */ | |||
94 | lea -1(%rax),%r11 /* all 0xff */ | |||
95 | and $7,%cl /* 1..7 */ | |||
96 | ||||
97 | mov (%rdi),%rax /* word containing first byte */ | |||
98 | shl $3,%cl /* 8..56 */ | |||
99 | cmp %r11,%rsi /* searching for 0xff */ | |||
100 | jz 25f | |||
101 | ||||
102 | /* Searching for other than 0xff, set low bytes */ | |||
103 | shl %cl,%r11 /* 0xff in high (wanted) bytes */ | |||
104 | not %r11 /* 0xff in low (unwanted) bytes */ | |||
105 | or %r11,%rax /* low bytes now set */ | |||
106 | jmp 2b | |||
107 | ||||
108 | 25: /* Searching for 0xff, clear low bytes */ | |||
109 | shl %cl,%r11 /* 0xff in high (wanted) bytes */ | |||
110 | and %r11,%rax /* low bytes now zero */ | |||
111 | jmp 2b | |||
109 | 112 | |||
110 | .Ldone: | 113 | /* Not found */ | |
114 | 30: xorq %rax,%rax | |||
111 | ret | 115 | ret |
--- src/common/lib/libc/arch/x86_64/string/strchr.S 2009/07/19 23:45:29 1.5
+++ src/common/lib/libc/arch/x86_64/string/strchr.S 2009/07/20 15:21:00 1.6
@@ -1,133 +1,153 @@ | @@ -1,133 +1,153 @@ | |||
1 | /* | 1 | /* $NetBSD: strchr.S,v 1.6 2009/07/20 15:21:00 christos Exp $ */ | |
2 | * Written by J.T. Conklin <jtc@acorntoolworks.com> | 2 | ||
3 | * Public domain. | 3 | /*- | |
4 | * Copyright (c) 2009 The NetBSD Foundation, Inc. | |||
5 | * All rights reserved. | |||
6 | * | |||
7 | * This code is derived from software contributed to The NetBSD Foundation | |||
8 | * by David Laight. | |||
9 | * | |||
10 | * Redistribution and use in source and binary forms, with or without | |||
11 | * modification, are permitted provided that the following conditions | |||
12 | * are met: | |||
13 | * 1. Redistributions of source code must retain the above copyright | |||
14 | * notice, this list of conditions and the following disclaimer. | |||
15 | * 2. Redistributions in binary form must reproduce the above copyright | |||
16 | * notice, this list of conditions and the following disclaimer in the | |||
17 | * documentation and/or other materials provided with the distribution. | |||
18 | * | |||
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |||
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |||
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |||
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |||
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
29 | * POSSIBILITY OF SUCH DAMAGE. | |||
4 | */ | 30 | */ | |
5 | 31 | |||
32 | /* See comments in strlen.S about checking words for byte values */ | |||
33 | ||||
6 | #include <machine/asm.h> | 34 | #include <machine/asm.h> | |
7 | 35 | |||
8 | #if defined(LIBC_SCCS) | 36 | #if defined(LIBC_SCCS) | |
9 | RCSID("$NetBSD: strchr.S,v 1.5 2009/07/19 23:45:29 christos Exp $") | 37 | RCSID("$NetBSD: strchr.S,v 1.6 2009/07/20 15:21:00 christos Exp $") | |
10 | #endif | 38 | #endif | |
11 | 39 | |||
12 | ENTRY(strchr) | 40 | /* | |
13 | movzbq %sil,%rcx | 41 | * On entry %rdi is the buffer and the low byte of %rsi (%sil) the | |
14 | 42 | * character to search for. | ||
15 | /* | 43 | * | |
16 | * Align to word boundary. | 44 | * Registers %rdx, %rcx, %r8-%r11 and %rax are also usable | |
17 | * Consider unrolling loop? | 45 | */ | |
18 | */ | |||
19 | .Lalign: | |||
20 | testb $7,%dil | |||
21 | je .Lword_aligned | |||
22 | movb (%rdi),%dl | |||
23 | cmpb %cl,%dl | |||
24 | je .Ldone | |||
25 | incq %rdi | |||
26 | testb %dl,%dl | |||
27 | jne .Lalign | |||
28 | jmp .Lzero | |||
29 | ||||
30 | .Lword_aligned: | |||
31 | /* copy char to all bytes in word */ | |||
32 | movb %cl,%ch | |||
33 | movq %rcx,%rdx | |||
34 | salq $16,%rcx | |||
35 | orq %rdx,%rcx | |||
36 | movq %rcx,%rdx | |||
37 | salq $32,%rcx | |||
38 | orq %rdx,%rcx | |||
39 | 46 | |||
47 | /* Uncomment below to get regression test to run this version but | |||
48 | * have everything else use the trivial one below. */ | |||
49 | /* #define TEST_STRCHR */ | |||
50 | ||||
51 | #ifdef TEST_STRCHR | |||
52 | ENTRY(test_strchr) | |||
53 | #else | |||
54 | ENTRY(strchr) | |||
55 | #endif | |||
40 | movabsq $0x0101010101010101,%r8 | 56 | movabsq $0x0101010101010101,%r8 | |
41 | movabsq $0x8080808080808080,%r9 | |||
42 | 57 | |||
43 | /* Check whether any byte in the word is equal to ch or 0. */ | 58 | movzbq %sil,%rdx /* value to search for (c) */ | |
44 | _ALIGN_TEXT | 59 | /* These imul are 'directpath' on athlons, so are fast */ | |
45 | .Lloop: | 60 | imul $0x80,%r8,%r9 /* 0x8080808080808080 */ | |
46 | movq (%rdi),%rdx | 61 | imul %r8,%rdx /* (c) copied to all bytes */ | |
62 | test $7,%dil | |||
63 | jnz 20f /* jump if misaligned */ | |||
64 | ||||
65 | _ALIGN_TEXT /* one byte nop */ | |||
66 | 1: | |||
67 | movq (%rdi),%rax /* bytes to check (x) */ | |||
68 | 2: | |||
47 | addq $8,%rdi | 69 | addq $8,%rdi | |
48 | movq %rdx,%rsi | 70 | mov %rax,%r10 | |
49 | subq %r8,%rdx | 71 | mov %rax,%r11 /* for 'char' check */ | |
50 | xorq %rcx,%rsi | 72 | not %r10 /* invert of data (~x) */ | |
51 | subq %r8,%rsi | 73 | ||
52 | orq %rsi,%rdx | 74 | xorq %rdx,%r11 /* convert 'char' test to one for NUL */ | |
53 | testq %r9,%rdx | 75 | subq %r8,%rax /* x - 0x10 */ | |
54 | je .Lloop | 76 | movq %r10,%rsi /* ~x */ | |
55 | 77 | subq %r8,%r11 /* (x ^ c) - 0x10 */ | ||
56 | /* | 78 | /* | |
57 | * In rare cases, the above loop may exit prematurely. We must | 79 | * Here we could check ((x - 0x10) | ((x ^ c) - 0x10)) & 0x80 | |
58 | * return to the loop if none of the bytes in the word match | 80 | * and short-circuit the case where no top bits are set, and | |
59 | * ch or are equal to 0. | 81 | * we continue the loop. | |
60 | */ | 82 | * However it needs 3 more clocks that are difficult to interleave | |
61 | 83 | * in the existing dependency chain ... | ||
62 | movb -8(%rdi),%dl | 84 | */ | |
63 | cmpb %cl,%dl /* 1st byte == ch? */ | 85 | andq %r9,%rax /* (x - 0x10) & 0x80 */ | |
64 | jne 1f | 86 | xorq %rdx,%rsi /* c ^ ~x == ~(c ^ x) */ | |
65 | subq $8,%rdi | 87 | andq %r9,%r11 /* ((x ^ c) - 0x10) & 0x80 */ | |
66 | jmp .Ldone | 88 | andq %r10,%rax /* (x - 0x10) & 0x80 & ~x */ | |
67 | 1: testb %dl,%dl /* 1st byte == 0? */ | 89 | jne 10f /* jump if string ends */ | |
68 | je .Lzero | 90 | andq %rsi,%r11 /* ((x ^ c) - 0x10) & 0x80 & ~(x ^ c) */ | |
69 | 91 | je 1b /* jump if no match */ | ||
70 | movb -7(%rdi),%dl | 92 | ||
71 | cmpb %cl,%dl /* 2nd byte == ch? */ | 93 | /* Found char, since LE can use bit scan */ | |
72 | jne 1f | 94 | bsf %r11,%r11 /* 7, 15, 23 ... 63 */ | |
73 | subq $7,%rdi | 95 | 8: shr $3,%r11 /* 0, 1, 2 .. 7 */ | |
74 | jmp .Ldone | 96 | lea -8(%r11,%rdi),%rax | |
75 | 1: testb %dl,%dl /* 2nd byte == 0? */ | 97 | ret | |
76 | je .Lzero | |||
77 | ||||
78 | movb -6(%rdi),%dl | |||
79 | cmpb %cl,%dl /* 3rd byte == ch? */ | |||
80 | jne 1f | |||
81 | subq $6,%rdi | |||
82 | jmp .Ldone | |||
83 | 1: testb %dl,%dl /* 3rd byte == 0? */ | |||
84 | je .Lzero | |||
85 | ||||
86 | movb -5(%rdi),%dl | |||
87 | cmpb %cl,%dl /* 4th byte == ch? */ | |||
88 | jne 1f | |||
89 | subq $5,%rdi | |||
90 | jmp .Ldone | |||
91 | 1: testb %dl,%dl /* 4th byte == 0? */ | |||
92 | je .Lzero | |||
93 | ||||
94 | movb -4(%rdi),%dl | |||
95 | cmpb %cl,%dl /* 5th byte == ch? */ | |||
96 | jne 1f | |||
97 | subq $4,%rdi | |||
98 | jmp .Ldone | |||
99 | 1: testb %dl,%dl /* 5th byte == 0? */ | |||
100 | je .Lzero | |||
101 | ||||
102 | movb -3(%rdi),%dl | |||
103 | cmpb %cl,%dl /* 6th byte == ch? */ | |||
104 | jne 1f | |||
105 | subq $3,%rdi | |||
106 | jmp .Ldone | |||
107 | 1: testb %dl,%dl /* 6th byte == 0? */ | |||
108 | je .Lzero | |||
109 | ||||
110 | movb -2(%rdi),%dl | |||
111 | cmpb %cl,%dl /* 7th byte == ch? */ | |||
112 | jne 1f | |||
113 | subq $2,%rdi | |||
114 | jmp .Ldone | |||
115 | 1: testb %dl,%dl /* 7th byte == 0? */ | |||
116 | je .Lzero | |||
117 | ||||
118 | movb -1(%rdi),%dl | |||
119 | cmpb %cl,%dl /* 8th byte == ch? */ | |||
120 | jne 1f | |||
121 | subq $1,%rdi | |||
122 | jmp .Ldone | |||
123 | 1: testb %dl,%dl /* 8th byte == 0? */ | |||
124 | jne .Lloop | |||
125 | ||||
126 | .Lzero: | |||
127 | /* If a ch wasn't found, return 0. */ | |||
128 | xorq %rdi,%rdi | |||
129 | 98 | |||
130 | .Ldone: | 99 | /* End of string, check whether char is before NUL */ | |
131 | movq %rdi,%rax | 100 | _ALIGN_TEXT /* adds three byte nop */ | |
101 | 10: | |||
102 | bsf %rax,%rax /* count to NUL */ | |||
103 | andq %rsi,%r11 /* check for char in last 8 bytes */ | |||
104 | je 11f | |||
105 | bsf %r11,%r11 /* NUL and char - see which was first */ | |||
106 | cmp %r11,%rax | |||
107 | jae 8b /* return 'found' if same - searching for NUL */ | |||
108 | 11: xor %eax,%eax /* char not found */ | |||
132 | ret | 109 | ret | |
110 | ||||
111 | /* Source misaligned: read aligned word and make low bytes invalid */ | |||
112 | /* I (dsl) think a _ALIGN_TEXT here will slow things down! */ | |||
113 | 20: | |||
114 | xor %rcx,%rcx | |||
115 | sub %dil,%cl /* Convert low address values 1..7 ... */ | |||
116 | sbb %rsi,%rsi /* carry was set, so %rsi now ~0u! */ | |||
117 | and $7,%cl /* ... to 7..1 */ | |||
118 | and $~7,%dil /* move address to start of word */ | |||
119 | shl $3,%cl /* now 56, 48 ... 16, 8 */ | |||
120 | movq (%rdi),%rax /* aligned word containing first data */ | |||
121 | xor %rdx,%rsi /* invert of search pattern (~c) */ | |||
122 | je 22f /* searching for 0xff */ | |||
123 | 21: shr %cl,%rsi /* ~c in low bytes */ | |||
124 | or %rsi,%rax /* set some bits making low bytes invalid */ | |||
125 | jmp 2b | |||
126 | ||||
127 | /* We are searching for 0xff, so can't use ~pattern for invalid value */ | |||
128 | 22: | |||
129 | mov %r8,%r10 /* 0x01 pattern */ | |||
130 | lea (%r8,%r8),%rsi /* 0x02 - bits gets set (above) */ | |||
131 | not %r10 /* now 0xfe */ | |||
132 | sar %cl,%r10 /* top bytes 0xff */ | |||
133 | and %r10,%rax /* clear lsb from unwanted low bytes */ | |||
134 | jmp 21b | |||
135 | ||||
136 | #ifdef TEST_STRCHR | |||
137 | /* Trivial version for bug-fixing above */ | |||
138 | ENTRY(strchr) | |||
139 | movq %rsi,%rdx | |||
140 | movq %rdi,%rsi | |||
141 | 1: | |||
142 | lodsb | |||
143 | cmp %al,%dl | |||
144 | je 2f | |||
145 | test %al,%al | |||
146 | jne 1b | |||
147 | xor %eax,%eax | |||
148 | ret | |||
149 | 2: lea -1(%rsi),%rax | |||
150 | ret | |||
151 | #endif | |||
152 | ||||
133 | STRONG_ALIAS(index,strchr) | 153 | STRONG_ALIAS(index,strchr) |