| @@ -1,153 +1,133 @@ | | | @@ -1,153 +1,133 @@ |
1 | /* $NetBSD: strchr.S,v 1.4 2009/07/18 16:40:31 dsl Exp $ */ | | 1 | /* |
2 | | | 2 | * Written by J.T. Conklin <jtc@acorntoolworks.com> |
3 | /*- | | 3 | * Public domain. |
4 | * Copyright (c) 2009 The NetBSD Foundation, Inc. | | | |
5 | * All rights reserved. | | | |
6 | * | | | |
7 | * This code is derived from software contributed to The NetBSD Foundation | | | |
8 | * by David Laight. | | | |
9 | * | | | |
10 | * Redistribution and use in source and binary forms, with or without | | | |
11 | * modification, are permitted provided that the following conditions | | | |
12 | * are met: | | | |
13 | * 1. Redistributions of source code must retain the above copyright | | | |
14 | * notice, this list of conditions and the following disclaimer. | | | |
15 | * 2. Redistributions in binary form must reproduce the above copyright | | | |
16 | * notice, this list of conditions and the following disclaimer in the | | | |
17 | * documentation and/or other materials provided with the distribution. | | | |
18 | * | | | |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | | |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | | |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | | |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | | |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | | |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | | |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | | |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | | |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | | |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | | |
29 | * POSSIBILITY OF SUCH DAMAGE. | | | |
30 | */ | | 4 | */ |
31 | | | 5 | |
32 | /* See comments in strlen.S about checking words for byte values */ | | | |
33 | | | | |
34 | #include <machine/asm.h> | | 6 | #include <machine/asm.h> |
35 | | | 7 | |
36 | #if defined(LIBC_SCCS) | | 8 | #if defined(LIBC_SCCS) |
37 | RCSID("$NetBSD: strchr.S,v 1.4 2009/07/18 16:40:31 dsl Exp $") | | 9 | RCSID("$NetBSD: strchr.S,v 1.5 2009/07/19 23:45:29 christos Exp $") |
38 | #endif | | 10 | #endif |
39 | | | 11 | |
40 | /* | | | |
41 | * On entry %rdi is the buffer and the low byte of %rsi (%sil) the | | | |
42 | * character to search for. | | | |
43 | * | | | |
44 | * Registers %rdx, %rcx, %r8-%r11 and %rax are also usable | | | |
45 | */ | | | |
46 | | | | |
47 | /* Uncomment below to get regression test to run this version but | | | |
48 | * have everything else use the trivial one below. */ | | | |
49 | /* #define TEST_STRCHR */ | | | |
50 | | | | |
51 | #ifdef TEST_STRCHR | | | |
52 | ENTRY(test_strchr) | | | |
53 | #else | | | |
54 | ENTRY(strchr) | | 12 | ENTRY(strchr) |
55 | #endif | | 13 | movzbq %sil,%rcx |
56 | movabsq $0x0101010101010101,%r8 | | | |
57 | | | 14 | |
58 | movzbq %sil,%rdx /* value to search for (c) */ | | 15 | /* |
59 | /* These imul are 'directpath' on athlons, so are fast */ | | 16 | * Align to word boundary. |
60 | imul $0x80,%r8,%r9 /* 0x8080808080808080 */ | | 17 | * Consider unrolling loop? |
61 | imul %r8,%rdx /* (c) copied to all bytes */ | | 18 | */ |
62 | test $7,%dil | | 19 | .Lalign: |
63 | jnz 20f /* jump if misaligned */ | | 20 | testb $7,%dil |
64 | | | 21 | je .Lword_aligned |
65 | _ALIGN_TEXT /* one byte nop */ | | 22 | movb (%rdi),%dl |
66 | 1: | | 23 | cmpb %cl,%dl |
67 | movq (%rdi),%rax /* bytes to check (x) */ | | 24 | je .Ldone |
68 | 2: | | 25 | incq %rdi |
69 | addq $8,%rdi | | 26 | testb %dl,%dl |
70 | mov %rax,%r10 | | 27 | jne .Lalign |
71 | mov %rax,%r11 /* for 'char' check */ | | 28 | jmp .Lzero |
72 | not %r10 /* invert of data (~x) */ | | 29 | |
73 | | | 30 | .Lword_aligned: |
74 | xorq %rdx,%r11 /* convert 'char' test to one for NUL */ | | 31 | /* copy char to all bytes in word */ |
75 | subq %r8,%rax /* x - 0x10 */ | | 32 | movb %cl,%ch |
76 | movq %r10,%rsi /* ~x */ | | 33 | movq %rcx,%rdx |
77 | subq %r8,%r11 /* (x ^ c) - 0x10 */ | | 34 | salq $16,%rcx |
78 | /* | | 35 | orq %rdx,%rcx |
79 | * Here we could check ((x - 0x10) | ((x ^ c) - 0x10)) & 0x80 | | 36 | movq %rcx,%rdx |
80 | * and short-circuit the case where no top bits are set, and | | 37 | salq $32,%rcx |
81 | * we continue the loop. | | 38 | orq %rdx,%rcx |
82 | * However it needs 3 more clocks that are difficult to interleave | | | |
83 | * in the existing dependency chain ... | | | |
84 | */ | | | |
85 | andq %r9,%rax /* (x - 0x10) & 0x80 */ | | | |
86 | xorq %rdx,%rsi /* c ^ ~x == ~(c ^ x) */ | | | |
87 | andq %r9,%r11 /* ((x ^ c) - 0x10) & 0x80 */ | | | |
88 | andq %r10,%rax /* (x - 0x10) & 0x80 & ~x */ | | | |
89 | jne 10f /* jump if string ends */ | | | |
90 | andq %rsi,%r11 /* ((x ^ c) - 0x10) & 0x80 & ~(x ^ c) */ | | | |
91 | je 1b /* jump if no match */ | | | |
92 | | | | |
93 | /* Found char, since LE can use bit scan */ | | | |
94 | bsf %r11,%r11 /* 7, 15, 23 ... 63 */ | | | |
95 | 8: shr $3,%r11 /* 0, 1, 2 .. 7 */ | | | |
96 | lea -8(%r11,%rdi),%rax | | | |
97 | ret | | | |
98 | | | 39 | |
99 | /* End of string, check whether char is before NUL */ | | 40 | movabsq $0x0101010101010101,%r8 |
100 | _ALIGN_TEXT /* adds three byte nop */ | | 41 | movabsq $0x8080808080808080,%r9 |
101 | 10: | | | |
102 | bsf %rax,%rax /* count to NUL */ | | | |
103 | andq %rsi,%r11 /* check for char in last 8 bytes */ | | | |
104 | je 11f | | | |
105 | bsf %r11,%r11 /* NUL and char - see which was first */ | | | |
106 | cmp %r11,%rax | | | |
107 | jae 8b /* return 'found' if same - searching for NUL */ | | | |
108 | 11: xor %eax,%eax /* char not found */ | | | |
109 | ret | | | |
110 | | | 42 | |
111 | /* Source misaligned: read aligned word and make low bytes invalid */ | | 43 | /* Check whether any byte in the word is equal to ch or 0. */ |
112 | /* I (dsl) think a _ALIGN_TEXT here will slow things down! */ | | 44 | _ALIGN_TEXT |
113 | 20: | | 45 | .Lloop: |
114 | xor %rcx,%rcx | | 46 | movq (%rdi),%rdx |
115 | sub %dil,%cl /* Convert low address values 1..7 ... */ | | 47 | addq $8,%rdi |
116 | sbb %rsi,%rsi /* carry was set, so %rsi now ~0u! */ | | 48 | movq %rdx,%rsi |
117 | and $7,%cl /* ... to 7..1 */ | | 49 | subq %r8,%rdx |
118 | and $~7,%dil /* move address to start of word */ | | 50 | xorq %rcx,%rsi |
119 | shl $3,%cl /* now 56, 48 ... 16, 8 */ | | 51 | subq %r8,%rsi |
120 | movq (%rdi),%rax /* aligned word containing first data */ | | 52 | orq %rsi,%rdx |
121 | xor %rdx,%rsi /* invert of search pattern (~c) */ | | 53 | testq %r9,%rdx |
122 | je 22f /* searching for 0xff */ | | 54 | je .Lloop |
123 | 21: shr %cl,%rsi /* ~c in low bytes */ | | 55 | |
124 | or %rsi,%rax /* set some bits making low bytes invalid */ | | 56 | /* |
125 | jmp 2b | | 57 | * In rare cases, the above loop may exit prematurely. We must |
126 | | | 58 | * return to the loop if none of the bytes in the word match |
127 | /* We are searching for 0xff, so can't use ~pattern for invalid value */ | | 59 | * ch or are equal to 0. |
128 | 22: | | 60 | */ |
129 | mov %r8,%r10 /* 0x01 pattern */ | | 61 | |
130 | lea (%r8,%r8),%rsi /* 0x02 - bits gets set (above) */ | | 62 | movb -8(%rdi),%dl |
131 | not %r10 /* now 0xfe */ | | 63 | cmpb %cl,%dl /* 1st byte == ch? */ |
132 | sar %cl,%r10 /* top bytes 0xff */ | | 64 | jne 1f |
133 | and %r10,%rax /* clear lsb from unwanted low bytes */ | | 65 | subq $8,%rdi |
134 | jmp 21b | | 66 | jmp .Ldone |
| | | 67 | 1: testb %dl,%dl /* 1st byte == 0? */ |
| | | 68 | je .Lzero |
| | | 69 | |
| | | 70 | movb -7(%rdi),%dl |
| | | 71 | cmpb %cl,%dl /* 2nd byte == ch? */ |
| | | 72 | jne 1f |
| | | 73 | subq $7,%rdi |
| | | 74 | jmp .Ldone |
| | | 75 | 1: testb %dl,%dl /* 2nd byte == 0? */ |
| | | 76 | je .Lzero |
| | | 77 | |
| | | 78 | movb -6(%rdi),%dl |
| | | 79 | cmpb %cl,%dl /* 3rd byte == ch? */ |
| | | 80 | jne 1f |
| | | 81 | subq $6,%rdi |
| | | 82 | jmp .Ldone |
| | | 83 | 1: testb %dl,%dl /* 3rd byte == 0? */ |
| | | 84 | je .Lzero |
| | | 85 | |
| | | 86 | movb -5(%rdi),%dl |
| | | 87 | cmpb %cl,%dl /* 4th byte == ch? */ |
| | | 88 | jne 1f |
| | | 89 | subq $5,%rdi |
| | | 90 | jmp .Ldone |
| | | 91 | 1: testb %dl,%dl /* 4th byte == 0? */ |
| | | 92 | je .Lzero |
| | | 93 | |
| | | 94 | movb -4(%rdi),%dl |
| | | 95 | cmpb %cl,%dl /* 5th byte == ch? */ |
| | | 96 | jne 1f |
| | | 97 | subq $4,%rdi |
| | | 98 | jmp .Ldone |
| | | 99 | 1: testb %dl,%dl /* 5th byte == 0? */ |
| | | 100 | je .Lzero |
| | | 101 | |
| | | 102 | movb -3(%rdi),%dl |
| | | 103 | cmpb %cl,%dl /* 6th byte == ch? */ |
| | | 104 | jne 1f |
| | | 105 | subq $3,%rdi |
| | | 106 | jmp .Ldone |
| | | 107 | 1: testb %dl,%dl /* 6th byte == 0? */ |
| | | 108 | je .Lzero |
| | | 109 | |
| | | 110 | movb -2(%rdi),%dl |
| | | 111 | cmpb %cl,%dl /* 7th byte == ch? */ |
| | | 112 | jne 1f |
| | | 113 | subq $2,%rdi |
| | | 114 | jmp .Ldone |
| | | 115 | 1: testb %dl,%dl /* 7th byte == 0? */ |
| | | 116 | je .Lzero |
| | | 117 | |
| | | 118 | movb -1(%rdi),%dl |
| | | 119 | cmpb %cl,%dl /* 8th byte == ch? */ |
| | | 120 | jne 1f |
| | | 121 | subq $1,%rdi |
| | | 122 | jmp .Ldone |
| | | 123 | 1: testb %dl,%dl /* 8th byte == 0? */ |
| | | 124 | jne .Lloop |
| | | 125 | |
| | | 126 | .Lzero: |
| | | 127 | /* If a ch wasn't found, return 0. */ |
| | | 128 | xorq %rdi,%rdi |
135 | | | 129 | |
136 | #ifdef TEST_STRCHR | | 130 | .Ldone: |
137 | /* Trivial version for bug-fixing above */ | | 131 | movq %rdi,%rax |
138 | ENTRY(strchr) | | | |
139 | movq %rsi,%rdx | | | |
140 | movq %rdi,%rsi | | | |
141 | 1: | | | |
142 | lodsb | | | |
143 | cmp %al,%dl | | | |
144 | je 2f | | | |
145 | test %al,%al | | | |
146 | jne 1b | | | |
147 | xor %eax,%eax | | | |
148 | ret | | | |
149 | 2: lea -1(%rsi),%rax | | | |
150 | ret | | 132 | ret |
151 | #endif | | | |
152 | | | | |
153 | STRONG_ALIAS(index,strchr) | | 133 | STRONG_ALIAS(index,strchr) |