Sun Jul 19 23:45:29 2009 UTC ()
revert changes that made new kernels hang in ACPI detection


(christos)
diff -r1.2 -r1.3 src/common/lib/libc/arch/x86_64/string/ffs.S
diff -r1.2 -r1.3 src/common/lib/libc/arch/x86_64/string/memchr.S
diff -r1.4 -r1.5 src/common/lib/libc/arch/x86_64/string/strchr.S

cvs diff -r1.2 -r1.3 src/common/lib/libc/arch/x86_64/string/ffs.S (expand / switch to context diff)
--- src/common/lib/libc/arch/x86_64/string/ffs.S 2009/07/18 12:03:31 1.2
+++ src/common/lib/libc/arch/x86_64/string/ffs.S 2009/07/19 23:45:29 1.3
@@ -7,14 +7,15 @@
 #include <machine/asm.h>
 
 #if defined(LIBC_SCCS)
-	RCSID("$NetBSD: ffs.S,v 1.2 2009/07/18 12:03:31 dsl Exp $")
+	RCSID("$NetBSD: ffs.S,v 1.3 2009/07/19 23:45:29 christos Exp $")
 #endif
 
 ENTRY(ffs)
 	bsfl	%edi,%eax
-	jz	1f	 		/* ZF is set if all bits are 0 */
+	jz	L1	 		/* ZF is set if all bits are 0 */
 	incl	%eax			/* bits numbered from 1, not 0 */
 	ret
 
-1:	xorl	%eax,%eax		/* clear result */
+	_ALIGN_TEXT
+L1:	xorl	%eax,%eax		/* clear result */
 	ret

cvs diff -r1.2 -r1.3 src/common/lib/libc/arch/x86_64/string/memchr.S (expand / switch to context diff)
--- src/common/lib/libc/arch/x86_64/string/memchr.S 2009/07/18 18:06:56 1.2
+++ src/common/lib/libc/arch/x86_64/string/memchr.S 2009/07/19 23:45:29 1.3
@@ -1,115 +1,111 @@
-/*	$NetBSD: memchr.S,v 1.2 2009/07/18 18:06:56 dsl Exp $	*/
-
-/*-
- * Copyright (c) 2009 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by David Laight.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+/*
+ * Written by J.T. Conklin <jtc@acorntoolworks.com>
+ * Public domain.
  */
 
 #include <machine/asm.h>
 
 #if defined(LIBC_SCCS)
-	RCSID("$NetBSD: memchr.S,v 1.2 2009/07/18 18:06:56 dsl Exp $")
+	RCSID("$NetBSD: memchr.S,v 1.3 2009/07/19 23:45:29 christos Exp $")
 #endif
 
-/*
- * The instruction sequences used try to avoid data dependencies
- * between adjacent instructions (to allow parallel execution).
- * The 'imul' for %r9 could be put into the delay following the
- * memory read (ie inside the loop) at no obvious cost - except
- * that the loop is currently exactly 32 bytes - 2 fetch blocks!.
- *
- * I don't think aligning any of the other branch targets is useful.
- */
-
 ENTRY(memchr)
-	movabsq	$0x0101010101010101,%r8
-	lea	(%rdi,%rdx),%r10	/* limit of buffer to scan */
-	movzbq	%sil,%rsi	/* mask high bits! */
+	movzbq	%sil,%rcx
 
-	/* 'directpath' imuls can execute 3 at a time ... (amd) */
-	imul	%r8,%rsi	/* search byte replicated in word */
-	imul	$0x80,%r8,%r9	/* 0x8080808080808080 */
-	test	$7,%dil
-	jnz	20f		/* jump if misaligned */
-	jmp	1f		/* jump to avoid 4 nops (13 bytes) in gap */
+	/*
+	 * Align to word boundary.
+	 * Consider unrolling loop?
+	 */
+	testq	%rdx,%rdx	/* nbytes == 0? */
+	je	.Lzero
+.Lalign:
+	testb	$7,%dil
+	je	.Lword_aligned
+	movq	%rdi,%rax
+	cmpb	(%rdi),%cl
+	je	.Ldone
+	incq	%rdi
+	decq	%rdx
+	jnz	.Lalign
+	jmp	.Lzero
 
-	_ALIGN_TEXT		/* entire loop now in 32 aligned bytes */
-1:
-	cmpq	%r10,%rdi	/* end of buffer ? */
-	jae	30f		/* jump if so */
+.Lword_aligned:
+	/* copy char to all bytes in word */
+	movb	%cl,%ch
+	movq	%rcx,%rsi
+	salq	$16,%rcx
+	orq	%rsi,%rcx
+	movq	%rcx,%rsi
+	salq	$32,%rcx
+	orq	%rsi,%rcx
 
-	movq	(%rdi),%rax	/* value to check */
-2:
+	movabsq	$0x0101010101010101,%r8
+	movabsq	$0x8080808080808080,%r9
+
+	_ALIGN_TEXT
+.Lloop:
+	cmpq	$7,%rdx		/* nbytes > 8 */
+	jbe	.Lbyte
+	movq	(%rdi),%rsi
 	addq	$8,%rdi
-	xorq	%rsi,%rax	/* now looking for zeros */
-	mov	%rax,%rcx
-	subq	%r8,%rax	/* x - 0x10 */
-	not	%rcx
-	andq	%r9,%rax	/* (x - 0x10) & 0x80 */
-	andq	%rcx,%rax	/* ((x - 0x10) & 0x80) ^ ~x */
-	je	1b		/* jump if not found */
+	xorq	%rcx,%rsi
+	subq	$8,%rdx
+	subq	%r8,%rsi
+	testq	%r9,%rsi
+	je	.Lloop
 
-/* Found byte in word, get its address */
-	bsf	%rax,%rax
-	shr	$3,%eax
-	lea	-8(%rax,%rdi),%rax
-	cmpq	%r10,%rax	/* need to check not beyond buffer */
-	jae	30f
-	rep
-	ret			/* amd - no ret after jmp */
+	/*
+	 * In rare cases, the above loop may exit prematurely. We must
+	 * return to the loop if none of the bytes in the word are
+	 * equal to ch.
+	 */
 
-/* Input misaligned, read aligned and kill low bits */
-/* (Getting a -1 is surprisingly hard work!) */
-20:
-	xor	%eax,%eax	/* zeros all 64 bits */
-	mov	%dil,%cl	/* misalignment amount 1..7 (+high bits )*/
-	test	%rdx,%rdx	/* zero length, don't read */
-	jz	30f
+	leaq	-8(%rdi),%rax
+	cmpb	-8(%rdi),%cl	/* 1st byte == ch? */
+	je	.Ldone
 
-	and	$~7,%dil	/* %rdi now start of word */
-	lea	-1(%rax),%r11	/* all 0xff */
-	and	$7,%cl		/* 1..7 */
+	leaq	-7(%rdi),%rax
+	cmpb	-7(%rdi),%cl	/* 2nd byte == ch? */
+	je	.Ldone
 
-	mov	(%rdi),%rax	/* word containing first byte */
-	shl	$3,%cl		/* 8..56 */
-	test	%r11,%rsi	/* searching for 0xff */
-	jz	25f
+	leaq	-6(%rdi),%rax
+	cmpb	-6(%rdi),%cl	/* 3rd byte == ch? */
+	je	.Ldone
 
-	/* Searching for other than 0xff, set low bytes */
-	shl	%cl,%r11	/* 0xff in high (wanted) bytes */
-	not	%r11		/* 0xff in low (unwanted) bytes */
-	or	%r11,%rax	/* low bytes now set */
-	jmp	2b
+	leaq	-5(%rdi),%rax
+	cmpb	-5(%rdi),%cl	/* 4th byte == ch? */
+	je	.Ldone
 
-25:	/* Searching for 0xff, clear low bytes */
-	shl	%cl,%r11	/* 0xff in high (wanted) bytes */
-	and	%r11,%rax	/* low bytes now zero */
-	jmp	2b
+	leaq	-4(%rdi),%rax
+	cmpb	-4(%rdi),%cl	/* 5th byte == ch? */
+	je	.Ldone
 
-/* Not found */
-30:	xorq	%rax,%rax
+	leaq	-3(%rdi),%rax
+	cmpb	-3(%rdi),%cl	/* 6th byte == ch? */
+	je	.Ldone
+
+	leaq	-2(%rdi),%rax
+	cmpb	-2(%rdi),%cl	/* 7th byte == ch? */
+	je	.Ldone
+
+	leaq	-1(%rdi),%rax
+	cmpb	-1(%rdi),%cl	/* 7th byte == ch? */
+	jne	.Lloop
+	ret
+
+.Lbyte:
+	testq	%rdx,%rdx
+	je	.Lzero
+.Lbyte_loop:
+	movq	%rdi,%rax
+	cmpb	(%rdi),%cl
+	je	.Ldone
+	incq	%rdi
+	decq	%rdx
+	jnz	.Lbyte_loop
+
+.Lzero:
+	xorq	%rax,%rax
+
+.Ldone:
 	ret

cvs diff -r1.4 -r1.5 src/common/lib/libc/arch/x86_64/string/strchr.S (expand / switch to context diff)
--- src/common/lib/libc/arch/x86_64/string/strchr.S 2009/07/18 16:40:31 1.4
+++ src/common/lib/libc/arch/x86_64/string/strchr.S 2009/07/19 23:45:29 1.5
@@ -1,153 +1,133 @@
-/*	$NetBSD: strchr.S,v 1.4 2009/07/18 16:40:31 dsl Exp $	*/
-
-/*-
- * Copyright (c) 2009 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by David Laight.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+/*
+ * Written by J.T. Conklin <jtc@acorntoolworks.com>
+ * Public domain.
  */
 
-/* See comments in strlen.S about checking words for byte values */
-
 #include <machine/asm.h>
 
 #if defined(LIBC_SCCS)
-	RCSID("$NetBSD: strchr.S,v 1.4 2009/07/18 16:40:31 dsl Exp $")
+	RCSID("$NetBSD: strchr.S,v 1.5 2009/07/19 23:45:29 christos Exp $")
 #endif
 
-/*
- * On entry %rdi is the buffer and the low byte of %rsi (%sil) the
- * character to search for.
- *
- * Registers %rdx, %rcx, %r8-%r11 and %rax are also usable
- */
+ENTRY(strchr)
+	movzbq	%sil,%rcx
 
-/* Uncomment below to get regression test to run this version but
- * have everything else use the trivial one below. */
-/* #define TEST_STRCHR */
+	/*
+	 * Align to word boundary.
+	 * Consider unrolling loop?
+	 */
+.Lalign:
+	testb	$7,%dil
+	je	.Lword_aligned
+	movb	(%rdi),%dl
+	cmpb	%cl,%dl
+	je	.Ldone
+	incq	%rdi
+	testb	%dl,%dl
+	jne	.Lalign
+	jmp	.Lzero
 
-#ifdef TEST_STRCHR
-ENTRY(test_strchr)
-#else
-ENTRY(strchr)
-#endif
+.Lword_aligned:
+	/* copy char to all bytes in word */
+	movb	%cl,%ch
+	movq	%rcx,%rdx
+	salq	$16,%rcx
+	orq	%rdx,%rcx
+	movq	%rcx,%rdx
+	salq	$32,%rcx
+	orq	%rdx,%rcx
+
 	movabsq	$0x0101010101010101,%r8
+	movabsq	$0x8080808080808080,%r9
 
-	movzbq	%sil,%rdx	/* value to search for (c) */
-	/* These imul are 'directpath' on athlons, so are fast */
-	imul	$0x80,%r8,%r9	/* 0x8080808080808080 */
-	imul	%r8,%rdx	/* (c) copied to all bytes */
-	test	$7,%dil
-	jnz	20f		/* jump if misaligned */
-
-	_ALIGN_TEXT		/* one byte nop */
-1:
-	movq	(%rdi),%rax	/* bytes to check (x) */
-2:
+	/* Check whether any byte in the word is equal to ch or 0. */
+	_ALIGN_TEXT
+.Lloop:
+	movq	(%rdi),%rdx
 	addq	$8,%rdi
-	mov	%rax,%r10
-	mov	%rax,%r11	/* for 'char' check */
-	not	%r10		/* invert of data (~x) */
+	movq	%rdx,%rsi
+	subq	%r8,%rdx
+	xorq	%rcx,%rsi
+	subq	%r8,%rsi
+	orq	%rsi,%rdx
+	testq	%r9,%rdx
+	je	.Lloop
 
-	xorq	%rdx,%r11	/* convert 'char' test to one for NUL */
-	subq	%r8,%rax	/* x - 0x10 */
-	movq	%r10,%rsi	/* ~x */
-	subq	%r8,%r11	/* (x ^ c) - 0x10 */
-/*
- * Here we could check ((x - 0x10) | ((x ^ c) - 0x10)) & 0x80
- * and short-circuit the case where no top bits are set, and
- * we continue the loop.
- * However it needs 3 more clocks that are difficult to interleave
- * in the existing dependency chain ...
- */
-	andq	%r9,%rax	/* (x - 0x10) & 0x80 */
-	xorq	%rdx,%rsi	/* c ^ ~x == ~(c ^ x) */
-	andq	%r9,%r11	/* ((x ^ c) - 0x10) & 0x80 */
-	andq	%r10,%rax	/* (x - 0x10) & 0x80 & ~x */
-	jne	10f		/* jump if string ends */
-	andq	%rsi,%r11	/* ((x ^ c) - 0x10) & 0x80 & ~(x ^ c) */
-	je	1b		/* jump if no match */
+	/*
+	 * In rare cases, the above loop may exit prematurely. We must
+	 * return to the loop if none of the bytes in the word match
+	 * ch or are equal to 0.
+	 */
 
-	/* Found char, since LE can use bit scan */
-	bsf	%r11,%r11	/* 7, 15, 23 ... 63 */
-8:	shr	$3,%r11		/* 0, 1, 2 .. 7 */
-	lea	-8(%r11,%rdi),%rax
-	ret
+	movb	-8(%rdi),%dl
+	cmpb	%cl,%dl		/* 1st byte == ch? */
+	jne	1f
+	subq	$8,%rdi
+	jmp	.Ldone
+1:	testb	%dl,%dl		/* 1st byte == 0? */
+	je	.Lzero
 
-/* End of string, check whether char is before NUL */
-	_ALIGN_TEXT		/* adds three byte nop */
-10:
-	bsf	%rax,%rax	/* count to NUL */
-	andq	%rsi,%r11	/* check for char in last 8 bytes */
-	je	11f
-	bsf	%r11,%r11	/* NUL and char - see which was first */
-	cmp	%r11,%rax
-	jae	8b		/* return 'found' if same - searching for NUL */
-11:	xor	%eax,%eax	/* char not found */
-	ret
+	movb	-7(%rdi),%dl
+	cmpb	%cl,%dl		/* 2nd byte == ch? */
+	jne	1f
+	subq	$7,%rdi
+	jmp	.Ldone
+1:	testb	%dl,%dl		/* 2nd byte == 0? */
+	je	.Lzero
 
-/* Source misaligned: read aligned word and make low bytes invalid */
-/* I (dsl) think a _ALIGN_TEXT here will slow things down! */
-20:
-	xor	%rcx,%rcx
-	sub	%dil,%cl	/* Convert low address values 1..7 ... */
-	sbb	%rsi,%rsi	/* carry was set, so %rsi now ~0u! */
-	and	$7,%cl		/* ... to 7..1 */
-	and	$~7,%dil	/* move address to start of word */
-	shl	$3,%cl		/* now 56, 48 ... 16, 8 */
-	movq	(%rdi),%rax	/* aligned word containing first data */
-	xor	%rdx,%rsi	/* invert of search pattern (~c) */
-	je	22f		/* searching for 0xff */
-21:	shr	%cl,%rsi	/* ~c in low bytes */
-	or	%rsi,%rax	/* set some bits making low bytes invalid */
-	jmp	2b
+	movb	-6(%rdi),%dl
+	cmpb	%cl,%dl		/* 3rd byte == ch? */
+	jne	1f
+	subq	$6,%rdi
+	jmp	.Ldone
+1:	testb	%dl,%dl		/* 3rd byte == 0? */
+	je	.Lzero
 
-/* We are searching for 0xff, so can't use ~pattern for invalid value */
-22:
-	mov	%r8,%r10	/* 0x01 pattern */
-	lea	(%r8,%r8),%rsi	/* 0x02 - bits gets set (above) */
-	not	%r10		/* now 0xfe */
-	sar	%cl,%r10	/* top bytes 0xff */
-	and	%r10,%rax	/* clear lsb from unwanted low bytes */
-	jmp	21b
+	movb	-5(%rdi),%dl
+	cmpb	%cl,%dl		/* 4th byte == ch? */
+	jne	1f
+	subq	$5,%rdi
+	jmp	.Ldone
+1:	testb	%dl,%dl		/* 4th byte == 0? */
+	je	.Lzero
 
-#ifdef TEST_STRCHR
-/* Trivial version for bug-fixing above */
-ENTRY(strchr)
-	movq	%rsi,%rdx
-	movq	%rdi,%rsi
-1:
-	lodsb
-	cmp	%al,%dl
-	je	2f
-	test	%al,%al
-	jne	1b
-	xor	%eax,%eax
-	ret
-2:	lea	-1(%rsi),%rax
-	ret
-#endif
+	movb	-4(%rdi),%dl
+	cmpb	%cl,%dl		/* 5th byte == ch? */
+	jne	1f
+	subq	$4,%rdi
+	jmp	.Ldone
+1:	testb	%dl,%dl		/* 5th byte == 0? */
+	je	.Lzero
 
+	movb	-3(%rdi),%dl
+	cmpb	%cl,%dl		/* 6th byte == ch? */
+	jne	1f
+	subq	$3,%rdi
+	jmp	.Ldone
+1:	testb	%dl,%dl		/* 6th byte == 0? */
+	je	.Lzero
+
+	movb	-2(%rdi),%dl
+	cmpb	%cl,%dl		/* 7th byte == ch? */
+	jne	1f
+	subq	$2,%rdi
+	jmp	.Ldone
+1:	testb	%dl,%dl		/* 7th byte == 0? */
+	je	.Lzero
+
+	movb	-1(%rdi),%dl
+	cmpb	%cl,%dl		/* 8th byte == ch? */
+	jne	1f
+	subq	$1,%rdi
+	jmp	.Ldone
+1:	testb	%dl,%dl		/* 8th byte == 0? */
+	jne	.Lloop
+
+.Lzero:
+	/* If a ch wasn't found, return 0. */
+	xorq	%rdi,%rdi
+
+.Ldone:
+	movq	%rdi,%rax
+	ret
 STRONG_ALIAS(index,strchr)