Mon Jun 29 23:29:40 2020 UTC ()
Add x86 AES-NI support.
Limited to amd64 for now. In principle, AES-NI should work in 32-bit
mode, and there may even be some 32-bit-only CPUs that support
AES-NI, but that requires work to adapt the assembly.
(riastradh)
diff -r1.111 -r1.112 src/sys/arch/x86/conf/files.x86
diff -r1.107 -r1.108 src/sys/arch/x86/x86/identcpu.c
diff -r0 -r1.1 src/sys/crypto/aes/arch/x86/aes_ni.c
diff -r0 -r1.1 src/sys/crypto/aes/arch/x86/aes_ni.h
diff -r0 -r1.1 src/sys/crypto/aes/arch/x86/aes_ni_64.S
diff -r0 -r1.1 src/sys/crypto/aes/arch/x86/files.aesni
--- src/sys/arch/x86/conf/files.x86 2020/05/06 19:45:12 1.111
+++ src/sys/arch/x86/conf/files.x86 2020/06/29 23:29:39 1.112
| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | # $NetBSD: files.x86,v 1.111 2020/05/06 19:45:12 bouyer Exp $ | | 1 | # $NetBSD: files.x86,v 1.112 2020/06/29 23:29:39 riastradh Exp $ |
2 | | | 2 | |
3 | # options for MP configuration through the MP spec | | 3 | # options for MP configuration through the MP spec |
4 | defflag opt_mpbios.h MPBIOS MPDEBUG MPBIOS_SCANPCI | | 4 | defflag opt_mpbios.h MPBIOS MPDEBUG MPBIOS_SCANPCI |
5 | defparam opt_mpbios.h MPVERBOSE | | 5 | defparam opt_mpbios.h MPVERBOSE |
6 | | | 6 | |
7 | # MTRR support | | 7 | # MTRR support |
8 | defflag MTRR | | 8 | defflag MTRR |
9 | | | 9 | |
10 | # Interrupt debug | | 10 | # Interrupt debug |
11 | defflag opt_intrdebug.h INTRDEBUG | | 11 | defflag opt_intrdebug.h INTRDEBUG |
12 | | | 12 | |
13 | # PCI fixup options | | 13 | # PCI fixup options |
14 | defflag opt_pcifixup.h PCI_ADDR_FIXUP PCI_BUS_FIXUP | | 14 | defflag opt_pcifixup.h PCI_ADDR_FIXUP PCI_BUS_FIXUP |
| @@ -155,13 +155,16 @@ file arch/x86/x86/x86_ipmi.c ipmi needs | | | @@ -155,13 +155,16 @@ file arch/x86/x86/x86_ipmi.c ipmi needs |
155 | | | 155 | |
156 | file arch/x86/x86/vga_post.c vga_post | | 156 | file arch/x86/x86/vga_post.c vga_post |
157 | | | 157 | |
158 | file arch/x86/pci/pci_machdep.c pci | | 158 | file arch/x86/pci/pci_machdep.c pci |
159 | #file arch/x86/pci/pci_ranges.c pci | | 159 | #file arch/x86/pci/pci_ranges.c pci |
160 | file arch/x86/pci/pci_intr_machdep.c pci | | 160 | file arch/x86/pci/pci_intr_machdep.c pci |
161 | file arch/x86/pci/pci_msi_machdep.c pci & ! no_pci_msi_msix | | 161 | file arch/x86/pci/pci_msi_machdep.c pci & ! no_pci_msi_msix |
162 | file arch/x86/pci/msipic.c pci & ! no_pci_msi_msix | | 162 | file arch/x86/pci/msipic.c pci & ! no_pci_msi_msix |
163 | | | 163 | |
164 | file arch/x86/pci/pciide_machdep.c pciide_common | | 164 | file arch/x86/pci/pciide_machdep.c pciide_common |
165 | | | 165 | |
166 | file arch/x86/pci/pci_bus_fixup.c pci_bus_fixup | | 166 | file arch/x86/pci/pci_bus_fixup.c pci_bus_fixup |
167 | file arch/x86/pci/pci_addr_fixup.c pci_addr_fixup | | 167 | file arch/x86/pci/pci_addr_fixup.c pci_addr_fixup |
| | | 168 | |
| | | 169 | # AES-NI |
| | | 170 | include "crypto/aes/arch/x86/files.aesni" |
--- src/sys/arch/x86/x86/identcpu.c 2020/04/25 15:26:18 1.107
+++ src/sys/arch/x86/x86/identcpu.c 2020/06/29 23:29:39 1.108
| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: identcpu.c,v 1.107 2020/04/25 15:26:18 bouyer Exp $ */ | | 1 | /* $NetBSD: identcpu.c,v 1.108 2020/06/29 23:29:39 riastradh Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 1999, 2000, 2001, 2006, 2007, 2008 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 1999, 2000, 2001, 2006, 2007, 2008 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Frank van der Linden, and by Jason R. Thorpe. | | 8 | * by Frank van der Linden, and by Jason R. Thorpe. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
| @@ -20,35 +20,37 @@ | | | @@ -20,35 +20,37 @@ |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. | | 29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ | | 30 | */ |
31 | | | 31 | |
32 | #include <sys/cdefs.h> | | 32 | #include <sys/cdefs.h> |
33 | __KERNEL_RCSID(0, "$NetBSD: identcpu.c,v 1.107 2020/04/25 15:26:18 bouyer Exp $"); | | 33 | __KERNEL_RCSID(0, "$NetBSD: identcpu.c,v 1.108 2020/06/29 23:29:39 riastradh Exp $"); |
34 | | | 34 | |
35 | #include "opt_xen.h" | | 35 | #include "opt_xen.h" |
36 | | | 36 | |
37 | #include <sys/param.h> | | 37 | #include <sys/param.h> |
38 | #include <sys/systm.h> | | 38 | #include <sys/systm.h> |
39 | #include <sys/device.h> | | 39 | #include <sys/device.h> |
40 | #include <sys/cpu.h> | | 40 | #include <sys/cpu.h> |
41 | | | 41 | |
| | | 42 | #include <crypto/aes/arch/x86/aes_ni.h> |
| | | 43 | |
42 | #include <uvm/uvm_extern.h> | | 44 | #include <uvm/uvm_extern.h> |
43 | | | 45 | |
44 | #include <machine/specialreg.h> | | 46 | #include <machine/specialreg.h> |
45 | #include <machine/pio.h> | | 47 | #include <machine/pio.h> |
46 | #include <machine/cpu.h> | | 48 | #include <machine/cpu.h> |
47 | | | 49 | |
48 | #include <x86/cputypes.h> | | 50 | #include <x86/cputypes.h> |
49 | #include <x86/cacheinfo.h> | | 51 | #include <x86/cacheinfo.h> |
50 | #include <x86/cpuvar.h> | | 52 | #include <x86/cpuvar.h> |
51 | #include <x86/fpu.h> | | 53 | #include <x86/fpu.h> |
52 | | | 54 | |
53 | #include <x86/x86/vmtreg.h> /* for vmt_hvcall() */ | | 55 | #include <x86/x86/vmtreg.h> /* for vmt_hvcall() */ |
54 | #include <x86/x86/vmtvar.h> /* for vmt_hvcall() */ | | 56 | #include <x86/x86/vmtvar.h> /* for vmt_hvcall() */ |
| @@ -985,26 +987,30 @@ cpu_probe(struct cpu_info *ci) | | | @@ -985,26 +987,30 @@ cpu_probe(struct cpu_info *ci) |
985 | } | | 987 | } |
986 | | | 988 | |
987 | ci->ci_feat_val[0] &= ~CPUID_FEAT_BLACKLIST; | | 989 | ci->ci_feat_val[0] &= ~CPUID_FEAT_BLACKLIST; |
988 | if (ci == &cpu_info_primary) { | | 990 | if (ci == &cpu_info_primary) { |
989 | /* If first. Boot Processor is the cpu_feature reference. */ | | 991 | /* If first. Boot Processor is the cpu_feature reference. */ |
990 | for (i = 0; i < __arraycount(cpu_feature); i++) { | | 992 | for (i = 0; i < __arraycount(cpu_feature); i++) { |
991 | cpu_feature[i] = ci->ci_feat_val[i]; | | 993 | cpu_feature[i] = ci->ci_feat_val[i]; |
992 | } | | 994 | } |
993 | identify_hypervisor(); | | 995 | identify_hypervisor(); |
994 | #ifndef XENPV | | 996 | #ifndef XENPV |
995 | /* Early patch of text segment. */ | | 997 | /* Early patch of text segment. */ |
996 | x86_patch(true); | | 998 | x86_patch(true); |
997 | #endif | | 999 | #endif |
| | | 1000 | #ifdef __x86_64__ /* not yet implemented on i386 */ |
| | | 1001 | if (cpu_feature[1] & CPUID2_AES) |
| | | 1002 | aes_md_init(&aes_ni_impl); |
| | | 1003 | #endif |
998 | } else { | | 1004 | } else { |
999 | /* | | 1005 | /* |
1000 | * If not first. Warn about cpu_feature mismatch for | | 1006 | * If not first. Warn about cpu_feature mismatch for |
1001 | * secondary CPUs. | | 1007 | * secondary CPUs. |
1002 | */ | | 1008 | */ |
1003 | for (i = 0; i < __arraycount(cpu_feature); i++) { | | 1009 | for (i = 0; i < __arraycount(cpu_feature); i++) { |
1004 | if (cpu_feature[i] != ci->ci_feat_val[i]) | | 1010 | if (cpu_feature[i] != ci->ci_feat_val[i]) |
1005 | aprint_error_dev(ci->ci_dev, | | 1011 | aprint_error_dev(ci->ci_dev, |
1006 | "feature mismatch: cpu_feature[%d] is " | | 1012 | "feature mismatch: cpu_feature[%d] is " |
1007 | "%#x, but CPU reported %#x\n", | | 1013 | "%#x, but CPU reported %#x\n", |
1008 | i, cpu_feature[i], ci->ci_feat_val[i]); | | 1014 | i, cpu_feature[i], ci->ci_feat_val[i]); |
1009 | } | | 1015 | } |
1010 | } | | 1016 | } |
/* $NetBSD: aes_ni.c,v 1.1 2020/06/29 23:29:40 riastradh Exp $ */
/*-
* Copyright (c) 2020 The NetBSD Foundation, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(1, "$NetBSD: aes_ni.c,v 1.1 2020/06/29 23:29:40 riastradh Exp $");
#include <sys/types.h>
#include <sys/systm.h>
#include <crypto/aes/aes.h>
#include <crypto/aes/arch/x86/aes_ni.h>
#include <x86/cpuvar.h>
#include <x86/fpu.h>
#include <x86/specialreg.h>
static void
aesni_setenckey(struct aesenc *enc, const uint8_t key[static 16],
uint32_t nrounds)
{
switch (nrounds) {
case 10:
aesni_setenckey128(enc, key);
break;
case 12:
aesni_setenckey192(enc, key);
break;
case 14:
aesni_setenckey256(enc, key);
break;
default:
panic("invalid AES rounds: %u", nrounds);
}
}
static void
aesni_setenckey_impl(struct aesenc *enc, const uint8_t key[static 16],
uint32_t nrounds)
{
fpu_kern_enter();
aesni_setenckey(enc, key, nrounds);
fpu_kern_leave();
}
static void
aesni_setdeckey_impl(struct aesdec *dec, const uint8_t key[static 16],
uint32_t nrounds)
{
struct aesenc enc;
fpu_kern_enter();
aesni_setenckey(&enc, key, nrounds);
aesni_enctodec(&enc, dec, nrounds);
fpu_kern_leave();
explicit_memset(&enc, 0, sizeof enc);
}
static void
aesni_enc_impl(const struct aesenc *enc, const uint8_t in[static 16],
uint8_t out[static 16], uint32_t nrounds)
{
fpu_kern_enter();
aesni_enc(enc, in, out, nrounds);
fpu_kern_leave();
}
static void
aesni_dec_impl(const struct aesdec *dec, const uint8_t in[static 16],
uint8_t out[static 16], uint32_t nrounds)
{
fpu_kern_enter();
aesni_dec(dec, in, out, nrounds);
fpu_kern_leave();
}
static void
aesni_cbc_enc_impl(const struct aesenc *enc, const uint8_t in[static 16],
uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
uint32_t nrounds)
{
KASSERT(nbytes % 16 == 0);
fpu_kern_enter();
aesni_cbc_enc(enc, in, out, nbytes, iv, nrounds);
fpu_kern_leave();
}
static void
aesni_cbc_dec_impl(const struct aesdec *dec, const uint8_t in[static 16],
uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
uint32_t nrounds)
{
KASSERT(nbytes % 16 == 0);
fpu_kern_enter();
if (nbytes % 128) {
aesni_cbc_dec1(dec, in, out, nbytes % 128, iv, nrounds);
in += nbytes % 128;
out += nbytes % 128;
nbytes -= nbytes % 128;
}
KASSERT(nbytes % 128 == 0);
if (nbytes)
aesni_cbc_dec8(dec, in, out, nbytes, iv, nrounds);
fpu_kern_leave();
}
static void
aesni_xts_enc_impl(const struct aesenc *enc, const uint8_t in[static 16],
uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
uint32_t nrounds)
{
KASSERT(nbytes % 16 == 0);
fpu_kern_enter();
if (nbytes % 128) {
aesni_xts_enc1(enc, in, out, nbytes % 128, iv, nrounds);
in += nbytes % 128;
out += nbytes % 128;
nbytes -= nbytes % 128;
}
KASSERT(nbytes % 128 == 0);
if (nbytes)
aesni_xts_enc8(enc, in, out, nbytes, iv, nrounds);
fpu_kern_leave();
}
static void
aesni_xts_dec_impl(const struct aesdec *dec, const uint8_t in[static 16],
uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
uint32_t nrounds)
{
KASSERT(nbytes % 16 == 0);
fpu_kern_enter();
if (nbytes % 128) {
aesni_xts_dec1(dec, in, out, nbytes % 128, iv, nrounds);
in += nbytes % 128;
out += nbytes % 128;
nbytes -= nbytes % 128;
}
KASSERT(nbytes % 128 == 0);
if (nbytes)
aesni_xts_dec8(dec, in, out, nbytes, iv, nrounds);
fpu_kern_leave();
}
static int
aesni_xts_update_selftest(void)
{
static const struct {
uint8_t in[16], out[16];
} cases[] = {
{{1}, {2}},
{{0,0,0,0x80}, {0,0,0,0,1}},
{{0,0,0,0,0,0,0,0x80}, {0,0,0,0,0,0,0,0,1}},
{{0,0,0,0x80,0,0,0,0x80}, {0,0,0,0,1,0,0,0,1}},
{{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x80}, {0x87}},
{{0,0,0,0,0,0,0,0x80,0,0,0,0,0,0,0,0x80},
{0x87,0,0,0,0,0,0,0,1}},
{{0,0,0,0x80,0,0,0,0,0,0,0,0,0,0,0,0x80}, {0x87,0,0,0,1}},
{{0,0,0,0x80,0,0,0,0x80,0,0,0,0,0,0,0,0x80},
{0x87,0,0,0,1,0,0,0,1}},
};
unsigned i;
uint8_t tweak[16];
for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
aesni_xts_update(cases[i].in, tweak);
if (memcmp(tweak, cases[i].out, 16))
return -1;
}
/* Success! */
return 0;
}
static int
aesni_probe(void)
{
int result = 0;
/* Verify that the CPU supports AES-NI. */
if ((cpu_feature[1] & CPUID2_AES) == 0)
return -1;
fpu_kern_enter();
/* Verify that our XTS tweak update logic works. */
if (aesni_xts_update_selftest())
result = -1;
fpu_kern_leave();
return result;
}
struct aes_impl aes_ni_impl = {
.ai_name = "Intel AES-NI",
.ai_probe = aesni_probe,
.ai_setenckey = aesni_setenckey_impl,
.ai_setdeckey = aesni_setdeckey_impl,
.ai_enc = aesni_enc_impl,
.ai_dec = aesni_dec_impl,
.ai_cbc_enc = aesni_cbc_enc_impl,
.ai_cbc_dec = aesni_cbc_dec_impl,
.ai_xts_enc = aesni_xts_enc_impl,
.ai_xts_dec = aesni_xts_dec_impl,
};
/* $NetBSD: aes_ni.h,v 1.1 2020/06/29 23:29:40 riastradh Exp $ */
/*-
* Copyright (c) 2020 The NetBSD Foundation, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _CRYPTO_AES_ARCH_X86_AES_NI_H
#define _CRYPTO_AES_ARCH_X86_AES_NI_H
#include <sys/types.h>
#include <crypto/aes/aes.h>
/* Assembly routines */
void aesni_setenckey128(struct aesenc *, const uint8_t[static 16]);
void aesni_setenckey192(struct aesenc *, const uint8_t[static 24]);
void aesni_setenckey256(struct aesenc *, const uint8_t[static 32]);
void aesni_enctodec(const struct aesenc *, struct aesdec *, uint32_t);
void aesni_enc(const struct aesenc *, const uint8_t[static 16],
uint8_t[static 16], uint32_t);
void aesni_dec(const struct aesdec *, const uint8_t[static 16],
uint8_t[static 16], uint32_t);
void aesni_cbc_enc(const struct aesenc *, const uint8_t[static 16],
uint8_t[static 16], size_t, uint8_t[static 16], uint32_t);
void aesni_cbc_dec1(const struct aesdec *, const uint8_t[static 16],
uint8_t[static 16], size_t, const uint8_t[static 16], uint32_t);
void aesni_cbc_dec8(const struct aesdec *, const uint8_t[static 128],
uint8_t[static 128], size_t, const uint8_t[static 16], uint32_t);
void aesni_xts_enc1(const struct aesenc *, const uint8_t[static 16],
uint8_t[static 16], size_t, uint8_t[static 16], uint32_t);
void aesni_xts_enc8(const struct aesenc *, const uint8_t[static 128],
uint8_t[static 128], size_t, uint8_t[static 16], uint32_t);
void aesni_xts_dec1(const struct aesdec *, const uint8_t[static 16],
uint8_t[static 16], size_t, uint8_t[static 16], uint32_t);
void aesni_xts_dec8(const struct aesdec *, const uint8_t[static 128],
uint8_t[static 128], size_t, uint8_t[static 16], uint32_t);
void aesni_xts_update(const uint8_t[static 16], uint8_t[static 16]);
extern struct aes_impl aes_ni_impl;
#endif /* _CRYPTO_AES_ARCH_X86_AES_NI_H */
/* $NetBSD: aes_ni_64.S,v 1.1 2020/06/29 23:29:40 riastradh Exp $ */
/*-
* Copyright (c) 2020 The NetBSD Foundation, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <machine/asm.h>
/*
* MOVDQA/MOVDQU are Move Double Quadword (Aligned/Unaligned), defined
* to operate on integers; MOVAPS/MOVUPS are Move (Aligned/Unaligned)
* Packed Single, defined to operate on binary32 floats. They have
* exactly the same architectural effects (move a 128-bit quantity from
* memory into an xmm register).
*
* In principle, they might have different microarchitectural effects
* so that MOVAPS/MOVUPS might incur a penalty when the register is
* later used for integer paths, but in practice they don't. So we use
* the one whose instruction encoding is shorter -- MOVAPS/MOVUPS.
*/
#define movdqa movaps
#define movdqu movups
/*
* aesni_setenckey128(struct aesenc *enckey@rdi, const uint8_t key[16] @rsi)
*
* Expand a 16-byte AES-128 key into 10 round keys.
*
* Standard ABI calling convention.
*/
ENTRY(aesni_setenckey128)
movdqu (%rsi),%xmm0 /* load master key into %xmm0 */
movdqa %xmm0,(%rdi) /* store master key as the first round key */
lea 0x10(%rdi),%rdi /* advance %rdi to next round key */
aeskeygenassist $0x1,%xmm0,%xmm2
call aesni_expand128
aeskeygenassist $0x2,%xmm0,%xmm2
call aesni_expand128
aeskeygenassist $0x4,%xmm0,%xmm2
call aesni_expand128
aeskeygenassist $0x8,%xmm0,%xmm2
call aesni_expand128
aeskeygenassist $0x10,%xmm0,%xmm2
call aesni_expand128
aeskeygenassist $0x20,%xmm0,%xmm2
call aesni_expand128
aeskeygenassist $0x40,%xmm0,%xmm2
call aesni_expand128
aeskeygenassist $0x80,%xmm0,%xmm2
call aesni_expand128
aeskeygenassist $0x1b,%xmm0,%xmm2
call aesni_expand128
aeskeygenassist $0x36,%xmm0,%xmm2
call aesni_expand128
ret
END(aesni_setenckey128)
/*
* aesni_setenckey192(struct aesenc *enckey@rdi, const uint8_t key[24] @rsi)
*
* Expand a 24-byte AES-192 key into 12 round keys.
*
* Standard ABI calling convention.
*/
ENTRY(aesni_setenckey192)
movdqu (%rsi),%xmm0 /* load master key [0:128) into %xmm0 */
movq 0x10(%rsi),%xmm1 /* load master key [128:192) into %xmm1 */
movdqa %xmm0,(%rdi) /* store master key [0:128) as round key */
lea 0x10(%rdi),%rdi /* advance %rdi to next round key */
aeskeygenassist $0x1,%xmm1,%xmm2
call aesni_expand192a
aeskeygenassist $0x2,%xmm0,%xmm2
call aesni_expand192b
aeskeygenassist $0x4,%xmm1,%xmm2
call aesni_expand192a
aeskeygenassist $0x8,%xmm0,%xmm2
call aesni_expand192b
aeskeygenassist $0x10,%xmm1,%xmm2
call aesni_expand192a
aeskeygenassist $0x20,%xmm0,%xmm2
call aesni_expand192b
aeskeygenassist $0x40,%xmm1,%xmm2
call aesni_expand192a
aeskeygenassist $0x80,%xmm0,%xmm2
call aesni_expand192b
ret
END(aesni_setenckey192)
/*
* aesni_setenckey256(struct aesenc *enckey@rdi, const uint8_t key[32] @rsi)
*
* Expand a 32-byte AES-256 key into 14 round keys.
*
* Standard ABI calling convention.
*/
ENTRY(aesni_setenckey256)
movdqu (%rsi),%xmm0 /* load master key [0:128) into %xmm0 */
movdqu 0x10(%rsi),%xmm1 /* load master key [128:256) into %xmm1 */
movdqa %xmm0,(%rdi) /* store master key [0:128) as round key */
movdqa %xmm1,0x10(%rdi) /* store master key [128:256) as round key */
lea 0x20(%rdi),%rdi /* advance %rdi to next round key */
aeskeygenassist $0x1,%xmm1,%xmm2
call aesni_expand256a
aeskeygenassist $0x1,%xmm0,%xmm2
call aesni_expand256b
aeskeygenassist $0x2,%xmm1,%xmm2
call aesni_expand256a
aeskeygenassist $0x2,%xmm0,%xmm2
call aesni_expand256b
aeskeygenassist $0x4,%xmm1,%xmm2
call aesni_expand256a
aeskeygenassist $0x4,%xmm0,%xmm2
call aesni_expand256b
aeskeygenassist $0x8,%xmm1,%xmm2
call aesni_expand256a
aeskeygenassist $0x8,%xmm0,%xmm2
call aesni_expand256b
aeskeygenassist $0x10,%xmm1,%xmm2
call aesni_expand256a
aeskeygenassist $0x10,%xmm0,%xmm2
call aesni_expand256b
aeskeygenassist $0x20,%xmm1,%xmm2
call aesni_expand256a
aeskeygenassist $0x20,%xmm0,%xmm2
call aesni_expand256b
aeskeygenassist $0x40,%xmm1,%xmm2
call aesni_expand256a
ret
END(aesni_setenckey256)
/*
* aesni_expand128(uint128_t *rkp@rdi, uint128_t prk@xmm0,
* uint128_t keygenassist@xmm2)
*
* 1. Compute the AES-128 round key using the previous round key.
* 2. Store it at *rkp.
* 3. Set %xmm0 to it.
* 4. Advance %rdi to point at the next round key.
*
* Internal ABI. On entry:
*
* %rdi = rkp, pointer to round key to compute
* %xmm0 = (prk[0], prk[1], prk[2], prk[3])
* %xmm2 = (xxx, xxx, xxx, t = Rot(SubWord(prk[3])) ^ RCON)
*
* On exit:
*
* %rdi = &rkp[1], rkp advanced by one round key
* %xmm0 = rk, the round key we just computed
* %xmm2 = garbage
* %xmm4 = garbage
* %xmm5 = garbage
* %xmm6 = garbage
*
* Note: %xmm1 is preserved (as are %xmm3 and %xmm7 through %xmm15,
* and all other registers).
*/
.text
_ALIGN_TEXT
.type aesni_expand128,@function
aesni_expand128:
/*
* %xmm2 := (%xmm2[3], %xmm2[3], %xmm2[3], %xmm2[3]),
* i.e., set each word of %xmm2 to t := Rot(SubWord(prk[3])) ^ RCON.
*/
pshufd $0b11111111,%xmm2,%xmm2
/*
* %xmm4 := (0, prk[0], prk[1], prk[2])
* %xmm5 := (0, 0, prk[0], prk[1])
* %xmm6 := (0, 0, 0, prk[0])
*/
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm5
movdqa %xmm0,%xmm6
pslldq $4,%xmm4
pslldq $8,%xmm5
pslldq $12,%xmm6
/*
* %xmm0 := (rk[0] = t ^ prk[0],
* rk[1] = t ^ prk[0] ^ prk[1],
* rk[2] = t ^ prk[0] ^ prk[1] ^ prk[2],
* rk[3] = t ^ prk[0] ^ prk[1] ^ prk[2] ^ prk[3])
*/
pxor %xmm2,%xmm0
pxor %xmm4,%xmm0
pxor %xmm5,%xmm0
pxor %xmm6,%xmm0
movdqa %xmm0,(%rdi) /* store round key */
lea 0x10(%rdi),%rdi /* advance to next round key address */
ret
END(aesni_expand128)
/*
* aesni_expand192a(uint128_t *rkp@rdi, uint128_t prk@xmm0,
* uint64_t rklo@xmm1, uint128_t keygenassist@xmm2)
*
* Set even-numbered AES-192 round key.
*
* Internal ABI. On entry:
*
* %rdi = rkp, pointer to two round keys to compute
* %xmm0 = (prk[0], prk[1], prk[2], prk[3])
* %xmm1 = (rklo[0], rklo[1], xxx, xxx)
* %xmm2 = (xxx, t = Rot(SubWord(rklo[1])) ^ RCON, xxx, xxx)
*
* On exit:
*
* %rdi = &rkp[2], rkp advanced by two round keys
* %xmm0 = nrk, second round key we just computed
* %xmm1 = rk, first round key we just computed
* %xmm2 = garbage
* %xmm4 = garbage
* %xmm5 = garbage
* %xmm6 = garbage
* %xmm7 = garbage
*/
.text
_ALIGN_TEXT
.type aesni_expand192a,@function
aesni_expand192a:
/*
* %xmm2 := (%xmm2[1], %xmm2[1], %xmm2[1], %xmm2[1]),
* i.e., set each word of %xmm2 to t := Rot(SubWord(rklo[1])) ^ RCON.
*/
pshufd $0b01010101,%xmm2,%xmm2
/*
* We need to compute:
*
* rk[0] := rklo[0]
* rk[1] := rklo[1]
* rk[2] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0]
* rk[3] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ prk[1]
* nrk[0] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ prk[1] ^ prk[2]
* nrk[1] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3]
* nrk[2] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] ^ rklo[0]
* nrk[3] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] ^ rklo[0]
* ^ rklo[1]
*/
/*
* %xmm4 := (prk[0], prk[1], prk[2], prk[3])
* %xmm5 := (0, prk[0], prk[1], prk[2])
* %xmm6 := (0, 0, prk[0], prk[1])
* %xmm7 := (0, 0, 0, prk[0])
*/
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm5
movdqa %xmm0,%xmm6
movdqa %xmm0,%xmm7
pslldq $4,%xmm5
pslldq $8,%xmm6
pslldq $12,%xmm7
/* %xmm4 := (rk[2], rk[3], nrk[0], nrk[1]) */
pxor %xmm2,%xmm4
pxor %xmm5,%xmm4
pxor %xmm6,%xmm4
pxor %xmm7,%xmm4
/*
* At this point, rk is split across %xmm1 (rk[0],rk[1],...) and
* %xmm4 (rk[2],rk[3],...); nrk is in %xmm4 (...,nrk[0],nrk[1]);
* and we have yet to compute nrk[2] or nrk[3], which requires
* rklo[0] and rklo[1] in %xmm1 (rklo[0], rklo[1], ...). We need
* nrk to end up in %xmm0 at the end, so gather rk into %xmm1 and
* nrk into %xmm0.
*/
/* %xmm0 := (nrk[0], nrk[1], nrk[1], nrk[1]) */
pshufd $0b11111110,%xmm4,%xmm0
/*
* %xmm6 := (0, 0, rklo[0], rklo[1])
* %xmm7 := (0, 0, 0, rklo[0])
*/
movdqa %xmm1,%xmm6
movdqa %xmm1,%xmm7
pslldq $8,%xmm6
pslldq $12,%xmm7
/*
* %xmm0 := (nrk[0],
* nrk[1],
* nrk[2] = nrk[1] ^ rklo[0],
* nrk[3] = nrk[1] ^ rklo[0] ^ rklo[1])
*/
pxor %xmm6,%xmm0
pxor %xmm7,%xmm0
/* %xmm1 := (rk[0], rk[1], rk[2], rk[3]) */
shufps $0b01000100,%xmm4,%xmm1
movdqa %xmm1,(%rdi) /* store round key */
movdqa %xmm0,0x10(%rdi) /* store next round key */
lea 0x20(%rdi),%rdi /* advance two round keys */
ret
END(aesni_expand192a)
/*
* aesni_expand192b(uint128_t *roundkey@rdi, uint128_t prk@xmm0,
* uint128_t keygenassist@xmm2)
*
* Set odd-numbered AES-192 round key.
*
* Internal ABI. On entry:
*
* %rdi = rkp, pointer to round key to compute
* %xmm0 = (prk[0], prk[1], prk[2], prk[3])
* %xmm1 = (xxx, xxx, pprk[2], pprk[3])
* %xmm2 = (xxx, xxx, xxx, t = Rot(Sub(prk[3])) ^ RCON)
*
* On exit:
*
* %rdi = &rkp[1], rkp advanced by one round key
* %xmm0 = rk, the round key we just computed
* %xmm1 = (nrk[0], nrk[1], xxx, xxx), half of next round key
* %xmm2 = garbage
* %xmm4 = garbage
* %xmm5 = garbage
* %xmm6 = garbage
* %xmm7 = garbage
*/
.text
_ALIGN_TEXT
.type aesni_expand192b,@function
aesni_expand192b:
/*
* %xmm2 := (%xmm2[3], %xmm2[3], %xmm2[3], %xmm2[3]),
* i.e., set each word of %xmm2 to t := Rot(Sub(prk[3])) ^ RCON.
*/
pshufd $0b11111111,%xmm2,%xmm2
/*
* We need to compute:
*
* rk[0] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2]
* rk[1] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3]
* rk[2] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3] ^ prk[0]
* rk[3] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3] ^ prk[0]
* ^ prk[1]
* nrk[0] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3] ^ prk[0]
* ^ prk[1] ^ prk[2]
* nrk[1] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3] ^ prk[0]
* ^ prk[1] ^ prk[2] ^ prk[3]
*/
/* %xmm1 := (pprk[2], pprk[3], prk[0], prk[1]) */
shufps $0b01001110,%xmm0,%xmm1
/*
* %xmm5 := (0, pprk[2], pprk[3], prk[0])
* %xmm6 := (0, 0, pprk[2], pprk[3])
* %xmm7 := (0, 0, 0, pprk[2])
*/
movdqa %xmm1,%xmm5
movdqa %xmm1,%xmm6
movdqa %xmm1,%xmm7
pslldq $4,%xmm5
pslldq $8,%xmm6
pslldq $12,%xmm7
/* %xmm1 := (rk[0], rk[1], rk[2], rk[3) */
pxor %xmm2,%xmm1
pxor %xmm5,%xmm1
pxor %xmm6,%xmm1
pxor %xmm7,%xmm1
/* %xmm4 := (prk[2], prk[3], xxx, xxx) */
pshufd $0b00001110,%xmm0,%xmm4
/* %xmm5 := (0, prk[2], xxx, xxx) */
movdqa %xmm4,%xmm5
pslldq $4,%xmm5
/* %xmm0 := (rk[0], rk[1], rk[2], rk[3]) */
movdqa %xmm1,%xmm0
/* %xmm1 := (rk[3], rk[3], xxx, xxx) */
shufps $0b00001111,%xmm1,%xmm1
/*
* %xmm1 := (nrk[0] = rk[3] ^ prk[2],
* nrk[1] = rk[3] ^ prk[2] ^ prk[3],
* xxx,
* xxx)
*/
pxor %xmm4,%xmm1
pxor %xmm5,%xmm1
movdqa %xmm0,(%rdi) /* store round key */
lea 0x10(%rdi),%rdi /* advance to next round key address */
ret
END(aesni_expand192b)
/*
* aesni_expand256a(uint128_t *rkp@rdi, uint128_t pprk@xmm0,
* uint128_t prk@xmm1, uint128_t keygenassist@xmm2)
*
* Set even-numbered AES-256 round key.
*
* Internal ABI. On entry:
*
* %rdi = rkp, pointer to round key to compute
* %xmm0 = (pprk[0], pprk[1], pprk[2], pprk[3])
* %xmm1 = (prk[0], prk[1], prk[2], prk[3])
* %xmm2 = (xxx, xxx, xxx, t = Rot(SubWord(prk[3])))
*
* On exit:
*
* %rdi = &rkp[1], rkp advanced by one round key
* %xmm0 = rk, the round key we just computed
* %xmm1 = prk, previous round key, preserved from entry
* %xmm2 = garbage
* %xmm4 = garbage
* %xmm5 = garbage
* %xmm6 = garbage
*
* The computation turns out to be the same as for AES-128; the
* previous round key does not figure into it, only the
* previous-previous round key.
*/
aesni_expand256a = aesni_expand128
/*
* aesni_expand256b(uint128_t *rkp@rdi, uint128_t prk@xmm0,
* uint128_t pprk@xmm1, uint128_t keygenassist@xmm2)
*
* Set odd-numbered AES-256 round key.
*
* Internal ABI. On entry:
*
* %rdi = rkp, pointer to round key to compute
* %xmm0 = (prk[0], prk[1], prk[2], prk[3])
* %xmm1 = (pprk[0], pprk[1], pprk[2], pprk[3])
* %xmm2 = (xxx, xxx, t = Sub(prk[3]), xxx)
*
* On exit:
*
* %rdi = &rkp[1], rkp advanced by one round key
* %xmm0 = prk, previous round key, preserved from entry
* %xmm1 = rk, the round key we just computed
* %xmm2 = garbage
* %xmm4 = garbage
* %xmm5 = garbage
* %xmm6 = garbage
*/
.text
_ALIGN_TEXT
.type aesni_expand256b,@function
aesni_expand256b:
/*
* %xmm2 := (%xmm2[3], %xmm2[3], %xmm2[3], %xmm2[3]),
* i.e., set each word of %xmm2 to t := Sub(prk[3]).
*/
pshufd $0b10101010,%xmm2,%xmm2
/*
* %xmm4 := (0, pprk[0], pprk[1], pprk[2])
* %xmm5 := (0, 0, pprk[0], pprk[1])
* %xmm6 := (0, 0, 0, pprk[0])
*/
movdqa %xmm1,%xmm4
movdqa %xmm1,%xmm5
movdqa %xmm1,%xmm6
pslldq $4,%xmm4
pslldq $8,%xmm5
pslldq $12,%xmm6
/*
* %xmm0 := (rk[0] = t ^ pprk[0],
* rk[1] = t ^ pprk[0] ^ pprk[1],
* rk[2] = t ^ pprk[0] ^ pprk[1] ^ pprk[2],
* rk[3] = t ^ pprk[0] ^ pprk[1] ^ pprk[2] ^ pprk[3])
*/
pxor %xmm2,%xmm1
pxor %xmm4,%xmm1
pxor %xmm5,%xmm1
pxor %xmm6,%xmm1
movdqa %xmm1,(%rdi) /* store round key */
lea 0x10(%rdi),%rdi /* advance to next round key address */
ret
END(aesni_expand256b)
/*
* aesni_enctodec(const struct aesenc *enckey@rdi, struct aesdec *deckey@rsi,
* uint32_t nrounds@rdx)
*
* Convert AES encryption round keys to AES decryption round keys.
* `rounds' must be between 10 and 14.
*
* Standard ABI calling convention.
*/
ENTRY(aesni_enctodec)
shl $4,%edx /* rdx := byte offset of last round key */
movdqa (%rdi,%rdx),%xmm0 /* load last round key */
movdqa %xmm0,(%rsi) /* store last round key verbatim */
1: sub $0x10,%rdx /* advance to next round key */
lea 0x10(%rsi),%rsi
jz 2f /* stop if this is the last one */
movdqa (%rdi,%rdx),%xmm0 /* load round key */
aesimc %xmm0,%xmm0 /* convert encryption to decryption */
movdqa %xmm0,(%rsi) /* store round key */
jmp 1b
2: movdqa (%rdi),%xmm0 /* load first round key */
movdqa %xmm0,(%rsi) /* store first round key verbatim */
ret
END(aesni_enctodec)
/*
* aesni_enc(const struct aesenc *enckey@rdi, const uint8_t in[16] @rsi,
* uint8_t out[16] @rdx, uint32_t nrounds@ecx)
*
* Encrypt a single block.
*
* Standard ABI calling convention.
*/
ENTRY(aesni_enc)
movdqu (%rsi),%xmm0
call aesni_enc1
movdqu %xmm0,(%rdx)
ret
END(aesni_enc)
/*
* aesni_dec(const struct aesdec *deckey@rdi, const uint8_t in[16] @rsi,
* uint8_t out[16] @rdx, uint32_t nrounds@ecx)
*
* Decrypt a single block.
*
* Standard ABI calling convention.
*/
ENTRY(aesni_dec)
movdqu (%rsi),%xmm0
call aesni_dec1
movdqu %xmm0,(%rdx)
ret
END(aesni_dec)
/*
* aesni_cbc_enc(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
* uint8_t *out@rdx, size_t nbytes@rcx, uint8_t iv[16] @r8,
* uint32_t nrounds@r9d)
*
* Encrypt a contiguous sequence of blocks with AES-CBC.
*
* nbytes must be an integral multiple of 16.
*
* Standard ABI calling convention.
*/
ENTRY(aesni_cbc_enc)
cmp $0,%rcx
jz 2f
mov %rcx,%r10 /* r10 := nbytes */
movdqu (%r8),%xmm0 /* xmm0 := chaining value */
1: movdqu (%rsi),%xmm1 /* xmm1 := plaintext block */
lea 0x10(%rsi),%rsi
pxor %xmm1,%xmm0 /* xmm0 := cv ^ ptxt */
mov %r9d,%ecx /* ecx := nrounds */
call aesni_enc1 /* xmm0 := ciphertext block */
movdqu %xmm0,(%rdx)
lea 0x10(%rdx),%rdx
sub $0x10,%r10
jnz 1b /* repeat if r10 is nonzero */
movdqu %xmm0,(%r8) /* store chaining value */
2: ret
END(aesni_cbc_enc)
/*
* aesni_cbc_dec1(const struct aesdec *deckey@rdi, const uint8_t *in@rsi,
* uint8_t *out@rdx, size_t nbytes@rcx, const uint8_t iv[16] @r8,
* uint32_t nrounds@r9)
*
* Decrypt a contiguous sequence of blocks with AES-CBC.
*
* nbytes must be a positive integral multiple of 16. This routine
* is not vectorized; use aesni_cbc_dec8 for >=8 blocks at once.
*
* Standard ABI calling convention.
*/
ENTRY(aesni_cbc_dec1)
push %rbp /* create stack frame uint128[1] */
mov %rsp,%rbp
sub $0x10,%rsp
movdqu (%r8),%xmm8 /* xmm8 := iv */
movdqa %xmm8,(%rsp) /* save iv */
mov %rcx,%r10 /* r10 := nbytes */
movdqu -0x10(%rsi,%r10),%xmm0 /* xmm0 := last ciphertext block */
movdqu %xmm0,(%r8) /* update iv */
1: mov %r9d,%ecx /* ecx := nrounds */
call aesni_dec1 /* xmm0 := cv ^ ptxt */
sub $0x10,%r10
jz 2f /* first block if r10 is now zero */
movdqu -0x10(%rsi,%r10),%xmm8 /* xmm8 := chaining value */
pxor %xmm8,%xmm0 /* xmm0 := ptxt */
movdqu %xmm0,(%rdx,%r10) /* store plaintext block */
movdqa %xmm8,%xmm0 /* move cv = ciphertext block */
jmp 1b
2: pxor (%rsp),%xmm0 /* xmm0 := ptxt */
movdqu %xmm0,(%rdx) /* store first plaintext block */
leave
ret
END(aesni_cbc_dec1)
/*
* aesni_cbc_dec8(const struct aesdec *deckey@rdi, const uint8_t *in@rsi,
* uint8_t *out@rdx, size_t nbytes@rcx, const uint8_t iv[16] @r8,
* uint32_t nrounds@r9)
*
* Decrypt a contiguous sequence of 8-block units with AES-CBC.
*
* nbytes must be a positive integral multiple of 128.
*
* Standard ABI calling convention.
*/
ENTRY(aesni_cbc_dec8)
push %rbp /* create stack frame uint128[1] */
mov %rsp,%rbp
sub $0x10,%rsp
movdqu (%r8),%xmm8 /* xmm8 := iv */
movdqa %xmm8,(%rsp) /* save iv */
mov %rcx,%r10 /* r10 := nbytes */
movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := ciphertext block[n-1] */
movdqu %xmm7,(%r8) /* update iv */
1: movdqu -0x20(%rsi,%r10),%xmm6 /* xmm6 := ciphertext block[n-2] */
movdqu -0x30(%rsi,%r10),%xmm5 /* xmm5 := ciphertext block[n-3] */
movdqu -0x40(%rsi,%r10),%xmm4 /* xmm4 := ciphertext block[n-4] */
movdqu -0x50(%rsi,%r10),%xmm3 /* xmm3 := ciphertext block[n-5] */
movdqu -0x60(%rsi,%r10),%xmm2 /* xmm2 := ciphertext block[n-6] */
movdqu -0x70(%rsi,%r10),%xmm1 /* xmm1 := ciphertext block[n-7] */
movdqu -0x80(%rsi,%r10),%xmm0 /* xmm0 := ciphertext block[n-8] */
movdqa %xmm6,%xmm15 /* xmm[8+i] := cv[i], 0<i<8 */
movdqa %xmm5,%xmm14
movdqa %xmm4,%xmm13
movdqa %xmm3,%xmm12
movdqa %xmm2,%xmm11
movdqa %xmm1,%xmm10
movdqa %xmm0,%xmm9
mov %r9d,%ecx /* ecx := nrounds */
call aesni_dec8 /* xmm[i] := cv[i] ^ ptxt[i], 0<=i<8 */
pxor %xmm15,%xmm7 /* xmm[i] := ptxt[i], 0<i<8 */
pxor %xmm14,%xmm6
pxor %xmm13,%xmm5
pxor %xmm12,%xmm4
pxor %xmm11,%xmm3
pxor %xmm10,%xmm2
pxor %xmm9,%xmm1
movdqu %xmm7,-0x10(%rdx,%r10) /* store plaintext blocks */
movdqu %xmm6,-0x20(%rdx,%r10)
movdqu %xmm5,-0x30(%rdx,%r10)
movdqu %xmm4,-0x40(%rdx,%r10)
movdqu %xmm3,-0x50(%rdx,%r10)
movdqu %xmm2,-0x60(%rdx,%r10)
movdqu %xmm1,-0x70(%rdx,%r10)
sub $0x80,%r10
jz 2f /* first block if r10 is now zero */
movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := cv[0] */
pxor %xmm7,%xmm0 /* xmm0 := ptxt[0] */
movdqu %xmm0,(%rdx,%r10) /* store plaintext block */
jmp 1b
2: pxor (%rsp),%xmm0 /* xmm0 := ptxt[0] */
movdqu %xmm0,(%rdx) /* store first plaintext block */
leave
ret
END(aesni_cbc_dec8)
/*
* aesni_xts_enc1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
* uint8_t *out@rdx, size_t nbytes@rcx, uint8_t tweak[16] @r8,
* uint32_t nrounds@r9d)
*
* Encrypt a contiguous sequence of blocks with AES-XTS.
*
* nbytes must be a positive integral multiple of 16. This routine
* is not vectorized; use aesni_xts_enc8 for >=8 blocks at once.
*
* Standard ABI calling convention.
*/
ENTRY(aesni_xts_enc1)
mov %rcx,%r10 /* r10 := nbytes */
movdqu (%r8),%xmm15 /* xmm15 := tweak */
1: movdqu (%rsi),%xmm0 /* xmm0 := ptxt */
lea 0x10(%rsi),%rsi /* advance rdi to next block */
pxor %xmm15,%xmm0 /* xmm0 := ptxt ^ tweak */
mov %r9d,%ecx /* ecx := nrounds */
call aesni_enc1 /* xmm0 := AES(ptxt ^ tweak) */
pxor %xmm15,%xmm0 /* xmm0 := AES(ptxt ^ tweak) ^ tweak */
movdqu %xmm0,(%rdx) /* store ciphertext block */
lea 0x10(%rdx),%rdx /* advance rsi to next block */
call aesni_xts_mulx /* xmm15 *= x; trash xmm0 */
sub $0x10,%r10
jnz 1b /* repeat if more blocks */
movdqu %xmm15,(%r8) /* update tweak */
ret
END(aesni_xts_enc1)
/*
* aesni_xts_enc8(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
* uint8_t *out@rdx, size_t nbytes@rcx, uint8_t tweak[16] @r8,
* uint32_t nrounds@r9d)
*
* Encrypt a contiguous sequence of blocks with AES-XTS.
*
* nbytes must be a positive integral multiple of 128.
*
* Standard ABI calling convention.
*/
ENTRY(aesni_xts_enc8)
push %rbp /* create stack frame uint128[1] */
mov %rsp,%rbp
sub $0x10,%rsp
mov %rcx,%r10 /* r10 := nbytes */
movdqu (%r8),%xmm15 /* xmm15 := tweak[0] */
1: movdqa %xmm15,%xmm8 /* xmm8 := tweak[0] */
call aesni_xts_mulx /* xmm15 := tweak[1] */
movdqa %xmm15,%xmm9 /* xmm9 := tweak[1] */
call aesni_xts_mulx /* xmm15 := tweak[2] */
movdqa %xmm15,%xmm10 /* xmm10 := tweak[2] */
call aesni_xts_mulx /* xmm15 := tweak[3] */
movdqa %xmm15,%xmm11 /* xmm11 := tweak[3] */
call aesni_xts_mulx /* xmm15 := tweak[4] */
movdqa %xmm15,%xmm12 /* xmm12 := tweak[4] */
call aesni_xts_mulx /* xmm15 := tweak[5] */
movdqa %xmm15,%xmm13 /* xmm13 := tweak[5] */
call aesni_xts_mulx /* xmm15 := tweak[6] */
movdqa %xmm15,%xmm14 /* xmm14 := tweak[6] */
call aesni_xts_mulx /* xmm15 := tweak[7] */
movdqu (%rsi),%xmm0 /* xmm[i] := ptxt[i] */
movdqu 0x10(%rsi),%xmm1
movdqu 0x20(%rsi),%xmm2
movdqu 0x30(%rsi),%xmm3
movdqu 0x40(%rsi),%xmm4
movdqu 0x50(%rsi),%xmm5
movdqu 0x60(%rsi),%xmm6
movdqu 0x70(%rsi),%xmm7
lea 0x80(%rsi),%rsi /* advance rsi to next block group */
movdqa %xmm8,(%rsp) /* save tweak[0] */
pxor %xmm8,%xmm0 /* xmm[i] := ptxt[i] ^ tweak[i] */
pxor %xmm9,%xmm1
pxor %xmm10,%xmm2
pxor %xmm11,%xmm3
pxor %xmm12,%xmm4
pxor %xmm13,%xmm5
pxor %xmm14,%xmm6
pxor %xmm15,%xmm7
mov %r9d,%ecx /* ecx := nrounds */
call aesni_enc8 /* xmm[i] := AES(ptxt[i] ^ tweak[i]) */
pxor (%rsp),%xmm0 /* xmm[i] := AES(...) ^ tweak[i] */
pxor %xmm9,%xmm1
pxor %xmm10,%xmm2
pxor %xmm11,%xmm3
pxor %xmm12,%xmm4
pxor %xmm13,%xmm5
pxor %xmm14,%xmm6
pxor %xmm15,%xmm7
movdqu %xmm0,(%rdx) /* store ciphertext blocks */
movdqu %xmm1,0x10(%rdx)
movdqu %xmm2,0x20(%rdx)
movdqu %xmm3,0x30(%rdx)
movdqu %xmm4,0x40(%rdx)
movdqu %xmm5,0x50(%rdx)
movdqu %xmm6,0x60(%rdx)
movdqu %xmm7,0x70(%rdx)
lea 0x80(%rdx),%rdx /* advance rdx to next block group */
call aesni_xts_mulx /* xmm15 := tweak[8] */
sub $0x80,%r10
jnz 1b /* repeat if more block groups */
movdqu %xmm15,(%r8) /* update tweak */
leave
ret
END(aesni_xts_enc8)
/*
* aesni_xts_dec1(const struct aesdec *deckey@rdi, const uint8_t *in@rsi,
* uint8_t *out@rdx, size_t nbytes@rcx, uint8_t tweak[16] @r8,
* uint32_t nrounds@r9d)
*
* Decrypt a contiguous sequence of blocks with AES-XTS.
*
* nbytes must be a positive integral multiple of 16. This routine
* is not vectorized; use aesni_xts_dec8 for >=8 blocks at once.
*
* Standard ABI calling convention.
*/
ENTRY(aesni_xts_dec1)
mov %rcx,%r10 /* r10 := nbytes */
movdqu (%r8),%xmm15 /* xmm15 := tweak */
1: movdqu (%rsi),%xmm0 /* xmm0 := ctxt */
lea 0x10(%rsi),%rsi /* advance rdi to next block */
pxor %xmm15,%xmm0 /* xmm0 := ctxt ^ tweak */
mov %r9d,%ecx /* ecx := nrounds */
call aesni_dec1 /* xmm0 := AES(ctxt ^ tweak) */
pxor %xmm15,%xmm0 /* xmm0 := AES(ctxt ^ tweak) ^ tweak */
movdqu %xmm0,(%rdx) /* store plaintext block */
lea 0x10(%rdx),%rdx /* advance rsi to next block */
call aesni_xts_mulx /* xmm15 *= x; trash xmm0 */
sub $0x10,%r10
jnz 1b /* repeat if more blocks */
movdqu %xmm15,(%r8) /* update tweak */
ret
END(aesni_xts_dec1)
/*
* aesni_xts_dec8(const struct aesdec *deckey@rdi, const uint8_t *in@rsi,
* uint8_t *out@rdx, size_t nbytes@rcx, uint8_t tweak[16] @r8,
* uint32_t nrounds@r9d)
*
* Decrypt a contiguous sequence of blocks with AES-XTS.
*
* nbytes must be a positive integral multiple of 128.
*
* Standard ABI calling convention.
*/
ENTRY(aesni_xts_dec8)
push %rbp /* create stack frame uint128[1] */
mov %rsp,%rbp
sub $0x10,%rsp
mov %rcx,%r10 /* r10 := nbytes */
movdqu (%r8),%xmm15 /* xmm15 := tweak[0] */
1: movdqa %xmm15,%xmm8 /* xmm8 := tweak[0] */
call aesni_xts_mulx /* xmm15 := tweak[1] */
movdqa %xmm15,%xmm9 /* xmm9 := tweak[1] */
call aesni_xts_mulx /* xmm15 := tweak[2] */
movdqa %xmm15,%xmm10 /* xmm10 := tweak[2] */
call aesni_xts_mulx /* xmm15 := tweak[3] */
movdqa %xmm15,%xmm11 /* xmm11 := tweak[3] */
call aesni_xts_mulx /* xmm51 := tweak[4] */
movdqa %xmm15,%xmm12 /* xmm12 := tweak[4] */
call aesni_xts_mulx /* xmm15 := tweak[5] */
movdqa %xmm15,%xmm13 /* xmm13 := tweak[5] */
call aesni_xts_mulx /* xmm15 := tweak[6] */
movdqa %xmm15,%xmm14 /* xmm14 := tweak[6] */
call aesni_xts_mulx /* xmm15 := tweak[7] */
movdqu (%rsi),%xmm0 /* xmm[i] := ptxt[i] */
movdqu 0x10(%rsi),%xmm1
movdqu 0x20(%rsi),%xmm2
movdqu 0x30(%rsi),%xmm3
movdqu 0x40(%rsi),%xmm4
movdqu 0x50(%rsi),%xmm5
movdqu 0x60(%rsi),%xmm6
movdqu 0x70(%rsi),%xmm7
lea 0x80(%rsi),%rsi /* advance rsi to next block group */
movdqa %xmm8,(%rsp) /* save tweak[0] */
pxor %xmm8,%xmm0 /* xmm[i] := ptxt[i] ^ tweak[i] */
pxor %xmm9,%xmm1
pxor %xmm10,%xmm2
pxor %xmm11,%xmm3
pxor %xmm12,%xmm4
pxor %xmm13,%xmm5
pxor %xmm14,%xmm6
pxor %xmm15,%xmm7
mov %r9d,%ecx /* ecx := nrounds */
call aesni_dec8 /* xmm[i] := AES(ptxt[i] ^ tweak[i]) */
pxor (%rsp),%xmm0 /* xmm[i] := AES(...) ^ tweak[i] */
pxor %xmm9,%xmm1
pxor %xmm10,%xmm2
pxor %xmm11,%xmm3
pxor %xmm12,%xmm4
pxor %xmm13,%xmm5
pxor %xmm14,%xmm6
pxor %xmm15,%xmm7
movdqu %xmm0,(%rdx) /* store ciphertext blocks */
movdqu %xmm1,0x10(%rdx)
movdqu %xmm2,0x20(%rdx)
movdqu %xmm3,0x30(%rdx)
movdqu %xmm4,0x40(%rdx)
movdqu %xmm5,0x50(%rdx)
movdqu %xmm6,0x60(%rdx)
movdqu %xmm7,0x70(%rdx)
lea 0x80(%rdx),%rdx /* advance rdx to next block group */
call aesni_xts_mulx /* xmm15 := tweak[8] */
sub $0x80,%r10
jnz 1b /* repeat if more block groups */
movdqu %xmm15,(%r8) /* update tweak */
leave
ret
END(aesni_xts_dec8)
/*
* aesni_xts_mulx(tweak@xmm15)
*
* Multiply xmm15 by x, modulo x^128 + x^7 + x^2 + x + 1, in place.
* Uses %xmm0 as temporary.
*/
.text
_ALIGN_TEXT
.type aesni_xts_mulx,@function
aesni_xts_mulx:
/*
* Simultaneously determine
* (a) whether the high bit of the low quadword must be
* shifted into the low bit of the high quadword, and
* (b) whether the high bit of the high quadword must be
* carried into x^128 = x^7 + x^2 + x + 1.
*/
pxor %xmm0,%xmm0 /* xmm0 := 0 */
pcmpgtq %xmm15,%xmm0 /* xmm0[i] := -1 if 0 > xmm15[i] else 0 */
pshufd $0b01001110,%xmm0,%xmm0 /* swap halves of xmm0 */
pand xtscarry(%rip),%xmm0 /* copy xtscarry according to mask */
psllq $1,%xmm15 /* shift */
pxor %xmm0,%xmm15 /* incorporate (a) and (b) */
ret
END(aesni_xts_mulx)
.section .rodata
.align 16
.type xtscarry,@object
xtscarry:
.byte 0x87,0,0,0, 0,0,0,0, 1,0,0,0, 0,0,0,0
END(xtscarry)
/*
* aesni_xts_update(const uint8_t in[16] @rdi, uint8_t out[16] @rsi)
*
* Update an AES-XTS tweak.
*
* Standard ABI calling convention.
*/
ENTRY(aesni_xts_update)
movdqu (%rdi),%xmm15
call aesni_xts_mulx
movdqu %xmm15,(%rsi)
ret
END(aesni_xts_update)
/*
* aesni_enc1(const struct aesenc *enckey@rdi, uint128_t block@xmm0,
* uint32_t nrounds@ecx)
*
* Encrypt a single AES block in %xmm0.
*
* Internal ABI. Uses %rax and %xmm8 as temporaries. Destroys %ecx.
*/
.text
_ALIGN_TEXT
.type aesni_enc1,@function
aesni_enc1:
pxor (%rdi),%xmm0 /* xor in first round key */
shl $4,%ecx /* ecx := total byte size of round keys */
lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */
neg %rcx /* rcx := byte offset of round key from end */
1: movdqa (%rax,%rcx),%xmm8 /* load round key */
add $0x10,%rcx
jz 2f /* stop if this is the last one */
aesenc %xmm8,%xmm0
jmp 1b
2: aesenclast %xmm8,%xmm0
ret
END(aesni_enc1)
/*
* aesni_enc8(const struct aesenc *enckey@rdi, uint128_t block0@xmm0, ...,
* block7@xmm7, uint32_t nrounds@ecx)
*
* Encrypt eight AES blocks in %xmm0 through %xmm7 in parallel.
*
* Internal ABI. Uses %rax and %xmm8 as temporaries. Destroys %ecx.
*/
.text
_ALIGN_TEXT
.type aesni_enc8,@function
aesni_enc8:
movdqa (%rdi),%xmm8 /* xor in first round key */
pxor %xmm8,%xmm0
pxor %xmm8,%xmm1
pxor %xmm8,%xmm2
pxor %xmm8,%xmm3
pxor %xmm8,%xmm4
pxor %xmm8,%xmm5
pxor %xmm8,%xmm6
pxor %xmm8,%xmm7
shl $4,%ecx /* ecx := total byte size of round keys */
lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */
neg %rcx /* rcx := byte offset of round key from end */
1: movdqa (%rax,%rcx),%xmm8 /* load round key */
add $0x10,%rcx
jz 2f /* stop if this is the last one */
aesenc %xmm8,%xmm0
aesenc %xmm8,%xmm1
aesenc %xmm8,%xmm2
aesenc %xmm8,%xmm3
aesenc %xmm8,%xmm4
aesenc %xmm8,%xmm5
aesenc %xmm8,%xmm6
aesenc %xmm8,%xmm7
jmp 1b
2: aesenclast %xmm8,%xmm0
aesenclast %xmm8,%xmm1
aesenclast %xmm8,%xmm2
aesenclast %xmm8,%xmm3
aesenclast %xmm8,%xmm4
aesenclast %xmm8,%xmm5
aesenclast %xmm8,%xmm6
aesenclast %xmm8,%xmm7
ret
END(aesni_enc8)
/*
* aesni_dec1(const struct aesdec *deckey@rdi, uint128_t block@xmm0,
* uint32_t nrounds@ecx)
*
* Decrypt a single AES block in %xmm0.
*
* Internal ABI. Uses %rax and %xmm8 as temporaries. Destroys %ecx.
*/
.text
_ALIGN_TEXT
.type aesni_dec1,@function
aesni_dec1:
pxor (%rdi),%xmm0 /* xor in first round key */
shl $4,%ecx /* ecx := byte offset of round key */
lea 0x10(%rdi,%rcx),%rax /* rax := pointer to round key */
neg %rcx /* rcx := byte offset of round key from end */
1: movdqa (%rax,%rcx),%xmm8 /* load round key */
add $0x10,%rcx
jz 2f /* stop if this is the last one */
aesdec %xmm8,%xmm0
jmp 1b
2: aesdeclast %xmm8,%xmm0
ret
END(aesni_dec1)
/*
* aesni_dec8(const struct aesdec *deckey@rdi, uint128_t block0@xmm0, ...,
* block7@xmm7, uint32_t nrounds@ecx)
*
* Decrypt eight AES blocks in %xmm0 through %xmm7 in parallel.
*
* Internal ABI. Uses %xmm8 as temporary. Destroys %rcx.
*/
.text
_ALIGN_TEXT
.type aesni_dec8,@function
aesni_dec8:
movdqa (%rdi),%xmm8 /* xor in first round key */
pxor %xmm8,%xmm0
pxor %xmm8,%xmm1
pxor %xmm8,%xmm2
pxor %xmm8,%xmm3
pxor %xmm8,%xmm4
pxor %xmm8,%xmm5
pxor %xmm8,%xmm6
pxor %xmm8,%xmm7
shl $4,%ecx /* ecx := byte offset of round key */
lea 0x10(%rdi,%rcx),%rax /* rax := pointer to round key */
neg %rcx /* rcx := byte offset of round key from end */
1: movdqa (%rax,%rcx),%xmm8 /* load round key */
add $0x10,%rcx
jz 2f /* stop if this is the last one */
aesdec %xmm8,%xmm0
aesdec %xmm8,%xmm1
aesdec %xmm8,%xmm2
aesdec %xmm8,%xmm3
aesdec %xmm8,%xmm4
aesdec %xmm8,%xmm5
aesdec %xmm8,%xmm6
aesdec %xmm8,%xmm7
jmp 1b
2: aesdeclast %xmm8,%xmm0
aesdeclast %xmm8,%xmm1
aesdeclast %xmm8,%xmm2
aesdeclast %xmm8,%xmm3
aesdeclast %xmm8,%xmm4
aesdeclast %xmm8,%xmm5
aesdeclast %xmm8,%xmm6
aesdeclast %xmm8,%xmm7
ret
END(aesni_dec8)
# $NetBSD: files.aesni,v 1.1 2020/06/29 23:29:40 riastradh Exp $
ifdef amd64 # amd64-only for now; i386 left as exercise for reader
file crypto/aes/arch/x86/aes_ni.c aes
file crypto/aes/arch/x86/aes_ni_64.S aes
endif