mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-03-22 07:27:12 +08:00
lib/crypto: arm64/aes: Migrate optimized code into library
Move the ARM64 optimized AES key expansion and single-block AES en/decryption code into lib/crypto/, wire it up to the AES library API, and remove the superseded crypto_cipher algorithms. The result is that both the AES library and crypto_cipher APIs are now optimized for ARM64, whereas previously only crypto_cipher was (and the optimizations weren't enabled by default, which this fixes as well). Note: to see the diff from arch/arm64/crypto/aes-ce-glue.c to lib/crypto/arm64/aes.h, view this commit with 'git show -M10'. Acked-by: Ard Biesheuvel <ardb@kernel.org> Link: https://lore.kernel.org/r/20260112192035.10427-12-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@kernel.org>
This commit is contained in:
@@ -15,6 +15,7 @@ config CRYPTO_LIB_AES_ARCH
|
||||
bool
|
||||
depends on CRYPTO_LIB_AES && !UML && !KMSAN
|
||||
default y if ARM
|
||||
default y if ARM64
|
||||
|
||||
config CRYPTO_LIB_AESCFB
|
||||
tristate
|
||||
|
||||
@@ -24,6 +24,11 @@ CFLAGS_aes.o += -I$(src)/$(SRCARCH)
|
||||
|
||||
libaes-$(CONFIG_ARM) += arm/aes-cipher-core.o
|
||||
|
||||
ifeq ($(CONFIG_ARM64),y)
|
||||
libaes-y += arm64/aes-cipher-core.o
|
||||
libaes-$(CONFIG_KERNEL_MODE_NEON) += arm64/aes-ce-core.o
|
||||
endif
|
||||
|
||||
endif # CONFIG_CRYPTO_LIB_AES_ARCH
|
||||
|
||||
################################################################################
|
||||
|
||||
84
lib/crypto/arm64/aes-ce-core.S
Normal file
84
lib/crypto/arm64/aes-ce-core.S
Normal file
@@ -0,0 +1,84 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.arch armv8-a+crypto
|
||||
|
||||
SYM_FUNC_START(__aes_ce_encrypt)
|
||||
sub w3, w3, #2
|
||||
ld1 {v0.16b}, [x2]
|
||||
ld1 {v1.4s}, [x0], #16
|
||||
cmp w3, #10
|
||||
bmi 0f
|
||||
bne 3f
|
||||
mov v3.16b, v1.16b
|
||||
b 2f
|
||||
0: mov v2.16b, v1.16b
|
||||
ld1 {v3.4s}, [x0], #16
|
||||
1: aese v0.16b, v2.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
2: ld1 {v1.4s}, [x0], #16
|
||||
aese v0.16b, v3.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
3: ld1 {v2.4s}, [x0], #16
|
||||
subs w3, w3, #3
|
||||
aese v0.16b, v1.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
ld1 {v3.4s}, [x0], #16
|
||||
bpl 1b
|
||||
aese v0.16b, v2.16b
|
||||
eor v0.16b, v0.16b, v3.16b
|
||||
st1 {v0.16b}, [x1]
|
||||
ret
|
||||
SYM_FUNC_END(__aes_ce_encrypt)
|
||||
|
||||
SYM_FUNC_START(__aes_ce_decrypt)
|
||||
sub w3, w3, #2
|
||||
ld1 {v0.16b}, [x2]
|
||||
ld1 {v1.4s}, [x0], #16
|
||||
cmp w3, #10
|
||||
bmi 0f
|
||||
bne 3f
|
||||
mov v3.16b, v1.16b
|
||||
b 2f
|
||||
0: mov v2.16b, v1.16b
|
||||
ld1 {v3.4s}, [x0], #16
|
||||
1: aesd v0.16b, v2.16b
|
||||
aesimc v0.16b, v0.16b
|
||||
2: ld1 {v1.4s}, [x0], #16
|
||||
aesd v0.16b, v3.16b
|
||||
aesimc v0.16b, v0.16b
|
||||
3: ld1 {v2.4s}, [x0], #16
|
||||
subs w3, w3, #3
|
||||
aesd v0.16b, v1.16b
|
||||
aesimc v0.16b, v0.16b
|
||||
ld1 {v3.4s}, [x0], #16
|
||||
bpl 1b
|
||||
aesd v0.16b, v2.16b
|
||||
eor v0.16b, v0.16b, v3.16b
|
||||
st1 {v0.16b}, [x1]
|
||||
ret
|
||||
SYM_FUNC_END(__aes_ce_decrypt)
|
||||
|
||||
/*
|
||||
* __aes_ce_sub() - use the aese instruction to perform the AES sbox
|
||||
* substitution on each byte in 'input'
|
||||
*/
|
||||
SYM_FUNC_START(__aes_ce_sub)
|
||||
dup v1.4s, w0
|
||||
movi v0.16b, #0
|
||||
aese v0.16b, v1.16b
|
||||
umov w0, v0.s[0]
|
||||
ret
|
||||
SYM_FUNC_END(__aes_ce_sub)
|
||||
|
||||
SYM_FUNC_START(__aes_ce_invert)
|
||||
ld1 {v0.4s}, [x1]
|
||||
aesimc v1.16b, v0.16b
|
||||
st1 {v1.4s}, [x0]
|
||||
ret
|
||||
SYM_FUNC_END(__aes_ce_invert)
|
||||
132
lib/crypto/arm64/aes-cipher-core.S
Normal file
132
lib/crypto/arm64/aes-cipher-core.S
Normal file
@@ -0,0 +1,132 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Scalar AES core transform
|
||||
*
|
||||
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/cache.h>
|
||||
|
||||
.text
|
||||
|
||||
rk .req x0
|
||||
out .req x1
|
||||
in .req x2
|
||||
rounds .req x3
|
||||
tt .req x2
|
||||
|
||||
.macro __pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
|
||||
.ifc \op\shift, b0
|
||||
ubfiz \reg0, \in0, #2, #8
|
||||
ubfiz \reg1, \in1e, #2, #8
|
||||
.else
|
||||
ubfx \reg0, \in0, #\shift, #8
|
||||
ubfx \reg1, \in1e, #\shift, #8
|
||||
.endif
|
||||
|
||||
/*
|
||||
* AArch64 cannot do byte size indexed loads from a table containing
|
||||
* 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
|
||||
* valid instruction. So perform the shift explicitly first for the
|
||||
* high bytes (the low byte is shifted implicitly by using ubfiz rather
|
||||
* than ubfx above)
|
||||
*/
|
||||
.ifnc \op, b
|
||||
ldr \reg0, [tt, \reg0, uxtw #2]
|
||||
ldr \reg1, [tt, \reg1, uxtw #2]
|
||||
.else
|
||||
.if \shift > 0
|
||||
lsl \reg0, \reg0, #2
|
||||
lsl \reg1, \reg1, #2
|
||||
.endif
|
||||
ldrb \reg0, [tt, \reg0, uxtw]
|
||||
ldrb \reg1, [tt, \reg1, uxtw]
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro __pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
|
||||
ubfx \reg0, \in0, #\shift, #8
|
||||
ubfx \reg1, \in1d, #\shift, #8
|
||||
ldr\op \reg0, [tt, \reg0, uxtw #\sz]
|
||||
ldr\op \reg1, [tt, \reg1, uxtw #\sz]
|
||||
.endm
|
||||
|
||||
.macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
|
||||
ldp \out0, \out1, [rk], #8
|
||||
|
||||
__pair\enc \sz, \op, w12, w13, \in0, \in1, \in3, 0
|
||||
__pair\enc \sz, \op, w14, w15, \in1, \in2, \in0, 8
|
||||
__pair\enc \sz, \op, w16, w17, \in2, \in3, \in1, 16
|
||||
__pair\enc \sz, \op, \t0, \t1, \in3, \in0, \in2, 24
|
||||
|
||||
eor \out0, \out0, w12
|
||||
eor \out1, \out1, w13
|
||||
eor \out0, \out0, w14, ror #24
|
||||
eor \out1, \out1, w15, ror #24
|
||||
eor \out0, \out0, w16, ror #16
|
||||
eor \out1, \out1, w17, ror #16
|
||||
eor \out0, \out0, \t0, ror #8
|
||||
eor \out1, \out1, \t1, ror #8
|
||||
.endm
|
||||
|
||||
.macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
|
||||
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
|
||||
__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
|
||||
.endm
|
||||
|
||||
.macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
|
||||
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
|
||||
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
|
||||
.endm
|
||||
|
||||
.macro do_crypt, round, ttab, ltab, bsz
|
||||
ldp w4, w5, [in]
|
||||
ldp w6, w7, [in, #8]
|
||||
ldp w8, w9, [rk], #16
|
||||
ldp w10, w11, [rk, #-8]
|
||||
|
||||
CPU_BE( rev w4, w4 )
|
||||
CPU_BE( rev w5, w5 )
|
||||
CPU_BE( rev w6, w6 )
|
||||
CPU_BE( rev w7, w7 )
|
||||
|
||||
eor w4, w4, w8
|
||||
eor w5, w5, w9
|
||||
eor w6, w6, w10
|
||||
eor w7, w7, w11
|
||||
|
||||
adr_l tt, \ttab
|
||||
|
||||
tbnz rounds, #1, 1f
|
||||
|
||||
0: \round w8, w9, w10, w11, w4, w5, w6, w7
|
||||
\round w4, w5, w6, w7, w8, w9, w10, w11
|
||||
|
||||
1: subs rounds, rounds, #4
|
||||
\round w8, w9, w10, w11, w4, w5, w6, w7
|
||||
b.ls 3f
|
||||
2: \round w4, w5, w6, w7, w8, w9, w10, w11
|
||||
b 0b
|
||||
3: adr_l tt, \ltab
|
||||
\round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
|
||||
|
||||
CPU_BE( rev w4, w4 )
|
||||
CPU_BE( rev w5, w5 )
|
||||
CPU_BE( rev w6, w6 )
|
||||
CPU_BE( rev w7, w7 )
|
||||
|
||||
stp w4, w5, [out]
|
||||
stp w6, w7, [out, #8]
|
||||
ret
|
||||
.endm
|
||||
|
||||
SYM_FUNC_START(__aes_arm64_encrypt)
|
||||
do_crypt fround, aes_enc_tab, aes_enc_tab + 1, 2
|
||||
SYM_FUNC_END(__aes_arm64_encrypt)
|
||||
|
||||
.align 5
|
||||
SYM_FUNC_START(__aes_arm64_decrypt)
|
||||
do_crypt iround, aes_dec_tab, crypto_aes_inv_sbox, 0
|
||||
SYM_FUNC_END(__aes_arm64_decrypt)
|
||||
164
lib/crypto/arm64/aes.h
Normal file
164
lib/crypto/arm64/aes.h
Normal file
@@ -0,0 +1,164 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* AES block cipher, optimized for ARM64
|
||||
*
|
||||
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
* Copyright 2026 Google LLC
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <linux/unaligned.h>
|
||||
#include <linux/cpufeature.h>
|
||||
|
||||
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_aes);
|
||||
|
||||
struct aes_block {
|
||||
u8 b[AES_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
asmlinkage void __aes_arm64_encrypt(const u32 rk[], u8 out[AES_BLOCK_SIZE],
|
||||
const u8 in[AES_BLOCK_SIZE], int rounds);
|
||||
asmlinkage void __aes_arm64_decrypt(const u32 inv_rk[], u8 out[AES_BLOCK_SIZE],
|
||||
const u8 in[AES_BLOCK_SIZE], int rounds);
|
||||
asmlinkage void __aes_ce_encrypt(const u32 rk[], u8 out[AES_BLOCK_SIZE],
|
||||
const u8 in[AES_BLOCK_SIZE], int rounds);
|
||||
asmlinkage void __aes_ce_decrypt(const u32 inv_rk[], u8 out[AES_BLOCK_SIZE],
|
||||
const u8 in[AES_BLOCK_SIZE], int rounds);
|
||||
asmlinkage u32 __aes_ce_sub(u32 l);
|
||||
asmlinkage void __aes_ce_invert(struct aes_block *out,
|
||||
const struct aes_block *in);
|
||||
|
||||
/*
|
||||
* Expand an AES key using the crypto extensions if supported and usable or
|
||||
* generic code otherwise. The expanded key format is compatible between the
|
||||
* two cases. The outputs are @rndkeys (required) and @inv_rndkeys (optional).
|
||||
*/
|
||||
static void aes_expandkey_arm64(u32 rndkeys[], u32 *inv_rndkeys,
|
||||
const u8 *in_key, int key_len, int nrounds)
|
||||
{
|
||||
/*
|
||||
* The AES key schedule round constants
|
||||
*/
|
||||
static u8 const rcon[] = {
|
||||
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
|
||||
};
|
||||
|
||||
u32 kwords = key_len / sizeof(u32);
|
||||
struct aes_block *key_enc, *key_dec;
|
||||
int i, j;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) ||
|
||||
!static_branch_likely(&have_aes) || unlikely(!may_use_simd())) {
|
||||
aes_expandkey_generic(rndkeys, inv_rndkeys, in_key, key_len);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < kwords; i++)
|
||||
rndkeys[i] = get_unaligned_le32(&in_key[i * sizeof(u32)]);
|
||||
|
||||
scoped_ksimd() {
|
||||
for (i = 0; i < sizeof(rcon); i++) {
|
||||
u32 *rki = &rndkeys[i * kwords];
|
||||
u32 *rko = rki + kwords;
|
||||
|
||||
rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^
|
||||
rcon[i] ^ rki[0];
|
||||
rko[1] = rko[0] ^ rki[1];
|
||||
rko[2] = rko[1] ^ rki[2];
|
||||
rko[3] = rko[2] ^ rki[3];
|
||||
|
||||
if (key_len == AES_KEYSIZE_192) {
|
||||
if (i >= 7)
|
||||
break;
|
||||
rko[4] = rko[3] ^ rki[4];
|
||||
rko[5] = rko[4] ^ rki[5];
|
||||
} else if (key_len == AES_KEYSIZE_256) {
|
||||
if (i >= 6)
|
||||
break;
|
||||
rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
|
||||
rko[5] = rko[4] ^ rki[5];
|
||||
rko[6] = rko[5] ^ rki[6];
|
||||
rko[7] = rko[6] ^ rki[7];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate the decryption keys for the Equivalent Inverse
|
||||
* Cipher. This involves reversing the order of the round
|
||||
* keys, and applying the Inverse Mix Columns transformation on
|
||||
* all but the first and the last one.
|
||||
*/
|
||||
if (inv_rndkeys) {
|
||||
key_enc = (struct aes_block *)rndkeys;
|
||||
key_dec = (struct aes_block *)inv_rndkeys;
|
||||
j = nrounds;
|
||||
|
||||
key_dec[0] = key_enc[j];
|
||||
for (i = 1, j--; j > 0; i++, j--)
|
||||
__aes_ce_invert(key_dec + i, key_enc + j);
|
||||
key_dec[i] = key_enc[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void aes_preparekey_arch(union aes_enckey_arch *k,
|
||||
union aes_invkey_arch *inv_k,
|
||||
const u8 *in_key, int key_len, int nrounds)
|
||||
{
|
||||
aes_expandkey_arm64(k->rndkeys, inv_k ? inv_k->inv_rndkeys : NULL,
|
||||
in_key, key_len, nrounds);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is here temporarily until the remaining AES mode implementations are
|
||||
* migrated from arch/arm64/crypto/ to lib/crypto/arm64/.
|
||||
*/
|
||||
int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
if (aes_check_keylen(key_len) != 0)
|
||||
return -EINVAL;
|
||||
ctx->key_length = key_len;
|
||||
aes_expandkey_arm64(ctx->key_enc, ctx->key_dec, in_key, key_len,
|
||||
6 + key_len / 4);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(ce_aes_expandkey);
|
||||
|
||||
static void aes_encrypt_arch(const struct aes_enckey *key,
|
||||
u8 out[AES_BLOCK_SIZE],
|
||||
const u8 in[AES_BLOCK_SIZE])
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
|
||||
static_branch_likely(&have_aes) && likely(may_use_simd())) {
|
||||
scoped_ksimd()
|
||||
__aes_ce_encrypt(key->k.rndkeys, out, in, key->nrounds);
|
||||
} else {
|
||||
__aes_arm64_encrypt(key->k.rndkeys, out, in, key->nrounds);
|
||||
}
|
||||
}
|
||||
|
||||
static void aes_decrypt_arch(const struct aes_key *key,
|
||||
u8 out[AES_BLOCK_SIZE],
|
||||
const u8 in[AES_BLOCK_SIZE])
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
|
||||
static_branch_likely(&have_aes) && likely(may_use_simd())) {
|
||||
scoped_ksimd()
|
||||
__aes_ce_decrypt(key->inv_k.inv_rndkeys, out, in,
|
||||
key->nrounds);
|
||||
} else {
|
||||
__aes_arm64_decrypt(key->inv_k.inv_rndkeys, out, in,
|
||||
key->nrounds);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KERNEL_MODE_NEON
|
||||
#define aes_mod_init_arch aes_mod_init_arch
|
||||
static void aes_mod_init_arch(void)
|
||||
{
|
||||
if (cpu_have_named_feature(AES))
|
||||
static_branch_enable(&have_aes);
|
||||
}
|
||||
#endif /* CONFIG_KERNEL_MODE_NEON */
|
||||
Reference in New Issue
Block a user