lib/crypto: arm64/aes: Migrate optimized code into library

Move the ARM64 optimized AES key expansion and single-block AES
en/decryption code into lib/crypto/, wire it up to the AES library API,
and remove the superseded crypto_cipher algorithms.

The result is that both the AES library and crypto_cipher APIs are now
optimized for ARM64, whereas previously only crypto_cipher was (and the
optimizations weren't enabled by default, which this fixes as well).

Note: to see the diff from arch/arm64/crypto/aes-ce-glue.c to
lib/crypto/arm64/aes.h, view this commit with 'git show -M10'.

Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20260112192035.10427-12-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
This commit is contained in:
Eric Biggers
2026-01-12 11:20:09 -08:00
parent fa2297750c
commit 2b1ef7aeeb
13 changed files with 181 additions and 290 deletions

View File

@@ -15,6 +15,7 @@ config CRYPTO_LIB_AES_ARCH
bool
depends on CRYPTO_LIB_AES && !UML && !KMSAN
default y if ARM
default y if ARM64
config CRYPTO_LIB_AESCFB
tristate

View File

@@ -24,6 +24,11 @@ CFLAGS_aes.o += -I$(src)/$(SRCARCH)
libaes-$(CONFIG_ARM) += arm/aes-cipher-core.o
ifeq ($(CONFIG_ARM64),y)
libaes-y += arm64/aes-cipher-core.o
libaes-$(CONFIG_KERNEL_MODE_NEON) += arm64/aes-ce-core.o
endif
endif # CONFIG_CRYPTO_LIB_AES_ARCH
################################################################################

View File

@@ -0,0 +1,84 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.arch armv8-a+crypto
SYM_FUNC_START(__aes_ce_encrypt)
sub w3, w3, #2
ld1 {v0.16b}, [x2]
ld1 {v1.4s}, [x0], #16
cmp w3, #10
bmi 0f
bne 3f
mov v3.16b, v1.16b
b 2f
0: mov v2.16b, v1.16b
ld1 {v3.4s}, [x0], #16
1: aese v0.16b, v2.16b
aesmc v0.16b, v0.16b
2: ld1 {v1.4s}, [x0], #16
aese v0.16b, v3.16b
aesmc v0.16b, v0.16b
3: ld1 {v2.4s}, [x0], #16
subs w3, w3, #3
aese v0.16b, v1.16b
aesmc v0.16b, v0.16b
ld1 {v3.4s}, [x0], #16
bpl 1b
aese v0.16b, v2.16b
eor v0.16b, v0.16b, v3.16b
st1 {v0.16b}, [x1]
ret
SYM_FUNC_END(__aes_ce_encrypt)
SYM_FUNC_START(__aes_ce_decrypt)
sub w3, w3, #2
ld1 {v0.16b}, [x2]
ld1 {v1.4s}, [x0], #16
cmp w3, #10
bmi 0f
bne 3f
mov v3.16b, v1.16b
b 2f
0: mov v2.16b, v1.16b
ld1 {v3.4s}, [x0], #16
1: aesd v0.16b, v2.16b
aesimc v0.16b, v0.16b
2: ld1 {v1.4s}, [x0], #16
aesd v0.16b, v3.16b
aesimc v0.16b, v0.16b
3: ld1 {v2.4s}, [x0], #16
subs w3, w3, #3
aesd v0.16b, v1.16b
aesimc v0.16b, v0.16b
ld1 {v3.4s}, [x0], #16
bpl 1b
aesd v0.16b, v2.16b
eor v0.16b, v0.16b, v3.16b
st1 {v0.16b}, [x1]
ret
SYM_FUNC_END(__aes_ce_decrypt)
/*
* __aes_ce_sub() - use the aese instruction to perform the AES sbox
* substitution on each byte in 'input'
*/
SYM_FUNC_START(__aes_ce_sub)
dup v1.4s, w0
movi v0.16b, #0
aese v0.16b, v1.16b
umov w0, v0.s[0]
ret
SYM_FUNC_END(__aes_ce_sub)
SYM_FUNC_START(__aes_ce_invert)
ld1 {v0.4s}, [x1]
aesimc v1.16b, v0.16b
st1 {v1.4s}, [x0]
ret
SYM_FUNC_END(__aes_ce_invert)

View File

@@ -0,0 +1,132 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Scalar AES core transform
*
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/cache.h>
.text
rk .req x0
out .req x1
in .req x2
rounds .req x3
tt .req x2
.macro __pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
.ifc \op\shift, b0
ubfiz \reg0, \in0, #2, #8
ubfiz \reg1, \in1e, #2, #8
.else
ubfx \reg0, \in0, #\shift, #8
ubfx \reg1, \in1e, #\shift, #8
.endif
/*
* AArch64 cannot do byte size indexed loads from a table containing
* 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
* valid instruction. So perform the shift explicitly first for the
* high bytes (the low byte is shifted implicitly by using ubfiz rather
* than ubfx above)
*/
.ifnc \op, b
ldr \reg0, [tt, \reg0, uxtw #2]
ldr \reg1, [tt, \reg1, uxtw #2]
.else
.if \shift > 0
lsl \reg0, \reg0, #2
lsl \reg1, \reg1, #2
.endif
ldrb \reg0, [tt, \reg0, uxtw]
ldrb \reg1, [tt, \reg1, uxtw]
.endif
.endm
.macro __pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
ubfx \reg0, \in0, #\shift, #8
ubfx \reg1, \in1d, #\shift, #8
ldr\op \reg0, [tt, \reg0, uxtw #\sz]
ldr\op \reg1, [tt, \reg1, uxtw #\sz]
.endm
.macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
ldp \out0, \out1, [rk], #8
__pair\enc \sz, \op, w12, w13, \in0, \in1, \in3, 0
__pair\enc \sz, \op, w14, w15, \in1, \in2, \in0, 8
__pair\enc \sz, \op, w16, w17, \in2, \in3, \in1, 16
__pair\enc \sz, \op, \t0, \t1, \in3, \in0, \in2, 24
eor \out0, \out0, w12
eor \out1, \out1, w13
eor \out0, \out0, w14, ror #24
eor \out1, \out1, w15, ror #24
eor \out0, \out0, w16, ror #16
eor \out1, \out1, w17, ror #16
eor \out0, \out0, \t0, ror #8
eor \out1, \out1, \t1, ror #8
.endm
.macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
.endm
.macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
.endm
.macro do_crypt, round, ttab, ltab, bsz
ldp w4, w5, [in]
ldp w6, w7, [in, #8]
ldp w8, w9, [rk], #16
ldp w10, w11, [rk, #-8]
CPU_BE( rev w4, w4 )
CPU_BE( rev w5, w5 )
CPU_BE( rev w6, w6 )
CPU_BE( rev w7, w7 )
eor w4, w4, w8
eor w5, w5, w9
eor w6, w6, w10
eor w7, w7, w11
adr_l tt, \ttab
tbnz rounds, #1, 1f
0: \round w8, w9, w10, w11, w4, w5, w6, w7
\round w4, w5, w6, w7, w8, w9, w10, w11
1: subs rounds, rounds, #4
\round w8, w9, w10, w11, w4, w5, w6, w7
b.ls 3f
2: \round w4, w5, w6, w7, w8, w9, w10, w11
b 0b
3: adr_l tt, \ltab
\round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
CPU_BE( rev w4, w4 )
CPU_BE( rev w5, w5 )
CPU_BE( rev w6, w6 )
CPU_BE( rev w7, w7 )
stp w4, w5, [out]
stp w6, w7, [out, #8]
ret
.endm
SYM_FUNC_START(__aes_arm64_encrypt)
do_crypt fround, aes_enc_tab, aes_enc_tab + 1, 2
SYM_FUNC_END(__aes_arm64_encrypt)
.align 5
SYM_FUNC_START(__aes_arm64_decrypt)
do_crypt iround, aes_dec_tab, crypto_aes_inv_sbox, 0
SYM_FUNC_END(__aes_arm64_decrypt)

164
lib/crypto/arm64/aes.h Normal file
View File

@@ -0,0 +1,164 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* AES block cipher, optimized for ARM64
*
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
* Copyright 2026 Google LLC
*/
#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/unaligned.h>
#include <linux/cpufeature.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_aes);
struct aes_block {
u8 b[AES_BLOCK_SIZE];
};
asmlinkage void __aes_arm64_encrypt(const u32 rk[], u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE], int rounds);
asmlinkage void __aes_arm64_decrypt(const u32 inv_rk[], u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE], int rounds);
asmlinkage void __aes_ce_encrypt(const u32 rk[], u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE], int rounds);
asmlinkage void __aes_ce_decrypt(const u32 inv_rk[], u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE], int rounds);
asmlinkage u32 __aes_ce_sub(u32 l);
asmlinkage void __aes_ce_invert(struct aes_block *out,
const struct aes_block *in);
/*
* Expand an AES key using the crypto extensions if supported and usable or
* generic code otherwise. The expanded key format is compatible between the
* two cases. The outputs are @rndkeys (required) and @inv_rndkeys (optional).
*/
static void aes_expandkey_arm64(u32 rndkeys[], u32 *inv_rndkeys,
const u8 *in_key, int key_len, int nrounds)
{
/*
* The AES key schedule round constants
*/
static u8 const rcon[] = {
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
};
u32 kwords = key_len / sizeof(u32);
struct aes_block *key_enc, *key_dec;
int i, j;
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) ||
!static_branch_likely(&have_aes) || unlikely(!may_use_simd())) {
aes_expandkey_generic(rndkeys, inv_rndkeys, in_key, key_len);
return;
}
for (i = 0; i < kwords; i++)
rndkeys[i] = get_unaligned_le32(&in_key[i * sizeof(u32)]);
scoped_ksimd() {
for (i = 0; i < sizeof(rcon); i++) {
u32 *rki = &rndkeys[i * kwords];
u32 *rko = rki + kwords;
rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^
rcon[i] ^ rki[0];
rko[1] = rko[0] ^ rki[1];
rko[2] = rko[1] ^ rki[2];
rko[3] = rko[2] ^ rki[3];
if (key_len == AES_KEYSIZE_192) {
if (i >= 7)
break;
rko[4] = rko[3] ^ rki[4];
rko[5] = rko[4] ^ rki[5];
} else if (key_len == AES_KEYSIZE_256) {
if (i >= 6)
break;
rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
rko[5] = rko[4] ^ rki[5];
rko[6] = rko[5] ^ rki[6];
rko[7] = rko[6] ^ rki[7];
}
}
/*
* Generate the decryption keys for the Equivalent Inverse
* Cipher. This involves reversing the order of the round
* keys, and applying the Inverse Mix Columns transformation on
* all but the first and the last one.
*/
if (inv_rndkeys) {
key_enc = (struct aes_block *)rndkeys;
key_dec = (struct aes_block *)inv_rndkeys;
j = nrounds;
key_dec[0] = key_enc[j];
for (i = 1, j--; j > 0; i++, j--)
__aes_ce_invert(key_dec + i, key_enc + j);
key_dec[i] = key_enc[0];
}
}
}
static void aes_preparekey_arch(union aes_enckey_arch *k,
union aes_invkey_arch *inv_k,
const u8 *in_key, int key_len, int nrounds)
{
aes_expandkey_arm64(k->rndkeys, inv_k ? inv_k->inv_rndkeys : NULL,
in_key, key_len, nrounds);
}
/*
* This is here temporarily until the remaining AES mode implementations are
* migrated from arch/arm64/crypto/ to lib/crypto/arm64/.
*/
int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
unsigned int key_len)
{
if (aes_check_keylen(key_len) != 0)
return -EINVAL;
ctx->key_length = key_len;
aes_expandkey_arm64(ctx->key_enc, ctx->key_dec, in_key, key_len,
6 + key_len / 4);
return 0;
}
EXPORT_SYMBOL(ce_aes_expandkey);
static void aes_encrypt_arch(const struct aes_enckey *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_aes) && likely(may_use_simd())) {
scoped_ksimd()
__aes_ce_encrypt(key->k.rndkeys, out, in, key->nrounds);
} else {
__aes_arm64_encrypt(key->k.rndkeys, out, in, key->nrounds);
}
}
static void aes_decrypt_arch(const struct aes_key *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_aes) && likely(may_use_simd())) {
scoped_ksimd()
__aes_ce_decrypt(key->inv_k.inv_rndkeys, out, in,
key->nrounds);
} else {
__aes_arm64_decrypt(key->inv_k.inv_rndkeys, out, in,
key->nrounds);
}
}
#ifdef CONFIG_KERNEL_MODE_NEON
#define aes_mod_init_arch aes_mod_init_arch
static void aes_mod_init_arch(void)
{
if (cpu_have_named_feature(AES))
static_branch_enable(&have_aes);
}
#endif /* CONFIG_KERNEL_MODE_NEON */