mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00
lib/crypto: arm64/sha1: Migrate optimized code into library
Instead of exposing the arm64-optimized SHA-1 code via arm64-specific crypto_shash algorithms, instead just implement the sha1_blocks() library function. This is much simpler, it makes the SHA-1 library functions be arm64-optimized, and it fixes the longstanding issue where the arm64-optimized SHA-1 code was disabled by default. SHA-1 still remains available through crypto_shash, but individual architectures no longer need to handle it. Remove support for SHA-1 finalization from assembly code, since the library does not yet support architecture-specific overrides of the finalization. (Support for that has been omitted for now, for simplicity and because usually it isn't performance-critical.) To match sha1_blocks(), change the type of the nblocks parameter and the return value of __sha1_ce_transform() from int to size_t. Update the assembly code accordingly. Reviewed-by: Ard Biesheuvel <ardb@kernel.org> Link: https://lore.kernel.org/r/20250712232329.818226-9-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@kernel.org>
This commit is contained in:
parent
70cb6ca58f
commit
00d549bb89
@ -1743,7 +1743,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=m
|
|||||||
CONFIG_CRYPTO_ANSI_CPRNG=y
|
CONFIG_CRYPTO_ANSI_CPRNG=y
|
||||||
CONFIG_CRYPTO_USER_API_RNG=m
|
CONFIG_CRYPTO_USER_API_RNG=m
|
||||||
CONFIG_CRYPTO_GHASH_ARM64_CE=y
|
CONFIG_CRYPTO_GHASH_ARM64_CE=y
|
||||||
CONFIG_CRYPTO_SHA1_ARM64_CE=y
|
|
||||||
CONFIG_CRYPTO_SHA3_ARM64=m
|
CONFIG_CRYPTO_SHA3_ARM64=m
|
||||||
CONFIG_CRYPTO_SM3_ARM64_CE=m
|
CONFIG_CRYPTO_SM3_ARM64_CE=m
|
||||||
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
|
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
|
||||||
|
@ -25,17 +25,6 @@ config CRYPTO_NHPOLY1305_NEON
|
|||||||
Architecture: arm64 using:
|
Architecture: arm64 using:
|
||||||
- NEON (Advanced SIMD) extensions
|
- NEON (Advanced SIMD) extensions
|
||||||
|
|
||||||
config CRYPTO_SHA1_ARM64_CE
|
|
||||||
tristate "Hash functions: SHA-1 (ARMv8 Crypto Extensions)"
|
|
||||||
depends on KERNEL_MODE_NEON
|
|
||||||
select CRYPTO_HASH
|
|
||||||
select CRYPTO_SHA1
|
|
||||||
help
|
|
||||||
SHA-1 secure hash algorithm (FIPS 180)
|
|
||||||
|
|
||||||
Architecture: arm64 using:
|
|
||||||
- ARMv8 Crypto Extensions
|
|
||||||
|
|
||||||
config CRYPTO_SHA3_ARM64
|
config CRYPTO_SHA3_ARM64
|
||||||
tristate "Hash functions: SHA-3 (ARMv8.2 Crypto Extensions)"
|
tristate "Hash functions: SHA-3 (ARMv8.2 Crypto Extensions)"
|
||||||
depends on KERNEL_MODE_NEON
|
depends on KERNEL_MODE_NEON
|
||||||
|
@ -5,9 +5,6 @@
|
|||||||
# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
|
# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||||
#
|
#
|
||||||
|
|
||||||
obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o
|
|
||||||
sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
|
|
||||||
|
|
||||||
obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o
|
obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o
|
||||||
sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
|
sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
|
||||||
|
|
||||||
|
@ -1,118 +0,0 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0-only
|
|
||||||
/*
|
|
||||||
* sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
|
|
||||||
*
|
|
||||||
* Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <asm/neon.h>
|
|
||||||
#include <asm/simd.h>
|
|
||||||
#include <crypto/internal/hash.h>
|
|
||||||
#include <crypto/internal/simd.h>
|
|
||||||
#include <crypto/sha1.h>
|
|
||||||
#include <crypto/sha1_base.h>
|
|
||||||
#include <linux/cpufeature.h>
|
|
||||||
#include <linux/kernel.h>
|
|
||||||
#include <linux/module.h>
|
|
||||||
#include <linux/string.h>
|
|
||||||
|
|
||||||
MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
|
|
||||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
|
||||||
MODULE_LICENSE("GPL v2");
|
|
||||||
MODULE_ALIAS_CRYPTO("sha1");
|
|
||||||
|
|
||||||
struct sha1_ce_state {
|
|
||||||
struct sha1_state sst;
|
|
||||||
u32 finalize;
|
|
||||||
};
|
|
||||||
|
|
||||||
extern const u32 sha1_ce_offsetof_count;
|
|
||||||
extern const u32 sha1_ce_offsetof_finalize;
|
|
||||||
|
|
||||||
asmlinkage int __sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
|
|
||||||
int blocks);
|
|
||||||
|
|
||||||
static void sha1_ce_transform(struct sha1_state *sst, u8 const *src,
|
|
||||||
int blocks)
|
|
||||||
{
|
|
||||||
while (blocks) {
|
|
||||||
int rem;
|
|
||||||
|
|
||||||
kernel_neon_begin();
|
|
||||||
rem = __sha1_ce_transform(container_of(sst,
|
|
||||||
struct sha1_ce_state,
|
|
||||||
sst), src, blocks);
|
|
||||||
kernel_neon_end();
|
|
||||||
src += (blocks - rem) * SHA1_BLOCK_SIZE;
|
|
||||||
blocks = rem;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count);
|
|
||||||
const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize);
|
|
||||||
|
|
||||||
static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
|
|
||||||
unsigned int len)
|
|
||||||
{
|
|
||||||
struct sha1_ce_state *sctx = shash_desc_ctx(desc);
|
|
||||||
|
|
||||||
sctx->finalize = 0;
|
|
||||||
return sha1_base_do_update_blocks(desc, data, len, sha1_ce_transform);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
|
|
||||||
unsigned int len, u8 *out)
|
|
||||||
{
|
|
||||||
struct sha1_ce_state *sctx = shash_desc_ctx(desc);
|
|
||||||
bool finalized = false;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Allow the asm code to perform the finalization if there is no
|
|
||||||
* partial data and the input is a round multiple of the block size.
|
|
||||||
*/
|
|
||||||
if (len >= SHA1_BLOCK_SIZE) {
|
|
||||||
unsigned int remain = len - round_down(len, SHA1_BLOCK_SIZE);
|
|
||||||
|
|
||||||
finalized = !remain;
|
|
||||||
sctx->finalize = finalized;
|
|
||||||
sha1_base_do_update_blocks(desc, data, len, sha1_ce_transform);
|
|
||||||
data += len - remain;
|
|
||||||
len = remain;
|
|
||||||
}
|
|
||||||
if (!finalized) {
|
|
||||||
sctx->finalize = 0;
|
|
||||||
sha1_base_do_finup(desc, data, len, sha1_ce_transform);
|
|
||||||
}
|
|
||||||
return sha1_base_finish(desc, out);
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct shash_alg alg = {
|
|
||||||
.init = sha1_base_init,
|
|
||||||
.update = sha1_ce_update,
|
|
||||||
.finup = sha1_ce_finup,
|
|
||||||
.descsize = sizeof(struct sha1_ce_state),
|
|
||||||
.statesize = SHA1_STATE_SIZE,
|
|
||||||
.digestsize = SHA1_DIGEST_SIZE,
|
|
||||||
.base = {
|
|
||||||
.cra_name = "sha1",
|
|
||||||
.cra_driver_name = "sha1-ce",
|
|
||||||
.cra_priority = 200,
|
|
||||||
.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
|
|
||||||
CRYPTO_AHASH_ALG_FINUP_MAX,
|
|
||||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
|
||||||
.cra_module = THIS_MODULE,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
static int __init sha1_ce_mod_init(void)
|
|
||||||
{
|
|
||||||
return crypto_register_shash(&alg);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void __exit sha1_ce_mod_fini(void)
|
|
||||||
{
|
|
||||||
crypto_unregister_shash(&alg);
|
|
||||||
}
|
|
||||||
|
|
||||||
module_cpu_feature_match(SHA1, sha1_ce_mod_init);
|
|
||||||
module_exit(sha1_ce_mod_fini);
|
|
@ -147,6 +147,7 @@ config CRYPTO_LIB_SHA1_ARCH
|
|||||||
bool
|
bool
|
||||||
depends on CRYPTO_LIB_SHA1 && !UML
|
depends on CRYPTO_LIB_SHA1 && !UML
|
||||||
default y if ARM
|
default y if ARM
|
||||||
|
default y if ARM64 && KERNEL_MODE_NEON
|
||||||
|
|
||||||
config CRYPTO_LIB_SHA256
|
config CRYPTO_LIB_SHA256
|
||||||
tristate
|
tristate
|
||||||
|
@ -76,6 +76,7 @@ libsha1-y += arm/sha1-armv4-large.o
|
|||||||
libsha1-$(CONFIG_KERNEL_MODE_NEON) += arm/sha1-armv7-neon.o \
|
libsha1-$(CONFIG_KERNEL_MODE_NEON) += arm/sha1-armv7-neon.o \
|
||||||
arm/sha1-ce-core.o
|
arm/sha1-ce-core.o
|
||||||
endif
|
endif
|
||||||
|
libsha1-$(CONFIG_ARM64) += arm64/sha1-ce-core.o
|
||||||
endif # CONFIG_CRYPTO_LIB_SHA1_ARCH
|
endif # CONFIG_CRYPTO_LIB_SHA1_ARCH
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
|
@ -62,8 +62,8 @@
|
|||||||
.endm
|
.endm
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* int __sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
|
* size_t __sha1_ce_transform(struct sha1_block_state *state,
|
||||||
* int blocks)
|
* const u8 *data, size_t nblocks);
|
||||||
*/
|
*/
|
||||||
SYM_FUNC_START(__sha1_ce_transform)
|
SYM_FUNC_START(__sha1_ce_transform)
|
||||||
/* load round constants */
|
/* load round constants */
|
||||||
@ -76,20 +76,16 @@ SYM_FUNC_START(__sha1_ce_transform)
|
|||||||
ld1 {dgav.4s}, [x0]
|
ld1 {dgav.4s}, [x0]
|
||||||
ldr dgb, [x0, #16]
|
ldr dgb, [x0, #16]
|
||||||
|
|
||||||
/* load sha1_ce_state::finalize */
|
|
||||||
ldr_l w4, sha1_ce_offsetof_finalize, x4
|
|
||||||
ldr w4, [x0, x4]
|
|
||||||
|
|
||||||
/* load input */
|
/* load input */
|
||||||
0: ld1 {v8.4s-v11.4s}, [x1], #64
|
0: ld1 {v8.4s-v11.4s}, [x1], #64
|
||||||
sub w2, w2, #1
|
sub x2, x2, #1
|
||||||
|
|
||||||
CPU_LE( rev32 v8.16b, v8.16b )
|
CPU_LE( rev32 v8.16b, v8.16b )
|
||||||
CPU_LE( rev32 v9.16b, v9.16b )
|
CPU_LE( rev32 v9.16b, v9.16b )
|
||||||
CPU_LE( rev32 v10.16b, v10.16b )
|
CPU_LE( rev32 v10.16b, v10.16b )
|
||||||
CPU_LE( rev32 v11.16b, v11.16b )
|
CPU_LE( rev32 v11.16b, v11.16b )
|
||||||
|
|
||||||
1: add t0.4s, v8.4s, k0.4s
|
add t0.4s, v8.4s, k0.4s
|
||||||
mov dg0v.16b, dgav.16b
|
mov dg0v.16b, dgav.16b
|
||||||
|
|
||||||
add_update c, ev, k0, 8, 9, 10, 11, dgb
|
add_update c, ev, k0, 8, 9, 10, 11, dgb
|
||||||
@ -120,31 +116,15 @@ CPU_LE( rev32 v11.16b, v11.16b )
|
|||||||
add dgbv.2s, dgbv.2s, dg1v.2s
|
add dgbv.2s, dgbv.2s, dg1v.2s
|
||||||
add dgav.4s, dgav.4s, dg0v.4s
|
add dgav.4s, dgav.4s, dg0v.4s
|
||||||
|
|
||||||
cbz w2, 2f
|
/* return early if voluntary preemption is needed */
|
||||||
cond_yield 3f, x5, x6
|
cond_yield 1f, x5, x6
|
||||||
b 0b
|
|
||||||
|
|
||||||
/*
|
/* handled all input blocks? */
|
||||||
* Final block: add padding and total bit count.
|
cbnz x2, 0b
|
||||||
* Skip if the input size was not a round multiple of the block size,
|
|
||||||
* the padding is handled by the C code in that case.
|
|
||||||
*/
|
|
||||||
2: cbz x4, 3f
|
|
||||||
ldr_l w4, sha1_ce_offsetof_count, x4
|
|
||||||
ldr x4, [x0, x4]
|
|
||||||
movi v9.2d, #0
|
|
||||||
mov x8, #0x80000000
|
|
||||||
movi v10.2d, #0
|
|
||||||
ror x7, x4, #29 // ror(lsl(x4, 3), 32)
|
|
||||||
fmov d8, x8
|
|
||||||
mov x4, #0
|
|
||||||
mov v11.d[0], xzr
|
|
||||||
mov v11.d[1], x7
|
|
||||||
b 1b
|
|
||||||
|
|
||||||
/* store new state */
|
/* store new state */
|
||||||
3: st1 {dgav.4s}, [x0]
|
1: st1 {dgav.4s}, [x0]
|
||||||
str dgb, [x0, #16]
|
str dgb, [x0, #16]
|
||||||
mov w0, w2
|
mov x0, x2
|
||||||
ret
|
ret
|
||||||
SYM_FUNC_END(__sha1_ce_transform)
|
SYM_FUNC_END(__sha1_ce_transform)
|
39
lib/crypto/arm64/sha1.h
Normal file
39
lib/crypto/arm64/sha1.h
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||||
|
/*
|
||||||
|
* SHA-1 optimized for ARM64
|
||||||
|
*
|
||||||
|
* Copyright 2025 Google LLC
|
||||||
|
*/
|
||||||
|
#include <asm/neon.h>
|
||||||
|
#include <asm/simd.h>
|
||||||
|
#include <linux/cpufeature.h>
|
||||||
|
|
||||||
|
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce);
|
||||||
|
|
||||||
|
asmlinkage size_t __sha1_ce_transform(struct sha1_block_state *state,
|
||||||
|
const u8 *data, size_t nblocks);
|
||||||
|
|
||||||
|
static void sha1_blocks(struct sha1_block_state *state,
|
||||||
|
const u8 *data, size_t nblocks)
|
||||||
|
{
|
||||||
|
if (static_branch_likely(&have_ce) && likely(may_use_simd())) {
|
||||||
|
do {
|
||||||
|
size_t rem;
|
||||||
|
|
||||||
|
kernel_neon_begin();
|
||||||
|
rem = __sha1_ce_transform(state, data, nblocks);
|
||||||
|
kernel_neon_end();
|
||||||
|
data += (nblocks - rem) * SHA1_BLOCK_SIZE;
|
||||||
|
nblocks = rem;
|
||||||
|
} while (nblocks);
|
||||||
|
} else {
|
||||||
|
sha1_blocks_generic(state, data, nblocks);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define sha1_mod_init_arch sha1_mod_init_arch
|
||||||
|
static inline void sha1_mod_init_arch(void)
|
||||||
|
{
|
||||||
|
if (cpu_have_named_feature(SHA1))
|
||||||
|
static_branch_enable(&have_ce);
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user