Merge tag 'libcrypto-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux

Pull crypto library updates from Eric Biggers:

 - Add support for verifying ML-DSA signatures.

   ML-DSA (Module-Lattice-Based Digital Signature Algorithm) is a
   recently-standardized post-quantum (quantum-resistant) signature
   algorithm. It was known as Dilithium pre-standardization.

   The first use case in the kernel will be module signing. But there
   are also other users of RSA and ECDSA signatures in the kernel that
   might want to upgrade to ML-DSA eventually.

 - Improve the AES library:

     - Make the AES key expansion and single block encryption and
       decryption functions use the architecture-optimized AES code.
       Enable these optimizations by default.

     - Support preparing an AES key for encryption-only, using about
       half as much memory as a bidirectional key.

     - Replace the existing two generic implementations of AES with a
       single one.

 - Simplify how Adiantum message hashing is implemented. Remove the
   "nhpoly1305" crypto_shash in favor of direct lib/crypto/ support for
   NH hashing, and enable optimizations by default.

* tag 'libcrypto-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux: (53 commits)
  lib/crypto: mldsa: Clarify the documentation for mldsa_verify() slightly
  lib/crypto: aes: Drop 'volatile' from aes_sbox and aes_inv_sbox
  lib/crypto: aes: Remove old AES en/decryption functions
  lib/crypto: aesgcm: Use new AES library API
  lib/crypto: aescfb: Use new AES library API
  crypto: omap - Use new AES library API
  crypto: inside-secure - Use new AES library API
  crypto: drbg - Use new AES library API
  crypto: crypto4xx - Use new AES library API
  crypto: chelsio - Use new AES library API
  crypto: ccp - Use new AES library API
  crypto: x86/aes-gcm - Use new AES library API
  crypto: arm64/ghash - Use new AES library API
  crypto: arm/ghash - Use new AES library API
  staging: rtl8723bs: core: Use new AES library API
  net: phy: mscc: macsec: Use new AES library API
  chelsio: Use new AES library API
  Bluetooth: SMP: Use new AES library API
  crypto: x86/aes - Remove the superseded AES-NI crypto_cipher
  lib/crypto: x86/aes: Add AES-NI optimization
  ...
This commit is contained in:
Linus Torvalds
2026-02-10 08:31:09 -08:00
141 changed files with 6668 additions and 5264 deletions

View File

@@ -11,6 +11,18 @@ config CRYPTO_LIB_UTILS
config CRYPTO_LIB_AES
tristate
config CRYPTO_LIB_AES_ARCH
bool
depends on CRYPTO_LIB_AES && !UML && !KMSAN
default y if ARM
default y if ARM64
default y if PPC && (SPE || (PPC64 && VSX))
default y if RISCV && 64BIT && TOOLCHAIN_HAS_VECTOR_CRYPTO && \
RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS
default y if S390
default y if SPARC64
default y if X86
config CRYPTO_LIB_AESCFB
tristate
select CRYPTO_LIB_AES
@@ -101,6 +113,26 @@ config CRYPTO_LIB_MD5_ARCH
default y if PPC
default y if SPARC64
config CRYPTO_LIB_MLDSA
tristate
select CRYPTO_LIB_SHA3
help
The ML-DSA library functions. Select this if your module uses any of
the functions from <crypto/mldsa.h>.
config CRYPTO_LIB_NH
tristate
help
Implementation of the NH almost-universal hash function, specifically
the variant of NH used in Adiantum.
config CRYPTO_LIB_NH_ARCH
bool
depends on CRYPTO_LIB_NH && !UML && !KMSAN
default y if ARM && KERNEL_MODE_NEON
default y if ARM64 && KERNEL_MODE_NEON
default y if X86_64
config CRYPTO_LIB_POLY1305
tristate
help

View File

@@ -15,8 +15,47 @@ obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o
obj-$(CONFIG_CRYPTO_LIB_UTILS) += libcryptoutils.o
libcryptoutils-y := memneq.o utils.o
obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o
libaes-y := aes.o
################################################################################
obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o
libaes-y := aes.o
ifeq ($(CONFIG_CRYPTO_LIB_AES_ARCH),y)
CFLAGS_aes.o += -I$(src)/$(SRCARCH)
libaes-$(CONFIG_ARM) += arm/aes-cipher-core.o
ifeq ($(CONFIG_ARM64),y)
libaes-y += arm64/aes-cipher-core.o
libaes-$(CONFIG_KERNEL_MODE_NEON) += arm64/aes-ce-core.o
endif
ifeq ($(CONFIG_PPC),y)
ifeq ($(CONFIG_SPE),y)
libaes-y += powerpc/aes-spe-core.o \
powerpc/aes-spe-keys.o \
powerpc/aes-spe-modes.o \
powerpc/aes-tab-4k.o
else
libaes-y += powerpc/aesp8-ppc.o
aes-perlasm-flavour-y := linux-ppc64
aes-perlasm-flavour-$(CONFIG_PPC64_ELF_ABI_V2) := linux-ppc64-elfv2
aes-perlasm-flavour-$(CONFIG_CPU_LITTLE_ENDIAN) := linux-ppc64le
quiet_cmd_perlasm_aes = PERLASM $@
cmd_perlasm_aes = $(PERL) $< $(aes-perlasm-flavour-y) $@
# Use if_changed instead of cmd, in case the flavour changed.
$(obj)/powerpc/aesp8-ppc.S: $(src)/powerpc/aesp8-ppc.pl FORCE
$(call if_changed,perlasm_aes)
targets += powerpc/aesp8-ppc.S
OBJECT_FILES_NON_STANDARD_powerpc/aesp8-ppc.o := y
endif # !CONFIG_SPE
endif # CONFIG_PPC
libaes-$(CONFIG_RISCV) += riscv/aes-riscv64-zvkned.o
libaes-$(CONFIG_SPARC) += sparc/aes_asm.o
libaes-$(CONFIG_X86) += x86/aes-aesni.o
endif # CONFIG_CRYPTO_LIB_AES_ARCH
################################################################################
obj-$(CONFIG_CRYPTO_LIB_AESCFB) += libaescfb.o
libaescfb-y := aescfb.o
@@ -126,6 +165,22 @@ endif # CONFIG_CRYPTO_LIB_MD5_ARCH
################################################################################
obj-$(CONFIG_CRYPTO_LIB_MLDSA) += libmldsa.o
libmldsa-y := mldsa.o
################################################################################
obj-$(CONFIG_CRYPTO_LIB_NH) += libnh.o
libnh-y := nh.o
ifeq ($(CONFIG_CRYPTO_LIB_NH_ARCH),y)
CFLAGS_nh.o += -I$(src)/$(SRCARCH)
libnh-$(CONFIG_ARM) += arm/nh-neon-core.o
libnh-$(CONFIG_ARM64) += arm64/nh-neon-core.o
libnh-$(CONFIG_X86) += x86/nh-sse2.o x86/nh-avx2.o
endif
################################################################################
obj-$(CONFIG_CRYPTO_LIB_POLY1305) += libpoly1305.o
libpoly1305-y := poly1305.o
ifeq ($(CONFIG_ARCH_SUPPORTS_INT128),y)

View File

@@ -1,19 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2017-2019 Linaro Ltd <ard.biesheuvel@linaro.org>
* Copyright 2026 Google LLC
*/
#include <crypto/aes.h>
#include <linux/cache.h>
#include <linux/crypto.h>
#include <linux/export.h>
#include <linux/module.h>
#include <linux/unaligned.h>
/*
* Emit the sbox as volatile const to prevent the compiler from doing
* constant folding on sbox references involving fixed indexes.
*/
static volatile const u8 ____cacheline_aligned aes_sbox[] = {
static const u8 ____cacheline_aligned aes_sbox[] = {
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
@@ -48,7 +46,7 @@ static volatile const u8 ____cacheline_aligned aes_sbox[] = {
0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
};
static volatile const u8 ____cacheline_aligned aes_inv_sbox[] = {
static const u8 ____cacheline_aligned aes_inv_sbox[] = {
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
@@ -89,6 +87,110 @@ extern const u8 crypto_aes_inv_sbox[256] __alias(aes_inv_sbox);
EXPORT_SYMBOL(crypto_aes_sbox);
EXPORT_SYMBOL(crypto_aes_inv_sbox);
/* aes_enc_tab[i] contains MixColumn([SubByte(i), 0, 0, 0]). */
const u32 ____cacheline_aligned aes_enc_tab[256] = {
0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6,
0xb16f6fde, 0x54c5c591, 0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56,
0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec, 0x45caca8f, 0x9d82821f,
0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb,
0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453,
0x967272e4, 0x5bc0c09b, 0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c,
0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83, 0x5c343468, 0xf4a5a551,
0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a,
0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637,
0x0f05050a, 0xb59a9a2f, 0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df,
0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea, 0x1b090912, 0x9e83831d,
0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b,
0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd,
0x712f2f5e, 0x97848413, 0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1,
0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6, 0xbe6a6ad4, 0x46cbcb8d,
0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85,
0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a,
0x55333366, 0x94858511, 0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe,
0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b, 0xf35151a2, 0xfea3a35d,
0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1,
0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5,
0x0ef3f3fd, 0x6dd2d2bf, 0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3,
0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e, 0x57c4c493, 0xf2a7a755,
0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6,
0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54,
0xab90903b, 0x8388880b, 0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428,
0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad, 0x3be0e0db, 0x56323264,
0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8,
0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531,
0x37e4e4d3, 0x8b7979f2, 0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda,
0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949, 0xb46c6cd8, 0xfa5656ac,
0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810,
0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657,
0xc7b4b473, 0x51c6c697, 0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e,
0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f, 0x907070e0, 0x423e3e7c,
0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c,
0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199,
0x271d1d3a, 0xb99e9e27, 0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122,
0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433, 0xb69b9b2d, 0x221e1e3c,
0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5,
0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7,
0xc6424284, 0xb86868d0, 0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e,
0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c,
};
EXPORT_SYMBOL(aes_enc_tab);
/* aes_dec_tab[i] contains InvMixColumn([InvSubByte(i), 0, 0, 0]). */
const u32 ____cacheline_aligned aes_dec_tab[256] = {
0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f,
0xab58faac, 0x9303e34b, 0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5,
0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5, 0x495ab1de, 0x671bba25,
0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b,
0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458,
0x2969e049, 0x44c8c98e, 0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927,
0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d, 0x184adf63, 0x82311ae5,
0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9,
0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72,
0x578f1fe3, 0x2aab5566, 0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3,
0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed, 0x2b1ccf8a, 0x92b479a7,
0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4,
0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040,
0x069f715e, 0x51106ebd, 0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d,
0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060, 0x24fb9819, 0x97e9bdd6,
0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879,
0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32,
0xac70111e, 0x4e725a6c, 0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36,
0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624, 0xb1670a0c, 0x0fe75793,
0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c,
0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2,
0xb9a8b62d, 0xc8a91e14, 0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3,
0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b, 0x7629438b, 0xdcc623cb,
0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684,
0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc,
0xec52860d, 0xd0e3c177, 0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947,
0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322, 0xc74e4987, 0xc1d138d9,
0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f,
0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890,
0x5ef7392e, 0xf5afc382, 0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf,
0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb, 0x097826cd, 0xf418596e,
0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef,
0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a,
0x3094a5c6, 0xc066a235, 0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733,
0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117, 0x8dd64d76, 0x4db0ef43,
0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546,
0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92,
0x335610e9, 0x1347d66d, 0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb,
0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a, 0x59dfd29c, 0x3f73f255,
0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478,
0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc,
0x8b493c28, 0x41950dff, 0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664,
0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0,
};
EXPORT_SYMBOL(aes_dec_tab);
/* Prefetch data into L1 cache. @mem should be cacheline-aligned. */
static __always_inline void aes_prefetch(const void *mem, size_t len)
{
for (size_t i = 0; i < len; i += L1_CACHE_BYTES)
*(volatile const u8 *)(mem + i);
barrier();
}
static u32 mul_by_x(u32 w)
{
u32 x = w & 0x7f7f7f7f;
@@ -145,22 +247,6 @@ static u32 inv_mix_columns(u32 x)
return mix_columns(x ^ y ^ ror32(y, 16));
}
static __always_inline u32 subshift(u32 in[], int pos)
{
return (aes_sbox[in[pos] & 0xff]) ^
(aes_sbox[(in[(pos + 1) % 4] >> 8) & 0xff] << 8) ^
(aes_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
(aes_sbox[(in[(pos + 3) % 4] >> 24) & 0xff] << 24);
}
static __always_inline u32 inv_subshift(u32 in[], int pos)
{
return (aes_inv_sbox[in[pos] & 0xff]) ^
(aes_inv_sbox[(in[(pos + 3) % 4] >> 8) & 0xff] << 8) ^
(aes_inv_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
(aes_inv_sbox[(in[(pos + 1) % 4] >> 24) & 0xff] << 24);
}
static u32 subw(u32 in)
{
return (aes_sbox[in & 0xff]) ^
@@ -169,38 +255,17 @@ static u32 subw(u32 in)
(aes_sbox[(in >> 24) & 0xff] << 24);
}
/**
* aes_expandkey - Expands the AES key as described in FIPS-197
* @ctx: The location where the computed key will be stored.
* @in_key: The supplied key.
* @key_len: The length of the supplied key.
*
* Returns 0 on success. The function fails only if an invalid key size (or
* pointer) is supplied.
* The expanded key size is 240 bytes (max of 14 rounds with a unique 16 bytes
* key schedule plus a 16 bytes key which is used before the first round).
* The decryption key is prepared for the "Equivalent Inverse Cipher" as
* described in FIPS-197. The first slot (16 bytes) of each key (enc or dec) is
* for the initial combination, the second slot for the first round and so on.
*/
int aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
unsigned int key_len)
static void aes_expandkey_generic(u32 rndkeys[], u32 *inv_rndkeys,
const u8 *in_key, int key_len)
{
u32 kwords = key_len / sizeof(u32);
u32 rc, i, j;
int err;
err = aes_check_keylen(key_len);
if (err)
return err;
ctx->key_length = key_len;
for (i = 0; i < kwords; i++)
ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
rndkeys[i] = get_unaligned_le32(&in_key[i * sizeof(u32)]);
for (i = 0, rc = 1; i < 10; i++, rc = mul_by_x(rc)) {
u32 *rki = ctx->key_enc + (i * kwords);
u32 *rki = &rndkeys[i * kwords];
u32 *rko = rki + kwords;
rko[0] = ror32(subw(rki[kwords - 1]), 8) ^ rc ^ rki[0];
@@ -229,129 +294,239 @@ int aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
* the Inverse Mix Columns transformation to all but the first and
* the last one.
*/
ctx->key_dec[0] = ctx->key_enc[key_len + 24];
ctx->key_dec[1] = ctx->key_enc[key_len + 25];
ctx->key_dec[2] = ctx->key_enc[key_len + 26];
ctx->key_dec[3] = ctx->key_enc[key_len + 27];
if (inv_rndkeys) {
inv_rndkeys[0] = rndkeys[key_len + 24];
inv_rndkeys[1] = rndkeys[key_len + 25];
inv_rndkeys[2] = rndkeys[key_len + 26];
inv_rndkeys[3] = rndkeys[key_len + 27];
for (i = 4, j = key_len + 20; j > 0; i += 4, j -= 4) {
ctx->key_dec[i] = inv_mix_columns(ctx->key_enc[j]);
ctx->key_dec[i + 1] = inv_mix_columns(ctx->key_enc[j + 1]);
ctx->key_dec[i + 2] = inv_mix_columns(ctx->key_enc[j + 2]);
ctx->key_dec[i + 3] = inv_mix_columns(ctx->key_enc[j + 3]);
for (i = 4, j = key_len + 20; j > 0; i += 4, j -= 4) {
inv_rndkeys[i] = inv_mix_columns(rndkeys[j]);
inv_rndkeys[i + 1] = inv_mix_columns(rndkeys[j + 1]);
inv_rndkeys[i + 2] = inv_mix_columns(rndkeys[j + 2]);
inv_rndkeys[i + 3] = inv_mix_columns(rndkeys[j + 3]);
}
inv_rndkeys[i] = rndkeys[0];
inv_rndkeys[i + 1] = rndkeys[1];
inv_rndkeys[i + 2] = rndkeys[2];
inv_rndkeys[i + 3] = rndkeys[3];
}
}
ctx->key_dec[i] = ctx->key_enc[0];
ctx->key_dec[i + 1] = ctx->key_enc[1];
ctx->key_dec[i + 2] = ctx->key_enc[2];
ctx->key_dec[i + 3] = ctx->key_enc[3];
int aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
unsigned int key_len)
{
if (aes_check_keylen(key_len) != 0)
return -EINVAL;
ctx->key_length = key_len;
aes_expandkey_generic(ctx->key_enc, ctx->key_dec, in_key, key_len);
return 0;
}
EXPORT_SYMBOL(aes_expandkey);
/**
* aes_encrypt - Encrypt a single AES block
* @ctx: Context struct containing the key schedule
* @out: Buffer to store the ciphertext
* @in: Buffer containing the plaintext
*/
void aes_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in)
static __always_inline u32 enc_quarterround(const u32 w[4], int i, u32 rk)
{
const u32 *rkp = ctx->key_enc + 4;
int rounds = 6 + ctx->key_length / 4;
u32 st0[4], st1[4];
int round;
return rk ^ aes_enc_tab[(u8)w[i]] ^
rol32(aes_enc_tab[(u8)(w[(i + 1) % 4] >> 8)], 8) ^
rol32(aes_enc_tab[(u8)(w[(i + 2) % 4] >> 16)], 16) ^
rol32(aes_enc_tab[(u8)(w[(i + 3) % 4] >> 24)], 24);
}
st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
st0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4);
st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
static __always_inline u32 enclast_quarterround(const u32 w[4], int i, u32 rk)
{
return rk ^ ((aes_enc_tab[(u8)w[i]] & 0x0000ff00) >> 8) ^
(aes_enc_tab[(u8)(w[(i + 1) % 4] >> 8)] & 0x0000ff00) ^
((aes_enc_tab[(u8)(w[(i + 2) % 4] >> 16)] & 0x0000ff00) << 8) ^
((aes_enc_tab[(u8)(w[(i + 3) % 4] >> 24)] & 0x0000ff00) << 16);
}
static void __maybe_unused aes_encrypt_generic(const u32 rndkeys[], int nrounds,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
const u32 *rkp = rndkeys;
int n = nrounds - 1;
u32 w[4];
w[0] = get_unaligned_le32(&in[0]) ^ *rkp++;
w[1] = get_unaligned_le32(&in[4]) ^ *rkp++;
w[2] = get_unaligned_le32(&in[8]) ^ *rkp++;
w[3] = get_unaligned_le32(&in[12]) ^ *rkp++;
/*
* Force the compiler to emit data independent Sbox references,
* by xoring the input with Sbox values that are known to add up
* to zero. This pulls the entire Sbox into the D-cache before any
* data dependent lookups are done.
* Prefetch the table before doing data and key-dependent loads from it.
*
* This is intended only as a basic constant-time hardening measure that
* avoids interfering with performance too much. Its effectiveness is
* not guaranteed. For proper constant-time AES, a CPU that supports
* AES instructions should be used instead.
*/
st0[0] ^= aes_sbox[ 0] ^ aes_sbox[ 64] ^ aes_sbox[134] ^ aes_sbox[195];
st0[1] ^= aes_sbox[16] ^ aes_sbox[ 82] ^ aes_sbox[158] ^ aes_sbox[221];
st0[2] ^= aes_sbox[32] ^ aes_sbox[ 96] ^ aes_sbox[160] ^ aes_sbox[234];
st0[3] ^= aes_sbox[48] ^ aes_sbox[112] ^ aes_sbox[186] ^ aes_sbox[241];
aes_prefetch(aes_enc_tab, sizeof(aes_enc_tab));
for (round = 0;; round += 2, rkp += 8) {
st1[0] = mix_columns(subshift(st0, 0)) ^ rkp[0];
st1[1] = mix_columns(subshift(st0, 1)) ^ rkp[1];
st1[2] = mix_columns(subshift(st0, 2)) ^ rkp[2];
st1[3] = mix_columns(subshift(st0, 3)) ^ rkp[3];
do {
u32 w0 = enc_quarterround(w, 0, *rkp++);
u32 w1 = enc_quarterround(w, 1, *rkp++);
u32 w2 = enc_quarterround(w, 2, *rkp++);
u32 w3 = enc_quarterround(w, 3, *rkp++);
if (round == rounds - 2)
break;
w[0] = w0;
w[1] = w1;
w[2] = w2;
w[3] = w3;
} while (--n);
st0[0] = mix_columns(subshift(st1, 0)) ^ rkp[4];
st0[1] = mix_columns(subshift(st1, 1)) ^ rkp[5];
st0[2] = mix_columns(subshift(st1, 2)) ^ rkp[6];
st0[3] = mix_columns(subshift(st1, 3)) ^ rkp[7];
}
put_unaligned_le32(enclast_quarterround(w, 0, *rkp++), &out[0]);
put_unaligned_le32(enclast_quarterround(w, 1, *rkp++), &out[4]);
put_unaligned_le32(enclast_quarterround(w, 2, *rkp++), &out[8]);
put_unaligned_le32(enclast_quarterround(w, 3, *rkp++), &out[12]);
}
put_unaligned_le32(subshift(st1, 0) ^ rkp[4], out);
put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4);
put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8);
put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12);
static __always_inline u32 dec_quarterround(const u32 w[4], int i, u32 rk)
{
return rk ^ aes_dec_tab[(u8)w[i]] ^
rol32(aes_dec_tab[(u8)(w[(i + 3) % 4] >> 8)], 8) ^
rol32(aes_dec_tab[(u8)(w[(i + 2) % 4] >> 16)], 16) ^
rol32(aes_dec_tab[(u8)(w[(i + 1) % 4] >> 24)], 24);
}
static __always_inline u32 declast_quarterround(const u32 w[4], int i, u32 rk)
{
return rk ^ aes_inv_sbox[(u8)w[i]] ^
((u32)aes_inv_sbox[(u8)(w[(i + 3) % 4] >> 8)] << 8) ^
((u32)aes_inv_sbox[(u8)(w[(i + 2) % 4] >> 16)] << 16) ^
((u32)aes_inv_sbox[(u8)(w[(i + 1) % 4] >> 24)] << 24);
}
static void __maybe_unused aes_decrypt_generic(const u32 inv_rndkeys[],
int nrounds,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
const u32 *rkp = inv_rndkeys;
int n = nrounds - 1;
u32 w[4];
w[0] = get_unaligned_le32(&in[0]) ^ *rkp++;
w[1] = get_unaligned_le32(&in[4]) ^ *rkp++;
w[2] = get_unaligned_le32(&in[8]) ^ *rkp++;
w[3] = get_unaligned_le32(&in[12]) ^ *rkp++;
aes_prefetch(aes_dec_tab, sizeof(aes_dec_tab));
do {
u32 w0 = dec_quarterround(w, 0, *rkp++);
u32 w1 = dec_quarterround(w, 1, *rkp++);
u32 w2 = dec_quarterround(w, 2, *rkp++);
u32 w3 = dec_quarterround(w, 3, *rkp++);
w[0] = w0;
w[1] = w1;
w[2] = w2;
w[3] = w3;
} while (--n);
aes_prefetch(aes_inv_sbox, sizeof(aes_inv_sbox));
put_unaligned_le32(declast_quarterround(w, 0, *rkp++), &out[0]);
put_unaligned_le32(declast_quarterround(w, 1, *rkp++), &out[4]);
put_unaligned_le32(declast_quarterround(w, 2, *rkp++), &out[8]);
put_unaligned_le32(declast_quarterround(w, 3, *rkp++), &out[12]);
}
/*
* Note: the aes_prepare*key_* names reflect the fact that the implementation
* might not actually expand the key. (The s390 code for example doesn't.)
* Where the key is expanded we use the more specific names aes_expandkey_*.
*
* aes_preparekey_arch() is passed an optional pointer 'inv_k' which points to
* the area to store the prepared decryption key. It will be NULL if the user
* is requesting encryption-only. aes_preparekey_arch() is also passed a valid
* 'key_len' and 'nrounds', corresponding to AES-128, AES-192, or AES-256.
*/
#ifdef CONFIG_CRYPTO_LIB_AES_ARCH
/* An arch-specific implementation of AES is available. Include it. */
#include "aes.h" /* $(SRCARCH)/aes.h */
#else
/* No arch-specific implementation of AES is available. Use generic code. */
static void aes_preparekey_arch(union aes_enckey_arch *k,
union aes_invkey_arch *inv_k,
const u8 *in_key, int key_len, int nrounds)
{
aes_expandkey_generic(k->rndkeys, inv_k ? inv_k->inv_rndkeys : NULL,
in_key, key_len);
}
static void aes_encrypt_arch(const struct aes_enckey *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
aes_encrypt_generic(key->k.rndkeys, key->nrounds, out, in);
}
static void aes_decrypt_arch(const struct aes_key *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
aes_decrypt_generic(key->inv_k.inv_rndkeys, key->nrounds, out, in);
}
#endif
static int __aes_preparekey(struct aes_enckey *enc_key,
union aes_invkey_arch *inv_k,
const u8 *in_key, size_t key_len)
{
if (aes_check_keylen(key_len) != 0)
return -EINVAL;
enc_key->len = key_len;
enc_key->nrounds = 6 + key_len / 4;
aes_preparekey_arch(&enc_key->k, inv_k, in_key, key_len,
enc_key->nrounds);
return 0;
}
int aes_preparekey(struct aes_key *key, const u8 *in_key, size_t key_len)
{
return __aes_preparekey((struct aes_enckey *)key, &key->inv_k,
in_key, key_len);
}
EXPORT_SYMBOL(aes_preparekey);
int aes_prepareenckey(struct aes_enckey *key, const u8 *in_key, size_t key_len)
{
return __aes_preparekey(key, NULL, in_key, key_len);
}
EXPORT_SYMBOL(aes_prepareenckey);
void aes_encrypt(aes_encrypt_arg key, u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
aes_encrypt_arch(key.enc_key, out, in);
}
EXPORT_SYMBOL(aes_encrypt);
/**
* aes_decrypt - Decrypt a single AES block
* @ctx: Context struct containing the key schedule
* @out: Buffer to store the plaintext
* @in: Buffer containing the ciphertext
*/
void aes_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in)
void aes_decrypt(const struct aes_key *key, u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
const u32 *rkp = ctx->key_dec + 4;
int rounds = 6 + ctx->key_length / 4;
u32 st0[4], st1[4];
int round;
st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
st0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4);
st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
/*
* Force the compiler to emit data independent Sbox references,
* by xoring the input with Sbox values that are known to add up
* to zero. This pulls the entire Sbox into the D-cache before any
* data dependent lookups are done.
*/
st0[0] ^= aes_inv_sbox[ 0] ^ aes_inv_sbox[ 64] ^ aes_inv_sbox[129] ^ aes_inv_sbox[200];
st0[1] ^= aes_inv_sbox[16] ^ aes_inv_sbox[ 83] ^ aes_inv_sbox[150] ^ aes_inv_sbox[212];
st0[2] ^= aes_inv_sbox[32] ^ aes_inv_sbox[ 96] ^ aes_inv_sbox[160] ^ aes_inv_sbox[236];
st0[3] ^= aes_inv_sbox[48] ^ aes_inv_sbox[112] ^ aes_inv_sbox[187] ^ aes_inv_sbox[247];
for (round = 0;; round += 2, rkp += 8) {
st1[0] = inv_mix_columns(inv_subshift(st0, 0)) ^ rkp[0];
st1[1] = inv_mix_columns(inv_subshift(st0, 1)) ^ rkp[1];
st1[2] = inv_mix_columns(inv_subshift(st0, 2)) ^ rkp[2];
st1[3] = inv_mix_columns(inv_subshift(st0, 3)) ^ rkp[3];
if (round == rounds - 2)
break;
st0[0] = inv_mix_columns(inv_subshift(st1, 0)) ^ rkp[4];
st0[1] = inv_mix_columns(inv_subshift(st1, 1)) ^ rkp[5];
st0[2] = inv_mix_columns(inv_subshift(st1, 2)) ^ rkp[6];
st0[3] = inv_mix_columns(inv_subshift(st1, 3)) ^ rkp[7];
}
put_unaligned_le32(inv_subshift(st1, 0) ^ rkp[4], out);
put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4);
put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8);
put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12);
aes_decrypt_arch(key, out, in);
}
EXPORT_SYMBOL(aes_decrypt);
MODULE_DESCRIPTION("Generic AES library");
#ifdef aes_mod_init_arch
static int __init aes_mod_init(void)
{
aes_mod_init_arch();
return 0;
}
subsys_initcall(aes_mod_init);
static void __exit aes_mod_exit(void)
{
}
module_exit(aes_mod_exit);
#endif
MODULE_DESCRIPTION("AES block cipher");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_AUTHOR("Eric Biggers <ebiggers@kernel.org>");
MODULE_LICENSE("GPL v2");

View File

@@ -11,7 +11,7 @@
#include <linux/module.h>
#include <asm/irqflags.h>
static void aescfb_encrypt_block(const struct crypto_aes_ctx *ctx, void *dst,
static void aescfb_encrypt_block(const struct aes_enckey *key, void *dst,
const void *src)
{
unsigned long flags;
@@ -25,27 +25,27 @@ static void aescfb_encrypt_block(const struct crypto_aes_ctx *ctx, void *dst,
* interrupts disabled.
*/
local_irq_save(flags);
aes_encrypt(ctx, dst, src);
aes_encrypt(key, dst, src);
local_irq_restore(flags);
}
/**
* aescfb_encrypt - Perform AES-CFB encryption on a block of data
*
* @ctx: The AES-CFB key schedule
* @key: The AES-CFB key schedule
* @dst: Pointer to the ciphertext output buffer
* @src: Pointer the plaintext (may equal @dst for encryption in place)
* @len: The size in bytes of the plaintext and ciphertext.
* @iv: The initialization vector (IV) to use for this block of data
*/
void aescfb_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src,
void aescfb_encrypt(const struct aes_enckey *key, u8 *dst, const u8 *src,
int len, const u8 iv[AES_BLOCK_SIZE])
{
u8 ks[AES_BLOCK_SIZE];
const u8 *v = iv;
while (len > 0) {
aescfb_encrypt_block(ctx, ks, v);
aescfb_encrypt_block(key, ks, v);
crypto_xor_cpy(dst, src, ks, min(len, AES_BLOCK_SIZE));
v = dst;
@@ -61,18 +61,18 @@ EXPORT_SYMBOL(aescfb_encrypt);
/**
* aescfb_decrypt - Perform AES-CFB decryption on a block of data
*
* @ctx: The AES-CFB key schedule
* @key: The AES-CFB key schedule
* @dst: Pointer to the plaintext output buffer
* @src: Pointer the ciphertext (may equal @dst for decryption in place)
* @len: The size in bytes of the plaintext and ciphertext.
* @iv: The initialization vector (IV) to use for this block of data
*/
void aescfb_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src,
void aescfb_decrypt(const struct aes_enckey *key, u8 *dst, const u8 *src,
int len, const u8 iv[AES_BLOCK_SIZE])
{
u8 ks[2][AES_BLOCK_SIZE];
aescfb_encrypt_block(ctx, ks[0], iv);
aescfb_encrypt_block(key, ks[0], iv);
for (int i = 0; len > 0; i ^= 1) {
if (len > AES_BLOCK_SIZE)
@@ -81,7 +81,7 @@ void aescfb_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src,
* performing the XOR, as that may update in place and
* overwrite the ciphertext.
*/
aescfb_encrypt_block(ctx, ks[!i], src);
aescfb_encrypt_block(key, ks[!i], src);
crypto_xor_cpy(dst, src, ks[i], min(len, AES_BLOCK_SIZE));
@@ -214,15 +214,15 @@ static struct {
static int __init libaescfb_init(void)
{
for (int i = 0; i < ARRAY_SIZE(aescfb_tv); i++) {
struct crypto_aes_ctx ctx;
struct aes_enckey key;
u8 buf[64];
if (aes_expandkey(&ctx, aescfb_tv[i].key, aescfb_tv[i].klen)) {
pr_err("aes_expandkey() failed on vector %d\n", i);
if (aes_prepareenckey(&key, aescfb_tv[i].key, aescfb_tv[i].klen)) {
pr_err("aes_prepareenckey() failed on vector %d\n", i);
return -ENODEV;
}
aescfb_encrypt(&ctx, buf, aescfb_tv[i].ptext, aescfb_tv[i].len,
aescfb_encrypt(&key, buf, aescfb_tv[i].ptext, aescfb_tv[i].len,
aescfb_tv[i].iv);
if (memcmp(buf, aescfb_tv[i].ctext, aescfb_tv[i].len)) {
pr_err("aescfb_encrypt() #1 failed on vector %d\n", i);
@@ -230,14 +230,14 @@ static int __init libaescfb_init(void)
}
/* decrypt in place */
aescfb_decrypt(&ctx, buf, buf, aescfb_tv[i].len, aescfb_tv[i].iv);
aescfb_decrypt(&key, buf, buf, aescfb_tv[i].len, aescfb_tv[i].iv);
if (memcmp(buf, aescfb_tv[i].ptext, aescfb_tv[i].len)) {
pr_err("aescfb_decrypt() failed on vector %d\n", i);
return -ENODEV;
}
/* encrypt in place */
aescfb_encrypt(&ctx, buf, buf, aescfb_tv[i].len, aescfb_tv[i].iv);
aescfb_encrypt(&key, buf, buf, aescfb_tv[i].len, aescfb_tv[i].iv);
if (memcmp(buf, aescfb_tv[i].ctext, aescfb_tv[i].len)) {
pr_err("aescfb_encrypt() #2 failed on vector %d\n", i);

View File

@@ -12,7 +12,7 @@
#include <linux/module.h>
#include <asm/irqflags.h>
static void aesgcm_encrypt_block(const struct crypto_aes_ctx *ctx, void *dst,
static void aesgcm_encrypt_block(const struct aes_enckey *key, void *dst,
const void *src)
{
unsigned long flags;
@@ -26,7 +26,7 @@ static void aesgcm_encrypt_block(const struct crypto_aes_ctx *ctx, void *dst,
* effective when running with interrupts disabled.
*/
local_irq_save(flags);
aes_encrypt(ctx, dst, src);
aes_encrypt(key, dst, src);
local_irq_restore(flags);
}
@@ -49,12 +49,12 @@ int aesgcm_expandkey(struct aesgcm_ctx *ctx, const u8 *key,
int ret;
ret = crypto_gcm_check_authsize(authsize) ?:
aes_expandkey(&ctx->aes_ctx, key, keysize);
aes_prepareenckey(&ctx->aes_key, key, keysize);
if (ret)
return ret;
ctx->authsize = authsize;
aesgcm_encrypt_block(&ctx->aes_ctx, &ctx->ghash_key, kin);
aesgcm_encrypt_block(&ctx->aes_key, &ctx->ghash_key, kin);
return 0;
}
@@ -97,7 +97,7 @@ static void aesgcm_mac(const struct aesgcm_ctx *ctx, const u8 *src, int src_len,
aesgcm_ghash(&ghash, &ctx->ghash_key, &tail, sizeof(tail));
ctr[3] = cpu_to_be32(1);
aesgcm_encrypt_block(&ctx->aes_ctx, buf, ctr);
aesgcm_encrypt_block(&ctx->aes_key, buf, ctr);
crypto_xor_cpy(authtag, buf, (u8 *)&ghash, ctx->authsize);
memzero_explicit(&ghash, sizeof(ghash));
@@ -119,7 +119,7 @@ static void aesgcm_crypt(const struct aesgcm_ctx *ctx, u8 *dst, const u8 *src,
* len', this cannot happen, so no explicit test is necessary.
*/
ctr[3] = cpu_to_be32(n++);
aesgcm_encrypt_block(&ctx->aes_ctx, buf, ctr);
aesgcm_encrypt_block(&ctx->aes_key, buf, ctr);
crypto_xor_cpy(dst, src, buf, min(len, AES_BLOCK_SIZE));
dst += AES_BLOCK_SIZE;

View File

@@ -0,0 +1,201 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Scalar AES core transform
*
* Copyright (C) 2017 Linaro Ltd.
* Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/cache.h>
.text
.align 5
rk .req r0
rounds .req r1
in .req r2
out .req r3
ttab .req ip
t0 .req lr
t1 .req r2
t2 .req r3
.macro __select, out, in, idx
.if __LINUX_ARM_ARCH__ < 7
and \out, \in, #0xff << (8 * \idx)
.else
ubfx \out, \in, #(8 * \idx), #8
.endif
.endm
.macro __load, out, in, idx, sz, op
.if __LINUX_ARM_ARCH__ < 7 && \idx > 0
ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz]
.else
ldr\op \out, [ttab, \in, lsl #\sz]
.endif
.endm
.macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
__select \out0, \in0, 0
__select t0, \in1, 1
__load \out0, \out0, 0, \sz, \op
__load t0, t0, 1, \sz, \op
.if \enc
__select \out1, \in1, 0
__select t1, \in2, 1
.else
__select \out1, \in3, 0
__select t1, \in0, 1
.endif
__load \out1, \out1, 0, \sz, \op
__select t2, \in2, 2
__load t1, t1, 1, \sz, \op
__load t2, t2, 2, \sz, \op
eor \out0, \out0, t0, ror #24
__select t0, \in3, 3
.if \enc
__select \t3, \in3, 2
__select \t4, \in0, 3
.else
__select \t3, \in1, 2
__select \t4, \in2, 3
.endif
__load \t3, \t3, 2, \sz, \op
__load t0, t0, 3, \sz, \op
__load \t4, \t4, 3, \sz, \op
.ifnb \oldcpsr
/*
* This is the final round and we're done with all data-dependent table
* lookups, so we can safely re-enable interrupts.
*/
restore_irqs \oldcpsr
.endif
eor \out1, \out1, t1, ror #24
eor \out0, \out0, t2, ror #16
ldm rk!, {t1, t2}
eor \out1, \out1, \t3, ror #16
eor \out0, \out0, t0, ror #8
eor \out1, \out1, \t4, ror #8
eor \out0, \out0, t1
eor \out1, \out1, t2
.endm
.macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
.endm
.macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
.endm
.macro do_crypt, round, ttab, ltab, bsz
push {r3-r11, lr}
// Load keys first, to reduce latency in case they're not cached yet.
ldm rk!, {r8-r11}
ldr r4, [in]
ldr r5, [in, #4]
ldr r6, [in, #8]
ldr r7, [in, #12]
#ifdef CONFIG_CPU_BIG_ENDIAN
rev_l r4, t0
rev_l r5, t0
rev_l r6, t0
rev_l r7, t0
#endif
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
eor r7, r7, r11
mov_l ttab, \ttab
/*
* Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
* L1 cache, assuming cacheline size >= 32. This is a hardening measure
* intended to make cache-timing attacks more difficult. They may not
* be fully prevented, however; see the paper
* https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
* ("Cache-timing attacks on AES") for a discussion of the many
* difficulties involved in writing truly constant-time AES software.
*/
save_and_disable_irqs t0
.set i, 0
.rept 1024 / 128
ldr r8, [ttab, #i + 0]
ldr r9, [ttab, #i + 32]
ldr r10, [ttab, #i + 64]
ldr r11, [ttab, #i + 96]
.set i, i + 128
.endr
push {t0} // oldcpsr
tst rounds, #2
bne 1f
0: \round r8, r9, r10, r11, r4, r5, r6, r7
\round r4, r5, r6, r7, r8, r9, r10, r11
1: subs rounds, rounds, #4
\round r8, r9, r10, r11, r4, r5, r6, r7
bls 2f
\round r4, r5, r6, r7, r8, r9, r10, r11
b 0b
2: .ifb \ltab
add ttab, ttab, #1
.else
mov_l ttab, \ltab
// Prefetch inverse S-box for final round; see explanation above
.set i, 0
.rept 256 / 64
ldr t0, [ttab, #i + 0]
ldr t1, [ttab, #i + 32]
.set i, i + 64
.endr
.endif
pop {rounds} // oldcpsr
\round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
#ifdef CONFIG_CPU_BIG_ENDIAN
rev_l r4, t0
rev_l r5, t0
rev_l r6, t0
rev_l r7, t0
#endif
ldr out, [sp]
str r4, [out]
str r5, [out, #4]
str r6, [out, #8]
str r7, [out, #12]
pop {r3-r11, pc}
.align 3
.ltorg
.endm
ENTRY(__aes_arm_encrypt)
do_crypt fround, aes_enc_tab,, 2
ENDPROC(__aes_arm_encrypt)
.align 5
ENTRY(__aes_arm_decrypt)
do_crypt iround, aes_dec_tab, crypto_aes_inv_sbox, 0
ENDPROC(__aes_arm_decrypt)

56
lib/crypto/arm/aes.h Normal file
View File

@@ -0,0 +1,56 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* AES block cipher, optimized for ARM
*
* Copyright (C) 2017 Linaro Ltd.
* Copyright 2026 Google LLC
*/
asmlinkage void __aes_arm_encrypt(const u32 rk[], int rounds,
const u8 in[AES_BLOCK_SIZE],
u8 out[AES_BLOCK_SIZE]);
asmlinkage void __aes_arm_decrypt(const u32 inv_rk[], int rounds,
const u8 in[AES_BLOCK_SIZE],
u8 out[AES_BLOCK_SIZE]);
static void aes_preparekey_arch(union aes_enckey_arch *k,
union aes_invkey_arch *inv_k,
const u8 *in_key, int key_len, int nrounds)
{
aes_expandkey_generic(k->rndkeys, inv_k ? inv_k->inv_rndkeys : NULL,
in_key, key_len);
}
static void aes_encrypt_arch(const struct aes_enckey *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
!IS_ALIGNED((uintptr_t)out | (uintptr_t)in, 4)) {
u8 bounce_buf[AES_BLOCK_SIZE] __aligned(4);
memcpy(bounce_buf, in, AES_BLOCK_SIZE);
__aes_arm_encrypt(key->k.rndkeys, key->nrounds, bounce_buf,
bounce_buf);
memcpy(out, bounce_buf, AES_BLOCK_SIZE);
return;
}
__aes_arm_encrypt(key->k.rndkeys, key->nrounds, in, out);
}
static void aes_decrypt_arch(const struct aes_key *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
!IS_ALIGNED((uintptr_t)out | (uintptr_t)in, 4)) {
u8 bounce_buf[AES_BLOCK_SIZE] __aligned(4);
memcpy(bounce_buf, in, AES_BLOCK_SIZE);
__aes_arm_decrypt(key->inv_k.inv_rndkeys, key->nrounds,
bounce_buf, bounce_buf);
memcpy(out, bounce_buf, AES_BLOCK_SIZE);
return;
}
__aes_arm_decrypt(key->inv_k.inv_rndkeys, key->nrounds, in, out);
}

View File

@@ -0,0 +1,116 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* NH - ε-almost-universal hash function, NEON accelerated version
*
* Copyright 2018 Google LLC
*
* Author: Eric Biggers <ebiggers@google.com>
*/
#include <linux/linkage.h>
.text
.fpu neon
KEY .req r0
MESSAGE .req r1
MESSAGE_LEN .req r2
HASH .req r3
PASS0_SUMS .req q0
PASS0_SUM_A .req d0
PASS0_SUM_B .req d1
PASS1_SUMS .req q1
PASS1_SUM_A .req d2
PASS1_SUM_B .req d3
PASS2_SUMS .req q2
PASS2_SUM_A .req d4
PASS2_SUM_B .req d5
PASS3_SUMS .req q3
PASS3_SUM_A .req d6
PASS3_SUM_B .req d7
K0 .req q4
K1 .req q5
K2 .req q6
K3 .req q7
T0 .req q8
T0_L .req d16
T0_H .req d17
T1 .req q9
T1_L .req d18
T1_H .req d19
T2 .req q10
T2_L .req d20
T2_H .req d21
T3 .req q11
T3_L .req d22
T3_H .req d23
.macro _nh_stride k0, k1, k2, k3
// Load next message stride
vld1.8 {T3}, [MESSAGE]!
// Load next key stride
vld1.32 {\k3}, [KEY]!
// Add message words to key words
vadd.u32 T0, T3, \k0
vadd.u32 T1, T3, \k1
vadd.u32 T2, T3, \k2
vadd.u32 T3, T3, \k3
// Multiply 32x32 => 64 and accumulate
vmlal.u32 PASS0_SUMS, T0_L, T0_H
vmlal.u32 PASS1_SUMS, T1_L, T1_H
vmlal.u32 PASS2_SUMS, T2_L, T2_H
vmlal.u32 PASS3_SUMS, T3_L, T3_H
.endm
/*
* void nh_neon(const u32 *key, const u8 *message, size_t message_len,
* __le64 hash[NH_NUM_PASSES])
*
* It's guaranteed that message_len % 16 == 0.
*/
ENTRY(nh_neon)
vld1.32 {K0,K1}, [KEY]!
vmov.u64 PASS0_SUMS, #0
vmov.u64 PASS1_SUMS, #0
vld1.32 {K2}, [KEY]!
vmov.u64 PASS2_SUMS, #0
vmov.u64 PASS3_SUMS, #0
subs MESSAGE_LEN, MESSAGE_LEN, #64
blt .Lloop4_done
.Lloop4:
_nh_stride K0, K1, K2, K3
_nh_stride K1, K2, K3, K0
_nh_stride K2, K3, K0, K1
_nh_stride K3, K0, K1, K2
subs MESSAGE_LEN, MESSAGE_LEN, #64
bge .Lloop4
.Lloop4_done:
ands MESSAGE_LEN, MESSAGE_LEN, #63
beq .Ldone
_nh_stride K0, K1, K2, K3
subs MESSAGE_LEN, MESSAGE_LEN, #16
beq .Ldone
_nh_stride K1, K2, K3, K0
subs MESSAGE_LEN, MESSAGE_LEN, #16
beq .Ldone
_nh_stride K2, K3, K0, K1
.Ldone:
// Sum the accumulators for each pass, then store the sums to 'hash'
vadd.u64 T0_L, PASS0_SUM_A, PASS0_SUM_B
vadd.u64 T0_H, PASS1_SUM_A, PASS1_SUM_B
vadd.u64 T1_L, PASS2_SUM_A, PASS2_SUM_B
vadd.u64 T1_H, PASS3_SUM_A, PASS3_SUM_B
vst1.8 {T0-T1}, [HASH]
bx lr
ENDPROC(nh_neon)

33
lib/crypto/arm/nh.h Normal file
View File

@@ -0,0 +1,33 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* ARM32 accelerated implementation of NH
*
* Copyright 2018 Google LLC
*/
#include <asm/neon.h>
#include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len,
__le64 hash[NH_NUM_PASSES]);
static bool nh_arch(const u32 *key, const u8 *message, size_t message_len,
__le64 hash[NH_NUM_PASSES])
{
if (static_branch_likely(&have_neon) && message_len >= 64 &&
may_use_simd()) {
scoped_ksimd()
nh_neon(key, message, message_len, hash);
return true;
}
return false;
}
#define nh_mod_init_arch nh_mod_init_arch
static void nh_mod_init_arch(void)
{
if (elf_hwcap & HWCAP_NEON)
static_branch_enable(&have_neon);
}

View File

@@ -0,0 +1,84 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.arch armv8-a+crypto
SYM_FUNC_START(__aes_ce_encrypt)
sub w3, w3, #2
ld1 {v0.16b}, [x2]
ld1 {v1.4s}, [x0], #16
cmp w3, #10
bmi 0f
bne 3f
mov v3.16b, v1.16b
b 2f
0: mov v2.16b, v1.16b
ld1 {v3.4s}, [x0], #16
1: aese v0.16b, v2.16b
aesmc v0.16b, v0.16b
2: ld1 {v1.4s}, [x0], #16
aese v0.16b, v3.16b
aesmc v0.16b, v0.16b
3: ld1 {v2.4s}, [x0], #16
subs w3, w3, #3
aese v0.16b, v1.16b
aesmc v0.16b, v0.16b
ld1 {v3.4s}, [x0], #16
bpl 1b
aese v0.16b, v2.16b
eor v0.16b, v0.16b, v3.16b
st1 {v0.16b}, [x1]
ret
SYM_FUNC_END(__aes_ce_encrypt)
SYM_FUNC_START(__aes_ce_decrypt)
sub w3, w3, #2
ld1 {v0.16b}, [x2]
ld1 {v1.4s}, [x0], #16
cmp w3, #10
bmi 0f
bne 3f
mov v3.16b, v1.16b
b 2f
0: mov v2.16b, v1.16b
ld1 {v3.4s}, [x0], #16
1: aesd v0.16b, v2.16b
aesimc v0.16b, v0.16b
2: ld1 {v1.4s}, [x0], #16
aesd v0.16b, v3.16b
aesimc v0.16b, v0.16b
3: ld1 {v2.4s}, [x0], #16
subs w3, w3, #3
aesd v0.16b, v1.16b
aesimc v0.16b, v0.16b
ld1 {v3.4s}, [x0], #16
bpl 1b
aesd v0.16b, v2.16b
eor v0.16b, v0.16b, v3.16b
st1 {v0.16b}, [x1]
ret
SYM_FUNC_END(__aes_ce_decrypt)
/*
* __aes_ce_sub() - use the aese instruction to perform the AES sbox
* substitution on each byte in 'input'
*/
SYM_FUNC_START(__aes_ce_sub)
dup v1.4s, w0
movi v0.16b, #0
aese v0.16b, v1.16b
umov w0, v0.s[0]
ret
SYM_FUNC_END(__aes_ce_sub)
SYM_FUNC_START(__aes_ce_invert)
ld1 {v0.4s}, [x1]
aesimc v1.16b, v0.16b
st1 {v1.4s}, [x0]
ret
SYM_FUNC_END(__aes_ce_invert)

View File

@@ -0,0 +1,132 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Scalar AES core transform
*
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/cache.h>
.text
rk .req x0
out .req x1
in .req x2
rounds .req x3
tt .req x2
.macro __pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
.ifc \op\shift, b0
ubfiz \reg0, \in0, #2, #8
ubfiz \reg1, \in1e, #2, #8
.else
ubfx \reg0, \in0, #\shift, #8
ubfx \reg1, \in1e, #\shift, #8
.endif
/*
* AArch64 cannot do byte size indexed loads from a table containing
* 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
* valid instruction. So perform the shift explicitly first for the
* high bytes (the low byte is shifted implicitly by using ubfiz rather
* than ubfx above)
*/
.ifnc \op, b
ldr \reg0, [tt, \reg0, uxtw #2]
ldr \reg1, [tt, \reg1, uxtw #2]
.else
.if \shift > 0
lsl \reg0, \reg0, #2
lsl \reg1, \reg1, #2
.endif
ldrb \reg0, [tt, \reg0, uxtw]
ldrb \reg1, [tt, \reg1, uxtw]
.endif
.endm
.macro __pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
ubfx \reg0, \in0, #\shift, #8
ubfx \reg1, \in1d, #\shift, #8
ldr\op \reg0, [tt, \reg0, uxtw #\sz]
ldr\op \reg1, [tt, \reg1, uxtw #\sz]
.endm
.macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
ldp \out0, \out1, [rk], #8
__pair\enc \sz, \op, w12, w13, \in0, \in1, \in3, 0
__pair\enc \sz, \op, w14, w15, \in1, \in2, \in0, 8
__pair\enc \sz, \op, w16, w17, \in2, \in3, \in1, 16
__pair\enc \sz, \op, \t0, \t1, \in3, \in0, \in2, 24
eor \out0, \out0, w12
eor \out1, \out1, w13
eor \out0, \out0, w14, ror #24
eor \out1, \out1, w15, ror #24
eor \out0, \out0, w16, ror #16
eor \out1, \out1, w17, ror #16
eor \out0, \out0, \t0, ror #8
eor \out1, \out1, \t1, ror #8
.endm
.macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
.endm
.macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
.endm
.macro do_crypt, round, ttab, ltab, bsz
ldp w4, w5, [in]
ldp w6, w7, [in, #8]
ldp w8, w9, [rk], #16
ldp w10, w11, [rk, #-8]
CPU_BE( rev w4, w4 )
CPU_BE( rev w5, w5 )
CPU_BE( rev w6, w6 )
CPU_BE( rev w7, w7 )
eor w4, w4, w8
eor w5, w5, w9
eor w6, w6, w10
eor w7, w7, w11
adr_l tt, \ttab
tbnz rounds, #1, 1f
0: \round w8, w9, w10, w11, w4, w5, w6, w7
\round w4, w5, w6, w7, w8, w9, w10, w11
1: subs rounds, rounds, #4
\round w8, w9, w10, w11, w4, w5, w6, w7
b.ls 3f
2: \round w4, w5, w6, w7, w8, w9, w10, w11
b 0b
3: adr_l tt, \ltab
\round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
CPU_BE( rev w4, w4 )
CPU_BE( rev w5, w5 )
CPU_BE( rev w6, w6 )
CPU_BE( rev w7, w7 )
stp w4, w5, [out]
stp w6, w7, [out, #8]
ret
.endm
SYM_FUNC_START(__aes_arm64_encrypt)
do_crypt fround, aes_enc_tab, aes_enc_tab + 1, 2
SYM_FUNC_END(__aes_arm64_encrypt)
.align 5
SYM_FUNC_START(__aes_arm64_decrypt)
do_crypt iround, aes_dec_tab, crypto_aes_inv_sbox, 0
SYM_FUNC_END(__aes_arm64_decrypt)

164
lib/crypto/arm64/aes.h Normal file
View File

@@ -0,0 +1,164 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* AES block cipher, optimized for ARM64
*
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
* Copyright 2026 Google LLC
*/
#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/unaligned.h>
#include <linux/cpufeature.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_aes);
struct aes_block {
u8 b[AES_BLOCK_SIZE];
};
asmlinkage void __aes_arm64_encrypt(const u32 rk[], u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE], int rounds);
asmlinkage void __aes_arm64_decrypt(const u32 inv_rk[], u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE], int rounds);
asmlinkage void __aes_ce_encrypt(const u32 rk[], u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE], int rounds);
asmlinkage void __aes_ce_decrypt(const u32 inv_rk[], u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE], int rounds);
asmlinkage u32 __aes_ce_sub(u32 l);
asmlinkage void __aes_ce_invert(struct aes_block *out,
const struct aes_block *in);
/*
* Expand an AES key using the crypto extensions if supported and usable or
* generic code otherwise. The expanded key format is compatible between the
* two cases. The outputs are @rndkeys (required) and @inv_rndkeys (optional).
*/
static void aes_expandkey_arm64(u32 rndkeys[], u32 *inv_rndkeys,
const u8 *in_key, int key_len, int nrounds)
{
/*
* The AES key schedule round constants
*/
static u8 const rcon[] = {
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
};
u32 kwords = key_len / sizeof(u32);
struct aes_block *key_enc, *key_dec;
int i, j;
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) ||
!static_branch_likely(&have_aes) || unlikely(!may_use_simd())) {
aes_expandkey_generic(rndkeys, inv_rndkeys, in_key, key_len);
return;
}
for (i = 0; i < kwords; i++)
rndkeys[i] = get_unaligned_le32(&in_key[i * sizeof(u32)]);
scoped_ksimd() {
for (i = 0; i < sizeof(rcon); i++) {
u32 *rki = &rndkeys[i * kwords];
u32 *rko = rki + kwords;
rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^
rcon[i] ^ rki[0];
rko[1] = rko[0] ^ rki[1];
rko[2] = rko[1] ^ rki[2];
rko[3] = rko[2] ^ rki[3];
if (key_len == AES_KEYSIZE_192) {
if (i >= 7)
break;
rko[4] = rko[3] ^ rki[4];
rko[5] = rko[4] ^ rki[5];
} else if (key_len == AES_KEYSIZE_256) {
if (i >= 6)
break;
rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
rko[5] = rko[4] ^ rki[5];
rko[6] = rko[5] ^ rki[6];
rko[7] = rko[6] ^ rki[7];
}
}
/*
* Generate the decryption keys for the Equivalent Inverse
* Cipher. This involves reversing the order of the round
* keys, and applying the Inverse Mix Columns transformation on
* all but the first and the last one.
*/
if (inv_rndkeys) {
key_enc = (struct aes_block *)rndkeys;
key_dec = (struct aes_block *)inv_rndkeys;
j = nrounds;
key_dec[0] = key_enc[j];
for (i = 1, j--; j > 0; i++, j--)
__aes_ce_invert(key_dec + i, key_enc + j);
key_dec[i] = key_enc[0];
}
}
}
static void aes_preparekey_arch(union aes_enckey_arch *k,
union aes_invkey_arch *inv_k,
const u8 *in_key, int key_len, int nrounds)
{
aes_expandkey_arm64(k->rndkeys, inv_k ? inv_k->inv_rndkeys : NULL,
in_key, key_len, nrounds);
}
/*
* This is here temporarily until the remaining AES mode implementations are
* migrated from arch/arm64/crypto/ to lib/crypto/arm64/.
*/
int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
unsigned int key_len)
{
if (aes_check_keylen(key_len) != 0)
return -EINVAL;
ctx->key_length = key_len;
aes_expandkey_arm64(ctx->key_enc, ctx->key_dec, in_key, key_len,
6 + key_len / 4);
return 0;
}
EXPORT_SYMBOL(ce_aes_expandkey);
static void aes_encrypt_arch(const struct aes_enckey *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_aes) && likely(may_use_simd())) {
scoped_ksimd()
__aes_ce_encrypt(key->k.rndkeys, out, in, key->nrounds);
} else {
__aes_arm64_encrypt(key->k.rndkeys, out, in, key->nrounds);
}
}
static void aes_decrypt_arch(const struct aes_key *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_aes) && likely(may_use_simd())) {
scoped_ksimd()
__aes_ce_decrypt(key->inv_k.inv_rndkeys, out, in,
key->nrounds);
} else {
__aes_arm64_decrypt(key->inv_k.inv_rndkeys, out, in,
key->nrounds);
}
}
#ifdef CONFIG_KERNEL_MODE_NEON
#define aes_mod_init_arch aes_mod_init_arch
static void aes_mod_init_arch(void)
{
if (cpu_have_named_feature(AES))
static_branch_enable(&have_aes);
}
#endif /* CONFIG_KERNEL_MODE_NEON */

View File

@@ -0,0 +1,103 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* NH - ε-almost-universal hash function, ARM64 NEON accelerated version
*
* Copyright 2018 Google LLC
*
* Author: Eric Biggers <ebiggers@google.com>
*/
#include <linux/linkage.h>
KEY .req x0
MESSAGE .req x1
MESSAGE_LEN .req x2
HASH .req x3
PASS0_SUMS .req v0
PASS1_SUMS .req v1
PASS2_SUMS .req v2
PASS3_SUMS .req v3
K0 .req v4
K1 .req v5
K2 .req v6
K3 .req v7
T0 .req v8
T1 .req v9
T2 .req v10
T3 .req v11
T4 .req v12
T5 .req v13
T6 .req v14
T7 .req v15
.macro _nh_stride k0, k1, k2, k3
// Load next message stride
ld1 {T3.16b}, [MESSAGE], #16
// Load next key stride
ld1 {\k3\().4s}, [KEY], #16
// Add message words to key words
add T0.4s, T3.4s, \k0\().4s
add T1.4s, T3.4s, \k1\().4s
add T2.4s, T3.4s, \k2\().4s
add T3.4s, T3.4s, \k3\().4s
// Multiply 32x32 => 64 and accumulate
mov T4.d[0], T0.d[1]
mov T5.d[0], T1.d[1]
mov T6.d[0], T2.d[1]
mov T7.d[0], T3.d[1]
umlal PASS0_SUMS.2d, T0.2s, T4.2s
umlal PASS1_SUMS.2d, T1.2s, T5.2s
umlal PASS2_SUMS.2d, T2.2s, T6.2s
umlal PASS3_SUMS.2d, T3.2s, T7.2s
.endm
/*
* void nh_neon(const u32 *key, const u8 *message, size_t message_len,
* __le64 hash[NH_NUM_PASSES])
*
* It's guaranteed that message_len % 16 == 0.
*/
SYM_FUNC_START(nh_neon)
ld1 {K0.4s,K1.4s}, [KEY], #32
movi PASS0_SUMS.2d, #0
movi PASS1_SUMS.2d, #0
ld1 {K2.4s}, [KEY], #16
movi PASS2_SUMS.2d, #0
movi PASS3_SUMS.2d, #0
subs MESSAGE_LEN, MESSAGE_LEN, #64
blt .Lloop4_done
.Lloop4:
_nh_stride K0, K1, K2, K3
_nh_stride K1, K2, K3, K0
_nh_stride K2, K3, K0, K1
_nh_stride K3, K0, K1, K2
subs MESSAGE_LEN, MESSAGE_LEN, #64
bge .Lloop4
.Lloop4_done:
ands MESSAGE_LEN, MESSAGE_LEN, #63
beq .Ldone
_nh_stride K0, K1, K2, K3
subs MESSAGE_LEN, MESSAGE_LEN, #16
beq .Ldone
_nh_stride K1, K2, K3, K0
subs MESSAGE_LEN, MESSAGE_LEN, #16
beq .Ldone
_nh_stride K2, K3, K0, K1
.Ldone:
// Sum the accumulators for each pass, then store the sums to 'hash'
addp T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d
addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d
st1 {T0.16b,T1.16b}, [HASH]
ret
SYM_FUNC_END(nh_neon)

34
lib/crypto/arm64/nh.h Normal file
View File

@@ -0,0 +1,34 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* ARM64 accelerated implementation of NH
*
* Copyright 2018 Google LLC
*/
#include <asm/hwcap.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len,
__le64 hash[NH_NUM_PASSES]);
static bool nh_arch(const u32 *key, const u8 *message, size_t message_len,
__le64 hash[NH_NUM_PASSES])
{
if (static_branch_likely(&have_neon) && message_len >= 64 &&
may_use_simd()) {
scoped_ksimd()
nh_neon(key, message, message_len, hash);
return true;
}
return false;
}
#define nh_mod_init_arch nh_mod_init_arch
static void nh_mod_init_arch(void)
{
if (cpu_have_named_feature(ASIMD))
static_branch_enable(&have_neon);
}

458
lib/crypto/fips-mldsa.h Normal file
View File

@@ -0,0 +1,458 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/* ML-DSA test vector extracted from leancrypto */
#include <linux/fips.h>
static const u8 fips_test_mldsa65_signature[] __initconst __maybe_unused = {
0xda, 0xcf, 0x8d, 0x67, 0x59, 0x60, 0x6c, 0x39, 0x2d, 0x89, 0xb6, 0xa1,
0xf3, 0x8c, 0x70, 0xcf, 0x25, 0x86, 0x21, 0xa1, 0x9f, 0x20, 0x9e, 0xf5,
0xd2, 0xdd, 0xbd, 0x99, 0xfa, 0xe4, 0xab, 0x77, 0x31, 0x65, 0x18, 0xa1,
0xd1, 0x3f, 0x21, 0x70, 0x36, 0xe1, 0xf9, 0x5c, 0x28, 0xb6, 0x7d, 0x34,
0xae, 0x66, 0xc9, 0x1c, 0x8e, 0xc6, 0xf9, 0x45, 0x8c, 0xa9, 0xb2, 0xfb,
0x0f, 0x5b, 0xb8, 0xf9, 0xf5, 0xe2, 0x37, 0x79, 0x12, 0xda, 0xa7, 0x72,
0x9e, 0x0d, 0xf8, 0x88, 0x5b, 0x34, 0x49, 0x6c, 0xed, 0xa3, 0x7f, 0x86,
0xd3, 0xd9, 0x2f, 0x44, 0x08, 0x0d, 0xb7, 0xdb, 0x4a, 0xce, 0x02, 0x14,
0x02, 0xd6, 0x40, 0x75, 0xe3, 0xc0, 0x97, 0xfc, 0x6c, 0x6a, 0x88, 0x29,
0x0c, 0xe2, 0x3a, 0x2b, 0x28, 0x82, 0x8f, 0x27, 0x09, 0x69, 0x91, 0xc6,
0xc3, 0xb7, 0x07, 0x61, 0x86, 0x8d, 0x89, 0x8a, 0xd5, 0x00, 0x3b, 0x4b,
0xfc, 0x6f, 0xb3, 0x3f, 0x4c, 0x93, 0x31, 0xfc, 0x88, 0x53, 0x26, 0xea,
0xe5, 0x3a, 0xfc, 0xc1, 0x59, 0x16, 0xf0, 0xb7, 0xac, 0xde, 0x1e, 0xd8,
0x74, 0x85, 0x72, 0xd9, 0xbb, 0xbe, 0x76, 0x32, 0x25, 0x9d, 0x21, 0xbc,
0xfd, 0x8d, 0x32, 0xfe, 0xae, 0x24, 0xe5, 0x4a, 0xcc, 0x5d, 0x15, 0x23,
0xd3, 0x57, 0xe7, 0xa9, 0x2c, 0x31, 0xd7, 0xc5, 0x6b, 0x70, 0x6c, 0x22,
0x5a, 0x13, 0x1f, 0x76, 0x13, 0x78, 0x6f, 0xac, 0x42, 0x4c, 0x46, 0x81,
0xa2, 0x20, 0x91, 0x30, 0xed, 0xcb, 0x90, 0xfe, 0x3c, 0xa3, 0xc7, 0xb4,
0x1f, 0x21, 0x1d, 0x98, 0x74, 0x6a, 0x3e, 0xc8, 0xcc, 0xd2, 0x68, 0x87,
0x69, 0xa9, 0xdf, 0x50, 0xd5, 0x0a, 0x8e, 0x10, 0x54, 0xab, 0xea, 0x65,
0x2a, 0x52, 0xd7, 0x22, 0xae, 0x2f, 0x1e, 0xc3, 0x16, 0x58, 0x20, 0x18,
0x6d, 0x35, 0x46, 0x31, 0x43, 0x5d, 0x62, 0xfb, 0xb1, 0x47, 0x32, 0xfa,
0x14, 0xcc, 0x51, 0xa3, 0xcd, 0x99, 0x4f, 0x97, 0x0f, 0xca, 0x24, 0x93,
0x17, 0xea, 0xa3, 0xf3, 0x1f, 0xbe, 0xb5, 0xa3, 0xac, 0x80, 0xcc, 0x20,
0x3b, 0xa6, 0xd3, 0x32, 0x72, 0x4e, 0xd9, 0x25, 0xf9, 0xc2, 0x24, 0x15,
0xbd, 0x1e, 0x1e, 0x41, 0x8c, 0x18, 0x8c, 0x58, 0xe8, 0x75, 0x20, 0xff,
0xa3, 0xf4, 0xd4, 0xab, 0x75, 0x78, 0x4e, 0xbb, 0x7c, 0x94, 0x93, 0x28,
0x5b, 0x07, 0x3a, 0x3c, 0xc9, 0xf1, 0x55, 0x3e, 0x33, 0xed, 0xf8, 0x72,
0x55, 0xab, 0x5a, 0xea, 0xbe, 0x65, 0xfa, 0x81, 0x50, 0xc0, 0x9d, 0x2d,
0xfb, 0x04, 0x25, 0x7c, 0xb9, 0xee, 0xe2, 0xa3, 0x00, 0x44, 0xd3, 0x9d,
0xee, 0x4f, 0x80, 0x77, 0xfb, 0x26, 0x6b, 0x07, 0xd0, 0xff, 0x82, 0x39,
0x0e, 0x2b, 0x47, 0xa3, 0xe7, 0x3e, 0xc5, 0x4e, 0x15, 0x8a, 0x48, 0x28,
0xfb, 0xf7, 0xa4, 0x86, 0xfb, 0x77, 0x60, 0xcd, 0xc5, 0x68, 0x96, 0xd7,
0x4c, 0x3c, 0xf2, 0x51, 0x71, 0x79, 0x2e, 0x2e, 0x57, 0x10, 0xa7, 0xfc,
0xd1, 0xd4, 0x61, 0x71, 0x81, 0x85, 0x74, 0x09, 0x7d, 0x80, 0xd0, 0xc2,
0xe9, 0xff, 0xb7, 0x88, 0x53, 0x74, 0x1e, 0xb0, 0xca, 0x65, 0x48, 0x8e,
0xdb, 0x59, 0x3a, 0xcb, 0x80, 0xeb, 0xfd, 0xd2, 0xc9, 0x38, 0x43, 0xae,
0x76, 0xf2, 0xbb, 0x51, 0xb2, 0xcb, 0xe6, 0x85, 0x31, 0xb5, 0x62, 0xd4,
0x5e, 0x48, 0x08, 0xf1, 0x40, 0x5b, 0x16, 0x83, 0x5e, 0xa5, 0x9c, 0x6b,
0x91, 0x49, 0x44, 0xff, 0x3b, 0xa9, 0x2b, 0xf3, 0x06, 0x33, 0x9e, 0x6e,
0x3c, 0x66, 0x7e, 0x27, 0xa2, 0x59, 0x7b, 0xe3, 0xb6, 0xb4, 0x28, 0xeb,
0x93, 0x35, 0x87, 0xac, 0x0e, 0x0b, 0x7e, 0xbc, 0x35, 0x28, 0x72, 0x1f,
0x26, 0x59, 0xd0, 0x1f, 0x63, 0xe4, 0x86, 0x5d, 0x70, 0xf3, 0xa8, 0xa4,
0xb8, 0xcd, 0xb3, 0xf8, 0x8d, 0xaa, 0x41, 0xd2, 0xcc, 0x0b, 0x15, 0x66,
0x22, 0x83, 0x92, 0xe3, 0x0b, 0xf9, 0xea, 0xa0, 0x33, 0xa1, 0x4e, 0x92,
0xae, 0x81, 0x95, 0xa4, 0x58, 0x3f, 0xa9, 0x15, 0x52, 0xf9, 0xda, 0xb7,
0x10, 0x8d, 0xc6, 0xab, 0x77, 0xe9, 0xbe, 0xad, 0xc9, 0x3a, 0x6a, 0x8d,
0x92, 0x6c, 0x69, 0xff, 0x31, 0x49, 0x25, 0x04, 0xc8, 0x93, 0x6f, 0xc8,
0xe7, 0x60, 0x7a, 0x76, 0xb5, 0xc1, 0x07, 0xef, 0xa3, 0x39, 0xa6, 0xf2,
0x36, 0x04, 0xde, 0x3c, 0x4a, 0x4e, 0x96, 0xbd, 0x64, 0x26, 0x80, 0x01,
0x88, 0x47, 0xd2, 0xa4, 0x46, 0xcd, 0xe1, 0x30, 0x7f, 0xa3, 0x00, 0x11,
0x38, 0x55, 0xfa, 0xeb, 0x10, 0xeb, 0xa0, 0x65, 0x04, 0x09, 0xc8, 0xde,
0x9c, 0x73, 0xba, 0x0c, 0xbd, 0xd3, 0xa5, 0x84, 0x5e, 0xb9, 0x3b, 0xd4,
0x94, 0xbd, 0xa6, 0x53, 0xbe, 0x93, 0x69, 0x3e, 0xaa, 0x32, 0x31, 0x06,
0xc8, 0x1b, 0x4a, 0x48, 0xb5, 0x17, 0x85, 0xbf, 0x72, 0xec, 0xf5, 0x29,
0x8a, 0xd8, 0xeb, 0x99, 0x8b, 0x74, 0x84, 0x57, 0x8c, 0xe1, 0x85, 0x94,
0xa0, 0xbc, 0x7a, 0x14, 0xf0, 0xf4, 0x8b, 0x25, 0x37, 0x43, 0xa1, 0x34,
0x09, 0x71, 0xca, 0x5c, 0x9f, 0x08, 0x38, 0xd9, 0x9c, 0x0c, 0x0e, 0xcb,
0xe4, 0xad, 0x4b, 0x2a, 0x89, 0x67, 0xf8, 0x29, 0x6c, 0x69, 0x0e, 0x5d,
0xca, 0xfa, 0xa6, 0x6b, 0x0e, 0xb5, 0x94, 0x17, 0x71, 0xf0, 0xc9, 0xcd,
0x02, 0x1d, 0xa5, 0xd5, 0xc6, 0xa7, 0xbc, 0x5f, 0x6e, 0x67, 0x43, 0x68,
0xce, 0xac, 0x54, 0x81, 0x2a, 0x25, 0x22, 0x52, 0x35, 0xad, 0x7b, 0xd5,
0x06, 0x8c, 0x00, 0xfb, 0xca, 0xc4, 0x0a, 0x49, 0x1e, 0xc8, 0xeb, 0x77,
0xc1, 0x63, 0x23, 0x96, 0xbd, 0x35, 0xfa, 0x13, 0xae, 0xbf, 0x1d, 0x1e,
0x69, 0x8d, 0xb3, 0xe3, 0x07, 0xde, 0x4e, 0xd0, 0x12, 0xa9, 0xc3, 0x36,
0x30, 0x46, 0xef, 0x92, 0x76, 0x17, 0x8f, 0x10, 0xe7, 0xba, 0x99, 0x4b,
0xdf, 0xad, 0xb8, 0x11, 0x80, 0xdf, 0xe7, 0xfd, 0x80, 0x64, 0xf7, 0x2a,
0xac, 0x60, 0x2a, 0x54, 0x8f, 0x4f, 0xaf, 0xaf, 0x60, 0xf9, 0x67, 0x20,
0x80, 0x53, 0x5c, 0xb6, 0x81, 0xa6, 0x2a, 0x74, 0x2d, 0xc5, 0x74, 0x2a,
0x95, 0x26, 0x13, 0x17, 0x01, 0xdd, 0x31, 0xac, 0x5a, 0x05, 0xda, 0xde,
0xba, 0xf6, 0x37, 0x13, 0x8d, 0xe4, 0xa8, 0x93, 0x46, 0x9e, 0xa9, 0x82,
0x24, 0x7e, 0xc8, 0xda, 0x63, 0x89, 0xcd, 0x33, 0xc9, 0xf7, 0xf9, 0x71,
0x35, 0xe6, 0xa5, 0x5f, 0x6b, 0x3b, 0xbb, 0x0c, 0xe0, 0xa4, 0x0b, 0xe3,
0x29, 0xc0, 0xae, 0x8e, 0xce, 0x03, 0x09, 0x73, 0x0e, 0x1e, 0x9c, 0xe9,
0x59, 0xb6, 0x8b, 0x78, 0x67, 0x32, 0x8b, 0xf1, 0x93, 0xcc, 0x72, 0x1b,
0x6f, 0xa2, 0xf1, 0x04, 0x9c, 0xfa, 0x98, 0x02, 0xca, 0xdf, 0x35, 0x3c,
0x38, 0xac, 0xa8, 0xdb, 0x90, 0xae, 0xaa, 0xf9, 0x70, 0xfb, 0xed, 0xbd,
0xa6, 0x25, 0x14, 0x58, 0x09, 0x8a, 0x36, 0xaf, 0x41, 0x09, 0x19, 0xcb,
0xd3, 0x25, 0x5d, 0x0e, 0xe6, 0x20, 0x14, 0x71, 0x24, 0x79, 0x19, 0x55,
0xaf, 0x51, 0x5b, 0xa4, 0xc0, 0x93, 0x9e, 0xdd, 0x88, 0x31, 0x13, 0x96,
0xbf, 0xca, 0x0a, 0xd7, 0xbc, 0xc4, 0x00, 0xa1, 0x10, 0x2d, 0x92, 0x79,
0xf9, 0x14, 0xdb, 0xd2, 0xba, 0x74, 0xfa, 0xa8, 0xe5, 0x40, 0x14, 0xc2,
0x56, 0x3c, 0x7f, 0x50, 0x07, 0x60, 0x86, 0x93, 0x51, 0x2e, 0xf9, 0x70,
0x61, 0x70, 0x0e, 0xa4, 0x87, 0x75, 0xcc, 0x6c, 0x72, 0xb7, 0x68, 0x23,
0xb7, 0x3d, 0x76, 0xaf, 0x96, 0x9b, 0x4a, 0xe5, 0x12, 0x28, 0x4a, 0x8f,
0x79, 0x34, 0xff, 0xec, 0x92, 0xeb, 0x6b, 0xaf, 0xc9, 0xbd, 0xc1, 0x77,
0x07, 0xd0, 0xfa, 0x55, 0x57, 0x10, 0x0c, 0xad, 0x29, 0x2a, 0x79, 0xd6,
0x09, 0x9e, 0x7d, 0x18, 0xd4, 0xd6, 0xdd, 0x72, 0x1a, 0x8f, 0x24, 0x11,
0x70, 0xd2, 0x52, 0x36, 0x0f, 0x38, 0x79, 0x38, 0x4a, 0x02, 0x4f, 0x73,
0x2a, 0xaa, 0x6a, 0xb5, 0x0c, 0x72, 0x32, 0x85, 0x21, 0x76, 0x1a, 0x8a,
0x7d, 0x51, 0x0e, 0xf1, 0xf9, 0x19, 0xfa, 0x6b, 0x9b, 0x22, 0x71, 0x8c,
0x13, 0xcc, 0xba, 0x7d, 0xee, 0xd8, 0x34, 0xf6, 0x85, 0x60, 0xe1, 0xe4,
0x59, 0x6e, 0x32, 0x60, 0xd9, 0xfa, 0xb7, 0x56, 0x54, 0x25, 0xd1, 0x73,
0x6a, 0xf2, 0xa0, 0xc7, 0xa0, 0x67, 0x10, 0x89, 0x9c, 0x27, 0x5f, 0x7f,
0x2e, 0x5a, 0x29, 0x70, 0x7a, 0x7b, 0xaf, 0x21, 0xd0, 0xf4, 0x06, 0xb9,
0x2d, 0xf1, 0xb8, 0x32, 0xed, 0xc5, 0xc9, 0xac, 0x2f, 0x54, 0x0a, 0xf9,
0x08, 0x39, 0x39, 0x7d, 0x1d, 0xaf, 0xb4, 0x5f, 0x4d, 0x75, 0xc3, 0xe8,
0x52, 0x3a, 0x47, 0x72, 0x2c, 0xa9, 0x2d, 0xcb, 0x74, 0x06, 0xfe, 0x69,
0xd3, 0xf3, 0x1a, 0xb2, 0xd3, 0x01, 0xed, 0x6c, 0xc1, 0xca, 0x4f, 0xaf,
0x11, 0x9b, 0xa2, 0x27, 0x2a, 0x59, 0x56, 0x58, 0xdf, 0x79, 0x8b, 0xc9,
0x87, 0xe9, 0x58, 0x81, 0x48, 0xc6, 0xb6, 0x7d, 0x60, 0x54, 0x87, 0x9c,
0x61, 0xbb, 0x4b, 0xbb, 0x61, 0xac, 0x0a, 0x5a, 0x66, 0x7e, 0x70, 0x8b,
0xfd, 0x92, 0x76, 0x4a, 0xa9, 0xa5, 0xc3, 0xf4, 0xf2, 0x93, 0x48, 0xc4,
0xf3, 0x91, 0x2b, 0x60, 0x04, 0x0e, 0xb0, 0x6b, 0x60, 0x5e, 0xf0, 0xf1,
0x54, 0x41, 0x56, 0xdc, 0x25, 0x57, 0xc3, 0xb6, 0x0b, 0x5e, 0x15, 0xb5,
0x2a, 0x36, 0x4f, 0xe7, 0x1d, 0x70, 0xa8, 0xa7, 0xec, 0xd6, 0x74, 0xba,
0xa4, 0x79, 0x83, 0x7c, 0x9e, 0x1a, 0x5d, 0x32, 0xc8, 0xcb, 0x41, 0xca,
0x04, 0xec, 0x0b, 0x18, 0x54, 0xe1, 0x67, 0xbf, 0xa8, 0x7a, 0xc3, 0x0f,
0x27, 0x2a, 0xaf, 0x2a, 0x41, 0x19, 0x1f, 0xe8, 0xa2, 0xe8, 0xfa, 0xfc,
0x88, 0x41, 0x46, 0xc3, 0x1c, 0x44, 0xe5, 0xee, 0x47, 0xec, 0xfe, 0xbf,
0xb8, 0x29, 0x2e, 0xae, 0x47, 0x0a, 0x42, 0x69, 0x8a, 0x9a, 0x94, 0x97,
0x9e, 0xf5, 0xb6, 0x37, 0x1c, 0x10, 0xc2, 0x99, 0xa8, 0xe9, 0x9e, 0x0e,
0x6e, 0xb5, 0xbe, 0xba, 0x1f, 0x77, 0xa6, 0x35, 0x02, 0x1e, 0x8c, 0xe6,
0x02, 0x53, 0xe2, 0x9a, 0xdd, 0x09, 0x6e, 0x9b, 0x7a, 0x36, 0x4f, 0x38,
0x8d, 0x4c, 0xa4, 0xb4, 0xff, 0x90, 0x76, 0x0d, 0x11, 0x7d, 0xe1, 0xe9,
0x7f, 0x2a, 0x4a, 0x80, 0xe0, 0xd8, 0x3c, 0x23, 0xd2, 0xa5, 0xe5, 0x39,
0x77, 0xbf, 0x3d, 0x71, 0x0d, 0x45, 0xbb, 0x39, 0x66, 0x1a, 0x4d, 0x59,
0xb7, 0xd0, 0x0a, 0xee, 0x87, 0xee, 0x1f, 0xcf, 0x6f, 0xc2, 0x50, 0xb1,
0xa5, 0x4c, 0xee, 0x40, 0x69, 0xd7, 0x36, 0x38, 0x14, 0xcd, 0x6a, 0x9a,
0x90, 0x40, 0xad, 0x76, 0xf1, 0xa6, 0xd4, 0x3c, 0x75, 0x10, 0xba, 0xcb,
0xab, 0x22, 0x28, 0x5f, 0x0c, 0xe0, 0xee, 0xf4, 0xfd, 0x61, 0x52, 0x0a,
0x59, 0xfe, 0x61, 0xc5, 0x40, 0xf9, 0x91, 0x8e, 0x36, 0x29, 0x63, 0x6c,
0x6e, 0x45, 0xa5, 0x42, 0xe3, 0x36, 0x90, 0xe7, 0x90, 0x9f, 0x58, 0xbb,
0xf9, 0x1b, 0xee, 0x2c, 0xbb, 0x3a, 0xfd, 0x3d, 0xbe, 0x3d, 0x45, 0xf0,
0xc2, 0x18, 0xaa, 0x46, 0x10, 0x23, 0xe9, 0x63, 0xba, 0x7f, 0xc2, 0xe1,
0xf4, 0x05, 0xdd, 0x4a, 0x7c, 0xa8, 0xab, 0xa9, 0xbd, 0x6f, 0xdf, 0x48,
0x59, 0x11, 0xd4, 0xba, 0x75, 0xb6, 0x22, 0xd4, 0xd7, 0x35, 0x6f, 0x27,
0x70, 0xc7, 0x3d, 0x90, 0x06, 0x39, 0x2a, 0x16, 0xd0, 0x8b, 0xd7, 0xfb,
0x5e, 0x85, 0x2e, 0xb0, 0xd8, 0xc7, 0xdb, 0xe5, 0x24, 0x3a, 0x6e, 0xc4,
0x5e, 0xd4, 0x22, 0x25, 0x14, 0xee, 0xa5, 0x30, 0x8b, 0xd6, 0x27, 0x61,
0x33, 0x13, 0x46, 0x0b, 0x26, 0x45, 0xa6, 0xb4, 0xfa, 0x8d, 0xa3, 0xf2,
0x27, 0xd2, 0xc5, 0x04, 0xaa, 0x96, 0xa4, 0x55, 0xfa, 0x40, 0xf1, 0xfc,
0x66, 0x33, 0x9e, 0x4b, 0x39, 0x75, 0xae, 0x7f, 0x52, 0x87, 0x7b, 0x8a,
0xf9, 0x7d, 0x5f, 0x8a, 0x7e, 0xf7, 0xfe, 0xc4, 0x7f, 0xf4, 0xf6, 0x9a,
0x86, 0x78, 0x21, 0x02, 0x94, 0x9e, 0x50, 0x2d, 0xdc, 0xd6, 0xa5, 0x53,
0xf1, 0xef, 0x06, 0xe8, 0xb5, 0x46, 0x81, 0xcc, 0x91, 0x4f, 0x37, 0xee,
0x27, 0xcb, 0x91, 0xad, 0xff, 0x1d, 0xd1, 0x00, 0xa8, 0x96, 0x22, 0xaa,
0x63, 0x23, 0x2a, 0x7a, 0x75, 0x6f, 0xe9, 0x2d, 0x26, 0xde, 0x11, 0x97,
0x4b, 0x17, 0x3f, 0xde, 0x51, 0x1a, 0x22, 0xed, 0x38, 0x6f, 0x3e, 0x7a,
0xd0, 0xd6, 0x60, 0x06, 0x7e, 0x3f, 0xa4, 0x29, 0xfa, 0x18, 0x91, 0xda,
0x73, 0x38, 0xe3, 0xe3, 0xb5, 0xc0, 0x5b, 0x4e, 0xe8, 0x94, 0xea, 0x45,
0x6e, 0x5b, 0x50, 0xaa, 0x38, 0xb6, 0x6f, 0xdb, 0x90, 0x1b, 0x3b, 0x82,
0xbb, 0x0d, 0x38, 0xe3, 0xca, 0xd9, 0xf1, 0x2e, 0x27, 0x4c, 0x2c, 0x5a,
0x42, 0xdf, 0x44, 0xc8, 0x07, 0xe4, 0x95, 0xb5, 0xec, 0x91, 0x34, 0x1c,
0x9a, 0x0c, 0x50, 0x1a, 0xce, 0x67, 0xe4, 0x4b, 0x87, 0x61, 0x43, 0x95,
0x95, 0xb8, 0x8a, 0xf4, 0xc9, 0x92, 0x33, 0x33, 0xe3, 0xfe, 0x98, 0x2a,
0xae, 0x8e, 0xf2, 0x6b, 0x13, 0x7c, 0xe4, 0x44, 0x40, 0x66, 0xea, 0x0c,
0xe4, 0xdb, 0x16, 0x65, 0xa8, 0x8b, 0x37, 0x08, 0xec, 0x1e, 0xfc, 0xa6,
0xd0, 0x9b, 0x9e, 0x0a, 0xd2, 0xe3, 0xcf, 0x5d, 0xb2, 0xaf, 0x8e, 0x05,
0x7d, 0x8d, 0x84, 0xbc, 0x9f, 0xb1, 0xe6, 0x6a, 0x2e, 0x4b, 0x6d, 0x64,
0x91, 0x17, 0x9d, 0xb5, 0x35, 0x15, 0x02, 0xe9, 0x1b, 0x85, 0xc1, 0x89,
0xc2, 0x5a, 0x32, 0x3a, 0x80, 0x78, 0x5e, 0xcc, 0x50, 0x26, 0xf5, 0x11,
0x01, 0x79, 0xf3, 0xaf, 0xb6, 0x40, 0x00, 0x73, 0x8f, 0xeb, 0x5a, 0xd1,
0x26, 0x00, 0xe2, 0xa3, 0xcd, 0xfd, 0xaa, 0x15, 0x5b, 0x98, 0x2a, 0x76,
0x41, 0x07, 0xc2, 0xde, 0xb6, 0x71, 0xe7, 0xc3, 0xe9, 0x92, 0xb3, 0xd8,
0xfe, 0xaf, 0x12, 0x61, 0x86, 0x5b, 0x6e, 0x74, 0x45, 0x7b, 0x9b, 0x6f,
0x1a, 0x13, 0x84, 0xf6, 0x31, 0x5f, 0x5b, 0x6c, 0xde, 0x47, 0xb8, 0x73,
0x32, 0xc7, 0x94, 0x92, 0xa5, 0xc3, 0x65, 0xdf, 0x96, 0x6c, 0xfd, 0xb7,
0x80, 0xfb, 0x47, 0xba, 0x6e, 0x43, 0xb3, 0x7e, 0x86, 0xc9, 0x97, 0x45,
0xde, 0x3f, 0x3a, 0xf6, 0xb0, 0x9e, 0x9a, 0xcb, 0xfd, 0xf2, 0x5c, 0xba,
0x6e, 0x3f, 0xed, 0xfa, 0x74, 0x84, 0xe2, 0xb1, 0xae, 0x66, 0x57, 0x0b,
0x96, 0x6c, 0x77, 0xe4, 0x8a, 0x67, 0x97, 0xc7, 0xe0, 0x44, 0xb2, 0x83,
0x2d, 0x3c, 0x2e, 0x01, 0x19, 0x2e, 0x4c, 0x74, 0xe1, 0x35, 0x73, 0xeb,
0x85, 0x63, 0x8c, 0x3a, 0xb8, 0xbc, 0x25, 0x6a, 0x8d, 0xaf, 0xd2, 0xfb,
0xef, 0xd3, 0x12, 0x93, 0x0b, 0x39, 0xfa, 0x66, 0xbe, 0x3b, 0xfd, 0x6c,
0x0b, 0xbb, 0xb2, 0x5a, 0x78, 0xa1, 0xcf, 0x8c, 0x7d, 0x60, 0x55, 0xeb,
0x33, 0x4e, 0x8e, 0xf9, 0x19, 0x4d, 0x42, 0xd4, 0xf8, 0xd8, 0xba, 0xad,
0x0a, 0x6e, 0x62, 0xd4, 0xe1, 0x6a, 0xcc, 0xea, 0x09, 0x91, 0x8e, 0x62,
0xc9, 0x1e, 0x9e, 0x48, 0xaa, 0xde, 0xf7, 0xa2, 0x5a, 0xcb, 0x83, 0x20,
0xe8, 0xf5, 0xd1, 0xfe, 0x9d, 0x18, 0x2f, 0xd6, 0xf8, 0x97, 0x17, 0xce,
0xc2, 0x05, 0x08, 0xef, 0x61, 0x70, 0x9d, 0x95, 0x79, 0x59, 0x4c, 0x06,
0x24, 0x3d, 0x24, 0x69, 0xff, 0x46, 0xda, 0xbc, 0x71, 0x7a, 0x74, 0x93,
0x58, 0xf5, 0xc8, 0x91, 0xfb, 0x66, 0xed, 0x78, 0x8f, 0xf8, 0x28, 0xa8,
0x1d, 0xa5, 0x3a, 0x13, 0x76, 0xc2, 0xcc, 0xba, 0xb9, 0x56, 0x29, 0x74,
0xd6, 0x14, 0x75, 0x58, 0xe6, 0x2e, 0x79, 0x6e, 0x9d, 0x41, 0x94, 0x8a,
0xcf, 0xf1, 0xb1, 0xe0, 0x36, 0xe5, 0x89, 0x9a, 0x95, 0xa1, 0x11, 0xd1,
0xbe, 0x45, 0xe4, 0xb3, 0xb0, 0x62, 0x32, 0x1d, 0xba, 0xe0, 0xde, 0x57,
0x81, 0x0e, 0x01, 0x9b, 0x52, 0x3d, 0xd5, 0xde, 0x3b, 0x3a, 0xdd, 0x8f,
0xe3, 0x2e, 0xce, 0x1e, 0x89, 0x4d, 0x81, 0xf0, 0xf6, 0x20, 0x63, 0x7a,
0x4c, 0xbb, 0x66, 0xe0, 0xbe, 0x2b, 0xee, 0xd0, 0x3b, 0x60, 0x1e, 0x65,
0xd1, 0x2c, 0x7c, 0x5c, 0x6c, 0x16, 0x5b, 0x90, 0xc8, 0x05, 0x10, 0xf2,
0xde, 0x33, 0x90, 0x35, 0x69, 0x24, 0x3f, 0xc1, 0x8f, 0x1e, 0x4a, 0x60,
0xf1, 0x03, 0x65, 0x46, 0x40, 0x76, 0xe9, 0x83, 0x97, 0xda, 0x0b, 0xb8,
0x22, 0xfa, 0x55, 0x99, 0xfd, 0x18, 0x24, 0xd2, 0x66, 0xb0, 0x7b, 0x70,
0x56, 0x93, 0xad, 0x09, 0x95, 0x8e, 0x1f, 0x2f, 0xe8, 0x12, 0x55, 0xd4,
0x1f, 0xde, 0x09, 0x85, 0x05, 0xd1, 0xd5, 0x10, 0x2c, 0x8c, 0x6b, 0x53,
0x28, 0xce, 0x06, 0xc5, 0x52, 0x0f, 0xfa, 0x09, 0x09, 0x23, 0x1b, 0xe3,
0xbf, 0xb1, 0x89, 0x72, 0x26, 0x0d, 0xa6, 0xbb, 0x7d, 0x9e, 0xdc, 0xf8,
0xf5, 0x0b, 0x8c, 0xe0, 0xbc, 0x97, 0x3b, 0x72, 0xdd, 0xf5, 0x9d, 0xc5,
0xb6, 0x37, 0x2c, 0x76, 0x5b, 0x58, 0x67, 0xdb, 0xed, 0x3b, 0x6e, 0xe5,
0xe5, 0x6d, 0x6f, 0x0d, 0x7e, 0xff, 0xa9, 0x57, 0x4a, 0x84, 0x85, 0x82,
0xac, 0x00, 0x50, 0xa3, 0x4f, 0x87, 0xfe, 0x2a, 0x40, 0x52, 0x54, 0x81,
0x69, 0x42, 0x0b, 0x0c, 0xd7, 0x18, 0x98, 0x01, 0x8c, 0x5a, 0xa2, 0xf4,
0xe8, 0x61, 0xd1, 0x38, 0xfd, 0x0f, 0x63, 0x75, 0xd3, 0x4b, 0x1d, 0xdc,
0xdf, 0xb2, 0xeb, 0x94, 0x97, 0x5c, 0x2a, 0xb4, 0x12, 0x5c, 0x49, 0x2b,
0xfc, 0xd0, 0x8d, 0xfb, 0xe7, 0xb3, 0xcb, 0x0f, 0x3c, 0x2e, 0x04, 0x36,
0xa8, 0x03, 0xc9, 0xd7, 0x11, 0x2d, 0x2a, 0x93, 0xff, 0xda, 0x26, 0xb0,
0x54, 0x7e, 0xaf, 0x30, 0x7d, 0xce, 0x46, 0x8a, 0x3d, 0x7c, 0xa4, 0x7a,
0x2c, 0xfa, 0xba, 0xa1, 0xc9, 0x41, 0xd3, 0xb8, 0x84, 0x03, 0x78, 0xdd,
0xe9, 0x57, 0x19, 0x62, 0x62, 0xff, 0x5b, 0x3b, 0x48, 0x62, 0x0e, 0xee,
0x19, 0xb0, 0x32, 0x6e, 0x6a, 0x07, 0xd8, 0x4e, 0x25, 0x76, 0xa7, 0xe3,
0x98, 0xa1, 0x6f, 0xb6, 0x99, 0x32, 0x67, 0x7d, 0x46, 0x42, 0x4a, 0x82,
0xd1, 0x29, 0x1b, 0x87, 0xeb, 0x4b, 0x9e, 0xdf, 0x69, 0x75, 0xbd, 0x4f,
0xd3, 0xde, 0xc9, 0x83, 0xe6, 0xd6, 0xea, 0x03, 0x81, 0x12, 0xf3, 0x5d,
0x99, 0xf1, 0xb1, 0xd9, 0x3e, 0xbe, 0xf3, 0xa8, 0xdc, 0xb6, 0xf8, 0x4b,
0x9e, 0x26, 0x3f, 0xf0, 0x7c, 0xb3, 0xf4, 0xca, 0x00, 0x6c, 0x6c, 0xe5,
0x43, 0xa1, 0xfd, 0x3a, 0xf8, 0x8e, 0xe3, 0x9f, 0x88, 0xc5, 0x44, 0xfd,
0x24, 0x69, 0x76, 0xd5, 0xcb, 0xdc, 0x9d, 0x12, 0xf3, 0x13, 0x7e, 0xe7,
0xc3, 0xa8, 0x6a, 0xb2, 0xe0, 0xb3, 0x1d, 0xab, 0x3b, 0xc9, 0x77, 0x3d,
0x0f, 0xc3, 0xbe, 0x4b, 0x8b, 0x28, 0xbd, 0x7c, 0xe6, 0xb2, 0x06, 0x1f,
0xf9, 0x8f, 0x16, 0x62, 0xbf, 0xc7, 0x55, 0x73, 0xd4, 0xf1, 0x5a, 0x95,
0x80, 0xa3, 0x4e, 0xaa, 0x60, 0x17, 0x3c, 0xc9, 0x5e, 0xd4, 0x0c, 0x56,
0x7a, 0x77, 0x8e, 0x7f, 0x67, 0x08, 0x2f, 0xd9, 0x21, 0x19, 0xfd, 0x86,
0x8c, 0x23, 0x8d, 0xf6, 0x92, 0x1f, 0x36, 0x2c, 0x7c, 0x83, 0xbd, 0x2f,
0x6c, 0x63, 0x7c, 0xb7, 0x93, 0x74, 0x1b, 0xc2, 0x95, 0x34, 0x26, 0x1e,
0x07, 0x87, 0x3a, 0xb6, 0xe2, 0x39, 0x71, 0x9b, 0x20, 0xcd, 0x63, 0xf0,
0xbf, 0x48, 0xb5, 0x0e, 0x49, 0x86, 0x50, 0x80, 0xbd, 0xd6, 0x0e, 0xab,
0xd5, 0x69, 0x1b, 0xa4, 0xb3, 0x63, 0x3c, 0x8f, 0xcb, 0x42, 0xdb, 0xd7,
0x1a, 0xf4, 0xdf, 0x9e, 0x25, 0xfc, 0xd4, 0x00, 0xcb, 0xec, 0x57, 0x69,
0x30, 0x15, 0x4d, 0x7a, 0x69, 0x28, 0x2f, 0x2b, 0x34, 0x26, 0xd1, 0xe7,
0x01, 0x42, 0x5e, 0x02, 0xe2, 0x75, 0xe8, 0x52, 0x8a, 0xb4, 0x71, 0xfa,
0xc3, 0x3d, 0xe6, 0xac, 0xeb, 0xf3, 0x93, 0xe0, 0x37, 0xcd, 0x66, 0x92,
0x66, 0x2c, 0xfe, 0x4b, 0xd6, 0x3c, 0xf1, 0x57, 0xe5, 0xcf, 0xf5, 0xd0,
0xdb, 0x0e, 0x1f, 0x82, 0x65, 0x3b, 0xab, 0x69, 0x42, 0x53, 0x7d, 0xa4,
0x7c, 0xb7, 0x86, 0xeb, 0x23, 0x45, 0xa8, 0x4a, 0x73, 0xfc, 0x38, 0xc6,
0xe5, 0x2c, 0xab, 0x80, 0xfb, 0x23, 0xb2, 0x0c, 0x53, 0x28, 0x21, 0x37,
0x54, 0x9c, 0x72, 0x51, 0x0f, 0x44, 0x50, 0xd3, 0xe1, 0xd5, 0xb2, 0x27,
0x83, 0xb6, 0xe9, 0x4d, 0x64, 0x5c, 0x17, 0x0f, 0xe0, 0x13, 0xe4, 0x26,
0x6b, 0xd0, 0xd8, 0x25, 0xe3, 0x69, 0x6a, 0x95, 0x3f, 0x4a, 0x4e, 0xa0,
0x58, 0xbc, 0x28, 0x47, 0x8b, 0x68, 0xe4, 0x41, 0x90, 0x46, 0x1b, 0x84,
0xa0, 0x7b, 0x46, 0x46, 0x03, 0xee, 0x21, 0x0d, 0x34, 0xed, 0xff, 0x15,
0x57, 0x06, 0xdf, 0x71, 0x09, 0xb2, 0x66, 0x0d, 0x6e, 0xcc, 0xa5, 0x0c,
0xaf, 0x3f, 0x24, 0x8f, 0xd1, 0xc8, 0x44, 0x86, 0xaf, 0xbf, 0xeb, 0x2f,
0xb9, 0xee, 0xa7, 0xcf, 0xe4, 0xe8, 0xec, 0x47, 0x09, 0xd8, 0x95, 0x9e,
0x3c, 0xda, 0x92, 0x41, 0x61, 0xf5, 0xc3, 0xec, 0x00, 0xe4, 0xa3, 0x0d,
0x4a, 0xb3, 0xf6, 0x82, 0x05, 0x38, 0x70, 0x6a, 0xd1, 0x28, 0x2c, 0xb3,
0xc6, 0xbb, 0x38, 0xb3, 0x06, 0x7f, 0xd6, 0x4c, 0xe7, 0xfb, 0xef, 0x0d,
0x52, 0x66, 0xbe, 0xd8, 0xa6, 0x6f, 0xe8, 0xd9, 0x42, 0x4f, 0xad, 0xe8,
0xe8, 0x6c, 0xf9, 0xe9, 0x42, 0xd9, 0x66, 0x6e, 0xec, 0xfe, 0xf5, 0x91,
0xbf, 0x0a, 0x98, 0xd8, 0x7b, 0x23, 0x12, 0xa6, 0x04, 0xa8, 0xb3, 0x61,
0x13, 0x65, 0xc0, 0xe2, 0x82, 0xb9, 0xb2, 0x38, 0x07, 0x06, 0xca, 0x64,
0x6c, 0x23, 0x93, 0x60, 0x1d, 0x4d, 0x38, 0x5e, 0x8e, 0x90, 0x16, 0x4a,
0xfd, 0xb3, 0xcd, 0x84, 0x9c, 0xa5, 0xfa, 0x73, 0x2d, 0xcb, 0x87, 0x31,
0x3d, 0xf8, 0xfc, 0xeb, 0xa7, 0x56, 0x2f, 0x5b, 0x95, 0x9a, 0xc6, 0x82,
0x29, 0x86, 0x47, 0xe2, 0xc2, 0x84, 0x01, 0xaf, 0xc8, 0x0b, 0x2d, 0xfb,
0x34, 0xba, 0x5d, 0x9d, 0xd1, 0x85, 0xd5, 0x1e, 0x63, 0xcb, 0x3c, 0xa8,
0xfa, 0x79, 0xef, 0x12, 0xa6, 0xb5, 0xdb, 0xc5, 0x1d, 0x6a, 0xa7, 0x54,
0x58, 0x0c, 0xbe, 0x61, 0xe5, 0x96, 0x7f, 0x4a, 0x3b, 0x59, 0x32, 0x2d,
0x06, 0x44, 0x83, 0x5c, 0xad, 0xe9, 0xfe, 0x7c, 0xd7, 0x5b, 0x34, 0xa1,
0xa3, 0xad, 0x9a, 0xbf, 0xd5, 0x30, 0xf0, 0x22, 0xfc, 0x94, 0x7f, 0xd4,
0xa4, 0xca, 0x88, 0x31, 0xe7, 0xf2, 0x89, 0x2d, 0xda, 0xe6, 0x91, 0xa6,
0x27, 0x22, 0x74, 0x9f, 0xc6, 0x72, 0x4f, 0xf6, 0xa9, 0xfe, 0x7a, 0xf0,
0xa8, 0x6b, 0x6c, 0x9f, 0xe9, 0x2a, 0x9b, 0x23, 0x9e, 0xb8, 0x2b, 0x29,
0x65, 0xa7, 0x5d, 0xbd, 0x10, 0xe4, 0x56, 0x02, 0x94, 0xdd, 0xd1, 0xab,
0x9b, 0x82, 0x2d, 0x8d, 0xf6, 0xd3, 0x65, 0x63, 0x4a, 0xc4, 0x86, 0x61,
0x37, 0x9f, 0xdb, 0x4b, 0x34, 0x20, 0x0a, 0xca, 0x45, 0x6c, 0x06, 0xc4,
0x9c, 0x74, 0x4d, 0x83, 0x6a, 0x8d, 0xad, 0xc6, 0x61, 0x3a, 0x8d, 0xde,
0x6c, 0xf9, 0x8e, 0x33, 0xa2, 0xee, 0x99, 0xc7, 0xe4, 0x52, 0xb2, 0x44,
0x6f, 0x2f, 0x0f, 0x41, 0xa9, 0x1a, 0xd3, 0x96, 0x42, 0xc6, 0x49, 0x12,
0x6a, 0xf0, 0x29, 0xa9, 0x0c, 0x9c, 0x50, 0x5d, 0x1d, 0xd1, 0x42, 0x7e,
0x6f, 0x36, 0x48, 0x0f, 0x58, 0x14, 0x94, 0xc0, 0x10, 0x1e, 0xe0, 0xb2,
0xdd, 0xba, 0x57, 0x91, 0x4d, 0xd5, 0xdc, 0xa6, 0x4c, 0x68, 0x00, 0x6c,
0xb3, 0x5d, 0x32, 0x13, 0xbe, 0xa8, 0xc3, 0xfb, 0xd4, 0x19, 0x40, 0xf5,
0x6f, 0x63, 0xa1, 0x07, 0xbf, 0xa2, 0x8b, 0xfc, 0xfe, 0xf8, 0xa1, 0x33,
0x70, 0x07, 0x6d, 0xc5, 0x72, 0xa0, 0x39, 0xd6, 0xd7, 0x76, 0x6c, 0xfa,
0x1f, 0x04, 0xd6, 0x23, 0xbf, 0x66, 0x78, 0x92, 0x00, 0x11, 0x8a, 0x75,
0x67, 0x44, 0xa6, 0x7c, 0xd0, 0x14, 0xe6, 0xd0, 0x31, 0x6d, 0xdb, 0xc5,
0xb1, 0xa7, 0x99, 0xc3, 0xaf, 0x18, 0x7a, 0x26, 0x46, 0xad, 0x6d, 0x0c,
0xb6, 0xb5, 0xad, 0xc1, 0xcf, 0x60, 0x99, 0xf5, 0x9f, 0x88, 0xaf, 0x0e,
0x37, 0x15, 0xf9, 0x2b, 0x1a, 0x5f, 0xfb, 0xc9, 0xf8, 0xd4, 0xf0, 0x97,
0xd2, 0x91, 0xf4, 0x94, 0xa2, 0xd3, 0x3b, 0x8b, 0x0c, 0x22, 0xa0, 0xac,
0xb3, 0xb5, 0xdf, 0xf2, 0x27, 0x38, 0x47, 0x53, 0x5b, 0x6e, 0x8f, 0x98,
0x9e, 0xad, 0xb6, 0xf5, 0x0e, 0x17, 0x20, 0x35, 0x54, 0x6b, 0x73, 0xa6,
0x64, 0x65, 0xac, 0xb8, 0xc1, 0xd3, 0xf7, 0x07, 0x82, 0x93, 0x9d, 0xcb,
0xcc, 0xe9, 0x0c, 0x51, 0x52, 0x85, 0x8b, 0x95, 0xa6, 0xb1, 0xce, 0xdc,
0xfa, 0x00, 0x00, 0x08, 0x14, 0x1c, 0x23, 0x2a, 0x35,
};
static const u8 fips_test_mldsa65_public_key[] __initconst __maybe_unused = {
0x9f, 0x55, 0x1e, 0x7f, 0x9c, 0x08, 0xb2, 0x83, 0xfd, 0x5b, 0xa2, 0xac,
0x4f, 0x26, 0xc2, 0xf5, 0x06, 0x05, 0x96, 0x08, 0x24, 0xad, 0xec, 0xe4,
0x99, 0xcc, 0x6c, 0xbd, 0x55, 0x37, 0x15, 0x94, 0xab, 0x31, 0x9e, 0x56,
0xe5, 0xe4, 0x55, 0xec, 0x4d, 0x49, 0x5b, 0x5a, 0x7a, 0xe8, 0xc3, 0x4a,
0x08, 0x44, 0x4a, 0xc2, 0x2d, 0xe4, 0x61, 0x33, 0x90, 0x20, 0x71, 0x45,
0xa5, 0x45, 0xd0, 0x83, 0x2b, 0x32, 0x6c, 0xa7, 0x9e, 0x76, 0xcd, 0xfb,
0x58, 0x15, 0x9e, 0x74, 0x0d, 0x67, 0x57, 0xb1, 0x06, 0x5b, 0x5d, 0xd5,
0x1c, 0xbb, 0x95, 0x40, 0x1c, 0x71, 0x31, 0x03, 0xef, 0xff, 0x04, 0x6b,
0xdd, 0xa2, 0xf0, 0x32, 0x00, 0x72, 0xbc, 0x87, 0xb6, 0x2c, 0x1f, 0x90,
0x7f, 0x92, 0xa0, 0xb2, 0x04, 0xdd, 0xa9, 0xaf, 0x7f, 0x01, 0x28, 0x4c,
0xb2, 0x57, 0x2d, 0x56, 0x93, 0xd0, 0xc7, 0x54, 0x02, 0x90, 0x57, 0x70,
0x23, 0x57, 0xe8, 0xe7, 0x33, 0x32, 0x98, 0xfc, 0x9b, 0x8e, 0x6e, 0x7b,
0xaa, 0x5d, 0xb5, 0x4e, 0xe0, 0x5d, 0x97, 0xa3, 0xea, 0x43, 0x7e, 0xb3,
0xa4, 0x8c, 0xcf, 0xdc, 0xc0, 0x51, 0xa7, 0x99, 0x45, 0x3d, 0x3c, 0xa0,
0xba, 0xc5, 0xff, 0xe1, 0x89, 0xb3, 0x7d, 0xc3, 0xdc, 0xe2, 0x23, 0x81,
0xff, 0xa9, 0xc7, 0x93, 0xc6, 0x67, 0xad, 0x94, 0xcf, 0xeb, 0x91, 0x78,
0x15, 0x25, 0xf7, 0xf5, 0x06, 0x08, 0x2f, 0x0c, 0xee, 0x0b, 0x6a, 0x06,
0x59, 0xe0, 0x1f, 0x2e, 0x5a, 0x12, 0x06, 0xf5, 0xf4, 0x8e, 0x75, 0x57,
0xa9, 0x33, 0x23, 0x0f, 0xc2, 0x6f, 0x02, 0xf8, 0x68, 0x0f, 0x62, 0x02,
0x81, 0xfe, 0x03, 0x7c, 0xaf, 0xd7, 0x42, 0x5b, 0xcc, 0xe7, 0x2b, 0xea,
0x49, 0xab, 0x03, 0x6d, 0x0a, 0x02, 0xae, 0x47, 0x79, 0xce, 0xfd, 0x18,
0x76, 0x07, 0x9e, 0xa6, 0xbf, 0x7e, 0x8d, 0x73, 0xf9, 0x44, 0xeb, 0x8c,
0xc5, 0x59, 0xb7, 0x19, 0xf6, 0x73, 0x53, 0x42, 0x2a, 0x55, 0x7b, 0xb4,
0x56, 0x49, 0x08, 0x9e, 0x9a, 0x65, 0x60, 0x70, 0x1d, 0xbd, 0xc6, 0x85,
0x29, 0xde, 0xfe, 0x44, 0xae, 0xdf, 0x25, 0xfd, 0x5b, 0x74, 0x6c, 0x96,
0xe6, 0x81, 0x37, 0x80, 0xe0, 0x9e, 0xf3, 0x75, 0x63, 0xb4, 0xc9, 0x2f,
0x71, 0xe6, 0xeb, 0xdf, 0xaf, 0x7e, 0xff, 0x9e, 0xe0, 0xbf, 0xca, 0xca,
0x11, 0xed, 0xc6, 0x04, 0xd8, 0x49, 0x13, 0x2c, 0x63, 0xf1, 0xb3, 0x17,
0x74, 0xd9, 0x50, 0x3f, 0xb9, 0x29, 0x0e, 0x48, 0xa7, 0xf0, 0xdc, 0x78,
0x18, 0x0e, 0x9f, 0xb7, 0xde, 0x36, 0x79, 0x67, 0xa4, 0x23, 0x08, 0xe7,
0x62, 0xe8, 0xa4, 0xe5, 0xcf, 0xff, 0x35, 0x55, 0x36, 0x2e, 0x3a, 0xe4,
0x45, 0x6a, 0x80, 0xf2, 0xca, 0xe7, 0x40, 0x79, 0x14, 0xc4, 0x62, 0x38,
0xbb, 0xd0, 0x4e, 0x6c, 0xb5, 0x85, 0x42, 0x3f, 0x35, 0xf7, 0xd7, 0x54,
0xb8, 0x2b, 0x8b, 0xd5, 0x6f, 0x16, 0x61, 0x27, 0x23, 0xac, 0xdb, 0xea,
0x9b, 0x3b, 0x99, 0xcd, 0x79, 0xe6, 0x12, 0x09, 0x99, 0x09, 0xa4, 0xe1,
0x88, 0x25, 0x00, 0x9e, 0x60, 0x16, 0x63, 0xd7, 0x42, 0x9b, 0xcc, 0x36,
0x9a, 0x8d, 0xa3, 0x75, 0x36, 0xa1, 0xa8, 0xfc, 0xa2, 0xfe, 0x29, 0x26,
0x4c, 0x93, 0x21, 0x44, 0x6b, 0x1c, 0xba, 0xbd, 0xef, 0xff, 0x6d, 0x1f,
0x2b, 0x6c, 0x66, 0x81, 0x9a, 0x3a, 0x1d, 0x0b, 0xd7, 0x24, 0xd4, 0xb8,
0x93, 0xb5, 0x22, 0xf9, 0xd2, 0xf4, 0xa5, 0x05, 0x78, 0x38, 0xae, 0x58,
0xf6, 0x50, 0x8f, 0x47, 0x1d, 0xf3, 0xfb, 0x0d, 0x04, 0x14, 0xd1, 0xd6,
0xd8, 0x2e, 0xf2, 0xbd, 0xf5, 0x71, 0x86, 0x4c, 0xdd, 0x61, 0x24, 0x18,
0x5b, 0x54, 0xf5, 0xcd, 0x99, 0x89, 0x01, 0x8e, 0xd1, 0x19, 0x52, 0xbc,
0x45, 0xed, 0x0e, 0xec, 0x72, 0x2f, 0x5a, 0xe7, 0xdf, 0x36, 0x1c, 0x57,
0x9f, 0xb2, 0x8b, 0xf2, 0x78, 0x1b, 0x3e, 0xc5, 0x48, 0x1f, 0x27, 0x04,
0x76, 0x10, 0x44, 0xee, 0x5c, 0x68, 0x8f, 0xca, 0xd7, 0x31, 0xfc, 0x5c,
0x40, 0x03, 0x2e, 0xbd, 0x1d, 0x59, 0x13, 0x57, 0xbc, 0x33, 0xc6, 0xa1,
0xa3, 0xe5, 0x55, 0x79, 0x9b, 0x7e, 0x49, 0xbb, 0x23, 0x96, 0xc3, 0x1c,
0xfe, 0x66, 0xeb, 0x5b, 0x5f, 0xe5, 0x03, 0xc9, 0xa4, 0xac, 0x4d, 0xc4,
0x50, 0xbb, 0xd3, 0xc1, 0x91, 0x48, 0xe0, 0x93, 0x92, 0x2a, 0xdb, 0x41,
0x37, 0x98, 0xbc, 0xa2, 0x7a, 0x09, 0x92, 0x0b, 0x1c, 0xe6, 0x4b, 0x1e,
0x8e, 0x78, 0x81, 0x74, 0x7d, 0x6b, 0x71, 0xd5, 0xe7, 0x0e, 0x7b, 0xc2,
0x74, 0x5d, 0x89, 0xf1, 0xfa, 0x59, 0xaa, 0xf7, 0x86, 0x66, 0x7e, 0xc2,
0x9c, 0xf4, 0xd5, 0x8d, 0xc0, 0xb7, 0xb7, 0xa2, 0xd5, 0xcd, 0x51, 0xc3,
0x7d, 0xa9, 0x5e, 0x46, 0xba, 0x06, 0xa3, 0x4d, 0x60, 0xd6, 0x68, 0xc6,
0xf9, 0x63, 0x88, 0x17, 0x5c, 0x20, 0xe1, 0xc4, 0x0f, 0x3f, 0xc1, 0xa9,
0xa7, 0x3e, 0x39, 0xef, 0x2f, 0xaf, 0xc4, 0x69, 0x29, 0xe3, 0xd4, 0x8d,
0xe0, 0x0e, 0x88, 0xc2, 0x93, 0x43, 0xfb, 0x28, 0xcf, 0x5d, 0x85, 0x50,
0xf7, 0xeb, 0x42, 0xf5, 0x87, 0xde, 0xa5, 0x65, 0xef, 0x43, 0x0c, 0x57,
0x76, 0x09, 0xf4, 0x5f, 0xde, 0x81, 0x0a, 0xd9, 0x59, 0x41, 0xa4, 0x6a,
0xb7, 0x05, 0xc7, 0xa5, 0xfe, 0x49, 0xd5, 0x9b, 0x57, 0x13, 0x14, 0x66,
0xe2, 0xb9, 0xcc, 0x09, 0x35, 0xd4, 0xb0, 0xe0, 0xd1, 0x0d, 0x7e, 0x50,
0x48, 0x45, 0x21, 0x00, 0x67, 0xb2, 0xad, 0xa7, 0x46, 0xe2, 0x6f, 0x70,
0xe5, 0x3c, 0x88, 0x04, 0xaa, 0x21, 0xde, 0x03, 0xb6, 0x6f, 0xfe, 0x43,
0x51, 0xdc, 0x2e, 0x5c, 0x6c, 0x77, 0x8f, 0x8e, 0x9d, 0x1a, 0x5b, 0x35,
0xc5, 0xe4, 0x48, 0x82, 0x17, 0x4b, 0xf0, 0xea, 0xc9, 0x0e, 0xd2, 0x8f,
0xcd, 0xd5, 0x01, 0xbd, 0x7f, 0x0f, 0xf5, 0xae, 0x92, 0x28, 0x1e, 0x2c,
0xf4, 0xe9, 0x03, 0xf7, 0x0a, 0xeb, 0x84, 0x18, 0xa1, 0x37, 0x38, 0x8a,
0x11, 0xa2, 0x5d, 0x8c, 0xf6, 0xe4, 0x3f, 0x5b, 0x87, 0x07, 0x6b, 0xb4,
0x07, 0xe0, 0x8f, 0x30, 0xc4, 0xfa, 0x27, 0xae, 0xfc, 0x02, 0xd1, 0x21,
0x5c, 0xbc, 0x0b, 0x93, 0x6e, 0x7e, 0xf9, 0x6b, 0x80, 0x7a, 0x25, 0x84,
0x20, 0xf1, 0x6a, 0xfa, 0x75, 0xed, 0x57, 0x61, 0x62, 0xa7, 0xf6, 0x5b,
0xe1, 0xb0, 0x38, 0xc8, 0xe9, 0x6d, 0x3f, 0xef, 0x1e, 0x99, 0x0b, 0xb7,
0xc8, 0x9f, 0x76, 0x5c, 0x04, 0x1f, 0x02, 0x92, 0x00, 0xa7, 0x38, 0x3d,
0x00, 0x3b, 0xa7, 0xbc, 0x39, 0x6e, 0xab, 0xf5, 0x10, 0xa8, 0xba, 0xd6,
0x28, 0x6b, 0x0e, 0x00, 0x48, 0xf9, 0x3b, 0x5c, 0xde, 0x59, 0x93, 0x46,
0xd6, 0x61, 0x52, 0x81, 0x71, 0x0f, 0x0e, 0x61, 0xac, 0xc6, 0x7f, 0x15,
0x93, 0xa7, 0xc1, 0x16, 0xb5, 0xef, 0x85, 0xd1, 0xa7, 0x61, 0xc2, 0x85,
0x1d, 0x61, 0xc6, 0xae, 0xb3, 0x9e, 0x8d, 0x23, 0xa3, 0xc8, 0xd5, 0xf2,
0xc7, 0x1b, 0x7e, 0xef, 0xd2, 0xdf, 0x25, 0xaf, 0x4e, 0x81, 0x15, 0x59,
0xe5, 0x36, 0xb1, 0xf1, 0xd5, 0xda, 0x58, 0xd8, 0xd9, 0x0d, 0x6d, 0xc9,
0x25, 0xb5, 0xe8, 0x1d, 0x3b, 0xca, 0x2d, 0xab, 0xf2, 0xe2, 0xe9, 0x55,
0xd7, 0xf4, 0xc7, 0xd0, 0x57, 0x7a, 0x86, 0x15, 0x0a, 0x5a, 0x8b, 0xd7,
0x3f, 0x66, 0x0f, 0x80, 0xb4, 0xe0, 0x5c, 0x33, 0xed, 0xaf, 0x1b, 0x3b,
0x6d, 0x1c, 0xd9, 0x8c, 0xb5, 0x96, 0xa3, 0xfb, 0xcf, 0xcc, 0x97, 0x1c,
0xae, 0x06, 0x19, 0x41, 0x61, 0xf8, 0x97, 0x6b, 0x82, 0x5e, 0x1c, 0xbf,
0x6f, 0x43, 0x3d, 0xe5, 0x00, 0xf5, 0xfe, 0x66, 0x48, 0x26, 0x31, 0xa1,
0x72, 0x67, 0x6e, 0xd4, 0x5b, 0x6f, 0x66, 0xde, 0x70, 0x8b, 0x2b, 0xc3,
0xa2, 0x30, 0xe9, 0x55, 0xc8, 0xff, 0xf8, 0xd0, 0xdd, 0xa9, 0x21, 0x85,
0x6e, 0x6c, 0x82, 0x66, 0xcc, 0x52, 0xf0, 0x9e, 0x1e, 0xb5, 0x3a, 0xff,
0x4c, 0xf3, 0xae, 0x02, 0xc3, 0x4b, 0x76, 0x25, 0xbd, 0xb0, 0x21, 0x54,
0x61, 0xda, 0x16, 0xd3, 0x23, 0x86, 0x41, 0xa1, 0x4c, 0x59, 0x15, 0x95,
0x65, 0x85, 0xb6, 0x8e, 0xa6, 0x37, 0xc0, 0xa2, 0x71, 0x1d, 0x67, 0x44,
0x7b, 0xe5, 0x4c, 0x4f, 0xb6, 0x2c, 0x46, 0xf7, 0x29, 0xa5, 0xf2, 0xd3,
0x51, 0x19, 0x91, 0x4d, 0xa7, 0xb5, 0x05, 0xb9, 0x6e, 0x61, 0x6e, 0xf8,
0xc0, 0x01, 0xe5, 0x41, 0x0a, 0x89, 0x64, 0x77, 0xf2, 0xc8, 0x63, 0x2d,
0x9d, 0x27, 0x7f, 0x47, 0x30, 0x39, 0xdf, 0xb6, 0x6e, 0x4f, 0x00, 0x3f,
0x15, 0xc6, 0xaf, 0x62, 0xdf, 0x3f, 0x47, 0xe8, 0x42, 0x90, 0x77, 0x23,
0x7a, 0xaa, 0x99, 0x53, 0x03, 0x63, 0x60, 0x59, 0x07, 0x52, 0x3c, 0xb5,
0x67, 0x59, 0xfe, 0x08, 0xe6, 0x43, 0x0f, 0x3b, 0x08, 0x7c, 0xc7, 0x07,
0x3c, 0xfa, 0x65, 0xea, 0x69, 0x51, 0x41, 0x31, 0xb3, 0x05, 0x69, 0xba,
0x2c, 0xbf, 0x89, 0x25, 0x9e, 0xfe, 0x07, 0x13, 0x78, 0x0e, 0x16, 0x54,
0xdf, 0x23, 0xdf, 0x10, 0x69, 0x79, 0xd0, 0x33, 0xd7, 0x21, 0x8b, 0xc8,
0x2a, 0xd0, 0x74, 0x0a, 0xfa, 0xb1, 0x6f, 0xa3, 0xcb, 0x1d, 0xca, 0x4f,
0x00, 0x46, 0x6c, 0x42, 0x09, 0xe0, 0x30, 0x89, 0x08, 0x33, 0x9b, 0x7b,
0x7b, 0x0f, 0x69, 0x5c, 0x0d, 0x34, 0x91, 0xfc, 0xfe, 0x22, 0x82, 0x02,
0xcd, 0xfa, 0x97, 0xe8, 0x28, 0x1d, 0xbc, 0x13, 0x0b, 0xfd, 0x47, 0xa1,
0x7e, 0xa2, 0x86, 0x4d, 0x6f, 0x12, 0x51, 0x35, 0x7d, 0x76, 0x8a, 0x58,
0x05, 0xb6, 0x39, 0xa1, 0x2f, 0xd7, 0xda, 0xaf, 0x00, 0xa0, 0x1a, 0x94,
0xd8, 0x23, 0x34, 0x99, 0x5c, 0xaf, 0xcc, 0x15, 0x4b, 0x56, 0xb2, 0xd2,
0x81, 0x07, 0xd3, 0xf3, 0x47, 0xa2, 0x45, 0x93, 0xcb, 0xae, 0xa7, 0x6b,
0x3f, 0xf9, 0xea, 0xfc, 0x0e, 0x64, 0xf2, 0x93, 0x7f, 0x24, 0x22, 0x73,
0x86, 0xc7, 0x2d, 0x75, 0x9b, 0x41, 0x8b, 0xfb, 0x3b, 0x26, 0x2a, 0xe5,
0x0b, 0xd4, 0x00, 0xe3, 0x2c, 0x69, 0x49, 0x62, 0x6c, 0x13, 0x58, 0x6e,
0xac, 0x43, 0xe5, 0x2b, 0x3b, 0x88, 0xdc, 0xd4, 0x41, 0xe8, 0xee, 0x4e,
0xc3, 0x28, 0x91, 0x17, 0x9a, 0x5a, 0xdb, 0x80, 0x8b, 0x4d, 0x64, 0xcc,
0xbe, 0x66, 0xa4, 0x62, 0xfb, 0x13, 0x44, 0x10, 0xd9, 0xe4, 0xd5, 0xa5,
0xae, 0x9e, 0x42, 0x50, 0xfc, 0x78, 0xad, 0xfa, 0xc4, 0xd0, 0x5a, 0x60,
0x9b, 0x45, 0x2b, 0x61, 0x5c, 0x57, 0xb5, 0x92, 0x28, 0xe9, 0xf5, 0x35,
0x67, 0xc1, 0x5e, 0xa8, 0x1c, 0x99, 0x36, 0x38, 0xb8, 0x5c, 0xff, 0x3d,
0xa0, 0xfc, 0xb0, 0xbc, 0x3d, 0x2c, 0xb4, 0x36, 0x17, 0xb4, 0x6d, 0xb5,
0x39, 0x45, 0xa9, 0x2a, 0x6b, 0xa2, 0x24, 0x44, 0x30, 0xab, 0x2c, 0x82,
0x36, 0xdc, 0xd6, 0x36, 0x5d, 0x0a, 0xdc, 0xee, 0x0f, 0x2b, 0x28, 0x99,
0xdc, 0x67, 0x0d, 0xea, 0x6e, 0x42, 0xb9, 0x45, 0x7f, 0xd2, 0x96, 0x1e,
0x60, 0x42, 0xeb, 0x1e, 0x5f, 0x8e, 0xa9, 0xdc, 0xd3, 0x8a, 0xd6, 0xbd,
0x4e, 0x1f, 0x42, 0x75, 0x1d, 0xe2, 0xc6, 0x11, 0xc9, 0x80, 0x1f, 0xfe,
0x99, 0x52, 0x4d, 0x7b, 0x35, 0xf7, 0xb7, 0xc3, 0xee, 0xd6, 0x94, 0xf5,
0x74, 0xa0, 0x69, 0xcd, 0x1f, 0x2b, 0xd0, 0x87, 0xf7, 0x8c, 0x69, 0xc5,
0x96, 0x70, 0x91, 0xe8, 0x3d, 0xd2, 0xcc, 0xf1, 0x4c, 0xcd, 0xe2, 0x14,
0x00, 0x10, 0x4a, 0xd9, 0x6a, 0x5d, 0x65, 0x2c, 0x4b, 0x79, 0x0c, 0xc4,
0x78, 0x5e, 0xc8, 0xc5, 0x37, 0x74, 0x6d, 0x50, 0x5c, 0x34, 0x1f, 0xe0,
0xf4, 0xe3, 0xe1, 0x86, 0x68, 0xb1, 0xea, 0x70, 0xf0, 0xae, 0xe4, 0x59,
0xa1, 0x08, 0x7e, 0x35, 0xa3, 0x16, 0xd2, 0xb0, 0xa3, 0xd4, 0xb0, 0x74,
0x8c, 0x05, 0x79, 0x73, 0xfb, 0xe6, 0x65, 0x96, 0x15, 0x07, 0xd5, 0xaf,
0x88, 0x9e, 0x6b, 0xf0, 0xbb, 0x3f, 0xe6, 0xd1, 0x6a, 0xe7, 0xc9, 0xae,
0xd9, 0xb0, 0x16, 0x1c, 0x40, 0xeb, 0xdb, 0xc1, 0xbf, 0x83, 0xdb, 0x8a,
0x4f, 0x96, 0xca, 0xd7, 0x22, 0x06, 0x87, 0x08, 0x9d, 0x65, 0x2f, 0xd9,
0x8e, 0x95, 0x6c, 0xcc, 0xbf, 0x76, 0x2a, 0xea, 0x5c, 0x8e, 0x5b, 0x17,
0x0f, 0x75, 0x7b, 0xfa, 0xf9, 0xfb, 0xaa, 0x92, 0xc7, 0x7e, 0x63, 0x63,
0x54, 0xa4, 0xff, 0xf6, 0xc0, 0xc0, 0xf5, 0x70, 0xd8, 0xe3, 0xa4, 0x79,
0x16, 0xf0, 0x6f, 0x90, 0x5e, 0xb7, 0xab, 0x6f, 0xab, 0x75, 0x3b, 0xe1,
0x4c, 0xa8, 0x0b, 0x72, 0x5f, 0x5f, 0x11, 0x22, 0x36, 0x71, 0x20, 0xd3,
0x5b, 0x5e, 0x07, 0x06, 0x76, 0x1a, 0xcc, 0x5e, 0x7c, 0x97, 0x7d, 0xb2,
0x6b, 0xf8, 0x39, 0x89, 0x37, 0xb6, 0x6d, 0xea, 0x74, 0x57, 0x28, 0xd7,
0x0e, 0x9b, 0xeb, 0x28, 0x88, 0x90, 0xfd, 0x2d, 0x16, 0x21, 0x74, 0x26,
0xc5, 0xb8, 0x44, 0xad, 0x9f, 0x97, 0xf9, 0x65, 0x36, 0xd8, 0x00, 0x59,
0x17, 0x49, 0xf9, 0xc7, 0xb3, 0x84, 0xb9, 0xe2, 0x95, 0xe0, 0xd1, 0x7f,
0x5f, 0xaa, 0xd7, 0xfd, 0x6a, 0x6a, 0x83, 0x14, 0x46, 0x1d, 0x12, 0x8d,
0x09, 0xc3, 0xa5, 0xca, 0x72, 0xa3, 0x25, 0x65, 0xb6, 0x40, 0x25, 0x04,
0x51, 0xab, 0x22, 0xeb, 0xd7, 0x69, 0xc9, 0x22, 0x9c, 0xa0, 0x19, 0x5c,
0x1a, 0xfd, 0x41, 0x8f, 0x98, 0xc5, 0x71, 0xb8, 0x6f, 0x76, 0xae, 0xfa,
0x9b, 0x03, 0xab, 0x43, 0x81, 0x3b, 0x66, 0xae, 0xf0, 0xd2, 0xb7, 0xee,
0x9a, 0xe3, 0xae, 0x45, 0xc1, 0x86, 0xb0, 0xce, 0x9e, 0x2b, 0xec, 0xb8,
0xcf, 0xca, 0x0e, 0x8c, 0x33, 0xfa, 0xa7, 0xef, 0xf7, 0xfc, 0xa1, 0x41,
0x49, 0xd3, 0x6d, 0xb5, 0x58, 0xe4, 0x0e, 0x24, 0xd2, 0x8a, 0x74, 0xc9,
0x56, 0x2e, 0x53, 0xc7, 0x7a, 0x38, 0x0f, 0x4b, 0xd9, 0xf9, 0x2f, 0xfa,
0x7d, 0xee, 0x14, 0x18, 0xce, 0x75, 0x42, 0x6c, 0x03, 0x34, 0xce, 0x80,
0xec, 0xf2, 0x05, 0xf0, 0xdf, 0xcd, 0xf8, 0xdb, 0x26, 0x7d, 0xb6, 0x3d,
0x28, 0x24, 0x7e, 0x7e, 0x39, 0x9f, 0xa6, 0xc6, 0xeb, 0x2a, 0xc8, 0x17,
0x94, 0xa9, 0x89, 0xf5, 0xdf, 0xcb, 0x77, 0xfd, 0xc9, 0x9e, 0x68, 0x98,
0x7d, 0x04, 0x50, 0x3c, 0x64, 0x1d, 0x66, 0xb0, 0x97, 0x06, 0xb6, 0x08,
0x5b, 0xe4, 0x17, 0x44, 0xd6, 0x94, 0x39, 0x6b, 0x03, 0x2c, 0xcb, 0x5a,
0x8d, 0x86, 0x08, 0x23, 0x4f, 0x95, 0xa8, 0x1a,
};
static const u8 fips_test_mldsa65_message[] __initconst __maybe_unused = {
0x1a, 0x84, 0x21, 0x0d, 0x35, 0x7a, 0x88, 0xc8, 0x6a, 0x11, 0xe3,
0x15, 0x24, 0xec, 0x0d, 0x2e, 0x76, 0xb9, 0xcf, 0x2b, 0x04, 0x25,
0x16, 0xae, 0x62, 0x42, 0xa0, 0x20, 0x68, 0x25, 0x3e, 0xb4, 0x75,
0xa7, 0x1d, 0x64, 0xc3, 0xd1, 0x08, 0x07, 0x67, 0xb6, 0xf7, 0x76,
0x76, 0xf6, 0xd6, 0x62, 0x66, 0x04, 0x89, 0x0c, 0x8f, 0x07, 0xac,
0xc8, 0x51, 0x77, 0xd9, 0x47, 0x5e, 0xb5, 0x22, 0x20,
};

View File

@@ -29,7 +29,7 @@ static const struct md5_block_state md5_iv = {
#define F4(x, y, z) (y ^ (x | ~z))
#define MD5STEP(f, w, x, y, z, in, s) \
(w += f(x, y, z) + in, w = (w << s | w >> (32 - s)) + x)
(w += f(x, y, z) + in, w = rol32(w, s) + x)
static void md5_block_generic(struct md5_block_state *state,
const u8 data[MD5_BLOCK_SIZE])

682
lib/crypto/mldsa.c Normal file
View File

@@ -0,0 +1,682 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Support for verifying ML-DSA signatures
*
* Copyright 2025 Google LLC
*/
#include <crypto/mldsa.h>
#include <crypto/sha3.h>
#include <kunit/visibility.h>
#include <linux/export.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/unaligned.h>
#include "fips-mldsa.h"
#define Q 8380417 /* The prime q = 2^23 - 2^13 + 1 */
#define QINV_MOD_2_32 58728449 /* Multiplicative inverse of q mod 2^32 */
#define N 256 /* Number of components per ring element */
#define D 13 /* Number of bits dropped from the public key vector t */
#define RHO_LEN 32 /* Length of the public random seed in bytes */
#define MAX_W1_ENCODED_LEN 192 /* Max encoded length of one element of w'_1 */
/*
* The zetas array in Montgomery form, i.e. with extra factor of 2^32.
* Reference: FIPS 204 Section 7.5 "NTT and NTT^-1"
* Generated by the following Python code:
* q=8380417; [a%q - q*(a%q > q//2) for a in [1753**(int(f'{i:08b}'[::-1], 2)) << 32 for i in range(256)]]
*/
static const s32 zetas_times_2_32[N] = {
-4186625, 25847, -2608894, -518909, 237124, -777960, -876248,
466468, 1826347, 2353451, -359251, -2091905, 3119733, -2884855,
3111497, 2680103, 2725464, 1024112, -1079900, 3585928, -549488,
-1119584, 2619752, -2108549, -2118186, -3859737, -1399561, -3277672,
1757237, -19422, 4010497, 280005, 2706023, 95776, 3077325,
3530437, -1661693, -3592148, -2537516, 3915439, -3861115, -3043716,
3574422, -2867647, 3539968, -300467, 2348700, -539299, -1699267,
-1643818, 3505694, -3821735, 3507263, -2140649, -1600420, 3699596,
811944, 531354, 954230, 3881043, 3900724, -2556880, 2071892,
-2797779, -3930395, -1528703, -3677745, -3041255, -1452451, 3475950,
2176455, -1585221, -1257611, 1939314, -4083598, -1000202, -3190144,
-3157330, -3632928, 126922, 3412210, -983419, 2147896, 2715295,
-2967645, -3693493, -411027, -2477047, -671102, -1228525, -22981,
-1308169, -381987, 1349076, 1852771, -1430430, -3343383, 264944,
508951, 3097992, 44288, -1100098, 904516, 3958618, -3724342,
-8578, 1653064, -3249728, 2389356, -210977, 759969, -1316856,
189548, -3553272, 3159746, -1851402, -2409325, -177440, 1315589,
1341330, 1285669, -1584928, -812732, -1439742, -3019102, -3881060,
-3628969, 3839961, 2091667, 3407706, 2316500, 3817976, -3342478,
2244091, -2446433, -3562462, 266997, 2434439, -1235728, 3513181,
-3520352, -3759364, -1197226, -3193378, 900702, 1859098, 909542,
819034, 495491, -1613174, -43260, -522500, -655327, -3122442,
2031748, 3207046, -3556995, -525098, -768622, -3595838, 342297,
286988, -2437823, 4108315, 3437287, -3342277, 1735879, 203044,
2842341, 2691481, -2590150, 1265009, 4055324, 1247620, 2486353,
1595974, -3767016, 1250494, 2635921, -3548272, -2994039, 1869119,
1903435, -1050970, -1333058, 1237275, -3318210, -1430225, -451100,
1312455, 3306115, -1962642, -1279661, 1917081, -2546312, -1374803,
1500165, 777191, 2235880, 3406031, -542412, -2831860, -1671176,
-1846953, -2584293, -3724270, 594136, -3776993, -2013608, 2432395,
2454455, -164721, 1957272, 3369112, 185531, -1207385, -3183426,
162844, 1616392, 3014001, 810149, 1652634, -3694233, -1799107,
-3038916, 3523897, 3866901, 269760, 2213111, -975884, 1717735,
472078, -426683, 1723600, -1803090, 1910376, -1667432, -1104333,
-260646, -3833893, -2939036, -2235985, -420899, -2286327, 183443,
-976891, 1612842, -3545687, -554416, 3919660, -48306, -1362209,
3937738, 1400424, -846154, 1976782
};
/* Reference: FIPS 204 Section 4 "Parameter Sets" */
static const struct mldsa_parameter_set {
u8 k; /* num rows in the matrix A */
u8 l; /* num columns in the matrix A */
u8 ctilde_len; /* length of commitment hash ctilde in bytes; lambda/4 */
u8 omega; /* max num of 1's in the hint vector h */
u8 tau; /* num of +-1's in challenge c */
u8 beta; /* tau times eta */
u16 pk_len; /* length of public keys in bytes */
u16 sig_len; /* length of signatures in bytes */
s32 gamma1; /* coefficient range of y */
} mldsa_parameter_sets[] = {
[MLDSA44] = {
.k = 4,
.l = 4,
.ctilde_len = 32,
.omega = 80,
.tau = 39,
.beta = 78,
.pk_len = MLDSA44_PUBLIC_KEY_SIZE,
.sig_len = MLDSA44_SIGNATURE_SIZE,
.gamma1 = 1 << 17,
},
[MLDSA65] = {
.k = 6,
.l = 5,
.ctilde_len = 48,
.omega = 55,
.tau = 49,
.beta = 196,
.pk_len = MLDSA65_PUBLIC_KEY_SIZE,
.sig_len = MLDSA65_SIGNATURE_SIZE,
.gamma1 = 1 << 19,
},
[MLDSA87] = {
.k = 8,
.l = 7,
.ctilde_len = 64,
.omega = 75,
.tau = 60,
.beta = 120,
.pk_len = MLDSA87_PUBLIC_KEY_SIZE,
.sig_len = MLDSA87_SIGNATURE_SIZE,
.gamma1 = 1 << 19,
},
};
/*
* An element of the ring R_q (normal form) or the ring T_q (NTT form). It
* consists of N integers mod q: either the polynomial coefficients of the R_q
* element or the components of the T_q element. In either case, whether they
* are fully reduced to [0, q - 1] varies in the different parts of the code.
*/
struct mldsa_ring_elem {
s32 x[N];
};
struct mldsa_verification_workspace {
/* SHAKE context for computing c, mu, and ctildeprime */
struct shake_ctx shake;
/* The fields in this union are used in their order of declaration. */
union {
/* The hash of the public key */
u8 tr[64];
/* The message representative mu */
u8 mu[64];
/* Temporary space for rej_ntt_poly() */
u8 block[SHAKE128_BLOCK_SIZE + 1];
/* Encoded element of w'_1 */
u8 w1_encoded[MAX_W1_ENCODED_LEN];
/* The commitment hash. Real length is params->ctilde_len */
u8 ctildeprime[64];
};
/* SHAKE context for generating elements of the matrix A */
struct shake_ctx a_shake;
/*
* An element of the matrix A generated from the public seed, or an
* element of the vector t_1 decoded from the public key and pre-scaled
* by 2^d. Both are in NTT form. To reduce memory usage, we generate
* or decode these elements only as needed.
*/
union {
struct mldsa_ring_elem a;
struct mldsa_ring_elem t1_scaled;
};
/* The challenge c, generated from ctilde */
struct mldsa_ring_elem c;
/* A temporary element used during calculations */
struct mldsa_ring_elem tmp;
/* The following fields are variable-length: */
/* The signer's response vector */
struct mldsa_ring_elem z[/* l */];
/* The signer's hint vector */
/* u8 h[k * N]; */
};
/*
* Compute a * b * 2^-32 mod q. a * b must be in the range [-2^31 * q, 2^31 * q
* - 1] before reduction. The return value is in the range [-q + 1, q - 1].
*
* To reduce mod q efficiently, this uses Montgomery reduction with R=2^32.
* That's where the factor of 2^-32 comes from. The caller must include a
* factor of 2^32 at some point to compensate for that.
*
* To keep the input and output ranges very close to symmetric, this
* specifically does a "signed" Montgomery reduction. That is, when computing
* d = c * q^-1 mod 2^32, this chooses a representative in [S32_MIN, S32_MAX]
* rather than [0, U32_MAX], i.e. s32 rather than u32. This matters in the
* wider multiplication d * Q when d keeps its value via sign extension.
*
* Reference: FIPS 204 Appendix A "Montgomery Multiplication". But, it doesn't
* explain it properly: it has an off-by-one error in the upper end of the input
* range, it doesn't clarify that the signed version should be used, and it
* gives an unnecessarily large output range. A better citation is perhaps the
* Dilithium reference code, which functionally matches the below code and
* merely has the (benign) off-by-one error in its documentation.
*/
static inline s32 Zq_mult(s32 a, s32 b)
{
/* Compute the unreduced product c. */
s64 c = (s64)a * b;
/*
* Compute d = c * q^-1 mod 2^32. Generate a signed result, as
* explained above, but do the actual multiplication using an unsigned
* type to avoid signed integer overflow which is undefined behavior.
*/
s32 d = (u32)c * QINV_MOD_2_32;
/*
* Compute e = c - d * q. This makes the low 32 bits zero, since
* c - (c * q^-1) * q mod 2^32
* = c - c * (q^-1 * q) mod 2^32
* = c - c * 1 mod 2^32
* = c - c mod 2^32
* = 0 mod 2^32
*/
s64 e = c - (s64)d * Q;
/* Finally, return e * 2^-32. */
return e >> 32;
}
/*
* Convert @w to its number-theoretically-transformed representation in-place.
* Reference: FIPS 204 Algorithm 41, NTT
*
* To prevent intermediate overflows, all input coefficients must have absolute
* value < q. All output components have absolute value < 9*q.
*/
static void ntt(struct mldsa_ring_elem *w)
{
int m = 0; /* index in zetas_times_2_32 */
for (int len = 128; len >= 1; len /= 2) {
for (int start = 0; start < 256; start += 2 * len) {
const s32 z = zetas_times_2_32[++m];
for (int j = start; j < start + len; j++) {
s32 t = Zq_mult(z, w->x[j + len]);
w->x[j + len] = w->x[j] - t;
w->x[j] += t;
}
}
}
}
/*
* Convert @w from its number-theoretically-transformed representation in-place.
* Reference: FIPS 204 Algorithm 42, NTT^-1
*
* This also multiplies the coefficients by 2^32, undoing an extra factor of
* 2^-32 introduced earlier, and reduces the coefficients to [0, q - 1].
*/
static void invntt_and_mul_2_32(struct mldsa_ring_elem *w)
{
int m = 256; /* index in zetas_times_2_32 */
/* Prevent intermediate overflows. */
for (int j = 0; j < 256; j++)
w->x[j] %= Q;
for (int len = 1; len < 256; len *= 2) {
for (int start = 0; start < 256; start += 2 * len) {
const s32 z = -zetas_times_2_32[--m];
for (int j = start; j < start + len; j++) {
s32 t = w->x[j];
w->x[j] = t + w->x[j + len];
w->x[j + len] = Zq_mult(z, t - w->x[j + len]);
}
}
}
/*
* Multiply by 2^32 * 256^-1. 2^32 cancels the factor of 2^-32 from
* earlier Montgomery multiplications. 256^-1 is for NTT^-1. This
* itself uses Montgomery multiplication, so *another* 2^32 is needed.
* Thus the actual multiplicand is 2^32 * 2^32 * 256^-1 mod q = 41978.
*
* Finally, also reduce from [-q + 1, q - 1] to [0, q - 1].
*/
for (int j = 0; j < 256; j++) {
w->x[j] = Zq_mult(w->x[j], 41978);
w->x[j] += (w->x[j] >> 31) & Q;
}
}
/*
* Decode an element of t_1, i.e. the high d bits of t = A*s_1 + s_2.
* Reference: FIPS 204 Algorithm 23, pkDecode.
* Also multiply it by 2^d and convert it to NTT form.
*/
static const u8 *decode_t1_elem(struct mldsa_ring_elem *out,
const u8 *t1_encoded)
{
for (int j = 0; j < N; j += 4, t1_encoded += 5) {
u32 v = get_unaligned_le32(t1_encoded);
out->x[j + 0] = ((v >> 0) & 0x3ff) << D;
out->x[j + 1] = ((v >> 10) & 0x3ff) << D;
out->x[j + 2] = ((v >> 20) & 0x3ff) << D;
out->x[j + 3] = ((v >> 30) | (t1_encoded[4] << 2)) << D;
static_assert(0x3ff << D < Q); /* All coefficients < q. */
}
ntt(out);
return t1_encoded; /* Return updated pointer. */
}
/*
* Decode the signer's response vector 'z' from the signature.
* Reference: FIPS 204 Algorithm 27, sigDecode.
*
* This also validates that the coefficients of z are in range, corresponding
* the infinity norm check at the end of Algorithm 8, ML-DSA.Verify_internal.
*
* Finally, this also converts z to NTT form.
*/
static bool decode_z(struct mldsa_ring_elem z[/* l */], int l, s32 gamma1,
int beta, const u8 **sig_ptr)
{
const u8 *sig = *sig_ptr;
for (int i = 0; i < l; i++) {
if (l == 4) { /* ML-DSA-44? */
/* 18-bit coefficients: decode 4 from 9 bytes. */
for (int j = 0; j < N; j += 4, sig += 9) {
u64 v = get_unaligned_le64(sig);
z[i].x[j + 0] = (v >> 0) & 0x3ffff;
z[i].x[j + 1] = (v >> 18) & 0x3ffff;
z[i].x[j + 2] = (v >> 36) & 0x3ffff;
z[i].x[j + 3] = (v >> 54) | (sig[8] << 10);
}
} else {
/* 20-bit coefficients: decode 4 from 10 bytes. */
for (int j = 0; j < N; j += 4, sig += 10) {
u64 v = get_unaligned_le64(sig);
z[i].x[j + 0] = (v >> 0) & 0xfffff;
z[i].x[j + 1] = (v >> 20) & 0xfffff;
z[i].x[j + 2] = (v >> 40) & 0xfffff;
z[i].x[j + 3] =
(v >> 60) |
(get_unaligned_le16(&sig[8]) << 4);
}
}
for (int j = 0; j < N; j++) {
z[i].x[j] = gamma1 - z[i].x[j];
if (z[i].x[j] <= -(gamma1 - beta) ||
z[i].x[j] >= gamma1 - beta)
return false;
}
ntt(&z[i]);
}
*sig_ptr = sig; /* Return updated pointer. */
return true;
}
/*
* Decode the signer's hint vector 'h' from the signature.
* Reference: FIPS 204 Algorithm 21, HintBitUnpack
*
* Note that there are several ways in which the hint vector can be malformed.
*/
static bool decode_hint_vector(u8 h[/* k * N */], int k, int omega, const u8 *y)
{
int index = 0;
memset(h, 0, k * N);
for (int i = 0; i < k; i++) {
int count = y[omega + i]; /* num 1's in elems 0 through i */
int prev = -1;
/* Cumulative count mustn't decrease or exceed omega. */
if (count < index || count > omega)
return false;
for (; index < count; index++) {
if (prev >= y[index]) /* Coefficients out of order? */
return false;
prev = y[index];
h[i * N + y[index]] = 1;
}
}
return mem_is_zero(&y[index], omega - index);
}
/*
* Expand @seed into an element of R_q @c with coefficients in {-1, 0, 1},
* exactly @tau of them nonzero. Reference: FIPS 204 Algorithm 29, SampleInBall
*/
static void sample_in_ball(struct mldsa_ring_elem *c, const u8 *seed,
size_t seed_len, int tau, struct shake_ctx *shake)
{
u64 signs;
u8 j;
shake256_init(shake);
shake_update(shake, seed, seed_len);
shake_squeeze(shake, (u8 *)&signs, sizeof(signs));
le64_to_cpus(&signs);
*c = (struct mldsa_ring_elem){};
for (int i = N - tau; i < N; i++, signs >>= 1) {
do {
shake_squeeze(shake, &j, 1);
} while (j > i);
c->x[i] = c->x[j];
c->x[j] = 1 - 2 * (s32)(signs & 1);
}
}
/*
* Expand the public seed @rho and @row_and_column into an element of T_q @out.
* Reference: FIPS 204 Algorithm 30, RejNTTPoly
*
* @shake and @block are temporary space used by the expansion. @block has
* space for one SHAKE128 block, plus an extra byte to allow reading a u32 from
* the final 3-byte group without reading out-of-bounds.
*/
static void rej_ntt_poly(struct mldsa_ring_elem *out, const u8 rho[RHO_LEN],
__le16 row_and_column, struct shake_ctx *shake,
u8 block[SHAKE128_BLOCK_SIZE + 1])
{
shake128_init(shake);
shake_update(shake, rho, RHO_LEN);
shake_update(shake, (u8 *)&row_and_column, sizeof(row_and_column));
for (int i = 0; i < N;) {
shake_squeeze(shake, block, SHAKE128_BLOCK_SIZE);
block[SHAKE128_BLOCK_SIZE] = 0; /* for KMSAN */
static_assert(SHAKE128_BLOCK_SIZE % 3 == 0);
for (int j = 0; j < SHAKE128_BLOCK_SIZE && i < N; j += 3) {
u32 x = get_unaligned_le32(&block[j]) & 0x7fffff;
if (x < Q) /* Ignore values >= q. */
out->x[i++] = x;
}
}
}
/*
* Return the HighBits of r adjusted according to hint h
* Reference: FIPS 204 Algorithm 40, UseHint
*
* This is needed because of the public key compression in ML-DSA.
*
* h is either 0 or 1, r is in [0, q - 1], and gamma2 is either (q - 1) / 88 or
* (q - 1) / 32. Except when invoked via the unit test interface, gamma2 is a
* compile-time constant, so compilers will optimize the code accordingly.
*/
static __always_inline s32 use_hint(u8 h, s32 r, const s32 gamma2)
{
const s32 m = (Q - 1) / (2 * gamma2); /* 44 or 16, compile-time const */
s32 r1;
/*
* Handle the special case where r - (r mod+- (2 * gamma2)) == q - 1,
* i.e. r >= q - gamma2. This is also exactly where the computation of
* r1 below would produce 'm' and would need a correction.
*/
if (r >= Q - gamma2)
return h == 0 ? 0 : m - 1;
/*
* Compute the (non-hint-adjusted) HighBits r1 as:
*
* r1 = (r - (r mod+- (2 * gamma2))) / (2 * gamma2)
* = floor((r + gamma2 - 1) / (2 * gamma2))
*
* Note that when '2 * gamma2' is a compile-time constant, compilers
* optimize the division to a reciprocal multiplication and shift.
*/
r1 = (u32)(r + gamma2 - 1) / (2 * gamma2);
/*
* Return the HighBits r1:
* + 0 if the hint is 0;
* + 1 (mod m) if the hint is 1 and the LowBits are positive;
* - 1 (mod m) if the hint is 1 and the LowBits are negative or 0.
*
* r1 is in (and remains in) [0, m - 1]. Note that when 'm' is a
* compile-time constant, compilers optimize the '% m' accordingly.
*/
if (h == 0)
return r1;
if (r > r1 * (2 * gamma2))
return (u32)(r1 + 1) % m;
return (u32)(r1 + m - 1) % m;
}
static __always_inline void use_hint_elem(struct mldsa_ring_elem *w,
const u8 h[N], const s32 gamma2)
{
for (int j = 0; j < N; j++)
w->x[j] = use_hint(h[j], w->x[j], gamma2);
}
#if IS_ENABLED(CONFIG_CRYPTO_LIB_MLDSA_KUNIT_TEST)
/* Allow the __always_inline function use_hint() to be unit-tested. */
s32 mldsa_use_hint(u8 h, s32 r, s32 gamma2)
{
return use_hint(h, r, gamma2);
}
EXPORT_SYMBOL_IF_KUNIT(mldsa_use_hint);
#endif
/*
* Encode one element of the commitment vector w'_1 into a byte string.
* Reference: FIPS 204 Algorithm 28, w1Encode.
* Return the number of bytes used: 192 for ML-DSA-44 and 128 for the others.
*/
static size_t encode_w1(u8 out[MAX_W1_ENCODED_LEN],
const struct mldsa_ring_elem *w1, int k)
{
size_t pos = 0;
static_assert(N * 6 / 8 == MAX_W1_ENCODED_LEN);
if (k == 4) { /* ML-DSA-44? */
/* 6 bits per coefficient. Pack 4 at a time. */
for (int j = 0; j < N; j += 4) {
u32 v = (w1->x[j + 0] << 0) | (w1->x[j + 1] << 6) |
(w1->x[j + 2] << 12) | (w1->x[j + 3] << 18);
out[pos++] = v >> 0;
out[pos++] = v >> 8;
out[pos++] = v >> 16;
}
} else {
/* 4 bits per coefficient. Pack 2 at a time. */
for (int j = 0; j < N; j += 2)
out[pos++] = w1->x[j] | (w1->x[j + 1] << 4);
}
return pos;
}
int mldsa_verify(enum mldsa_alg alg, const u8 *sig, size_t sig_len,
const u8 *msg, size_t msg_len, const u8 *pk, size_t pk_len)
{
const struct mldsa_parameter_set *params = &mldsa_parameter_sets[alg];
const int k = params->k, l = params->l;
/* For now this just does pure ML-DSA with an empty context string. */
static const u8 msg_prefix[2] = { /* dom_sep= */ 0, /* ctx_len= */ 0 };
const u8 *ctilde; /* The signer's commitment hash */
const u8 *t1_encoded = &pk[RHO_LEN]; /* Next encoded element of t_1 */
u8 *h; /* The signer's hint vector, length k * N */
size_t w1_enc_len;
/* Validate the public key and signature lengths. */
if (pk_len != params->pk_len || sig_len != params->sig_len)
return -EBADMSG;
/*
* Allocate the workspace, including variable-length fields. Its size
* depends only on the ML-DSA parameter set, not the other inputs.
*
* For freeing it, use kfree_sensitive() rather than kfree(). This is
* mainly to comply with FIPS 204 Section 3.6.3 "Intermediate Values".
* In reality it's a bit gratuitous, as this is a public key operation.
*/
struct mldsa_verification_workspace *ws __free(kfree_sensitive) =
kmalloc(sizeof(*ws) + (l * sizeof(ws->z[0])) + (k * N),
GFP_KERNEL);
if (!ws)
return -ENOMEM;
h = (u8 *)&ws->z[l];
/* Decode the signature. Reference: FIPS 204 Algorithm 27, sigDecode */
ctilde = sig;
sig += params->ctilde_len;
if (!decode_z(ws->z, l, params->gamma1, params->beta, &sig))
return -EBADMSG;
if (!decode_hint_vector(h, k, params->omega, sig))
return -EBADMSG;
/* Recreate the challenge c from the signer's commitment hash. */
sample_in_ball(&ws->c, ctilde, params->ctilde_len, params->tau,
&ws->shake);
ntt(&ws->c);
/* Compute the message representative mu. */
shake256(pk, pk_len, ws->tr, sizeof(ws->tr));
shake256_init(&ws->shake);
shake_update(&ws->shake, ws->tr, sizeof(ws->tr));
shake_update(&ws->shake, msg_prefix, sizeof(msg_prefix));
shake_update(&ws->shake, msg, msg_len);
shake_squeeze(&ws->shake, ws->mu, sizeof(ws->mu));
/* Start computing ctildeprime = H(mu || w1Encode(w'_1)). */
shake256_init(&ws->shake);
shake_update(&ws->shake, ws->mu, sizeof(ws->mu));
/*
* Compute the commitment w'_1 from A, z, c, t_1, and h.
*
* The computation is the same for each of the k rows. Just do each row
* before moving on to the next, resulting in only one loop over k.
*/
for (int i = 0; i < k; i++) {
/*
* tmp = NTT(A) * NTT(z) * 2^-32
* To reduce memory use, generate each element of NTT(A)
* on-demand. Note that each element is used only once.
*/
ws->tmp = (struct mldsa_ring_elem){};
for (int j = 0; j < l; j++) {
rej_ntt_poly(&ws->a, pk /* rho is first field of pk */,
cpu_to_le16((i << 8) | j), &ws->a_shake,
ws->block);
for (int n = 0; n < N; n++)
ws->tmp.x[n] +=
Zq_mult(ws->a.x[n], ws->z[j].x[n]);
}
/* All components of tmp now have abs value < l*q. */
/* Decode the next element of t_1. */
t1_encoded = decode_t1_elem(&ws->t1_scaled, t1_encoded);
/*
* tmp -= NTT(c) * NTT(t_1 * 2^d) * 2^-32
*
* Taking a conservative bound for the output of ntt(), the
* multiplicands can have absolute value up to 9*q. That
* corresponds to a product with absolute value 81*q^2. That is
* within the limits of Zq_mult() which needs < ~256*q^2.
*/
for (int j = 0; j < N; j++)
ws->tmp.x[j] -= Zq_mult(ws->c.x[j], ws->t1_scaled.x[j]);
/* All components of tmp now have abs value < (l+1)*q. */
/* tmp = w'_Approx = NTT^-1(tmp) * 2^32 */
invntt_and_mul_2_32(&ws->tmp);
/* All coefficients of tmp are now in [0, q - 1]. */
/*
* tmp = w'_1 = UseHint(h, w'_Approx)
* For efficiency, set gamma2 to a compile-time constant.
*/
if (k == 4)
use_hint_elem(&ws->tmp, &h[i * N], (Q - 1) / 88);
else
use_hint_elem(&ws->tmp, &h[i * N], (Q - 1) / 32);
/* Encode and hash the next element of w'_1. */
w1_enc_len = encode_w1(ws->w1_encoded, &ws->tmp, k);
shake_update(&ws->shake, ws->w1_encoded, w1_enc_len);
}
/* Finish computing ctildeprime. */
shake_squeeze(&ws->shake, ws->ctildeprime, params->ctilde_len);
/* Verify that ctilde == ctildeprime. */
if (memcmp(ws->ctildeprime, ctilde, params->ctilde_len) != 0)
return -EKEYREJECTED;
/* ||z||_infinity < gamma1 - beta was already checked in decode_z(). */
return 0;
}
EXPORT_SYMBOL_GPL(mldsa_verify);
#ifdef CONFIG_CRYPTO_FIPS
static int __init mldsa_mod_init(void)
{
if (fips_enabled) {
/*
* FIPS cryptographic algorithm self-test. As per the FIPS
* Implementation Guidance, testing any ML-DSA parameter set
* satisfies the test requirement for all of them, and only a
* positive test is required.
*/
int err = mldsa_verify(MLDSA65, fips_test_mldsa65_signature,
sizeof(fips_test_mldsa65_signature),
fips_test_mldsa65_message,
sizeof(fips_test_mldsa65_message),
fips_test_mldsa65_public_key,
sizeof(fips_test_mldsa65_public_key));
if (err)
panic("mldsa: FIPS self-test failed; err=%pe\n",
ERR_PTR(err));
}
return 0;
}
subsys_initcall(mldsa_mod_init);
static void __exit mldsa_mod_exit(void)
{
}
module_exit(mldsa_mod_exit);
#endif /* CONFIG_CRYPTO_FIPS */
MODULE_DESCRIPTION("ML-DSA signature verification");
MODULE_LICENSE("GPL");

82
lib/crypto/nh.c Normal file
View File

@@ -0,0 +1,82 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2018 Google LLC
*/
/*
* Implementation of the NH almost-universal hash function, specifically the
* variant of NH used in Adiantum. This is *not* a cryptographic hash function.
*
* Reference: section 6.3 of "Adiantum: length-preserving encryption for
* entry-level processors" (https://eprint.iacr.org/2018/720.pdf).
*/
#include <crypto/nh.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/unaligned.h>
#ifdef CONFIG_CRYPTO_LIB_NH_ARCH
#include "nh.h" /* $(SRCARCH)/nh.h */
#else
static bool nh_arch(const u32 *key, const u8 *message, size_t message_len,
__le64 hash[NH_NUM_PASSES])
{
return false;
}
#endif
void nh(const u32 *key, const u8 *message, size_t message_len,
__le64 hash[NH_NUM_PASSES])
{
u64 sums[4] = { 0, 0, 0, 0 };
if (nh_arch(key, message, message_len, hash))
return;
static_assert(NH_PAIR_STRIDE == 2);
static_assert(NH_NUM_PASSES == 4);
while (message_len) {
u32 m0 = get_unaligned_le32(message + 0);
u32 m1 = get_unaligned_le32(message + 4);
u32 m2 = get_unaligned_le32(message + 8);
u32 m3 = get_unaligned_le32(message + 12);
sums[0] += (u64)(u32)(m0 + key[0]) * (u32)(m2 + key[2]);
sums[1] += (u64)(u32)(m0 + key[4]) * (u32)(m2 + key[6]);
sums[2] += (u64)(u32)(m0 + key[8]) * (u32)(m2 + key[10]);
sums[3] += (u64)(u32)(m0 + key[12]) * (u32)(m2 + key[14]);
sums[0] += (u64)(u32)(m1 + key[1]) * (u32)(m3 + key[3]);
sums[1] += (u64)(u32)(m1 + key[5]) * (u32)(m3 + key[7]);
sums[2] += (u64)(u32)(m1 + key[9]) * (u32)(m3 + key[11]);
sums[3] += (u64)(u32)(m1 + key[13]) * (u32)(m3 + key[15]);
key += NH_MESSAGE_UNIT / sizeof(key[0]);
message += NH_MESSAGE_UNIT;
message_len -= NH_MESSAGE_UNIT;
}
hash[0] = cpu_to_le64(sums[0]);
hash[1] = cpu_to_le64(sums[1]);
hash[2] = cpu_to_le64(sums[2]);
hash[3] = cpu_to_le64(sums[3]);
}
EXPORT_SYMBOL_GPL(nh);
#ifdef nh_mod_init_arch
static int __init nh_mod_init(void)
{
nh_mod_init_arch();
return 0;
}
subsys_initcall(nh_mod_init);
static void __exit nh_mod_exit(void)
{
}
module_exit(nh_mod_exit);
#endif
MODULE_DESCRIPTION("NH almost-universal hash function");
MODULE_LICENSE("GPL");

2
lib/crypto/powerpc/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
aesp8-ppc.S

View File

@@ -0,0 +1,346 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Fast AES implementation for SPE instruction set (PPC)
*
* This code makes use of the SPE SIMD instruction set as defined in
* http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
* Implementation is based on optimization guide notes from
* http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*/
#include <asm/ppc_asm.h>
#include "aes-spe-regs.h"
#define EAD(in, bpos) \
rlwimi rT0,in,28-((bpos+3)%4)*8,20,27;
#define DAD(in, bpos) \
rlwimi rT1,in,24-((bpos+3)%4)*8,24,31;
#define LWH(out, off) \
evlwwsplat out,off(rT0); /* load word high */
#define LWL(out, off) \
lwz out,off(rT0); /* load word low */
#define LBZ(out, tab, off) \
lbz out,off(tab); /* load byte */
#define LAH(out, in, bpos, off) \
EAD(in, bpos) /* calc addr + load word high */ \
LWH(out, off)
#define LAL(out, in, bpos, off) \
EAD(in, bpos) /* calc addr + load word low */ \
LWL(out, off)
#define LAE(out, in, bpos) \
EAD(in, bpos) /* calc addr + load enc byte */ \
LBZ(out, rT0, 8)
#define LBE(out) \
LBZ(out, rT0, 8) /* load enc byte */
#define LAD(out, in, bpos) \
DAD(in, bpos) /* calc addr + load dec byte */ \
LBZ(out, rT1, 0)
#define LBD(out) \
LBZ(out, rT1, 0)
/*
* ppc_encrypt_block: The central encryption function for a single 16 bytes
* block. It does no stack handling or register saving to support fast calls
* via bl/blr. It expects that caller has pre-xored input data with first
* 4 words of encryption key into rD0-rD3. Pointer/counter registers must
* have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
* and rW0-rW3 and caller must execute a final xor on the output registers.
* All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
*
*/
_GLOBAL(ppc_encrypt_block)
LAH(rW4, rD1, 2, 4)
LAH(rW6, rD0, 3, 0)
LAH(rW3, rD0, 1, 8)
ppc_encrypt_block_loop:
LAH(rW0, rD3, 0, 12)
LAL(rW0, rD0, 0, 12)
LAH(rW1, rD1, 0, 12)
LAH(rW2, rD2, 1, 8)
LAL(rW2, rD3, 1, 8)
LAL(rW3, rD1, 1, 8)
LAL(rW4, rD2, 2, 4)
LAL(rW6, rD1, 3, 0)
LAH(rW5, rD3, 2, 4)
LAL(rW5, rD0, 2, 4)
LAH(rW7, rD2, 3, 0)
evldw rD1,16(rKP)
EAD(rD3, 3)
evxor rW2,rW2,rW4
LWL(rW7, 0)
evxor rW2,rW2,rW6
EAD(rD2, 0)
evxor rD1,rD1,rW2
LWL(rW1, 12)
evxor rD1,rD1,rW0
evldw rD3,24(rKP)
evmergehi rD0,rD0,rD1
EAD(rD1, 2)
evxor rW3,rW3,rW5
LWH(rW4, 4)
evxor rW3,rW3,rW7
EAD(rD0, 3)
evxor rD3,rD3,rW3
LWH(rW6, 0)
evxor rD3,rD3,rW1
EAD(rD0, 1)
evmergehi rD2,rD2,rD3
LWH(rW3, 8)
LAH(rW0, rD3, 0, 12)
LAL(rW0, rD0, 0, 12)
LAH(rW1, rD1, 0, 12)
LAH(rW2, rD2, 1, 8)
LAL(rW2, rD3, 1, 8)
LAL(rW3, rD1, 1, 8)
LAL(rW4, rD2, 2, 4)
LAL(rW6, rD1, 3, 0)
LAH(rW5, rD3, 2, 4)
LAL(rW5, rD0, 2, 4)
LAH(rW7, rD2, 3, 0)
evldw rD1,32(rKP)
EAD(rD3, 3)
evxor rW2,rW2,rW4
LWL(rW7, 0)
evxor rW2,rW2,rW6
EAD(rD2, 0)
evxor rD1,rD1,rW2
LWL(rW1, 12)
evxor rD1,rD1,rW0
evldw rD3,40(rKP)
evmergehi rD0,rD0,rD1
EAD(rD1, 2)
evxor rW3,rW3,rW5
LWH(rW4, 4)
evxor rW3,rW3,rW7
EAD(rD0, 3)
evxor rD3,rD3,rW3
LWH(rW6, 0)
evxor rD3,rD3,rW1
EAD(rD0, 1)
evmergehi rD2,rD2,rD3
LWH(rW3, 8)
addi rKP,rKP,32
bdnz ppc_encrypt_block_loop
LAH(rW0, rD3, 0, 12)
LAL(rW0, rD0, 0, 12)
LAH(rW1, rD1, 0, 12)
LAH(rW2, rD2, 1, 8)
LAL(rW2, rD3, 1, 8)
LAL(rW3, rD1, 1, 8)
LAL(rW4, rD2, 2, 4)
LAH(rW5, rD3, 2, 4)
LAL(rW6, rD1, 3, 0)
LAL(rW5, rD0, 2, 4)
LAH(rW7, rD2, 3, 0)
evldw rD1,16(rKP)
EAD(rD3, 3)
evxor rW2,rW2,rW4
LWL(rW7, 0)
evxor rW2,rW2,rW6
EAD(rD2, 0)
evxor rD1,rD1,rW2
LWL(rW1, 12)
evxor rD1,rD1,rW0
evldw rD3,24(rKP)
evmergehi rD0,rD0,rD1
EAD(rD1, 0)
evxor rW3,rW3,rW5
LBE(rW2)
evxor rW3,rW3,rW7
EAD(rD0, 1)
evxor rD3,rD3,rW3
LBE(rW6)
evxor rD3,rD3,rW1
EAD(rD0, 0)
evmergehi rD2,rD2,rD3
LBE(rW1)
LAE(rW0, rD3, 0)
LAE(rW1, rD0, 0)
LAE(rW4, rD2, 1)
LAE(rW5, rD3, 1)
LAE(rW3, rD2, 0)
LAE(rW7, rD1, 1)
rlwimi rW0,rW4,8,16,23
rlwimi rW1,rW5,8,16,23
LAE(rW4, rD1, 2)
LAE(rW5, rD2, 2)
rlwimi rW2,rW6,8,16,23
rlwimi rW3,rW7,8,16,23
LAE(rW6, rD3, 2)
LAE(rW7, rD0, 2)
rlwimi rW0,rW4,16,8,15
rlwimi rW1,rW5,16,8,15
LAE(rW4, rD0, 3)
LAE(rW5, rD1, 3)
rlwimi rW2,rW6,16,8,15
lwz rD0,32(rKP)
rlwimi rW3,rW7,16,8,15
lwz rD1,36(rKP)
LAE(rW6, rD2, 3)
LAE(rW7, rD3, 3)
rlwimi rW0,rW4,24,0,7
lwz rD2,40(rKP)
rlwimi rW1,rW5,24,0,7
lwz rD3,44(rKP)
rlwimi rW2,rW6,24,0,7
rlwimi rW3,rW7,24,0,7
blr
/*
* ppc_decrypt_block: The central decryption function for a single 16 bytes
* block. It does no stack handling or register saving to support fast calls
* via bl/blr. It expects that caller has pre-xored input data with first
* 4 words of encryption key into rD0-rD3. Pointer/counter registers must
* have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
* and rW0-rW3 and caller must execute a final xor on the output registers.
* All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
*
*/
_GLOBAL(ppc_decrypt_block)
LAH(rW0, rD1, 0, 12)
LAH(rW6, rD0, 3, 0)
LAH(rW3, rD0, 1, 8)
ppc_decrypt_block_loop:
LAH(rW1, rD3, 0, 12)
LAL(rW0, rD2, 0, 12)
LAH(rW2, rD2, 1, 8)
LAL(rW2, rD3, 1, 8)
LAH(rW4, rD3, 2, 4)
LAL(rW4, rD0, 2, 4)
LAL(rW6, rD1, 3, 0)
LAH(rW5, rD1, 2, 4)
LAH(rW7, rD2, 3, 0)
LAL(rW7, rD3, 3, 0)
LAL(rW3, rD1, 1, 8)
evldw rD1,16(rKP)
EAD(rD0, 0)
evxor rW4,rW4,rW6
LWL(rW1, 12)
evxor rW0,rW0,rW4
EAD(rD2, 2)
evxor rW0,rW0,rW2
LWL(rW5, 4)
evxor rD1,rD1,rW0
evldw rD3,24(rKP)
evmergehi rD0,rD0,rD1
EAD(rD1, 0)
evxor rW3,rW3,rW7
LWH(rW0, 12)
evxor rW3,rW3,rW1
EAD(rD0, 3)
evxor rD3,rD3,rW3
LWH(rW6, 0)
evxor rD3,rD3,rW5
EAD(rD0, 1)
evmergehi rD2,rD2,rD3
LWH(rW3, 8)
LAH(rW1, rD3, 0, 12)
LAL(rW0, rD2, 0, 12)
LAH(rW2, rD2, 1, 8)
LAL(rW2, rD3, 1, 8)
LAH(rW4, rD3, 2, 4)
LAL(rW4, rD0, 2, 4)
LAL(rW6, rD1, 3, 0)
LAH(rW5, rD1, 2, 4)
LAH(rW7, rD2, 3, 0)
LAL(rW7, rD3, 3, 0)
LAL(rW3, rD1, 1, 8)
evldw rD1,32(rKP)
EAD(rD0, 0)
evxor rW4,rW4,rW6
LWL(rW1, 12)
evxor rW0,rW0,rW4
EAD(rD2, 2)
evxor rW0,rW0,rW2
LWL(rW5, 4)
evxor rD1,rD1,rW0
evldw rD3,40(rKP)
evmergehi rD0,rD0,rD1
EAD(rD1, 0)
evxor rW3,rW3,rW7
LWH(rW0, 12)
evxor rW3,rW3,rW1
EAD(rD0, 3)
evxor rD3,rD3,rW3
LWH(rW6, 0)
evxor rD3,rD3,rW5
EAD(rD0, 1)
evmergehi rD2,rD2,rD3
LWH(rW3, 8)
addi rKP,rKP,32
bdnz ppc_decrypt_block_loop
LAH(rW1, rD3, 0, 12)
LAL(rW0, rD2, 0, 12)
LAH(rW2, rD2, 1, 8)
LAL(rW2, rD3, 1, 8)
LAH(rW4, rD3, 2, 4)
LAL(rW4, rD0, 2, 4)
LAL(rW6, rD1, 3, 0)
LAH(rW5, rD1, 2, 4)
LAH(rW7, rD2, 3, 0)
LAL(rW7, rD3, 3, 0)
LAL(rW3, rD1, 1, 8)
evldw rD1,16(rKP)
EAD(rD0, 0)
evxor rW4,rW4,rW6
LWL(rW1, 12)
evxor rW0,rW0,rW4
EAD(rD2, 2)
evxor rW0,rW0,rW2
LWL(rW5, 4)
evxor rD1,rD1,rW0
evldw rD3,24(rKP)
evmergehi rD0,rD0,rD1
DAD(rD1, 0)
evxor rW3,rW3,rW7
LBD(rW0)
evxor rW3,rW3,rW1
DAD(rD0, 1)
evxor rD3,rD3,rW3
LBD(rW6)
evxor rD3,rD3,rW5
DAD(rD0, 0)
evmergehi rD2,rD2,rD3
LBD(rW3)
LAD(rW2, rD3, 0)
LAD(rW1, rD2, 0)
LAD(rW4, rD2, 1)
LAD(rW5, rD3, 1)
LAD(rW7, rD1, 1)
rlwimi rW0,rW4,8,16,23
rlwimi rW1,rW5,8,16,23
LAD(rW4, rD3, 2)
LAD(rW5, rD0, 2)
rlwimi rW2,rW6,8,16,23
rlwimi rW3,rW7,8,16,23
LAD(rW6, rD1, 2)
LAD(rW7, rD2, 2)
rlwimi rW0,rW4,16,8,15
rlwimi rW1,rW5,16,8,15
LAD(rW4, rD0, 3)
LAD(rW5, rD1, 3)
rlwimi rW2,rW6,16,8,15
lwz rD0,32(rKP)
rlwimi rW3,rW7,16,8,15
lwz rD1,36(rKP)
LAD(rW6, rD2, 3)
LAD(rW7, rD3, 3)
rlwimi rW0,rW4,24,0,7
lwz rD2,40(rKP)
rlwimi rW1,rW5,24,0,7
lwz rD3,44(rKP)
rlwimi rW2,rW6,24,0,7
rlwimi rW3,rW7,24,0,7
blr

View File

@@ -0,0 +1,278 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Key handling functions for PPC AES implementation
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*/
#include <asm/ppc_asm.h>
#ifdef __BIG_ENDIAN__
#define LOAD_KEY(d, s, off) \
lwz d,off(s);
#else
#define LOAD_KEY(d, s, off) \
li r0,off; \
lwbrx d,s,r0;
#endif
#define INITIALIZE_KEY \
stwu r1,-32(r1); /* create stack frame */ \
stw r14,8(r1); /* save registers */ \
stw r15,12(r1); \
stw r16,16(r1);
#define FINALIZE_KEY \
lwz r14,8(r1); /* restore registers */ \
lwz r15,12(r1); \
lwz r16,16(r1); \
xor r5,r5,r5; /* clear sensitive data */ \
xor r6,r6,r6; \
xor r7,r7,r7; \
xor r8,r8,r8; \
xor r9,r9,r9; \
xor r10,r10,r10; \
xor r11,r11,r11; \
xor r12,r12,r12; \
addi r1,r1,32; /* cleanup stack */
#define LS_BOX(r, t1, t2) \
lis t2,PPC_AES_4K_ENCTAB@h; \
ori t2,t2,PPC_AES_4K_ENCTAB@l; \
rlwimi t2,r,4,20,27; \
lbz t1,8(t2); \
rlwimi r,t1,0,24,31; \
rlwimi t2,r,28,20,27; \
lbz t1,8(t2); \
rlwimi r,t1,8,16,23; \
rlwimi t2,r,20,20,27; \
lbz t1,8(t2); \
rlwimi r,t1,16,8,15; \
rlwimi t2,r,12,20,27; \
lbz t1,8(t2); \
rlwimi r,t1,24,0,7;
#define GF8_MUL(out, in, t1, t2) \
lis t1,0x8080; /* multiplication in GF8 */ \
ori t1,t1,0x8080; \
and t1,t1,in; \
srwi t1,t1,7; \
mulli t1,t1,0x1b; \
lis t2,0x7f7f; \
ori t2,t2,0x7f7f; \
and t2,t2,in; \
slwi t2,t2,1; \
xor out,t1,t2;
/*
* ppc_expand_key_128(u32 *key_enc, const u8 *key)
*
* Expand 128 bit key into 176 bytes encryption key. It consists of
* key itself plus 10 rounds with 16 bytes each
*
*/
_GLOBAL(ppc_expand_key_128)
INITIALIZE_KEY
LOAD_KEY(r5,r4,0)
LOAD_KEY(r6,r4,4)
LOAD_KEY(r7,r4,8)
LOAD_KEY(r8,r4,12)
stw r5,0(r3) /* key[0..3] = input data */
stw r6,4(r3)
stw r7,8(r3)
stw r8,12(r3)
li r16,10 /* 10 expansion rounds */
lis r0,0x0100 /* RCO(1) */
ppc_expand_128_loop:
addi r3,r3,16
mr r14,r8 /* apply LS_BOX to 4th temp */
rotlwi r14,r14,8
LS_BOX(r14, r15, r4)
xor r14,r14,r0
xor r5,r5,r14 /* xor next 4 keys */
xor r6,r6,r5
xor r7,r7,r6
xor r8,r8,r7
stw r5,0(r3) /* store next 4 keys */
stw r6,4(r3)
stw r7,8(r3)
stw r8,12(r3)
GF8_MUL(r0, r0, r4, r14) /* multiply RCO by 2 in GF */
subi r16,r16,1
cmpwi r16,0
bt eq,ppc_expand_128_end
b ppc_expand_128_loop
ppc_expand_128_end:
FINALIZE_KEY
blr
/*
* ppc_expand_key_192(u32 *key_enc, const u8 *key)
*
* Expand 192 bit key into 208 bytes encryption key. It consists of key
* itself plus 12 rounds with 16 bytes each
*
*/
_GLOBAL(ppc_expand_key_192)
INITIALIZE_KEY
LOAD_KEY(r5,r4,0)
LOAD_KEY(r6,r4,4)
LOAD_KEY(r7,r4,8)
LOAD_KEY(r8,r4,12)
LOAD_KEY(r9,r4,16)
LOAD_KEY(r10,r4,20)
stw r5,0(r3)
stw r6,4(r3)
stw r7,8(r3)
stw r8,12(r3)
stw r9,16(r3)
stw r10,20(r3)
li r16,8 /* 8 expansion rounds */
lis r0,0x0100 /* RCO(1) */
ppc_expand_192_loop:
addi r3,r3,24
mr r14,r10 /* apply LS_BOX to 6th temp */
rotlwi r14,r14,8
LS_BOX(r14, r15, r4)
xor r14,r14,r0
xor r5,r5,r14 /* xor next 6 keys */
xor r6,r6,r5
xor r7,r7,r6
xor r8,r8,r7
xor r9,r9,r8
xor r10,r10,r9
stw r5,0(r3)
stw r6,4(r3)
stw r7,8(r3)
stw r8,12(r3)
subi r16,r16,1
cmpwi r16,0 /* last round early kick out */
bt eq,ppc_expand_192_end
stw r9,16(r3)
stw r10,20(r3)
GF8_MUL(r0, r0, r4, r14) /* multiply RCO GF8 */
b ppc_expand_192_loop
ppc_expand_192_end:
FINALIZE_KEY
blr
/*
* ppc_expand_key_256(u32 *key_enc, const u8 *key)
*
* Expand 256 bit key into 240 bytes encryption key. It consists of key
* itself plus 14 rounds with 16 bytes each
*
*/
_GLOBAL(ppc_expand_key_256)
INITIALIZE_KEY
LOAD_KEY(r5,r4,0)
LOAD_KEY(r6,r4,4)
LOAD_KEY(r7,r4,8)
LOAD_KEY(r8,r4,12)
LOAD_KEY(r9,r4,16)
LOAD_KEY(r10,r4,20)
LOAD_KEY(r11,r4,24)
LOAD_KEY(r12,r4,28)
stw r5,0(r3)
stw r6,4(r3)
stw r7,8(r3)
stw r8,12(r3)
stw r9,16(r3)
stw r10,20(r3)
stw r11,24(r3)
stw r12,28(r3)
li r16,7 /* 7 expansion rounds */
lis r0,0x0100 /* RCO(1) */
ppc_expand_256_loop:
addi r3,r3,32
mr r14,r12 /* apply LS_BOX to 8th temp */
rotlwi r14,r14,8
LS_BOX(r14, r15, r4)
xor r14,r14,r0
xor r5,r5,r14 /* xor 4 keys */
xor r6,r6,r5
xor r7,r7,r6
xor r8,r8,r7
mr r14,r8
LS_BOX(r14, r15, r4) /* apply LS_BOX to 4th temp */
xor r9,r9,r14 /* xor 4 keys */
xor r10,r10,r9
xor r11,r11,r10
xor r12,r12,r11
stw r5,0(r3)
stw r6,4(r3)
stw r7,8(r3)
stw r8,12(r3)
subi r16,r16,1
cmpwi r16,0 /* last round early kick out */
bt eq,ppc_expand_256_end
stw r9,16(r3)
stw r10,20(r3)
stw r11,24(r3)
stw r12,28(r3)
GF8_MUL(r0, r0, r4, r14)
b ppc_expand_256_loop
ppc_expand_256_end:
FINALIZE_KEY
blr
/*
* ppc_generate_decrypt_key: derive decryption key from encryption key
* number of bytes to handle are calculated from length of key (16/24/32)
*
*/
_GLOBAL(ppc_generate_decrypt_key)
addi r6,r5,24
slwi r6,r6,2
lwzx r7,r4,r6 /* first/last 4 words are same */
stw r7,0(r3)
lwz r7,0(r4)
stwx r7,r3,r6
addi r6,r6,4
lwzx r7,r4,r6
stw r7,4(r3)
lwz r7,4(r4)
stwx r7,r3,r6
addi r6,r6,4
lwzx r7,r4,r6
stw r7,8(r3)
lwz r7,8(r4)
stwx r7,r3,r6
addi r6,r6,4
lwzx r7,r4,r6
stw r7,12(r3)
lwz r7,12(r4)
stwx r7,r3,r6
addi r3,r3,16
add r4,r4,r6
subi r4,r4,28
addi r5,r5,20
srwi r5,r5,2
ppc_generate_decrypt_block:
li r6,4
mtctr r6
ppc_generate_decrypt_word:
lwz r6,0(r4)
GF8_MUL(r7, r6, r0, r7)
GF8_MUL(r8, r7, r0, r8)
GF8_MUL(r9, r8, r0, r9)
xor r10,r9,r6
xor r11,r7,r8
xor r11,r11,r9
xor r12,r7,r10
rotrwi r12,r12,24
xor r11,r11,r12
xor r12,r8,r10
rotrwi r12,r12,16
xor r11,r11,r12
rotrwi r12,r10,8
xor r11,r11,r12
stw r11,0(r3)
addi r3,r3,4
addi r4,r4,4
bdnz ppc_generate_decrypt_word
subi r4,r4,32
subi r5,r5,1
cmpwi r5,0
bt gt,ppc_generate_decrypt_block
blr

View File

@@ -0,0 +1,625 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*/
#include <asm/ppc_asm.h>
#include "aes-spe-regs.h"
#ifdef __BIG_ENDIAN__ /* Macros for big endian builds */
#define LOAD_DATA(reg, off) \
lwz reg,off(rSP); /* load with offset */
#define SAVE_DATA(reg, off) \
stw reg,off(rDP); /* save with offset */
#define NEXT_BLOCK \
addi rSP,rSP,16; /* increment pointers per bloc */ \
addi rDP,rDP,16;
#define LOAD_IV(reg, off) \
lwz reg,off(rIP); /* IV loading with offset */
#define SAVE_IV(reg, off) \
stw reg,off(rIP); /* IV saving with offset */
#define START_IV /* nothing to reset */
#define CBC_DEC 16 /* CBC decrement per block */
#define CTR_DEC 1 /* CTR decrement one byte */
#else /* Macros for little endian */
#define LOAD_DATA(reg, off) \
lwbrx reg,0,rSP; /* load reversed */ \
addi rSP,rSP,4; /* and increment pointer */
#define SAVE_DATA(reg, off) \
stwbrx reg,0,rDP; /* save reversed */ \
addi rDP,rDP,4; /* and increment pointer */
#define NEXT_BLOCK /* nothing todo */
#define LOAD_IV(reg, off) \
lwbrx reg,0,rIP; /* load reversed */ \
addi rIP,rIP,4; /* and increment pointer */
#define SAVE_IV(reg, off) \
stwbrx reg,0,rIP; /* load reversed */ \
addi rIP,rIP,4; /* and increment pointer */
#define START_IV \
subi rIP,rIP,16; /* must reset pointer */
#define CBC_DEC 32 /* 2 blocks because of incs */
#define CTR_DEC 17 /* 1 block because of incs */
#endif
#define SAVE_0_REGS
#define LOAD_0_REGS
#define SAVE_4_REGS \
stw rI0,96(r1); /* save 32 bit registers */ \
stw rI1,100(r1); \
stw rI2,104(r1); \
stw rI3,108(r1);
#define LOAD_4_REGS \
lwz rI0,96(r1); /* restore 32 bit registers */ \
lwz rI1,100(r1); \
lwz rI2,104(r1); \
lwz rI3,108(r1);
#define SAVE_8_REGS \
SAVE_4_REGS \
stw rG0,112(r1); /* save 32 bit registers */ \
stw rG1,116(r1); \
stw rG2,120(r1); \
stw rG3,124(r1);
#define LOAD_8_REGS \
LOAD_4_REGS \
lwz rG0,112(r1); /* restore 32 bit registers */ \
lwz rG1,116(r1); \
lwz rG2,120(r1); \
lwz rG3,124(r1);
#define INITIALIZE_CRYPT(tab,nr32bitregs) \
mflr r0; \
stwu r1,-160(r1); /* create stack frame */ \
lis rT0,tab@h; /* en-/decryption table pointer */ \
stw r0,8(r1); /* save link register */ \
ori rT0,rT0,tab@l; \
evstdw r14,16(r1); \
mr rKS,rKP; \
evstdw r15,24(r1); /* We must save non volatile */ \
evstdw r16,32(r1); /* registers. Take the chance */ \
evstdw r17,40(r1); /* and save the SPE part too */ \
evstdw r18,48(r1); \
evstdw r19,56(r1); \
evstdw r20,64(r1); \
evstdw r21,72(r1); \
evstdw r22,80(r1); \
evstdw r23,88(r1); \
SAVE_##nr32bitregs##_REGS
#define FINALIZE_CRYPT(nr32bitregs) \
lwz r0,8(r1); \
evldw r14,16(r1); /* restore SPE registers */ \
evldw r15,24(r1); \
evldw r16,32(r1); \
evldw r17,40(r1); \
evldw r18,48(r1); \
evldw r19,56(r1); \
evldw r20,64(r1); \
evldw r21,72(r1); \
evldw r22,80(r1); \
evldw r23,88(r1); \
LOAD_##nr32bitregs##_REGS \
mtlr r0; /* restore link register */ \
xor r0,r0,r0; \
stw r0,16(r1); /* delete sensitive data */ \
stw r0,24(r1); /* that we might have pushed */ \
stw r0,32(r1); /* from other context that runs */ \
stw r0,40(r1); /* the same code */ \
stw r0,48(r1); \
stw r0,56(r1); \
stw r0,64(r1); \
stw r0,72(r1); \
stw r0,80(r1); \
stw r0,88(r1); \
addi r1,r1,160; /* cleanup stack frame */
#define ENDIAN_SWAP(t0, t1, s0, s1) \
rotrwi t0,s0,8; /* swap endianness for 2 GPRs */ \
rotrwi t1,s1,8; \
rlwimi t0,s0,8,8,15; \
rlwimi t1,s1,8,8,15; \
rlwimi t0,s0,8,24,31; \
rlwimi t1,s1,8,24,31;
#define GF128_MUL(d0, d1, d2, d3, t0) \
li t0,0x87; /* multiplication in GF128 */ \
cmpwi d3,-1; \
iselgt t0,0,t0; \
rlwimi d3,d2,0,0,0; /* propagate "carry" bits */ \
rotlwi d3,d3,1; \
rlwimi d2,d1,0,0,0; \
rotlwi d2,d2,1; \
rlwimi d1,d0,0,0,0; \
slwi d0,d0,1; /* shift left 128 bit */ \
rotlwi d1,d1,1; \
xor d0,d0,t0;
#define START_KEY(d0, d1, d2, d3) \
lwz rW0,0(rKP); \
mtctr rRR; \
lwz rW1,4(rKP); \
lwz rW2,8(rKP); \
lwz rW3,12(rKP); \
xor rD0,d0,rW0; \
xor rD1,d1,rW1; \
xor rD2,d2,rW2; \
xor rD3,d3,rW3;
/*
* ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
* u32 rounds)
*
* called from glue layer to encrypt a single 16 byte block
* round values are AES128 = 4, AES192 = 5, AES256 = 6
*
*/
_GLOBAL(ppc_encrypt_aes)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
LOAD_DATA(rD0, 0)
LOAD_DATA(rD1, 4)
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_encrypt_block
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
FINALIZE_CRYPT(0)
blr
/*
* ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
* u32 rounds)
*
* called from glue layer to decrypt a single 16 byte block
* round values are AES128 = 4, AES192 = 5, AES256 = 6
*
*/
_GLOBAL(ppc_decrypt_aes)
INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
LOAD_DATA(rD0, 0)
addi rT1,rT0,4096
LOAD_DATA(rD1, 4)
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
FINALIZE_CRYPT(0)
blr
/*
* ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
* u32 rounds, u32 bytes);
*
* called from glue layer to encrypt multiple blocks via ECB
* Bytes must be larger or equal 16 and only whole blocks are
* processed. round values are AES128 = 4, AES192 = 5 and
* AES256 = 6
*
*/
_GLOBAL(ppc_encrypt_ecb)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
ppc_encrypt_ecb_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
cmpwi rLN,15
LOAD_DATA(rD3, 12)
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_encrypt_block
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
NEXT_BLOCK
bt gt,ppc_encrypt_ecb_loop
FINALIZE_CRYPT(0)
blr
/*
* ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
* u32 rounds, u32 bytes);
*
* called from glue layer to decrypt multiple blocks via ECB
* Bytes must be larger or equal 16 and only whole blocks are
* processed. round values are AES128 = 4, AES192 = 5 and
* AES256 = 6
*
*/
_GLOBAL(ppc_decrypt_ecb)
INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
addi rT1,rT0,4096
ppc_decrypt_ecb_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
cmpwi rLN,15
LOAD_DATA(rD3, 12)
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
NEXT_BLOCK
bt gt,ppc_decrypt_ecb_loop
FINALIZE_CRYPT(0)
blr
/*
* ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
* 32 rounds, u32 bytes, u8 *iv);
*
* called from glue layer to encrypt multiple blocks via CBC
* Bytes must be larger or equal 16 and only whole blocks are
* processed. round values are AES128 = 4, AES192 = 5 and
* AES256 = 6
*
*/
_GLOBAL(ppc_encrypt_cbc)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
LOAD_IV(rI0, 0)
LOAD_IV(rI1, 4)
LOAD_IV(rI2, 8)
LOAD_IV(rI3, 12)
ppc_encrypt_cbc_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
cmpwi rLN,15
LOAD_DATA(rD3, 12)
xor rD0,rD0,rI0
xor rD1,rD1,rI1
xor rD2,rD2,rI2
xor rD3,rD3,rI3
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_encrypt_block
xor rI0,rD0,rW0
SAVE_DATA(rI0, 0)
xor rI1,rD1,rW1
SAVE_DATA(rI1, 4)
xor rI2,rD2,rW2
SAVE_DATA(rI2, 8)
xor rI3,rD3,rW3
SAVE_DATA(rI3, 12)
NEXT_BLOCK
bt gt,ppc_encrypt_cbc_loop
START_IV
SAVE_IV(rI0, 0)
SAVE_IV(rI1, 4)
SAVE_IV(rI2, 8)
SAVE_IV(rI3, 12)
FINALIZE_CRYPT(4)
blr
/*
* ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
* u32 rounds, u32 bytes, u8 *iv);
*
* called from glue layer to decrypt multiple blocks via CBC
* round values are AES128 = 4, AES192 = 5, AES256 = 6
*
*/
_GLOBAL(ppc_decrypt_cbc)
INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
li rT1,15
LOAD_IV(rI0, 0)
andc rLN,rLN,rT1
LOAD_IV(rI1, 4)
subi rLN,rLN,16
LOAD_IV(rI2, 8)
add rSP,rSP,rLN /* reverse processing */
LOAD_IV(rI3, 12)
add rDP,rDP,rLN
LOAD_DATA(rD0, 0)
addi rT1,rT0,4096
LOAD_DATA(rD1, 4)
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
START_IV
SAVE_IV(rD0, 0)
SAVE_IV(rD1, 4)
SAVE_IV(rD2, 8)
cmpwi rLN,16
SAVE_IV(rD3, 12)
bt lt,ppc_decrypt_cbc_end
ppc_decrypt_cbc_loop:
mr rKP,rKS
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
subi rLN,rLN,16
subi rSP,rSP,CBC_DEC
xor rW0,rD0,rW0
LOAD_DATA(rD0, 0)
xor rW1,rD1,rW1
LOAD_DATA(rD1, 4)
xor rW2,rD2,rW2
LOAD_DATA(rD2, 8)
xor rW3,rD3,rW3
LOAD_DATA(rD3, 12)
xor rW0,rW0,rD0
SAVE_DATA(rW0, 0)
xor rW1,rW1,rD1
SAVE_DATA(rW1, 4)
xor rW2,rW2,rD2
SAVE_DATA(rW2, 8)
xor rW3,rW3,rD3
SAVE_DATA(rW3, 12)
cmpwi rLN,15
subi rDP,rDP,CBC_DEC
bt gt,ppc_decrypt_cbc_loop
ppc_decrypt_cbc_end:
mr rKP,rKS
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
xor rW0,rW0,rD0
xor rW1,rW1,rD1
xor rW2,rW2,rD2
xor rW3,rW3,rD3
xor rW0,rW0,rI0 /* decrypt with initial IV */
SAVE_DATA(rW0, 0)
xor rW1,rW1,rI1
SAVE_DATA(rW1, 4)
xor rW2,rW2,rI2
SAVE_DATA(rW2, 8)
xor rW3,rW3,rI3
SAVE_DATA(rW3, 12)
FINALIZE_CRYPT(4)
blr
/*
* ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
* u32 rounds, u32 bytes, u8 *iv);
*
* called from glue layer to encrypt/decrypt multiple blocks
* via CTR. Number of bytes does not need to be a multiple of
* 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
*
*/
_GLOBAL(ppc_crypt_ctr)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
LOAD_IV(rI0, 0)
LOAD_IV(rI1, 4)
LOAD_IV(rI2, 8)
cmpwi rLN,16
LOAD_IV(rI3, 12)
START_IV
bt lt,ppc_crypt_ctr_partial
ppc_crypt_ctr_loop:
mr rKP,rKS
START_KEY(rI0, rI1, rI2, rI3)
bl ppc_encrypt_block
xor rW0,rD0,rW0
xor rW1,rD1,rW1
xor rW2,rD2,rW2
xor rW3,rD3,rW3
LOAD_DATA(rD0, 0)
subi rLN,rLN,16
LOAD_DATA(rD1, 4)
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
addic rI3,rI3,1 /* increase counter */
addze rI2,rI2
addze rI1,rI1
addze rI0,rI0
NEXT_BLOCK
cmpwi rLN,15
bt gt,ppc_crypt_ctr_loop
ppc_crypt_ctr_partial:
cmpwi rLN,0
bt eq,ppc_crypt_ctr_end
mr rKP,rKS
START_KEY(rI0, rI1, rI2, rI3)
bl ppc_encrypt_block
xor rW0,rD0,rW0
SAVE_IV(rW0, 0)
xor rW1,rD1,rW1
SAVE_IV(rW1, 4)
xor rW2,rD2,rW2
SAVE_IV(rW2, 8)
xor rW3,rD3,rW3
SAVE_IV(rW3, 12)
mtctr rLN
subi rIP,rIP,CTR_DEC
subi rSP,rSP,1
subi rDP,rDP,1
ppc_crypt_ctr_xorbyte:
lbzu rW4,1(rIP) /* bytewise xor for partial block */
lbzu rW5,1(rSP)
xor rW4,rW4,rW5
stbu rW4,1(rDP)
bdnz ppc_crypt_ctr_xorbyte
subf rIP,rLN,rIP
addi rIP,rIP,1
addic rI3,rI3,1
addze rI2,rI2
addze rI1,rI1
addze rI0,rI0
ppc_crypt_ctr_end:
SAVE_IV(rI0, 0)
SAVE_IV(rI1, 4)
SAVE_IV(rI2, 8)
SAVE_IV(rI3, 12)
FINALIZE_CRYPT(4)
blr
/*
* ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
* u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
*
* called from glue layer to encrypt multiple blocks via XTS
* If key_twk is given, the initial IV encryption will be
* processed too. Round values are AES128 = 4, AES192 = 5,
* AES256 = 6
*
*/
_GLOBAL(ppc_encrypt_xts)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
LOAD_IV(rI0, 0)
LOAD_IV(rI1, 4)
LOAD_IV(rI2, 8)
cmpwi rKT,0
LOAD_IV(rI3, 12)
bt eq,ppc_encrypt_xts_notweak
mr rKP,rKT
START_KEY(rI0, rI1, rI2, rI3)
bl ppc_encrypt_block
xor rI0,rD0,rW0
xor rI1,rD1,rW1
xor rI2,rD2,rW2
xor rI3,rD3,rW3
ppc_encrypt_xts_notweak:
ENDIAN_SWAP(rG0, rG1, rI0, rI1)
ENDIAN_SWAP(rG2, rG3, rI2, rI3)
ppc_encrypt_xts_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
xor rD0,rD0,rI0
xor rD1,rD1,rI1
xor rD2,rD2,rI2
xor rD3,rD3,rI3
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_encrypt_block
xor rD0,rD0,rW0
xor rD1,rD1,rW1
xor rD2,rD2,rW2
xor rD3,rD3,rW3
xor rD0,rD0,rI0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rI1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rI2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rI3
SAVE_DATA(rD3, 12)
GF128_MUL(rG0, rG1, rG2, rG3, rW0)
ENDIAN_SWAP(rI0, rI1, rG0, rG1)
ENDIAN_SWAP(rI2, rI3, rG2, rG3)
cmpwi rLN,0
NEXT_BLOCK
bt gt,ppc_encrypt_xts_loop
START_IV
SAVE_IV(rI0, 0)
SAVE_IV(rI1, 4)
SAVE_IV(rI2, 8)
SAVE_IV(rI3, 12)
FINALIZE_CRYPT(8)
blr
/*
* ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
* u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
*
* called from glue layer to decrypt multiple blocks via XTS
* If key_twk is given, the initial IV encryption will be
* processed too. Round values are AES128 = 4, AES192 = 5,
* AES256 = 6
*
*/
_GLOBAL(ppc_decrypt_xts)
INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
LOAD_IV(rI0, 0)
addi rT1,rT0,4096
LOAD_IV(rI1, 4)
LOAD_IV(rI2, 8)
cmpwi rKT,0
LOAD_IV(rI3, 12)
bt eq,ppc_decrypt_xts_notweak
subi rT0,rT0,4096
mr rKP,rKT
START_KEY(rI0, rI1, rI2, rI3)
bl ppc_encrypt_block
xor rI0,rD0,rW0
xor rI1,rD1,rW1
xor rI2,rD2,rW2
xor rI3,rD3,rW3
addi rT0,rT0,4096
ppc_decrypt_xts_notweak:
ENDIAN_SWAP(rG0, rG1, rI0, rI1)
ENDIAN_SWAP(rG2, rG3, rI2, rI3)
ppc_decrypt_xts_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
xor rD0,rD0,rI0
xor rD1,rD1,rI1
xor rD2,rD2,rI2
xor rD3,rD3,rI3
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
xor rD0,rD0,rW0
xor rD1,rD1,rW1
xor rD2,rD2,rW2
xor rD3,rD3,rW3
xor rD0,rD0,rI0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rI1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rI2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rI3
SAVE_DATA(rD3, 12)
GF128_MUL(rG0, rG1, rG2, rG3, rW0)
ENDIAN_SWAP(rI0, rI1, rG0, rG1)
ENDIAN_SWAP(rI2, rI3, rG2, rG3)
cmpwi rLN,0
NEXT_BLOCK
bt gt,ppc_decrypt_xts_loop
START_IV
SAVE_IV(rI0, 0)
SAVE_IV(rI1, 4)
SAVE_IV(rI2, 8)
SAVE_IV(rI3, 12)
FINALIZE_CRYPT(8)
blr

View File

@@ -0,0 +1,37 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Common registers for PPC AES implementation
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*/
#define rKS r0 /* copy of en-/decryption key pointer */
#define rDP r3 /* destination pointer */
#define rSP r4 /* source pointer */
#define rKP r5 /* pointer to en-/decryption key pointer */
#define rRR r6 /* en-/decryption rounds */
#define rLN r7 /* length of data to be processed */
#define rIP r8 /* potiner to IV (CBC/CTR/XTS modes) */
#define rKT r9 /* pointer to tweak key (XTS mode) */
#define rT0 r11 /* pointers to en-/decryption tables */
#define rT1 r10
#define rD0 r9 /* data */
#define rD1 r14
#define rD2 r12
#define rD3 r15
#define rW0 r16 /* working registers */
#define rW1 r17
#define rW2 r18
#define rW3 r19
#define rW4 r20
#define rW5 r21
#define rW6 r22
#define rW7 r23
#define rI0 r24 /* IV */
#define rI1 r25
#define rI2 r26
#define rI3 r27
#define rG0 r28 /* endian reversed tweak (XTS mode) */
#define rG1 r29
#define rG2 r30
#define rG3 r31

View File

@@ -0,0 +1,326 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* 4K AES tables for PPC AES implementation
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*/
/*
* These big endian AES encryption/decryption tables have been taken from
* crypto/aes_generic.c and are designed to be simply accessed by a combination
* of rlwimi/lwz instructions with a minimum of table registers (usually only
* one required). Thus they are aligned to 4K. The locality of rotated values
* is derived from the reduced offsets that are available in the SPE load
* instructions. E.g. evldw, evlwwsplat, ...
*
* For the safety-conscious it has to be noted that they might be vulnerable
* to cache timing attacks because of their size. Nevertheless in contrast to
* the generic tables they have been reduced from 16KB to 8KB + 256 bytes.
* This is a quite good tradeoff for low power devices (e.g. routers) without
* dedicated encryption hardware where we usually have no multiuser
* environment.
*
*/
#define R(a, b, c, d) \
0x##a##b##c##d, 0x##d##a##b##c, 0x##c##d##a##b, 0x##b##c##d##a
.data
.align 12
.globl PPC_AES_4K_ENCTAB
PPC_AES_4K_ENCTAB:
/* encryption table, same as crypto_ft_tab in crypto/aes-generic.c */
.long R(c6, 63, 63, a5), R(f8, 7c, 7c, 84)
.long R(ee, 77, 77, 99), R(f6, 7b, 7b, 8d)
.long R(ff, f2, f2, 0d), R(d6, 6b, 6b, bd)
.long R(de, 6f, 6f, b1), R(91, c5, c5, 54)
.long R(60, 30, 30, 50), R(02, 01, 01, 03)
.long R(ce, 67, 67, a9), R(56, 2b, 2b, 7d)
.long R(e7, fe, fe, 19), R(b5, d7, d7, 62)
.long R(4d, ab, ab, e6), R(ec, 76, 76, 9a)
.long R(8f, ca, ca, 45), R(1f, 82, 82, 9d)
.long R(89, c9, c9, 40), R(fa, 7d, 7d, 87)
.long R(ef, fa, fa, 15), R(b2, 59, 59, eb)
.long R(8e, 47, 47, c9), R(fb, f0, f0, 0b)
.long R(41, ad, ad, ec), R(b3, d4, d4, 67)
.long R(5f, a2, a2, fd), R(45, af, af, ea)
.long R(23, 9c, 9c, bf), R(53, a4, a4, f7)
.long R(e4, 72, 72, 96), R(9b, c0, c0, 5b)
.long R(75, b7, b7, c2), R(e1, fd, fd, 1c)
.long R(3d, 93, 93, ae), R(4c, 26, 26, 6a)
.long R(6c, 36, 36, 5a), R(7e, 3f, 3f, 41)
.long R(f5, f7, f7, 02), R(83, cc, cc, 4f)
.long R(68, 34, 34, 5c), R(51, a5, a5, f4)
.long R(d1, e5, e5, 34), R(f9, f1, f1, 08)
.long R(e2, 71, 71, 93), R(ab, d8, d8, 73)
.long R(62, 31, 31, 53), R(2a, 15, 15, 3f)
.long R(08, 04, 04, 0c), R(95, c7, c7, 52)
.long R(46, 23, 23, 65), R(9d, c3, c3, 5e)
.long R(30, 18, 18, 28), R(37, 96, 96, a1)
.long R(0a, 05, 05, 0f), R(2f, 9a, 9a, b5)
.long R(0e, 07, 07, 09), R(24, 12, 12, 36)
.long R(1b, 80, 80, 9b), R(df, e2, e2, 3d)
.long R(cd, eb, eb, 26), R(4e, 27, 27, 69)
.long R(7f, b2, b2, cd), R(ea, 75, 75, 9f)
.long R(12, 09, 09, 1b), R(1d, 83, 83, 9e)
.long R(58, 2c, 2c, 74), R(34, 1a, 1a, 2e)
.long R(36, 1b, 1b, 2d), R(dc, 6e, 6e, b2)
.long R(b4, 5a, 5a, ee), R(5b, a0, a0, fb)
.long R(a4, 52, 52, f6), R(76, 3b, 3b, 4d)
.long R(b7, d6, d6, 61), R(7d, b3, b3, ce)
.long R(52, 29, 29, 7b), R(dd, e3, e3, 3e)
.long R(5e, 2f, 2f, 71), R(13, 84, 84, 97)
.long R(a6, 53, 53, f5), R(b9, d1, d1, 68)
.long R(00, 00, 00, 00), R(c1, ed, ed, 2c)
.long R(40, 20, 20, 60), R(e3, fc, fc, 1f)
.long R(79, b1, b1, c8), R(b6, 5b, 5b, ed)
.long R(d4, 6a, 6a, be), R(8d, cb, cb, 46)
.long R(67, be, be, d9), R(72, 39, 39, 4b)
.long R(94, 4a, 4a, de), R(98, 4c, 4c, d4)
.long R(b0, 58, 58, e8), R(85, cf, cf, 4a)
.long R(bb, d0, d0, 6b), R(c5, ef, ef, 2a)
.long R(4f, aa, aa, e5), R(ed, fb, fb, 16)
.long R(86, 43, 43, c5), R(9a, 4d, 4d, d7)
.long R(66, 33, 33, 55), R(11, 85, 85, 94)
.long R(8a, 45, 45, cf), R(e9, f9, f9, 10)
.long R(04, 02, 02, 06), R(fe, 7f, 7f, 81)
.long R(a0, 50, 50, f0), R(78, 3c, 3c, 44)
.long R(25, 9f, 9f, ba), R(4b, a8, a8, e3)
.long R(a2, 51, 51, f3), R(5d, a3, a3, fe)
.long R(80, 40, 40, c0), R(05, 8f, 8f, 8a)
.long R(3f, 92, 92, ad), R(21, 9d, 9d, bc)
.long R(70, 38, 38, 48), R(f1, f5, f5, 04)
.long R(63, bc, bc, df), R(77, b6, b6, c1)
.long R(af, da, da, 75), R(42, 21, 21, 63)
.long R(20, 10, 10, 30), R(e5, ff, ff, 1a)
.long R(fd, f3, f3, 0e), R(bf, d2, d2, 6d)
.long R(81, cd, cd, 4c), R(18, 0c, 0c, 14)
.long R(26, 13, 13, 35), R(c3, ec, ec, 2f)
.long R(be, 5f, 5f, e1), R(35, 97, 97, a2)
.long R(88, 44, 44, cc), R(2e, 17, 17, 39)
.long R(93, c4, c4, 57), R(55, a7, a7, f2)
.long R(fc, 7e, 7e, 82), R(7a, 3d, 3d, 47)
.long R(c8, 64, 64, ac), R(ba, 5d, 5d, e7)
.long R(32, 19, 19, 2b), R(e6, 73, 73, 95)
.long R(c0, 60, 60, a0), R(19, 81, 81, 98)
.long R(9e, 4f, 4f, d1), R(a3, dc, dc, 7f)
.long R(44, 22, 22, 66), R(54, 2a, 2a, 7e)
.long R(3b, 90, 90, ab), R(0b, 88, 88, 83)
.long R(8c, 46, 46, ca), R(c7, ee, ee, 29)
.long R(6b, b8, b8, d3), R(28, 14, 14, 3c)
.long R(a7, de, de, 79), R(bc, 5e, 5e, e2)
.long R(16, 0b, 0b, 1d), R(ad, db, db, 76)
.long R(db, e0, e0, 3b), R(64, 32, 32, 56)
.long R(74, 3a, 3a, 4e), R(14, 0a, 0a, 1e)
.long R(92, 49, 49, db), R(0c, 06, 06, 0a)
.long R(48, 24, 24, 6c), R(b8, 5c, 5c, e4)
.long R(9f, c2, c2, 5d), R(bd, d3, d3, 6e)
.long R(43, ac, ac, ef), R(c4, 62, 62, a6)
.long R(39, 91, 91, a8), R(31, 95, 95, a4)
.long R(d3, e4, e4, 37), R(f2, 79, 79, 8b)
.long R(d5, e7, e7, 32), R(8b, c8, c8, 43)
.long R(6e, 37, 37, 59), R(da, 6d, 6d, b7)
.long R(01, 8d, 8d, 8c), R(b1, d5, d5, 64)
.long R(9c, 4e, 4e, d2), R(49, a9, a9, e0)
.long R(d8, 6c, 6c, b4), R(ac, 56, 56, fa)
.long R(f3, f4, f4, 07), R(cf, ea, ea, 25)
.long R(ca, 65, 65, af), R(f4, 7a, 7a, 8e)
.long R(47, ae, ae, e9), R(10, 08, 08, 18)
.long R(6f, ba, ba, d5), R(f0, 78, 78, 88)
.long R(4a, 25, 25, 6f), R(5c, 2e, 2e, 72)
.long R(38, 1c, 1c, 24), R(57, a6, a6, f1)
.long R(73, b4, b4, c7), R(97, c6, c6, 51)
.long R(cb, e8, e8, 23), R(a1, dd, dd, 7c)
.long R(e8, 74, 74, 9c), R(3e, 1f, 1f, 21)
.long R(96, 4b, 4b, dd), R(61, bd, bd, dc)
.long R(0d, 8b, 8b, 86), R(0f, 8a, 8a, 85)
.long R(e0, 70, 70, 90), R(7c, 3e, 3e, 42)
.long R(71, b5, b5, c4), R(cc, 66, 66, aa)
.long R(90, 48, 48, d8), R(06, 03, 03, 05)
.long R(f7, f6, f6, 01), R(1c, 0e, 0e, 12)
.long R(c2, 61, 61, a3), R(6a, 35, 35, 5f)
.long R(ae, 57, 57, f9), R(69, b9, b9, d0)
.long R(17, 86, 86, 91), R(99, c1, c1, 58)
.long R(3a, 1d, 1d, 27), R(27, 9e, 9e, b9)
.long R(d9, e1, e1, 38), R(eb, f8, f8, 13)
.long R(2b, 98, 98, b3), R(22, 11, 11, 33)
.long R(d2, 69, 69, bb), R(a9, d9, d9, 70)
.long R(07, 8e, 8e, 89), R(33, 94, 94, a7)
.long R(2d, 9b, 9b, b6), R(3c, 1e, 1e, 22)
.long R(15, 87, 87, 92), R(c9, e9, e9, 20)
.long R(87, ce, ce, 49), R(aa, 55, 55, ff)
.long R(50, 28, 28, 78), R(a5, df, df, 7a)
.long R(03, 8c, 8c, 8f), R(59, a1, a1, f8)
.long R(09, 89, 89, 80), R(1a, 0d, 0d, 17)
.long R(65, bf, bf, da), R(d7, e6, e6, 31)
.long R(84, 42, 42, c6), R(d0, 68, 68, b8)
.long R(82, 41, 41, c3), R(29, 99, 99, b0)
.long R(5a, 2d, 2d, 77), R(1e, 0f, 0f, 11)
.long R(7b, b0, b0, cb), R(a8, 54, 54, fc)
.long R(6d, bb, bb, d6), R(2c, 16, 16, 3a)
.globl PPC_AES_4K_DECTAB
PPC_AES_4K_DECTAB:
/* decryption table, same as crypto_it_tab in crypto/aes-generic.c */
.long R(51, f4, a7, 50), R(7e, 41, 65, 53)
.long R(1a, 17, a4, c3), R(3a, 27, 5e, 96)
.long R(3b, ab, 6b, cb), R(1f, 9d, 45, f1)
.long R(ac, fa, 58, ab), R(4b, e3, 03, 93)
.long R(20, 30, fa, 55), R(ad, 76, 6d, f6)
.long R(88, cc, 76, 91), R(f5, 02, 4c, 25)
.long R(4f, e5, d7, fc), R(c5, 2a, cb, d7)
.long R(26, 35, 44, 80), R(b5, 62, a3, 8f)
.long R(de, b1, 5a, 49), R(25, ba, 1b, 67)
.long R(45, ea, 0e, 98), R(5d, fe, c0, e1)
.long R(c3, 2f, 75, 02), R(81, 4c, f0, 12)
.long R(8d, 46, 97, a3), R(6b, d3, f9, c6)
.long R(03, 8f, 5f, e7), R(15, 92, 9c, 95)
.long R(bf, 6d, 7a, eb), R(95, 52, 59, da)
.long R(d4, be, 83, 2d), R(58, 74, 21, d3)
.long R(49, e0, 69, 29), R(8e, c9, c8, 44)
.long R(75, c2, 89, 6a), R(f4, 8e, 79, 78)
.long R(99, 58, 3e, 6b), R(27, b9, 71, dd)
.long R(be, e1, 4f, b6), R(f0, 88, ad, 17)
.long R(c9, 20, ac, 66), R(7d, ce, 3a, b4)
.long R(63, df, 4a, 18), R(e5, 1a, 31, 82)
.long R(97, 51, 33, 60), R(62, 53, 7f, 45)
.long R(b1, 64, 77, e0), R(bb, 6b, ae, 84)
.long R(fe, 81, a0, 1c), R(f9, 08, 2b, 94)
.long R(70, 48, 68, 58), R(8f, 45, fd, 19)
.long R(94, de, 6c, 87), R(52, 7b, f8, b7)
.long R(ab, 73, d3, 23), R(72, 4b, 02, e2)
.long R(e3, 1f, 8f, 57), R(66, 55, ab, 2a)
.long R(b2, eb, 28, 07), R(2f, b5, c2, 03)
.long R(86, c5, 7b, 9a), R(d3, 37, 08, a5)
.long R(30, 28, 87, f2), R(23, bf, a5, b2)
.long R(02, 03, 6a, ba), R(ed, 16, 82, 5c)
.long R(8a, cf, 1c, 2b), R(a7, 79, b4, 92)
.long R(f3, 07, f2, f0), R(4e, 69, e2, a1)
.long R(65, da, f4, cd), R(06, 05, be, d5)
.long R(d1, 34, 62, 1f), R(c4, a6, fe, 8a)
.long R(34, 2e, 53, 9d), R(a2, f3, 55, a0)
.long R(05, 8a, e1, 32), R(a4, f6, eb, 75)
.long R(0b, 83, ec, 39), R(40, 60, ef, aa)
.long R(5e, 71, 9f, 06), R(bd, 6e, 10, 51)
.long R(3e, 21, 8a, f9), R(96, dd, 06, 3d)
.long R(dd, 3e, 05, ae), R(4d, e6, bd, 46)
.long R(91, 54, 8d, b5), R(71, c4, 5d, 05)
.long R(04, 06, d4, 6f), R(60, 50, 15, ff)
.long R(19, 98, fb, 24), R(d6, bd, e9, 97)
.long R(89, 40, 43, cc), R(67, d9, 9e, 77)
.long R(b0, e8, 42, bd), R(07, 89, 8b, 88)
.long R(e7, 19, 5b, 38), R(79, c8, ee, db)
.long R(a1, 7c, 0a, 47), R(7c, 42, 0f, e9)
.long R(f8, 84, 1e, c9), R(00, 00, 00, 00)
.long R(09, 80, 86, 83), R(32, 2b, ed, 48)
.long R(1e, 11, 70, ac), R(6c, 5a, 72, 4e)
.long R(fd, 0e, ff, fb), R(0f, 85, 38, 56)
.long R(3d, ae, d5, 1e), R(36, 2d, 39, 27)
.long R(0a, 0f, d9, 64), R(68, 5c, a6, 21)
.long R(9b, 5b, 54, d1), R(24, 36, 2e, 3a)
.long R(0c, 0a, 67, b1), R(93, 57, e7, 0f)
.long R(b4, ee, 96, d2), R(1b, 9b, 91, 9e)
.long R(80, c0, c5, 4f), R(61, dc, 20, a2)
.long R(5a, 77, 4b, 69), R(1c, 12, 1a, 16)
.long R(e2, 93, ba, 0a), R(c0, a0, 2a, e5)
.long R(3c, 22, e0, 43), R(12, 1b, 17, 1d)
.long R(0e, 09, 0d, 0b), R(f2, 8b, c7, ad)
.long R(2d, b6, a8, b9), R(14, 1e, a9, c8)
.long R(57, f1, 19, 85), R(af, 75, 07, 4c)
.long R(ee, 99, dd, bb), R(a3, 7f, 60, fd)
.long R(f7, 01, 26, 9f), R(5c, 72, f5, bc)
.long R(44, 66, 3b, c5), R(5b, fb, 7e, 34)
.long R(8b, 43, 29, 76), R(cb, 23, c6, dc)
.long R(b6, ed, fc, 68), R(b8, e4, f1, 63)
.long R(d7, 31, dc, ca), R(42, 63, 85, 10)
.long R(13, 97, 22, 40), R(84, c6, 11, 20)
.long R(85, 4a, 24, 7d), R(d2, bb, 3d, f8)
.long R(ae, f9, 32, 11), R(c7, 29, a1, 6d)
.long R(1d, 9e, 2f, 4b), R(dc, b2, 30, f3)
.long R(0d, 86, 52, ec), R(77, c1, e3, d0)
.long R(2b, b3, 16, 6c), R(a9, 70, b9, 99)
.long R(11, 94, 48, fa), R(47, e9, 64, 22)
.long R(a8, fc, 8c, c4), R(a0, f0, 3f, 1a)
.long R(56, 7d, 2c, d8), R(22, 33, 90, ef)
.long R(87, 49, 4e, c7), R(d9, 38, d1, c1)
.long R(8c, ca, a2, fe), R(98, d4, 0b, 36)
.long R(a6, f5, 81, cf), R(a5, 7a, de, 28)
.long R(da, b7, 8e, 26), R(3f, ad, bf, a4)
.long R(2c, 3a, 9d, e4), R(50, 78, 92, 0d)
.long R(6a, 5f, cc, 9b), R(54, 7e, 46, 62)
.long R(f6, 8d, 13, c2), R(90, d8, b8, e8)
.long R(2e, 39, f7, 5e), R(82, c3, af, f5)
.long R(9f, 5d, 80, be), R(69, d0, 93, 7c)
.long R(6f, d5, 2d, a9), R(cf, 25, 12, b3)
.long R(c8, ac, 99, 3b), R(10, 18, 7d, a7)
.long R(e8, 9c, 63, 6e), R(db, 3b, bb, 7b)
.long R(cd, 26, 78, 09), R(6e, 59, 18, f4)
.long R(ec, 9a, b7, 01), R(83, 4f, 9a, a8)
.long R(e6, 95, 6e, 65), R(aa, ff, e6, 7e)
.long R(21, bc, cf, 08), R(ef, 15, e8, e6)
.long R(ba, e7, 9b, d9), R(4a, 6f, 36, ce)
.long R(ea, 9f, 09, d4), R(29, b0, 7c, d6)
.long R(31, a4, b2, af), R(2a, 3f, 23, 31)
.long R(c6, a5, 94, 30), R(35, a2, 66, c0)
.long R(74, 4e, bc, 37), R(fc, 82, ca, a6)
.long R(e0, 90, d0, b0), R(33, a7, d8, 15)
.long R(f1, 04, 98, 4a), R(41, ec, da, f7)
.long R(7f, cd, 50, 0e), R(17, 91, f6, 2f)
.long R(76, 4d, d6, 8d), R(43, ef, b0, 4d)
.long R(cc, aa, 4d, 54), R(e4, 96, 04, df)
.long R(9e, d1, b5, e3), R(4c, 6a, 88, 1b)
.long R(c1, 2c, 1f, b8), R(46, 65, 51, 7f)
.long R(9d, 5e, ea, 04), R(01, 8c, 35, 5d)
.long R(fa, 87, 74, 73), R(fb, 0b, 41, 2e)
.long R(b3, 67, 1d, 5a), R(92, db, d2, 52)
.long R(e9, 10, 56, 33), R(6d, d6, 47, 13)
.long R(9a, d7, 61, 8c), R(37, a1, 0c, 7a)
.long R(59, f8, 14, 8e), R(eb, 13, 3c, 89)
.long R(ce, a9, 27, ee), R(b7, 61, c9, 35)
.long R(e1, 1c, e5, ed), R(7a, 47, b1, 3c)
.long R(9c, d2, df, 59), R(55, f2, 73, 3f)
.long R(18, 14, ce, 79), R(73, c7, 37, bf)
.long R(53, f7, cd, ea), R(5f, fd, aa, 5b)
.long R(df, 3d, 6f, 14), R(78, 44, db, 86)
.long R(ca, af, f3, 81), R(b9, 68, c4, 3e)
.long R(38, 24, 34, 2c), R(c2, a3, 40, 5f)
.long R(16, 1d, c3, 72), R(bc, e2, 25, 0c)
.long R(28, 3c, 49, 8b), R(ff, 0d, 95, 41)
.long R(39, a8, 01, 71), R(08, 0c, b3, de)
.long R(d8, b4, e4, 9c), R(64, 56, c1, 90)
.long R(7b, cb, 84, 61), R(d5, 32, b6, 70)
.long R(48, 6c, 5c, 74), R(d0, b8, 57, 42)
.globl PPC_AES_4K_DECTAB2
PPC_AES_4K_DECTAB2:
/* decryption table, same as crypto_il_tab in crypto/aes-generic.c */
.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d

238
lib/crypto/powerpc/aes.h Normal file
View File

@@ -0,0 +1,238 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
* Copyright (C) 2015 International Business Machines Inc.
* Copyright 2026 Google LLC
*/
#include <asm/simd.h>
#include <asm/switch_to.h>
#include <linux/cpufeature.h>
#include <linux/jump_label.h>
#include <linux/preempt.h>
#include <linux/uaccess.h>
#ifdef CONFIG_SPE
EXPORT_SYMBOL_GPL(ppc_expand_key_128);
EXPORT_SYMBOL_GPL(ppc_expand_key_192);
EXPORT_SYMBOL_GPL(ppc_expand_key_256);
EXPORT_SYMBOL_GPL(ppc_generate_decrypt_key);
EXPORT_SYMBOL_GPL(ppc_encrypt_ecb);
EXPORT_SYMBOL_GPL(ppc_decrypt_ecb);
EXPORT_SYMBOL_GPL(ppc_encrypt_cbc);
EXPORT_SYMBOL_GPL(ppc_decrypt_cbc);
EXPORT_SYMBOL_GPL(ppc_crypt_ctr);
EXPORT_SYMBOL_GPL(ppc_encrypt_xts);
EXPORT_SYMBOL_GPL(ppc_decrypt_xts);
void ppc_encrypt_aes(u8 *out, const u8 *in, const u32 *key_enc, u32 rounds);
void ppc_decrypt_aes(u8 *out, const u8 *in, const u32 *key_dec, u32 rounds);
static void spe_begin(void)
{
/* disable preemption and save users SPE registers if required */
preempt_disable();
enable_kernel_spe();
}
static void spe_end(void)
{
disable_kernel_spe();
/* reenable preemption */
preempt_enable();
}
static void aes_preparekey_arch(union aes_enckey_arch *k,
union aes_invkey_arch *inv_k,
const u8 *in_key, int key_len, int nrounds)
{
if (key_len == AES_KEYSIZE_128)
ppc_expand_key_128(k->spe_enc_key, in_key);
else if (key_len == AES_KEYSIZE_192)
ppc_expand_key_192(k->spe_enc_key, in_key);
else
ppc_expand_key_256(k->spe_enc_key, in_key);
if (inv_k)
ppc_generate_decrypt_key(inv_k->spe_dec_key, k->spe_enc_key,
key_len);
}
static void aes_encrypt_arch(const struct aes_enckey *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
spe_begin();
ppc_encrypt_aes(out, in, key->k.spe_enc_key, key->nrounds / 2 - 1);
spe_end();
}
static void aes_decrypt_arch(const struct aes_key *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
spe_begin();
ppc_decrypt_aes(out, in, key->inv_k.spe_dec_key, key->nrounds / 2 - 1);
spe_end();
}
#else /* CONFIG_SPE */
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_vec_crypto);
EXPORT_SYMBOL_GPL(aes_p8_set_encrypt_key);
EXPORT_SYMBOL_GPL(aes_p8_set_decrypt_key);
EXPORT_SYMBOL_GPL(aes_p8_encrypt);
EXPORT_SYMBOL_GPL(aes_p8_decrypt);
EXPORT_SYMBOL_GPL(aes_p8_cbc_encrypt);
EXPORT_SYMBOL_GPL(aes_p8_ctr32_encrypt_blocks);
EXPORT_SYMBOL_GPL(aes_p8_xts_encrypt);
EXPORT_SYMBOL_GPL(aes_p8_xts_decrypt);
static inline bool is_vsx_format(const struct p8_aes_key *key)
{
return key->nrounds != 0;
}
/*
* Convert a round key from VSX to generic format by reflecting the 16 bytes,
* and (if apply_inv_mix=true) applying InvMixColumn to each column.
*
* It would be nice if the VSX and generic key formats would be compatible. But
* that's very difficult to do, with the assembly code having been borrowed from
* OpenSSL and also targeted to POWER8 rather than POWER9.
*
* Fortunately, this conversion should only be needed in extremely rare cases,
* possibly not at all in practice. It's just included for full correctness.
*/
static void rndkey_from_vsx(u32 out[4], const u32 in[4], bool apply_inv_mix)
{
u32 k0 = swab32(in[0]);
u32 k1 = swab32(in[1]);
u32 k2 = swab32(in[2]);
u32 k3 = swab32(in[3]);
if (apply_inv_mix) {
k0 = inv_mix_columns(k0);
k1 = inv_mix_columns(k1);
k2 = inv_mix_columns(k2);
k3 = inv_mix_columns(k3);
}
out[0] = k3;
out[1] = k2;
out[2] = k1;
out[3] = k0;
}
static void aes_preparekey_arch(union aes_enckey_arch *k,
union aes_invkey_arch *inv_k,
const u8 *in_key, int key_len, int nrounds)
{
const int keybits = 8 * key_len;
int ret;
if (static_branch_likely(&have_vec_crypto) && likely(may_use_simd())) {
preempt_disable();
pagefault_disable();
enable_kernel_vsx();
ret = aes_p8_set_encrypt_key(in_key, keybits, &k->p8);
/*
* aes_p8_set_encrypt_key() should never fail here, since the
* key length was already validated.
*/
WARN_ON_ONCE(ret);
if (inv_k) {
ret = aes_p8_set_decrypt_key(in_key, keybits,
&inv_k->p8);
/* ... and likewise for aes_p8_set_decrypt_key(). */
WARN_ON_ONCE(ret);
}
disable_kernel_vsx();
pagefault_enable();
preempt_enable();
} else {
aes_expandkey_generic(k->rndkeys,
inv_k ? inv_k->inv_rndkeys : NULL,
in_key, key_len);
/* Mark the key as using the generic format. */
k->p8.nrounds = 0;
if (inv_k)
inv_k->p8.nrounds = 0;
}
}
static void aes_encrypt_arch(const struct aes_enckey *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
if (static_branch_likely(&have_vec_crypto) &&
likely(is_vsx_format(&key->k.p8) && may_use_simd())) {
preempt_disable();
pagefault_disable();
enable_kernel_vsx();
aes_p8_encrypt(in, out, &key->k.p8);
disable_kernel_vsx();
pagefault_enable();
preempt_enable();
} else if (unlikely(is_vsx_format(&key->k.p8))) {
/*
* This handles (the hopefully extremely rare) case where a key
* was prepared using the VSX optimized format, then encryption
* is done in a context that cannot use VSX instructions.
*/
u32 rndkeys[AES_MAX_KEYLENGTH_U32];
for (int i = 0; i < 4 * (key->nrounds + 1); i += 4)
rndkey_from_vsx(&rndkeys[i],
&key->k.p8.rndkeys[i], false);
aes_encrypt_generic(rndkeys, key->nrounds, out, in);
} else {
aes_encrypt_generic(key->k.rndkeys, key->nrounds, out, in);
}
}
static void aes_decrypt_arch(const struct aes_key *key, u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
if (static_branch_likely(&have_vec_crypto) &&
likely(is_vsx_format(&key->inv_k.p8) && may_use_simd())) {
preempt_disable();
pagefault_disable();
enable_kernel_vsx();
aes_p8_decrypt(in, out, &key->inv_k.p8);
disable_kernel_vsx();
pagefault_enable();
preempt_enable();
} else if (unlikely(is_vsx_format(&key->inv_k.p8))) {
/*
* This handles (the hopefully extremely rare) case where a key
* was prepared using the VSX optimized format, then decryption
* is done in a context that cannot use VSX instructions.
*/
u32 inv_rndkeys[AES_MAX_KEYLENGTH_U32];
int i;
rndkey_from_vsx(&inv_rndkeys[0],
&key->inv_k.p8.rndkeys[0], false);
for (i = 4; i < 4 * key->nrounds; i += 4) {
rndkey_from_vsx(&inv_rndkeys[i],
&key->inv_k.p8.rndkeys[i], true);
}
rndkey_from_vsx(&inv_rndkeys[i],
&key->inv_k.p8.rndkeys[i], false);
aes_decrypt_generic(inv_rndkeys, key->nrounds, out, in);
} else {
aes_decrypt_generic(key->inv_k.inv_rndkeys, key->nrounds,
out, in);
}
}
#define aes_mod_init_arch aes_mod_init_arch
static void aes_mod_init_arch(void)
{
if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO))
static_branch_enable(&have_vec_crypto);
}
#endif /* !CONFIG_SPE */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,84 @@
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
//
// This file is dual-licensed, meaning that you can use it under your
// choice of either of the following two licenses:
//
// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the Apache License 2.0 (the "License"). You can obtain
// a copy in the file LICENSE in the source distribution or at
// https://www.openssl.org/source/license.html
//
// or
//
// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
// Copyright 2024 Google LLC
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// The generated code of this file depends on the following RISC-V extensions:
// - RV64I
// - RISC-V Vector ('V') with VLEN >= 128
// - RISC-V Vector AES block cipher extension ('Zvkned')
#include <linux/linkage.h>
.text
.option arch, +zvkned
#include "../../arch/riscv/crypto/aes-macros.S"
#define RNDKEYS a0
#define KEY_LEN a1
#define OUTP a2
#define INP a3
.macro __aes_crypt_zvkned enc, keybits
vle32.v v16, (INP)
aes_crypt v16, \enc, \keybits
vse32.v v16, (OUTP)
ret
.endm
.macro aes_crypt_zvkned enc
aes_begin RNDKEYS, 128f, 192f, KEY_LEN
__aes_crypt_zvkned \enc, 256
128:
__aes_crypt_zvkned \enc, 128
192:
__aes_crypt_zvkned \enc, 192
.endm
// void aes_encrypt_zvkned(const u32 rndkeys[], int key_len,
// u8 out[AES_BLOCK_SIZE], const u8 in[AES_BLOCK_SIZE]);
SYM_FUNC_START(aes_encrypt_zvkned)
aes_crypt_zvkned 1
SYM_FUNC_END(aes_encrypt_zvkned)
// void aes_decrypt_zvkned(const u32 rndkeys[], int key_len,
// u8 out[AES_BLOCK_SIZE], const u8 in[AES_BLOCK_SIZE]);
SYM_FUNC_START(aes_decrypt_zvkned)
aes_crypt_zvkned 0
SYM_FUNC_END(aes_decrypt_zvkned)

63
lib/crypto/riscv/aes.h Normal file
View File

@@ -0,0 +1,63 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2023 VRULL GmbH
* Copyright (C) 2023 SiFive, Inc.
* Copyright 2024 Google LLC
*/
#include <asm/simd.h>
#include <asm/vector.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_zvkned);
void aes_encrypt_zvkned(const u32 rndkeys[], int key_len,
u8 out[AES_BLOCK_SIZE], const u8 in[AES_BLOCK_SIZE]);
void aes_decrypt_zvkned(const u32 rndkeys[], int key_len,
u8 out[AES_BLOCK_SIZE], const u8 in[AES_BLOCK_SIZE]);
static void aes_preparekey_arch(union aes_enckey_arch *k,
union aes_invkey_arch *inv_k,
const u8 *in_key, int key_len, int nrounds)
{
aes_expandkey_generic(k->rndkeys, inv_k ? inv_k->inv_rndkeys : NULL,
in_key, key_len);
}
static void aes_encrypt_arch(const struct aes_enckey *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
if (static_branch_likely(&have_zvkned) && likely(may_use_simd())) {
kernel_vector_begin();
aes_encrypt_zvkned(key->k.rndkeys, key->len, out, in);
kernel_vector_end();
} else {
aes_encrypt_generic(key->k.rndkeys, key->nrounds, out, in);
}
}
static void aes_decrypt_arch(const struct aes_key *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
/*
* Note that the Zvkned code uses the standard round keys, while the
* fallback uses the inverse round keys. Thus both must be present.
*/
if (static_branch_likely(&have_zvkned) && likely(may_use_simd())) {
kernel_vector_begin();
aes_decrypt_zvkned(key->k.rndkeys, key->len, out, in);
kernel_vector_end();
} else {
aes_decrypt_generic(key->inv_k.inv_rndkeys, key->nrounds,
out, in);
}
}
#define aes_mod_init_arch aes_mod_init_arch
static void aes_mod_init_arch(void)
{
if (riscv_isa_extension_available(NULL, ZVKNED) &&
riscv_vector_vlen() >= 128)
static_branch_enable(&have_zvkned);
}

106
lib/crypto/s390/aes.h Normal file
View File

@@ -0,0 +1,106 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* AES optimized using the CP Assist for Cryptographic Functions (CPACF)
*
* Copyright 2026 Google LLC
*/
#include <asm/cpacf.h>
#include <linux/cpufeature.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_cpacf_aes128);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_cpacf_aes192);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_cpacf_aes256);
/*
* When the CPU supports CPACF AES for the requested key length, we need only
* save a copy of the raw AES key, as that's what the CPACF instructions need.
*
* When unsupported, fall back to the generic key expansion and en/decryption.
*/
static void aes_preparekey_arch(union aes_enckey_arch *k,
union aes_invkey_arch *inv_k,
const u8 *in_key, int key_len, int nrounds)
{
if (key_len == AES_KEYSIZE_128) {
if (static_branch_likely(&have_cpacf_aes128)) {
memcpy(k->raw_key, in_key, AES_KEYSIZE_128);
return;
}
} else if (key_len == AES_KEYSIZE_192) {
if (static_branch_likely(&have_cpacf_aes192)) {
memcpy(k->raw_key, in_key, AES_KEYSIZE_192);
return;
}
} else {
if (static_branch_likely(&have_cpacf_aes256)) {
memcpy(k->raw_key, in_key, AES_KEYSIZE_256);
return;
}
}
aes_expandkey_generic(k->rndkeys, inv_k ? inv_k->inv_rndkeys : NULL,
in_key, key_len);
}
static inline bool aes_crypt_s390(const struct aes_enckey *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE], int decrypt)
{
if (key->len == AES_KEYSIZE_128) {
if (static_branch_likely(&have_cpacf_aes128)) {
cpacf_km(CPACF_KM_AES_128 | decrypt,
(void *)key->k.raw_key, out, in,
AES_BLOCK_SIZE);
return true;
}
} else if (key->len == AES_KEYSIZE_192) {
if (static_branch_likely(&have_cpacf_aes192)) {
cpacf_km(CPACF_KM_AES_192 | decrypt,
(void *)key->k.raw_key, out, in,
AES_BLOCK_SIZE);
return true;
}
} else {
if (static_branch_likely(&have_cpacf_aes256)) {
cpacf_km(CPACF_KM_AES_256 | decrypt,
(void *)key->k.raw_key, out, in,
AES_BLOCK_SIZE);
return true;
}
}
return false;
}
static void aes_encrypt_arch(const struct aes_enckey *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
if (likely(aes_crypt_s390(key, out, in, 0)))
return;
aes_encrypt_generic(key->k.rndkeys, key->nrounds, out, in);
}
static void aes_decrypt_arch(const struct aes_key *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
if (likely(aes_crypt_s390((const struct aes_enckey *)key, out, in,
CPACF_DECRYPT)))
return;
aes_decrypt_generic(key->inv_k.inv_rndkeys, key->nrounds, out, in);
}
#define aes_mod_init_arch aes_mod_init_arch
static void aes_mod_init_arch(void)
{
if (cpu_have_feature(S390_CPU_FEATURE_MSA)) {
cpacf_mask_t km_functions;
cpacf_query(CPACF_KM, &km_functions);
if (cpacf_test_func(&km_functions, CPACF_KM_AES_128))
static_branch_enable(&have_cpacf_aes128);
if (cpacf_test_func(&km_functions, CPACF_KM_AES_192))
static_branch_enable(&have_cpacf_aes192);
if (cpacf_test_func(&km_functions, CPACF_KM_AES_256))
static_branch_enable(&have_cpacf_aes256);
}
}

149
lib/crypto/sparc/aes.h Normal file
View File

@@ -0,0 +1,149 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* AES accelerated using the sparc64 aes opcodes
*
* Copyright (C) 2008, Intel Corp.
* Copyright (c) 2010, Intel Corporation.
* Copyright 2026 Google LLC
*/
#include <asm/fpumacro.h>
#include <asm/opcodes.h>
#include <asm/pstate.h>
#include <asm/elf.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_aes_opcodes);
EXPORT_SYMBOL_GPL(aes_sparc64_key_expand);
EXPORT_SYMBOL_GPL(aes_sparc64_load_encrypt_keys_128);
EXPORT_SYMBOL_GPL(aes_sparc64_load_encrypt_keys_192);
EXPORT_SYMBOL_GPL(aes_sparc64_load_encrypt_keys_256);
EXPORT_SYMBOL_GPL(aes_sparc64_load_decrypt_keys_128);
EXPORT_SYMBOL_GPL(aes_sparc64_load_decrypt_keys_192);
EXPORT_SYMBOL_GPL(aes_sparc64_load_decrypt_keys_256);
EXPORT_SYMBOL_GPL(aes_sparc64_ecb_encrypt_128);
EXPORT_SYMBOL_GPL(aes_sparc64_ecb_encrypt_192);
EXPORT_SYMBOL_GPL(aes_sparc64_ecb_encrypt_256);
EXPORT_SYMBOL_GPL(aes_sparc64_ecb_decrypt_128);
EXPORT_SYMBOL_GPL(aes_sparc64_ecb_decrypt_192);
EXPORT_SYMBOL_GPL(aes_sparc64_ecb_decrypt_256);
EXPORT_SYMBOL_GPL(aes_sparc64_cbc_encrypt_128);
EXPORT_SYMBOL_GPL(aes_sparc64_cbc_encrypt_192);
EXPORT_SYMBOL_GPL(aes_sparc64_cbc_encrypt_256);
EXPORT_SYMBOL_GPL(aes_sparc64_cbc_decrypt_128);
EXPORT_SYMBOL_GPL(aes_sparc64_cbc_decrypt_192);
EXPORT_SYMBOL_GPL(aes_sparc64_cbc_decrypt_256);
EXPORT_SYMBOL_GPL(aes_sparc64_ctr_crypt_128);
EXPORT_SYMBOL_GPL(aes_sparc64_ctr_crypt_192);
EXPORT_SYMBOL_GPL(aes_sparc64_ctr_crypt_256);
void aes_sparc64_encrypt_128(const u64 *key, const u32 *input, u32 *output);
void aes_sparc64_encrypt_192(const u64 *key, const u32 *input, u32 *output);
void aes_sparc64_encrypt_256(const u64 *key, const u32 *input, u32 *output);
void aes_sparc64_decrypt_128(const u64 *key, const u32 *input, u32 *output);
void aes_sparc64_decrypt_192(const u64 *key, const u32 *input, u32 *output);
void aes_sparc64_decrypt_256(const u64 *key, const u32 *input, u32 *output);
static void aes_preparekey_arch(union aes_enckey_arch *k,
union aes_invkey_arch *inv_k,
const u8 *in_key, int key_len, int nrounds)
{
if (static_branch_likely(&have_aes_opcodes)) {
u32 aligned_key[AES_MAX_KEY_SIZE / 4];
if (IS_ALIGNED((uintptr_t)in_key, 4)) {
aes_sparc64_key_expand((const u32 *)in_key,
k->sparc_rndkeys, key_len);
} else {
memcpy(aligned_key, in_key, key_len);
aes_sparc64_key_expand(aligned_key,
k->sparc_rndkeys, key_len);
memzero_explicit(aligned_key, key_len);
}
/*
* Note that nothing needs to be written to inv_k (if it's
* non-NULL) here, since the SPARC64 assembly code uses
* k->sparc_rndkeys for both encryption and decryption.
*/
} else {
aes_expandkey_generic(k->rndkeys,
inv_k ? inv_k->inv_rndkeys : NULL,
in_key, key_len);
}
}
static void aes_sparc64_encrypt(const struct aes_enckey *key,
const u32 *input, u32 *output)
{
if (key->len == AES_KEYSIZE_128)
aes_sparc64_encrypt_128(key->k.sparc_rndkeys, input, output);
else if (key->len == AES_KEYSIZE_192)
aes_sparc64_encrypt_192(key->k.sparc_rndkeys, input, output);
else
aes_sparc64_encrypt_256(key->k.sparc_rndkeys, input, output);
}
static void aes_encrypt_arch(const struct aes_enckey *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
u32 bounce_buf[AES_BLOCK_SIZE / 4];
if (static_branch_likely(&have_aes_opcodes)) {
if (IS_ALIGNED((uintptr_t)in | (uintptr_t)out, 4)) {
aes_sparc64_encrypt(key, (const u32 *)in, (u32 *)out);
} else {
memcpy(bounce_buf, in, AES_BLOCK_SIZE);
aes_sparc64_encrypt(key, bounce_buf, bounce_buf);
memcpy(out, bounce_buf, AES_BLOCK_SIZE);
}
} else {
aes_encrypt_generic(key->k.rndkeys, key->nrounds, out, in);
}
}
static void aes_sparc64_decrypt(const struct aes_key *key,
const u32 *input, u32 *output)
{
if (key->len == AES_KEYSIZE_128)
aes_sparc64_decrypt_128(key->k.sparc_rndkeys, input, output);
else if (key->len == AES_KEYSIZE_192)
aes_sparc64_decrypt_192(key->k.sparc_rndkeys, input, output);
else
aes_sparc64_decrypt_256(key->k.sparc_rndkeys, input, output);
}
static void aes_decrypt_arch(const struct aes_key *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
u32 bounce_buf[AES_BLOCK_SIZE / 4];
if (static_branch_likely(&have_aes_opcodes)) {
if (IS_ALIGNED((uintptr_t)in | (uintptr_t)out, 4)) {
aes_sparc64_decrypt(key, (const u32 *)in, (u32 *)out);
} else {
memcpy(bounce_buf, in, AES_BLOCK_SIZE);
aes_sparc64_decrypt(key, bounce_buf, bounce_buf);
memcpy(out, bounce_buf, AES_BLOCK_SIZE);
}
} else {
aes_decrypt_generic(key->inv_k.inv_rndkeys, key->nrounds,
out, in);
}
}
#define aes_mod_init_arch aes_mod_init_arch
static void aes_mod_init_arch(void)
{
unsigned long cfr;
if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
return;
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
if (!(cfr & CFR_AES))
return;
static_branch_enable(&have_aes_opcodes);
}

1543
lib/crypto/sparc/aes_asm.S Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -38,6 +38,23 @@ config CRYPTO_LIB_MD5_KUNIT_TEST
KUnit tests for the MD5 cryptographic hash function and its
corresponding HMAC.
config CRYPTO_LIB_MLDSA_KUNIT_TEST
tristate "KUnit tests for ML-DSA" if !KUNIT_ALL_TESTS
depends on KUNIT
default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS
select CRYPTO_LIB_BENCHMARK_VISIBLE
select CRYPTO_LIB_MLDSA
help
KUnit tests for the ML-DSA digital signature algorithm.
config CRYPTO_LIB_NH_KUNIT_TEST
tristate "KUnit tests for NH" if !KUNIT_ALL_TESTS
depends on KUNIT
default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS
select CRYPTO_LIB_NH
help
KUnit tests for the NH almost-universal hash function.
config CRYPTO_LIB_POLY1305_KUNIT_TEST
tristate "KUnit tests for Poly1305" if !KUNIT_ALL_TESTS
depends on KUNIT

View File

@@ -4,6 +4,8 @@ obj-$(CONFIG_CRYPTO_LIB_BLAKE2B_KUNIT_TEST) += blake2b_kunit.o
obj-$(CONFIG_CRYPTO_LIB_BLAKE2S_KUNIT_TEST) += blake2s_kunit.o
obj-$(CONFIG_CRYPTO_LIB_CURVE25519_KUNIT_TEST) += curve25519_kunit.o
obj-$(CONFIG_CRYPTO_LIB_MD5_KUNIT_TEST) += md5_kunit.o
obj-$(CONFIG_CRYPTO_LIB_MLDSA_KUNIT_TEST) += mldsa_kunit.o
obj-$(CONFIG_CRYPTO_LIB_NH_KUNIT_TEST) += nh_kunit.o
obj-$(CONFIG_CRYPTO_LIB_POLY1305_KUNIT_TEST) += poly1305_kunit.o
obj-$(CONFIG_CRYPTO_LIB_POLYVAL_KUNIT_TEST) += polyval_kunit.o
obj-$(CONFIG_CRYPTO_LIB_SHA1_KUNIT_TEST) += sha1_kunit.o

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,438 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* KUnit tests and benchmark for ML-DSA
*
* Copyright 2025 Google LLC
*/
#include <crypto/mldsa.h>
#include <kunit/test.h>
#include <linux/random.h>
#include <linux/unaligned.h>
#define Q 8380417 /* The prime q = 2^23 - 2^13 + 1 */
/* ML-DSA parameters that the tests use */
static const struct {
int sig_len;
int pk_len;
int k;
int lambda;
int gamma1;
int beta;
int omega;
} params[] = {
[MLDSA44] = {
.sig_len = MLDSA44_SIGNATURE_SIZE,
.pk_len = MLDSA44_PUBLIC_KEY_SIZE,
.k = 4,
.lambda = 128,
.gamma1 = 1 << 17,
.beta = 78,
.omega = 80,
},
[MLDSA65] = {
.sig_len = MLDSA65_SIGNATURE_SIZE,
.pk_len = MLDSA65_PUBLIC_KEY_SIZE,
.k = 6,
.lambda = 192,
.gamma1 = 1 << 19,
.beta = 196,
.omega = 55,
},
[MLDSA87] = {
.sig_len = MLDSA87_SIGNATURE_SIZE,
.pk_len = MLDSA87_PUBLIC_KEY_SIZE,
.k = 8,
.lambda = 256,
.gamma1 = 1 << 19,
.beta = 120,
.omega = 75,
},
};
#include "mldsa-testvecs.h"
static void do_mldsa_and_assert_success(struct kunit *test,
const struct mldsa_testvector *tv)
{
int err = mldsa_verify(tv->alg, tv->sig, tv->sig_len, tv->msg,
tv->msg_len, tv->pk, tv->pk_len);
KUNIT_ASSERT_EQ(test, err, 0);
}
static u8 *kunit_kmemdup_or_fail(struct kunit *test, const u8 *src, size_t len)
{
u8 *dst = kunit_kmalloc(test, len, GFP_KERNEL);
KUNIT_ASSERT_NOT_NULL(test, dst);
return memcpy(dst, src, len);
}
/*
* Test that changing coefficients in a valid signature's z vector results in
* the following behavior from mldsa_verify():
*
* * -EBADMSG if a coefficient is changed to have an out-of-range value, i.e.
* absolute value >= gamma1 - beta, corresponding to the verifier detecting
* the out-of-range coefficient and rejecting the signature as malformed
*
* * -EKEYREJECTED if a coefficient is changed to a different in-range value,
* i.e. absolute value < gamma1 - beta, corresponding to the verifier
* continuing to the "real" signature check and that check failing
*/
static void test_mldsa_z_range(struct kunit *test,
const struct mldsa_testvector *tv)
{
u8 *sig = kunit_kmemdup_or_fail(test, tv->sig, tv->sig_len);
const int lambda = params[tv->alg].lambda;
const s32 gamma1 = params[tv->alg].gamma1;
const int beta = params[tv->alg].beta;
/*
* We just modify the first coefficient. The coefficient is gamma1
* minus either the first 18 or 20 bits of the u32, depending on gamma1.
*
* The layout of ML-DSA signatures is ctilde || z || h. ctilde is
* lambda / 4 bytes, so z starts at &sig[lambda / 4].
*/
u8 *z_ptr = &sig[lambda / 4];
const u32 z_data = get_unaligned_le32(z_ptr);
const u32 mask = (gamma1 << 1) - 1;
/* These are the four boundaries of the out-of-range values. */
const s32 out_of_range_coeffs[] = {
-gamma1 + 1,
-(gamma1 - beta),
gamma1,
gamma1 - beta,
};
/*
* These are the two boundaries of the valid range, along with 0. We
* assume that none of these matches the original coefficient.
*/
const s32 in_range_coeffs[] = {
-(gamma1 - beta - 1),
0,
gamma1 - beta - 1,
};
/* Initially the signature is valid. */
do_mldsa_and_assert_success(test, tv);
/* Test some out-of-range coefficients. */
for (int i = 0; i < ARRAY_SIZE(out_of_range_coeffs); i++) {
const s32 c = out_of_range_coeffs[i];
put_unaligned_le32((z_data & ~mask) | (mask & (gamma1 - c)),
z_ptr);
KUNIT_ASSERT_EQ(test, -EBADMSG,
mldsa_verify(tv->alg, sig, tv->sig_len, tv->msg,
tv->msg_len, tv->pk, tv->pk_len));
}
/* Test some in-range coefficients. */
for (int i = 0; i < ARRAY_SIZE(in_range_coeffs); i++) {
const s32 c = in_range_coeffs[i];
put_unaligned_le32((z_data & ~mask) | (mask & (gamma1 - c)),
z_ptr);
KUNIT_ASSERT_EQ(test, -EKEYREJECTED,
mldsa_verify(tv->alg, sig, tv->sig_len, tv->msg,
tv->msg_len, tv->pk, tv->pk_len));
}
}
/* Test that mldsa_verify() rejects malformed hint vectors with -EBADMSG. */
static void test_mldsa_bad_hints(struct kunit *test,
const struct mldsa_testvector *tv)
{
const int omega = params[tv->alg].omega;
const int k = params[tv->alg].k;
u8 *sig = kunit_kmemdup_or_fail(test, tv->sig, tv->sig_len);
/* Pointer to the encoded hint vector in the signature */
u8 *hintvec = &sig[tv->sig_len - omega - k];
u8 h;
/* Initially the signature is valid. */
do_mldsa_and_assert_success(test, tv);
/* Cumulative hint count exceeds omega */
memcpy(sig, tv->sig, tv->sig_len);
hintvec[omega + k - 1] = omega + 1;
KUNIT_ASSERT_EQ(test, -EBADMSG,
mldsa_verify(tv->alg, sig, tv->sig_len, tv->msg,
tv->msg_len, tv->pk, tv->pk_len));
/* Cumulative hint count decreases */
memcpy(sig, tv->sig, tv->sig_len);
KUNIT_ASSERT_GE(test, hintvec[omega + k - 2], 1);
hintvec[omega + k - 1] = hintvec[omega + k - 2] - 1;
KUNIT_ASSERT_EQ(test, -EBADMSG,
mldsa_verify(tv->alg, sig, tv->sig_len, tv->msg,
tv->msg_len, tv->pk, tv->pk_len));
/*
* Hint indices out of order. To test this, swap hintvec[0] and
* hintvec[1]. This assumes that the original valid signature had at
* least two nonzero hints in the first element (asserted below).
*/
memcpy(sig, tv->sig, tv->sig_len);
KUNIT_ASSERT_GE(test, hintvec[omega], 2);
h = hintvec[0];
hintvec[0] = hintvec[1];
hintvec[1] = h;
KUNIT_ASSERT_EQ(test, -EBADMSG,
mldsa_verify(tv->alg, sig, tv->sig_len, tv->msg,
tv->msg_len, tv->pk, tv->pk_len));
/*
* Extra hint indices given. For this test to work, the original valid
* signature must have fewer than omega nonzero hints (asserted below).
*/
memcpy(sig, tv->sig, tv->sig_len);
KUNIT_ASSERT_LT(test, hintvec[omega + k - 1], omega);
hintvec[omega - 1] = 0xff;
KUNIT_ASSERT_EQ(test, -EBADMSG,
mldsa_verify(tv->alg, sig, tv->sig_len, tv->msg,
tv->msg_len, tv->pk, tv->pk_len));
}
static void test_mldsa_mutation(struct kunit *test,
const struct mldsa_testvector *tv)
{
const int sig_len = tv->sig_len;
const int msg_len = tv->msg_len;
const int pk_len = tv->pk_len;
const int num_iter = 200;
u8 *sig = kunit_kmemdup_or_fail(test, tv->sig, sig_len);
u8 *msg = kunit_kmemdup_or_fail(test, tv->msg, msg_len);
u8 *pk = kunit_kmemdup_or_fail(test, tv->pk, pk_len);
/* Initially the signature is valid. */
do_mldsa_and_assert_success(test, tv);
/* Changing any bit in the signature should invalidate the signature */
for (int i = 0; i < num_iter; i++) {
size_t pos = get_random_u32_below(sig_len);
u8 b = 1 << get_random_u32_below(8);
sig[pos] ^= b;
KUNIT_ASSERT_NE(test, 0,
mldsa_verify(tv->alg, sig, sig_len, msg,
msg_len, pk, pk_len));
sig[pos] ^= b;
}
/* Changing any bit in the message should invalidate the signature */
for (int i = 0; i < num_iter; i++) {
size_t pos = get_random_u32_below(msg_len);
u8 b = 1 << get_random_u32_below(8);
msg[pos] ^= b;
KUNIT_ASSERT_NE(test, 0,
mldsa_verify(tv->alg, sig, sig_len, msg,
msg_len, pk, pk_len));
msg[pos] ^= b;
}
/* Changing any bit in the public key should invalidate the signature */
for (int i = 0; i < num_iter; i++) {
size_t pos = get_random_u32_below(pk_len);
u8 b = 1 << get_random_u32_below(8);
pk[pos] ^= b;
KUNIT_ASSERT_NE(test, 0,
mldsa_verify(tv->alg, sig, sig_len, msg,
msg_len, pk, pk_len));
pk[pos] ^= b;
}
/* All changes should have been undone. */
KUNIT_ASSERT_EQ(test, 0,
mldsa_verify(tv->alg, sig, sig_len, msg, msg_len, pk,
pk_len));
}
static void test_mldsa(struct kunit *test, const struct mldsa_testvector *tv)
{
/* Valid signature */
KUNIT_ASSERT_EQ(test, tv->sig_len, params[tv->alg].sig_len);
KUNIT_ASSERT_EQ(test, tv->pk_len, params[tv->alg].pk_len);
do_mldsa_and_assert_success(test, tv);
/* Signature too short */
KUNIT_ASSERT_EQ(test, -EBADMSG,
mldsa_verify(tv->alg, tv->sig, tv->sig_len - 1, tv->msg,
tv->msg_len, tv->pk, tv->pk_len));
/* Signature too long */
KUNIT_ASSERT_EQ(test, -EBADMSG,
mldsa_verify(tv->alg, tv->sig, tv->sig_len + 1, tv->msg,
tv->msg_len, tv->pk, tv->pk_len));
/* Public key too short */
KUNIT_ASSERT_EQ(test, -EBADMSG,
mldsa_verify(tv->alg, tv->sig, tv->sig_len, tv->msg,
tv->msg_len, tv->pk, tv->pk_len - 1));
/* Public key too long */
KUNIT_ASSERT_EQ(test, -EBADMSG,
mldsa_verify(tv->alg, tv->sig, tv->sig_len, tv->msg,
tv->msg_len, tv->pk, tv->pk_len + 1));
/*
* Message too short. Error is EKEYREJECTED because it gets rejected by
* the "real" signature check rather than the well-formedness checks.
*/
KUNIT_ASSERT_EQ(test, -EKEYREJECTED,
mldsa_verify(tv->alg, tv->sig, tv->sig_len, tv->msg,
tv->msg_len - 1, tv->pk, tv->pk_len));
/*
* Can't simply try (tv->msg, tv->msg_len + 1) too, as tv->msg would be
* accessed out of bounds. However, ML-DSA just hashes the message and
* doesn't handle different message lengths differently anyway.
*/
/* Test the validity checks on the z vector. */
test_mldsa_z_range(test, tv);
/* Test the validity checks on the hint vector. */
test_mldsa_bad_hints(test, tv);
/* Test randomly mutating the inputs. */
test_mldsa_mutation(test, tv);
}
static void test_mldsa44(struct kunit *test)
{
test_mldsa(test, &mldsa44_testvector);
}
static void test_mldsa65(struct kunit *test)
{
test_mldsa(test, &mldsa65_testvector);
}
static void test_mldsa87(struct kunit *test)
{
test_mldsa(test, &mldsa87_testvector);
}
static s32 mod(s32 a, s32 m)
{
a %= m;
if (a < 0)
a += m;
return a;
}
static s32 symmetric_mod(s32 a, s32 m)
{
a = mod(a, m);
if (a > m / 2)
a -= m;
return a;
}
/* Mechanical, inefficient translation of FIPS 204 Algorithm 36, Decompose */
static void decompose_ref(s32 r, s32 gamma2, s32 *r0, s32 *r1)
{
s32 rplus = mod(r, Q);
*r0 = symmetric_mod(rplus, 2 * gamma2);
if (rplus - *r0 == Q - 1) {
*r1 = 0;
*r0 = *r0 - 1;
} else {
*r1 = (rplus - *r0) / (2 * gamma2);
}
}
/* Mechanical, inefficient translation of FIPS 204 Algorithm 40, UseHint */
static s32 use_hint_ref(u8 h, s32 r, s32 gamma2)
{
s32 m = (Q - 1) / (2 * gamma2);
s32 r0, r1;
decompose_ref(r, gamma2, &r0, &r1);
if (h == 1 && r0 > 0)
return mod(r1 + 1, m);
if (h == 1 && r0 <= 0)
return mod(r1 - 1, m);
return r1;
}
/*
* Test that for all possible inputs, mldsa_use_hint() gives the same output as
* a mechanical translation of the pseudocode from FIPS 204.
*/
static void test_mldsa_use_hint(struct kunit *test)
{
for (int i = 0; i < 2; i++) {
const s32 gamma2 = (Q - 1) / (i == 0 ? 88 : 32);
for (u8 h = 0; h < 2; h++) {
for (s32 r = 0; r < Q; r++) {
KUNIT_ASSERT_EQ(test,
mldsa_use_hint(h, r, gamma2),
use_hint_ref(h, r, gamma2));
}
}
}
}
static void benchmark_mldsa(struct kunit *test,
const struct mldsa_testvector *tv)
{
const int warmup_niter = 200;
const int benchmark_niter = 200;
u64 t0, t1;
if (!IS_ENABLED(CONFIG_CRYPTO_LIB_BENCHMARK))
kunit_skip(test, "not enabled");
for (int i = 0; i < warmup_niter; i++)
do_mldsa_and_assert_success(test, tv);
t0 = ktime_get_ns();
for (int i = 0; i < benchmark_niter; i++)
do_mldsa_and_assert_success(test, tv);
t1 = ktime_get_ns();
kunit_info(test, "%llu ops/s",
div64_u64((u64)benchmark_niter * NSEC_PER_SEC,
t1 - t0 ?: 1));
}
static void benchmark_mldsa44(struct kunit *test)
{
benchmark_mldsa(test, &mldsa44_testvector);
}
static void benchmark_mldsa65(struct kunit *test)
{
benchmark_mldsa(test, &mldsa65_testvector);
}
static void benchmark_mldsa87(struct kunit *test)
{
benchmark_mldsa(test, &mldsa87_testvector);
}
static struct kunit_case mldsa_kunit_cases[] = {
KUNIT_CASE(test_mldsa44),
KUNIT_CASE(test_mldsa65),
KUNIT_CASE(test_mldsa87),
KUNIT_CASE(test_mldsa_use_hint),
KUNIT_CASE(benchmark_mldsa44),
KUNIT_CASE(benchmark_mldsa65),
KUNIT_CASE(benchmark_mldsa87),
{},
};
static struct kunit_suite mldsa_kunit_suite = {
.name = "mldsa",
.test_cases = mldsa_kunit_cases,
};
kunit_test_suite(mldsa_kunit_suite);
MODULE_DESCRIPTION("KUnit tests and benchmark for ML-DSA");
MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING");
MODULE_LICENSE("GPL");

View File

@@ -0,0 +1,298 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* This file was generated by: ./scripts/crypto/gen-hash-testvecs.py nh */
static const u8 nh_test_key[NH_KEY_BYTES] = {
0x04, 0x59, 0x66, 0x92, 0x81, 0xd7, 0xe9, 0x25,
0x68, 0xfa, 0xb0, 0xca, 0x9f, 0xea, 0x98, 0xca,
0xcd, 0xbf, 0x6d, 0xa5, 0x0c, 0x22, 0xc3, 0x57,
0xdc, 0x35, 0x05, 0xdd, 0x5b, 0xb0, 0xce, 0xf6,
0xb2, 0x4c, 0x77, 0x2e, 0xd2, 0x63, 0xf0, 0x17,
0x60, 0xd8, 0xd3, 0xd9, 0xed, 0x34, 0xb6, 0xed,
0x6a, 0x11, 0xc0, 0x25, 0xda, 0xba, 0x7e, 0xef,
0x49, 0x13, 0xf7, 0xd9, 0xfc, 0xb6, 0xfd, 0x58,
0xe9, 0x5f, 0xc5, 0xc4, 0x69, 0x89, 0xba, 0xa6,
0x2b, 0x58, 0x8d, 0x36, 0x6c, 0xb9, 0x90, 0x1e,
0x64, 0xc7, 0x44, 0x84, 0x03, 0x70, 0x30, 0x47,
0xdd, 0x58, 0xf4, 0x87, 0x61, 0xfd, 0x9c, 0x6b,
0x51, 0x1b, 0x39, 0x1d, 0x6d, 0x50, 0xae, 0x19,
0x71, 0x03, 0xc7, 0xa7, 0x42, 0x82, 0x8f, 0xa5,
0x63, 0x6a, 0xe2, 0x8a, 0xad, 0x4b, 0x40, 0xa7,
0x3f, 0x8b, 0xe4, 0xae, 0xb2, 0x8a, 0x14, 0x78,
0x91, 0x07, 0xba, 0x02, 0x08, 0xc1, 0x34, 0xb8,
0xda, 0x61, 0x67, 0xf6, 0x98, 0x97, 0x1a, 0xcb,
0x0f, 0x82, 0x80, 0xff, 0x02, 0x54, 0x16, 0x57,
0x18, 0x35, 0xaf, 0x16, 0x17, 0x68, 0xcc, 0xc7,
0x52, 0xac, 0x31, 0x39, 0x60, 0xe4, 0xb4, 0xcb,
0x0e, 0xf9, 0x57, 0xe9, 0x96, 0xff, 0x99, 0xd6,
0x10, 0x96, 0x09, 0xab, 0x28, 0x92, 0x1b, 0x9f,
0x10, 0xde, 0x3e, 0x87, 0xb8, 0x9d, 0x2d, 0xa0,
0x3c, 0x91, 0x85, 0x8c, 0x9e, 0xc0, 0x97, 0x9a,
0xb4, 0x54, 0x7f, 0x4a, 0x63, 0xc2, 0x75, 0x0f,
0x0d, 0x2f, 0x62, 0x56, 0x48, 0x0e, 0xb6, 0xc7,
0xcf, 0x0d, 0x78, 0xca, 0xbd, 0x31, 0x9e, 0x4c,
0xf7, 0x3f, 0x9e, 0xc2, 0xea, 0x5e, 0x44, 0x6d,
0x76, 0xf9, 0xc5, 0xe0, 0x29, 0xea, 0x15, 0xbf,
0xaf, 0xd4, 0x75, 0xc8, 0x89, 0xcf, 0x4f, 0x17,
0xfd, 0x4a, 0x45, 0xa5, 0x4d, 0x2d, 0x87, 0x11,
0x2b, 0x3e, 0x64, 0xa2, 0x6b, 0xc5, 0x23, 0x8c,
0xfa, 0x71, 0x13, 0x72, 0x0e, 0x7c, 0xe1, 0x2c,
0x9f, 0x0e, 0x29, 0xc9, 0x15, 0xde, 0x4e, 0xd7,
0x42, 0x1f, 0x8e, 0xe1, 0x91, 0x99, 0x50, 0x38,
0x7f, 0x15, 0xc0, 0xf6, 0x4b, 0xfd, 0x9d, 0x40,
0xe9, 0x44, 0x51, 0xca, 0x3b, 0x83, 0x41, 0x9f,
0x82, 0x64, 0x66, 0x22, 0x12, 0x43, 0x1c, 0x4f,
0x45, 0x11, 0x3a, 0x46, 0xb1, 0x7c, 0x62, 0x0a,
0x9d, 0x4c, 0x99, 0x85, 0xb0, 0x10, 0x19, 0xcf,
0xeb, 0xf9, 0x65, 0xaf, 0xd8, 0x05, 0x9e, 0x61,
0x03, 0x5f, 0x15, 0x99, 0xa9, 0x05, 0x20, 0xc8,
0xaf, 0xab, 0x31, 0x9d, 0xd5, 0xdf, 0x24, 0xce,
0x2b, 0x6d, 0xd7, 0x17, 0xc3, 0x04, 0xff, 0x82,
0xa7, 0x18, 0x39, 0xe9, 0x0d, 0x0a, 0x5f, 0xb9,
0xc9, 0x86, 0x1d, 0xf8, 0x02, 0x2d, 0xc3, 0x88,
0x28, 0x73, 0x5c, 0xac, 0x25, 0xc9, 0xfe, 0xcb,
0xd2, 0xfd, 0x63, 0x74, 0xac, 0xe1, 0xb8, 0xa2,
0xc6, 0x2b, 0xb5, 0x40, 0x01, 0x9b, 0xed, 0xee,
0x7b, 0x63, 0x66, 0x05, 0x45, 0xc2, 0x6c, 0xd8,
0x58, 0xf1, 0xa1, 0x3d, 0xc8, 0x43, 0x59, 0x4b,
0x39, 0x87, 0x24, 0x64, 0x92, 0xb0, 0xab, 0x75,
0xf1, 0xb7, 0xbf, 0x7c, 0xde, 0xc0, 0xaf, 0x4a,
0xc2, 0x7b, 0xd9, 0x8a, 0x99, 0xcd, 0x83, 0x01,
0xe6, 0xae, 0xeb, 0x16, 0xe7, 0x54, 0x9c, 0x95,
0x0a, 0x91, 0x02, 0xaf, 0x9f, 0x79, 0x40, 0x45,
0xce, 0x47, 0x41, 0x65, 0xca, 0x80, 0x0d, 0x14,
0x46, 0x58, 0x5d, 0x4d, 0x28, 0x55, 0x70, 0x49,
0x7c, 0x32, 0x1f, 0x01, 0xaa, 0x05, 0x2f, 0xf1,
0xeb, 0xa3, 0xe6, 0x1d, 0xf9, 0x43, 0xe0, 0x58,
0x05, 0x61, 0x22, 0xc3, 0xee, 0xe4, 0x6f, 0x94,
0xaf, 0x82, 0xda, 0x18, 0x18, 0x63, 0x9c, 0xfa,
0xc0, 0x04, 0x27, 0xc5, 0x39, 0x5e, 0x7a, 0xa6,
0x85, 0x46, 0xb7, 0x76, 0xc9, 0x16, 0xf2, 0xf8,
0x40, 0x8d, 0x4b, 0x5e, 0x72, 0xf3, 0x3e, 0x12,
0xa4, 0x80, 0x39, 0xb2, 0x92, 0xfe, 0x6e, 0x5b,
0x5b, 0xad, 0xea, 0x29, 0xbc, 0x66, 0xe6, 0xfe,
0x80, 0x02, 0x5d, 0x83, 0x37, 0xfc, 0xde, 0x6c,
0x25, 0x54, 0xa2, 0xff, 0x7d, 0xb6, 0xe1, 0xd6,
0xcf, 0xdb, 0x60, 0xe3, 0xbe, 0x2f, 0x4e, 0xb4,
0xf5, 0xb4, 0x51, 0xf7, 0x5a, 0x25, 0xda, 0x40,
0x84, 0x5e, 0xc0, 0x0a, 0x6b, 0xfa, 0x0c, 0xfb,
0x5e, 0x3e, 0x12, 0x6c, 0x39, 0x35, 0xc0, 0x28,
0xd6, 0x1b, 0x3a, 0x72, 0xc3, 0xfe, 0xa5, 0x4c,
0x35, 0xa2, 0x42, 0xf6, 0x3d, 0xa5, 0xbf, 0xb5,
0x39, 0xe3, 0xc9, 0xd5, 0x8c, 0x1b, 0xe5, 0xef,
0x91, 0xd2, 0x80, 0x6f, 0xcc, 0x77, 0x44, 0x50,
0x62, 0xc7, 0xac, 0x29, 0xcb, 0x72, 0xda, 0x6d,
0xc5, 0xfe, 0xa7, 0xee, 0x8b, 0xeb, 0xfc, 0xa3,
0x46, 0x18, 0x5f, 0xaa, 0xc3, 0x65, 0xd0, 0x8f,
0x67, 0x98, 0xd6, 0xce, 0x5f, 0x84, 0xd4, 0x96,
0x1b, 0x67, 0xa0, 0xcf, 0xfc, 0x94, 0x55, 0x5e,
0x4b, 0x51, 0x68, 0xa7, 0x6d, 0x02, 0xf9, 0x53,
0x54, 0x86, 0x6b, 0x53, 0x39, 0xe0, 0x36, 0x23,
0x87, 0x1a, 0xfb, 0x53, 0x1a, 0x65, 0xd8, 0x42,
0xa8, 0x85, 0xfd, 0x2c, 0x7f, 0x6b, 0x7f, 0x67,
0x70, 0x23, 0x6c, 0xe9, 0x0b, 0xf0, 0x1e, 0x0d,
0x0b, 0xb4, 0xd4, 0x96, 0x14, 0x95, 0x7e, 0xf3,
0x9b, 0xdd, 0xd7, 0xc4, 0x24, 0x22, 0xb9, 0x9d,
0xb3, 0xa6, 0xac, 0x09, 0x7c, 0x00, 0xbf, 0xd0,
0xdc, 0xfb, 0x9b, 0x7c, 0x8c, 0xbd, 0xd4, 0x1a,
0x13, 0x2b, 0x82, 0x3d, 0x7c, 0x8c, 0x10, 0x47,
0x49, 0x6c, 0x53, 0xeb, 0xa7, 0xc2, 0xde, 0xed,
0xe2, 0x55, 0x93, 0x2c, 0x1a, 0x5a, 0x7d, 0xe1,
0x37, 0x62, 0xdd, 0x29, 0x1a, 0x72, 0x82, 0xc0,
0x14, 0x73, 0x5d, 0x0e, 0x9b, 0xcc, 0x54, 0x68,
0x3a, 0x4d, 0x56, 0x8f, 0xc9, 0x4e, 0xaf, 0x7b,
0xde, 0x17, 0x9c, 0x5e, 0x83, 0x82, 0x22, 0xe3,
0x28, 0xdf, 0x1b, 0xb6, 0xdb, 0x17, 0x90, 0x48,
0xb5, 0x13, 0x4e, 0xd3, 0x97, 0x5e, 0xb3, 0x9c,
0x16, 0x08, 0xc8, 0x77, 0xb3, 0xcd, 0x94, 0x90,
0x4f, 0x77, 0xaf, 0x67, 0xdd, 0x80, 0x15, 0x1c,
0x59, 0xfb, 0x3c, 0xec, 0xf8, 0xb3, 0x67, 0xfb,
0xa0, 0x94, 0x3c, 0x53, 0x99, 0x49, 0x94, 0x2c,
0x85, 0x26, 0x92, 0x6d, 0x8d, 0x48, 0xf6, 0x72,
0xdd, 0xfb, 0xb2, 0x10, 0x51, 0x5b, 0xbe, 0xd5,
0x70, 0x3d, 0x28, 0x94, 0x98, 0x4f, 0x6e, 0x20,
0x7b, 0x7d, 0x0f, 0x56, 0xc9, 0x96, 0x5f, 0x60,
0x2e, 0x2f, 0x9b, 0x38, 0x7f, 0xc7, 0x3c, 0x6b,
0x2f, 0x2b, 0x8f, 0x1f, 0x07, 0x1c, 0x85, 0x57,
0x16, 0x2e, 0xc7, 0x74, 0xe5, 0xf2, 0x0d, 0xfe,
0xef, 0x57, 0xb0, 0xa4, 0x4f, 0x4c, 0x7d, 0x81,
0xbb, 0xaa, 0xcb, 0xa0, 0xb0, 0x51, 0xcf, 0xc2,
0xee, 0x90, 0x2e, 0x5e, 0x27, 0xca, 0xd3, 0xe8,
0xf3, 0x55, 0x02, 0x56, 0x06, 0xa5, 0xad, 0xdf,
0xa3, 0xa9, 0x06, 0x05, 0x53, 0x74, 0x55, 0xd5,
0xd2, 0x20, 0x0a, 0x6d, 0x4a, 0xef, 0x16, 0xbf,
0xc3, 0xb2, 0x75, 0x93, 0xd8, 0x6e, 0x0f, 0xd2,
0xae, 0x3b, 0xc0, 0x00, 0x22, 0x6f, 0xb5, 0x0a,
0x41, 0xfc, 0xf9, 0x41, 0xfc, 0x16, 0x4f, 0xa6,
0x1c, 0x18, 0x41, 0x67, 0x73, 0xa8, 0x79, 0xa9,
0x54, 0x18, 0x4e, 0x88, 0x44, 0x0f, 0xa1, 0x5b,
0xf0, 0x68, 0xea, 0x3c, 0x62, 0x59, 0x8d, 0xc7,
0x6f, 0xd7, 0x72, 0x20, 0x74, 0x39, 0xd4, 0x3a,
0x41, 0x1b, 0x58, 0x57, 0x54, 0x85, 0x60, 0xca,
0x49, 0x4b, 0xa1, 0x04, 0x91, 0xb6, 0xf2, 0xcd,
0x62, 0x63, 0x67, 0xd1, 0xee, 0x6b, 0x9e, 0x5d,
0xd6, 0xc4, 0x58, 0x6b, 0xe1, 0xe6, 0x4a, 0xdb,
0xe8, 0xb1, 0x35, 0x03, 0x15, 0x8d, 0x34, 0x69,
0x4c, 0xd2, 0x54, 0xce, 0xe8, 0x6a, 0x69, 0x6f,
0xaa, 0xb5, 0x1f, 0x86, 0xed, 0xac, 0x4f, 0x16,
0x1e, 0x48, 0x93, 0xe8, 0x6c, 0x24, 0x1c, 0xd0,
0xbb, 0x61, 0xc2, 0x34, 0xdd, 0xc9, 0x5c, 0xce,
};
static const u8 nh_test_msg[NH_MESSAGE_BYTES] = {
0x99, 0x57, 0x61, 0x41, 0xad, 0x08, 0x7e, 0x17,
0xd4, 0xef, 0x0b, 0x23, 0xff, 0x0b, 0x96, 0x0a,
0x6c, 0x98, 0xac, 0x78, 0x5e, 0xb6, 0xb2, 0x67,
0x0f, 0x48, 0xf4, 0xa1, 0xe5, 0x1e, 0xfe, 0x83,
0xe4, 0x56, 0x2a, 0x03, 0x64, 0xff, 0x7a, 0xf3,
0x03, 0xfe, 0xa7, 0x86, 0xdc, 0x35, 0x79, 0x13,
0xf8, 0xe1, 0x59, 0x19, 0x04, 0x43, 0x24, 0x82,
0x44, 0x82, 0x41, 0x2b, 0xc7, 0xcf, 0xf5, 0xa4,
0xdc, 0xca, 0xf5, 0x34, 0xc4, 0x23, 0x3c, 0x1f,
0xa8, 0x84, 0x1f, 0x2a, 0xcd, 0xae, 0x9d, 0x5e,
0x05, 0xe2, 0xfb, 0x0c, 0x68, 0x81, 0x90, 0x11,
0x44, 0xf6, 0xdd, 0x5b, 0x51, 0xd3, 0xe0, 0xab,
0x29, 0x3a, 0xa9, 0x9c, 0xf6, 0x7e, 0x2d, 0xe3,
0x6c, 0x09, 0x59, 0xd7, 0xfa, 0x7f, 0x6a, 0x33,
0x3b, 0x23, 0x7b, 0x1b, 0xb2, 0x79, 0x5f, 0x5c,
0xb6, 0x2d, 0xb0, 0xf8, 0xab, 0x33, 0x28, 0xe0,
0x72, 0x2e, 0x2f, 0x03, 0x22, 0x16, 0xb4, 0x87,
0xf7, 0x14, 0x3f, 0x55, 0x8a, 0xb0, 0x47, 0xdb,
0x42, 0x2d, 0xc0, 0x0c, 0x0a, 0x33, 0xf8, 0xab,
0x44, 0xae, 0xa3, 0xc9, 0xfc, 0xf6, 0x34, 0x8c,
0x60, 0x30, 0x6d, 0x31, 0x70, 0xf3, 0x39, 0x53,
0xf1, 0x2d, 0xb9, 0x6c, 0xa6, 0x48, 0x9c, 0x9c,
0xc2, 0x88, 0xb3, 0xa9, 0x98, 0xb6, 0xc3, 0x47,
0x94, 0x02, 0x9d, 0x98, 0x6e, 0x25, 0x6c, 0xf5,
0x9b, 0xc6, 0x4d, 0xee, 0x07, 0x1e, 0x25, 0x8f,
0x01, 0xde, 0xad, 0xe5, 0x77, 0x4f, 0xd1, 0xc0,
0x62, 0xbb, 0x3a, 0xb9, 0x83, 0x0b, 0x29, 0x76,
0x4f, 0xb1, 0x86, 0x2c, 0x27, 0xc7, 0x38, 0x65,
0xcb, 0x78, 0xb7, 0x02, 0x10, 0x9e, 0xde, 0x83,
0xd1, 0xac, 0x05, 0x86, 0x23, 0xce, 0x4f, 0x8d,
0xcc, 0x4e, 0x3f, 0x04, 0xf4, 0x39, 0x91, 0x81,
0x1c, 0x42, 0x47, 0x4d, 0x50, 0xe5, 0x01, 0x22,
0x98, 0xcf, 0x91, 0x36, 0xb3, 0x7c, 0xcf, 0x78,
0x07, 0x22, 0xa9, 0x18, 0xd2, 0xcd, 0x7d, 0x4d,
0xa6, 0xcb, 0xaa, 0x52, 0x13, 0x49, 0x64, 0xb0,
0xa5, 0x3d, 0xc7, 0xc3, 0x10, 0x87, 0x2e, 0x76,
0xa9, 0x52, 0xc5, 0x50, 0x18, 0xc0, 0x5d, 0xb4,
0x4c, 0xc6, 0x7f, 0x64, 0xae, 0x53, 0xc3, 0x46,
0x99, 0xb7, 0x61, 0x6b, 0x08, 0x43, 0x08, 0x4c,
0x90, 0x2c, 0xee, 0x56, 0x91, 0xb4, 0x28, 0xa8,
0xa8, 0x8b, 0x3b, 0x1a, 0x67, 0x71, 0xf2, 0x81,
0x48, 0x20, 0x71, 0x30, 0xdd, 0x69, 0x8a, 0xc2,
0x4c, 0x9d, 0x4e, 0x17, 0xfb, 0x2e, 0xe7, 0x9b,
0x86, 0x94, 0xa5, 0xce, 0xf9, 0x74, 0x56, 0xff,
0x3b, 0xff, 0xd9, 0x5a, 0xc8, 0x98, 0xf5, 0x25,
0xa2, 0xb9, 0x66, 0x46, 0x89, 0x17, 0x39, 0x08,
0x69, 0x03, 0x59, 0x1e, 0x13, 0x12, 0x68, 0xe7,
0x2f, 0x00, 0xd3, 0xf3, 0x71, 0xd1, 0x20, 0xc5,
0x0b, 0x38, 0x89, 0xda, 0x62, 0x3c, 0xce, 0xea,
0x04, 0x19, 0x47, 0x6d, 0xd8, 0x64, 0x38, 0x60,
0x96, 0x71, 0x68, 0x48, 0x79, 0xf8, 0xf4, 0x76,
0x33, 0xf6, 0x60, 0x8d, 0x21, 0xd0, 0xee, 0x41,
0xc0, 0xbe, 0x33, 0x61, 0x5e, 0x66, 0xe6, 0x16,
0x14, 0xc7, 0xfb, 0x6c, 0xf3, 0x58, 0xef, 0x12,
0x7c, 0x70, 0x65, 0x5d, 0x55, 0xe8, 0xf2, 0x92,
0x3a, 0xfe, 0x34, 0x64, 0x31, 0x7c, 0x29, 0xbb,
0x01, 0x18, 0xbd, 0xb6, 0xe4, 0x1e, 0xa4, 0xf3,
0x7b, 0x4c, 0x6a, 0x0d, 0x01, 0xfc, 0xc7, 0x66,
0xc3, 0x88, 0x37, 0x25, 0xcf, 0xe9, 0xca, 0x82,
0xeb, 0xa1, 0x38, 0x40, 0xc9, 0xdb, 0x38, 0x7b,
0x78, 0xcf, 0x11, 0xa3, 0x1c, 0x6b, 0x70, 0xc8,
0xe1, 0x2f, 0x7c, 0x17, 0x2c, 0x58, 0x28, 0xa4,
0x13, 0x40, 0xc7, 0x69, 0x0f, 0x04, 0xe5, 0x8e,
0xf0, 0x67, 0x53, 0xea, 0x10, 0xf5, 0x83, 0xc9,
0xcb, 0x6b, 0x16, 0xef, 0x2e, 0x55, 0xb3, 0xdd,
0xed, 0xf9, 0x1a, 0x52, 0x9a, 0x73, 0x78, 0x14,
0x14, 0x21, 0xfc, 0xef, 0x3c, 0x40, 0xa9, 0xfe,
0xef, 0xd7, 0x6e, 0x28, 0x2f, 0xd3, 0x73, 0xed,
0xa3, 0x73, 0xb5, 0x62, 0x41, 0xe6, 0xd4, 0x79,
0x49, 0x31, 0x2b, 0x86, 0x74, 0x56, 0x21, 0xfe,
0x6d, 0xb2, 0xbe, 0x81, 0x80, 0xa6, 0x81, 0x19,
0x90, 0x79, 0x6f, 0xc4, 0x4e, 0x7d, 0x6f, 0x2f,
0xa8, 0x6f, 0xd5, 0xc4, 0x7e, 0x23, 0x3b, 0xe6,
0x9b, 0x60, 0x97, 0x7b, 0xe2, 0x08, 0x8a, 0xaa,
0xc7, 0x7c, 0xf6, 0xe5, 0x01, 0x3e, 0xd2, 0x29,
0x7d, 0xd7, 0x40, 0x84, 0x95, 0xfa, 0xdf, 0xd8,
0x81, 0xe9, 0x5e, 0xdd, 0x0d, 0x17, 0x51, 0x6b,
0x8c, 0x0e, 0x47, 0xf9, 0x0c, 0x92, 0x1b, 0x60,
0xca, 0x06, 0x8a, 0xe5, 0xe8, 0x0f, 0x06, 0x75,
0x5d, 0x76, 0xc9, 0x32, 0x2c, 0x52, 0x2c, 0x2e,
0xd8, 0x66, 0x38, 0x75, 0x16, 0xc7, 0x7d, 0x51,
0xc4, 0xc2, 0x22, 0xc8, 0x19, 0xfc, 0x3d, 0x69,
0x1e, 0xd9, 0x64, 0x47, 0x5d, 0x21, 0x84, 0x46,
0xd7, 0xe1, 0xf0, 0x95, 0x3a, 0x8f, 0xbd, 0x7a,
0x53, 0x71, 0x4c, 0x54, 0xc1, 0x3e, 0x27, 0xde,
0xeb, 0x04, 0x11, 0xb0, 0x33, 0x4d, 0x57, 0x0b,
0x6b, 0x7d, 0x6c, 0xd5, 0x87, 0x7e, 0xb4, 0xe2,
0x94, 0x9e, 0x9f, 0x74, 0xe8, 0xb7, 0xfa, 0x05,
0x9b, 0x8f, 0x81, 0x43, 0x35, 0x82, 0xb8, 0x5b,
0xa8, 0x5e, 0xfa, 0x7a, 0x80, 0x8d, 0xd2, 0x90,
0x58, 0x79, 0x89, 0x56, 0x90, 0x2b, 0xff, 0x92,
0x3c, 0x35, 0xbe, 0x99, 0x5f, 0xd2, 0x4b, 0x15,
0x58, 0x4b, 0xbf, 0x08, 0x9b, 0x9b, 0x97, 0x10,
0xa4, 0x55, 0xc7, 0xec, 0x29, 0xc5, 0x14, 0x3e,
0x8f, 0x56, 0xa3, 0x92, 0x9e, 0x33, 0xcc, 0x9e,
0x77, 0x2f, 0x33, 0xcb, 0xc4, 0xe9, 0x19, 0xf4,
0x32, 0x2b, 0xef, 0x6c, 0x1c, 0x92, 0x2c, 0x45,
0x88, 0x74, 0x5f, 0xcf, 0x56, 0xfd, 0x87, 0x5f,
0xb6, 0x9b, 0xa2, 0x51, 0xda, 0x9b, 0x83, 0x4f,
0xec, 0x14, 0xe8, 0xd2, 0x42, 0x03, 0xcb, 0xe8,
0xd0, 0xb7, 0xf8, 0x38, 0xde, 0x6f, 0xdf, 0x43,
0xfa, 0x41, 0xab, 0xec, 0x2e, 0x3c, 0x93, 0x39,
0x76, 0xd1, 0x6f, 0x5b, 0x6c, 0x6e, 0x8d, 0xeb,
0x45, 0x6b, 0xc5, 0x76, 0x00, 0x29, 0xca, 0x3b,
0xdb, 0x78, 0xc2, 0x32, 0x09, 0x39, 0x19, 0x50,
0xa2, 0x44, 0x92, 0x09, 0xdb, 0x8b, 0x9e, 0x16,
0x76, 0x7f, 0xf1, 0x78, 0x7b, 0xb2, 0x51, 0xbc,
0x28, 0xbd, 0xb0, 0x7f, 0x25, 0x63, 0x7d, 0x34,
0xfb, 0xf6, 0x36, 0x24, 0xc7, 0xf9, 0x41, 0xb6,
0x2a, 0x06, 0xfc, 0xf0, 0x83, 0xf2, 0x12, 0x3d,
0x60, 0x2e, 0x10, 0x70, 0x31, 0x6f, 0x37, 0x08,
0x3e, 0x91, 0x93, 0xb5, 0xda, 0xb8, 0x4c, 0x1b,
0xd8, 0xb8, 0x3b, 0xd5, 0x3e, 0xb6, 0xc0, 0xbb,
0x38, 0x0f, 0xd2, 0x68, 0x4f, 0x78, 0x56, 0xf6,
0xda, 0x65, 0xb4, 0x0b, 0xb4, 0xaf, 0xa8, 0x19,
0x2f, 0x70, 0x55, 0xe0, 0x47, 0x31, 0x9f, 0x37,
0x1a, 0x47, 0xb9, 0x0c, 0x97, 0x79, 0xfc, 0xa9,
0x76, 0xe6, 0xfa, 0x38, 0x67, 0x25, 0xd3, 0x89,
0x8d, 0xad, 0xc6, 0x11, 0x2d, 0x77, 0x0b, 0x35,
0xa2, 0xe2, 0xdf, 0xc8, 0x94, 0xd5, 0xdf, 0xd2,
0x69, 0x2a, 0x99, 0x93, 0xfa, 0x4a, 0x5f, 0xc7,
0x8a, 0x14, 0x5f, 0x2a, 0xf3, 0x02, 0xf0, 0x3e,
0x21, 0x8e, 0x2e, 0x4b, 0xc4, 0xd2, 0xc8, 0xa6,
0x41, 0x6e, 0x17, 0x36, 0xe9, 0xad, 0x73, 0x33,
0x6c, 0xea, 0xc2, 0x31, 0x8f, 0x30, 0x51, 0x5c,
0x1c, 0x20, 0xe6, 0x05, 0x1a, 0x17, 0x15, 0x5d,
0x3e, 0x8f, 0xd2, 0x7f, 0xa1, 0xc5, 0x47, 0xb3,
0xb2, 0x9c, 0xe8, 0xf0, 0x6d, 0xc1, 0xc3, 0xa2,
};
static const u8 nh_test_val16[NH_HASH_BYTES] = {
0x30, 0x77, 0x55, 0x7c, 0x45, 0xd8, 0xce, 0xf7,
0x2a, 0xb5, 0x14, 0x8c, 0x35, 0x7e, 0xaa, 0x00,
0x50, 0xbc, 0x50, 0x7c, 0xd3, 0x20, 0x7c, 0x9c,
0xb4, 0xf1, 0x91, 0x26, 0x81, 0x03, 0xa5, 0x68,
};
static const u8 nh_test_val96[NH_HASH_BYTES] = {
0xd2, 0x19, 0xca, 0xa5, 0x6c, 0x0c, 0xdf, 0x2f,
0x69, 0xfa, 0x75, 0xc1, 0x63, 0xdb, 0xfa, 0x4d,
0x45, 0x2b, 0xb8, 0xdb, 0xac, 0xee, 0x61, 0xc6,
0x7a, 0x83, 0xb6, 0x0f, 0x32, 0x82, 0xe4, 0xd0,
};
static const u8 nh_test_val256[NH_HASH_BYTES] = {
0x33, 0x8f, 0xb4, 0x96, 0xf1, 0xb6, 0xf1, 0xb5,
0x05, 0x19, 0xbb, 0x6b, 0xda, 0xd9, 0x95, 0x75,
0x96, 0x3f, 0x8b, 0x42, 0xb6, 0xcd, 0xb7, 0xb7,
0xe7, 0x97, 0xb5, 0xa9, 0x0b, 0xd7, 0xdd, 0x33,
};
static const u8 nh_test_val1024[NH_HASH_BYTES] = {
0x32, 0x3d, 0x51, 0xe1, 0x77, 0xb6, 0xac, 0x06,
0x84, 0x67, 0xb7, 0xf2, 0x24, 0xe7, 0xec, 0xfd,
0x96, 0x64, 0xff, 0x55, 0xc7, 0x1b, 0xf9, 0xdc,
0xa3, 0xc7, 0x32, 0x06, 0x79, 0xcf, 0xca, 0xb6,
};

View File

@@ -0,0 +1,43 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright 2025 Google LLC
*/
#include <crypto/nh.h>
#include <kunit/test.h>
#include "nh-testvecs.h"
static void test_nh(struct kunit *test)
{
u32 *key = kunit_kmalloc(test, NH_KEY_BYTES, GFP_KERNEL);
__le64 hash[NH_NUM_PASSES];
KUNIT_ASSERT_NOT_NULL(test, key);
memcpy(key, nh_test_key, NH_KEY_BYTES);
le32_to_cpu_array(key, NH_KEY_WORDS);
nh(key, nh_test_msg, 16, hash);
KUNIT_ASSERT_MEMEQ(test, hash, nh_test_val16, NH_HASH_BYTES);
nh(key, nh_test_msg, 96, hash);
KUNIT_ASSERT_MEMEQ(test, hash, nh_test_val96, NH_HASH_BYTES);
nh(key, nh_test_msg, 256, hash);
KUNIT_ASSERT_MEMEQ(test, hash, nh_test_val256, NH_HASH_BYTES);
nh(key, nh_test_msg, 1024, hash);
KUNIT_ASSERT_MEMEQ(test, hash, nh_test_val1024, NH_HASH_BYTES);
}
static struct kunit_case nh_test_cases[] = {
KUNIT_CASE(test_nh),
{},
};
static struct kunit_suite nh_test_suite = {
.name = "nh",
.test_cases = nh_test_cases,
};
kunit_test_suite(nh_test_suite);
MODULE_DESCRIPTION("KUnit tests for NH");
MODULE_LICENSE("GPL");

261
lib/crypto/x86/aes-aesni.S Normal file
View File

@@ -0,0 +1,261 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
//
// AES block cipher using AES-NI instructions
//
// Copyright 2026 Google LLC
//
// The code in this file supports 32-bit and 64-bit CPUs, and it doesn't require
// AVX. It does use up to SSE4.1, which all CPUs with AES-NI have.
#include <linux/linkage.h>
.section .rodata
#ifdef __x86_64__
#define RODATA(label) label(%rip)
#else
#define RODATA(label) label
#endif
// A mask for pshufb that extracts the last dword, rotates it right by 8
// bits, and copies the result to all four dwords.
.p2align 4
.Lmask:
.byte 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12
// The AES round constants, used during key expansion
.Lrcon:
.long 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36
.text
// Transform four dwords [a0, a1, a2, a3] in \a into
// [a0, a0^a1, a0^a1^a2, a0^a1^a2^a3]. \tmp is a temporary xmm register.
//
// Note: this could be done in four instructions, shufps + pxor + shufps + pxor,
// if the temporary register were zero-initialized ahead of time. We instead do
// it in an easier-to-understand way that doesn't require zero-initialization
// and avoids the unusual shufps instruction. movdqa is usually "free" anyway.
.macro _prefix_sum a, tmp
movdqa \a, \tmp // [a0, a1, a2, a3]
pslldq $4, \a // [0, a0, a1, a2]
pxor \tmp, \a // [a0, a0^a1, a1^a2, a2^a3]
movdqa \a, \tmp
pslldq $8, \a // [0, 0, a0, a0^a1]
pxor \tmp, \a // [a0, a0^a1, a0^a1^a2, a0^a1^a2^a3]
.endm
.macro _gen_round_key a, b
// Compute four copies of rcon[i] ^ SubBytes(ror32(w, 8)), where w is
// the last dword of the previous round key (given in \b).
//
// 'aesenclast src, dst' does dst = src XOR SubBytes(ShiftRows(dst)).
// It is used here solely for the SubBytes and the XOR. The ShiftRows
// is a no-op because all four columns are the same here.
//
// Don't use the 'aeskeygenassist' instruction, since:
// - On most Intel CPUs it is microcoded, making it have a much higher
// latency and use more execution ports than 'aesenclast'.
// - It cannot be used in a loop, since it requires an immediate.
// - It doesn't do much more than 'aesenclast' in the first place.
movdqa \b, %xmm2
pshufb MASK, %xmm2
aesenclast RCON, %xmm2
// XOR in the prefix sum of the four dwords of \a, which is the
// previous round key (AES-128) or the first round key in the previous
// pair of round keys (AES-256). The result is the next round key.
_prefix_sum \a, tmp=%xmm3
pxor %xmm2, \a
// Store the next round key to memory. Also leave it in \a.
movdqu \a, (RNDKEYS)
.endm
.macro _aes_expandkey_aesni is_aes128
#ifdef __x86_64__
// Arguments
.set RNDKEYS, %rdi
.set INV_RNDKEYS, %rsi
.set IN_KEY, %rdx
// Other local variables
.set RCON_PTR, %rcx
.set COUNTER, %eax
#else
// Arguments, assuming -mregparm=3
.set RNDKEYS, %eax
.set INV_RNDKEYS, %edx
.set IN_KEY, %ecx
// Other local variables
.set RCON_PTR, %ebx
.set COUNTER, %esi
#endif
.set RCON, %xmm6
.set MASK, %xmm7
#ifdef __i386__
push %ebx
push %esi
#endif
.if \is_aes128
// AES-128: the first round key is simply a copy of the raw key.
movdqu (IN_KEY), %xmm0
movdqu %xmm0, (RNDKEYS)
.else
// AES-256: the first two round keys are simply a copy of the raw key.
movdqu (IN_KEY), %xmm0
movdqu %xmm0, (RNDKEYS)
movdqu 16(IN_KEY), %xmm1
movdqu %xmm1, 16(RNDKEYS)
add $32, RNDKEYS
.endif
// Generate the remaining round keys.
movdqa RODATA(.Lmask), MASK
.if \is_aes128
lea RODATA(.Lrcon), RCON_PTR
mov $10, COUNTER
.Lgen_next_aes128_round_key:
add $16, RNDKEYS
movd (RCON_PTR), RCON
pshufd $0x00, RCON, RCON
add $4, RCON_PTR
_gen_round_key %xmm0, %xmm0
dec COUNTER
jnz .Lgen_next_aes128_round_key
.else
// AES-256: only the first 7 round constants are needed, so instead of
// loading each one from memory, just start by loading [1, 1, 1, 1] and
// then generate the rest by doubling.
pshufd $0x00, RODATA(.Lrcon), RCON
pxor %xmm5, %xmm5 // All-zeroes
mov $7, COUNTER
.Lgen_next_aes256_round_key_pair:
// Generate the next AES-256 round key: either the first of a pair of
// two, or the last one.
_gen_round_key %xmm0, %xmm1
dec COUNTER
jz .Lgen_aes256_round_keys_done
// Generate the second AES-256 round key of the pair. Compared to the
// first, there's no rotation and no XOR of a round constant.
pshufd $0xff, %xmm0, %xmm2 // Get four copies of last dword
aesenclast %xmm5, %xmm2 // Just does SubBytes
_prefix_sum %xmm1, tmp=%xmm3
pxor %xmm2, %xmm1
movdqu %xmm1, 16(RNDKEYS)
add $32, RNDKEYS
paddd RCON, RCON // RCON <<= 1
jmp .Lgen_next_aes256_round_key_pair
.Lgen_aes256_round_keys_done:
.endif
// If INV_RNDKEYS is non-NULL, write the round keys for the Equivalent
// Inverse Cipher to it. To do that, reverse the standard round keys,
// and apply aesimc (InvMixColumn) to each except the first and last.
test INV_RNDKEYS, INV_RNDKEYS
jz .Ldone\@
movdqu (RNDKEYS), %xmm0 // Last standard round key
movdqu %xmm0, (INV_RNDKEYS) // => First inverse round key
.if \is_aes128
mov $9, COUNTER
.else
mov $13, COUNTER
.endif
.Lgen_next_inv_round_key\@:
sub $16, RNDKEYS
add $16, INV_RNDKEYS
movdqu (RNDKEYS), %xmm0
aesimc %xmm0, %xmm0
movdqu %xmm0, (INV_RNDKEYS)
dec COUNTER
jnz .Lgen_next_inv_round_key\@
movdqu -16(RNDKEYS), %xmm0 // First standard round key
movdqu %xmm0, 16(INV_RNDKEYS) // => Last inverse round key
.Ldone\@:
#ifdef __i386__
pop %esi
pop %ebx
#endif
RET
.endm
// void aes128_expandkey_aesni(u32 rndkeys[], u32 *inv_rndkeys,
// const u8 in_key[AES_KEYSIZE_128]);
SYM_FUNC_START(aes128_expandkey_aesni)
_aes_expandkey_aesni 1
SYM_FUNC_END(aes128_expandkey_aesni)
// void aes256_expandkey_aesni(u32 rndkeys[], u32 *inv_rndkeys,
// const u8 in_key[AES_KEYSIZE_256]);
SYM_FUNC_START(aes256_expandkey_aesni)
_aes_expandkey_aesni 0
SYM_FUNC_END(aes256_expandkey_aesni)
.macro _aes_crypt_aesni enc
#ifdef __x86_64__
.set RNDKEYS, %rdi
.set NROUNDS, %esi
.set OUT, %rdx
.set IN, %rcx
#else
// Assuming -mregparm=3
.set RNDKEYS, %eax
.set NROUNDS, %edx
.set OUT, %ecx
.set IN, %ebx // Passed on stack
#endif
#ifdef __i386__
push %ebx
mov 8(%esp), %ebx
#endif
// Zero-th round
movdqu (IN), %xmm0
movdqu (RNDKEYS), %xmm1
pxor %xmm1, %xmm0
// Normal rounds
add $16, RNDKEYS
dec NROUNDS
.Lnext_round\@:
movdqu (RNDKEYS), %xmm1
.if \enc
aesenc %xmm1, %xmm0
.else
aesdec %xmm1, %xmm0
.endif
add $16, RNDKEYS
dec NROUNDS
jne .Lnext_round\@
// Last round
movdqu (RNDKEYS), %xmm1
.if \enc
aesenclast %xmm1, %xmm0
.else
aesdeclast %xmm1, %xmm0
.endif
movdqu %xmm0, (OUT)
#ifdef __i386__
pop %ebx
#endif
RET
.endm
// void aes_encrypt_aesni(const u32 rndkeys[], int nrounds,
// u8 out[AES_BLOCK_SIZE], const u8 in[AES_BLOCK_SIZE]);
SYM_FUNC_START(aes_encrypt_aesni)
_aes_crypt_aesni 1
SYM_FUNC_END(aes_encrypt_aesni)
// void aes_decrypt_aesni(const u32 inv_rndkeys[], int nrounds,
// u8 out[AES_BLOCK_SIZE], const u8 in[AES_BLOCK_SIZE]);
SYM_FUNC_START(aes_decrypt_aesni)
_aes_crypt_aesni 0
SYM_FUNC_END(aes_decrypt_aesni)

85
lib/crypto/x86/aes.h Normal file
View File

@@ -0,0 +1,85 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* AES block cipher using AES-NI instructions
*
* Copyright 2026 Google LLC
*/
#include <asm/fpu/api.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_aes);
void aes128_expandkey_aesni(u32 rndkeys[], u32 *inv_rndkeys,
const u8 in_key[AES_KEYSIZE_128]);
void aes256_expandkey_aesni(u32 rndkeys[], u32 *inv_rndkeys,
const u8 in_key[AES_KEYSIZE_256]);
void aes_encrypt_aesni(const u32 rndkeys[], int nrounds,
u8 out[AES_BLOCK_SIZE], const u8 in[AES_BLOCK_SIZE]);
void aes_decrypt_aesni(const u32 inv_rndkeys[], int nrounds,
u8 out[AES_BLOCK_SIZE], const u8 in[AES_BLOCK_SIZE]);
/*
* Expand an AES key using AES-NI if supported and usable or generic code
* otherwise. The expanded key format is compatible between the two cases. The
* outputs are @k->rndkeys (required) and @inv_k->inv_rndkeys (optional).
*
* We could just always use the generic key expansion code. AES key expansion
* is usually less performance-critical than AES en/decryption. However,
* there's still *some* value in speed here, as well as in non-key-dependent
* execution time which AES-NI provides. So, do use AES-NI to expand AES-128
* and AES-256 keys. (Don't bother with AES-192, as it's almost never used.)
*/
static void aes_preparekey_arch(union aes_enckey_arch *k,
union aes_invkey_arch *inv_k,
const u8 *in_key, int key_len, int nrounds)
{
u32 *rndkeys = k->rndkeys;
u32 *inv_rndkeys = inv_k ? inv_k->inv_rndkeys : NULL;
if (static_branch_likely(&have_aes) && key_len != AES_KEYSIZE_192 &&
irq_fpu_usable()) {
kernel_fpu_begin();
if (key_len == AES_KEYSIZE_128)
aes128_expandkey_aesni(rndkeys, inv_rndkeys, in_key);
else
aes256_expandkey_aesni(rndkeys, inv_rndkeys, in_key);
kernel_fpu_end();
} else {
aes_expandkey_generic(rndkeys, inv_rndkeys, in_key, key_len);
}
}
static void aes_encrypt_arch(const struct aes_enckey *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
if (static_branch_likely(&have_aes) && irq_fpu_usable()) {
kernel_fpu_begin();
aes_encrypt_aesni(key->k.rndkeys, key->nrounds, out, in);
kernel_fpu_end();
} else {
aes_encrypt_generic(key->k.rndkeys, key->nrounds, out, in);
}
}
static void aes_decrypt_arch(const struct aes_key *key,
u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE])
{
if (static_branch_likely(&have_aes) && irq_fpu_usable()) {
kernel_fpu_begin();
aes_decrypt_aesni(key->inv_k.inv_rndkeys, key->nrounds,
out, in);
kernel_fpu_end();
} else {
aes_decrypt_generic(key->inv_k.inv_rndkeys, key->nrounds,
out, in);
}
}
#define aes_mod_init_arch aes_mod_init_arch
static void aes_mod_init_arch(void)
{
if (boot_cpu_has(X86_FEATURE_AES))
static_branch_enable(&have_aes);
}

158
lib/crypto/x86/nh-avx2.S Normal file
View File

@@ -0,0 +1,158 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* NH - ε-almost-universal hash function, x86_64 AVX2 accelerated
*
* Copyright 2018 Google LLC
*
* Author: Eric Biggers <ebiggers@google.com>
*/
#include <linux/linkage.h>
#define PASS0_SUMS %ymm0
#define PASS1_SUMS %ymm1
#define PASS2_SUMS %ymm2
#define PASS3_SUMS %ymm3
#define K0 %ymm4
#define K0_XMM %xmm4
#define K1 %ymm5
#define K1_XMM %xmm5
#define K2 %ymm6
#define K2_XMM %xmm6
#define K3 %ymm7
#define K3_XMM %xmm7
#define T0 %ymm8
#define T1 %ymm9
#define T2 %ymm10
#define T2_XMM %xmm10
#define T3 %ymm11
#define T3_XMM %xmm11
#define T4 %ymm12
#define T5 %ymm13
#define T6 %ymm14
#define T7 %ymm15
#define KEY %rdi
#define MESSAGE %rsi
#define MESSAGE_LEN %rdx
#define HASH %rcx
.macro _nh_2xstride k0, k1, k2, k3
// Add message words to key words
vpaddd \k0, T3, T0
vpaddd \k1, T3, T1
vpaddd \k2, T3, T2
vpaddd \k3, T3, T3
// Multiply 32x32 => 64 and accumulate
vpshufd $0x10, T0, T4
vpshufd $0x32, T0, T0
vpshufd $0x10, T1, T5
vpshufd $0x32, T1, T1
vpshufd $0x10, T2, T6
vpshufd $0x32, T2, T2
vpshufd $0x10, T3, T7
vpshufd $0x32, T3, T3
vpmuludq T4, T0, T0
vpmuludq T5, T1, T1
vpmuludq T6, T2, T2
vpmuludq T7, T3, T3
vpaddq T0, PASS0_SUMS, PASS0_SUMS
vpaddq T1, PASS1_SUMS, PASS1_SUMS
vpaddq T2, PASS2_SUMS, PASS2_SUMS
vpaddq T3, PASS3_SUMS, PASS3_SUMS
.endm
/*
* void nh_avx2(const u32 *key, const u8 *message, size_t message_len,
* __le64 hash[NH_NUM_PASSES])
*
* It's guaranteed that message_len % 16 == 0.
*/
SYM_FUNC_START(nh_avx2)
vmovdqu 0x00(KEY), K0
vmovdqu 0x10(KEY), K1
add $0x20, KEY
vpxor PASS0_SUMS, PASS0_SUMS, PASS0_SUMS
vpxor PASS1_SUMS, PASS1_SUMS, PASS1_SUMS
vpxor PASS2_SUMS, PASS2_SUMS, PASS2_SUMS
vpxor PASS3_SUMS, PASS3_SUMS, PASS3_SUMS
sub $0x40, MESSAGE_LEN
jl .Lloop4_done
.Lloop4:
vmovdqu (MESSAGE), T3
vmovdqu 0x00(KEY), K2
vmovdqu 0x10(KEY), K3
_nh_2xstride K0, K1, K2, K3
vmovdqu 0x20(MESSAGE), T3
vmovdqu 0x20(KEY), K0
vmovdqu 0x30(KEY), K1
_nh_2xstride K2, K3, K0, K1
add $0x40, MESSAGE
add $0x40, KEY
sub $0x40, MESSAGE_LEN
jge .Lloop4
.Lloop4_done:
and $0x3f, MESSAGE_LEN
jz .Ldone
cmp $0x20, MESSAGE_LEN
jl .Llast
// 2 or 3 strides remain; do 2 more.
vmovdqu (MESSAGE), T3
vmovdqu 0x00(KEY), K2
vmovdqu 0x10(KEY), K3
_nh_2xstride K0, K1, K2, K3
add $0x20, MESSAGE
add $0x20, KEY
sub $0x20, MESSAGE_LEN
jz .Ldone
vmovdqa K2, K0
vmovdqa K3, K1
.Llast:
// Last stride. Zero the high 128 bits of the message and keys so they
// don't affect the result when processing them like 2 strides.
vmovdqu (MESSAGE), T3_XMM
vmovdqa K0_XMM, K0_XMM
vmovdqa K1_XMM, K1_XMM
vmovdqu 0x00(KEY), K2_XMM
vmovdqu 0x10(KEY), K3_XMM
_nh_2xstride K0, K1, K2, K3
.Ldone:
// Sum the accumulators for each pass, then store the sums to 'hash'
// PASS0_SUMS is (0A 0B 0C 0D)
// PASS1_SUMS is (1A 1B 1C 1D)
// PASS2_SUMS is (2A 2B 2C 2D)
// PASS3_SUMS is (3A 3B 3C 3D)
// We need the horizontal sums:
// (0A + 0B + 0C + 0D,
// 1A + 1B + 1C + 1D,
// 2A + 2B + 2C + 2D,
// 3A + 3B + 3C + 3D)
//
vpunpcklqdq PASS1_SUMS, PASS0_SUMS, T0 // T0 = (0A 1A 0C 1C)
vpunpckhqdq PASS1_SUMS, PASS0_SUMS, T1 // T1 = (0B 1B 0D 1D)
vpunpcklqdq PASS3_SUMS, PASS2_SUMS, T2 // T2 = (2A 3A 2C 3C)
vpunpckhqdq PASS3_SUMS, PASS2_SUMS, T3 // T3 = (2B 3B 2D 3D)
vinserti128 $0x1, T2_XMM, T0, T4 // T4 = (0A 1A 2A 3A)
vinserti128 $0x1, T3_XMM, T1, T5 // T5 = (0B 1B 2B 3B)
vperm2i128 $0x31, T2, T0, T0 // T0 = (0C 1C 2C 3C)
vperm2i128 $0x31, T3, T1, T1 // T1 = (0D 1D 2D 3D)
vpaddq T5, T4, T4
vpaddq T1, T0, T0
vpaddq T4, T0, T0
vmovdqu T0, (HASH)
vzeroupper
RET
SYM_FUNC_END(nh_avx2)

123
lib/crypto/x86/nh-sse2.S Normal file
View File

@@ -0,0 +1,123 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* NH - ε-almost-universal hash function, x86_64 SSE2 accelerated
*
* Copyright 2018 Google LLC
*
* Author: Eric Biggers <ebiggers@google.com>
*/
#include <linux/linkage.h>
#define PASS0_SUMS %xmm0
#define PASS1_SUMS %xmm1
#define PASS2_SUMS %xmm2
#define PASS3_SUMS %xmm3
#define K0 %xmm4
#define K1 %xmm5
#define K2 %xmm6
#define K3 %xmm7
#define T0 %xmm8
#define T1 %xmm9
#define T2 %xmm10
#define T3 %xmm11
#define T4 %xmm12
#define T5 %xmm13
#define T6 %xmm14
#define T7 %xmm15
#define KEY %rdi
#define MESSAGE %rsi
#define MESSAGE_LEN %rdx
#define HASH %rcx
.macro _nh_stride k0, k1, k2, k3, offset
// Load next message stride
movdqu \offset(MESSAGE), T1
// Load next key stride
movdqu \offset(KEY), \k3
// Add message words to key words
movdqa T1, T2
movdqa T1, T3
paddd T1, \k0 // reuse k0 to avoid a move
paddd \k1, T1
paddd \k2, T2
paddd \k3, T3
// Multiply 32x32 => 64 and accumulate
pshufd $0x10, \k0, T4
pshufd $0x32, \k0, \k0
pshufd $0x10, T1, T5
pshufd $0x32, T1, T1
pshufd $0x10, T2, T6
pshufd $0x32, T2, T2
pshufd $0x10, T3, T7
pshufd $0x32, T3, T3
pmuludq T4, \k0
pmuludq T5, T1
pmuludq T6, T2
pmuludq T7, T3
paddq \k0, PASS0_SUMS
paddq T1, PASS1_SUMS
paddq T2, PASS2_SUMS
paddq T3, PASS3_SUMS
.endm
/*
* void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
* __le64 hash[NH_NUM_PASSES])
*
* It's guaranteed that message_len % 16 == 0.
*/
SYM_FUNC_START(nh_sse2)
movdqu 0x00(KEY), K0
movdqu 0x10(KEY), K1
movdqu 0x20(KEY), K2
add $0x30, KEY
pxor PASS0_SUMS, PASS0_SUMS
pxor PASS1_SUMS, PASS1_SUMS
pxor PASS2_SUMS, PASS2_SUMS
pxor PASS3_SUMS, PASS3_SUMS
sub $0x40, MESSAGE_LEN
jl .Lloop4_done
.Lloop4:
_nh_stride K0, K1, K2, K3, 0x00
_nh_stride K1, K2, K3, K0, 0x10
_nh_stride K2, K3, K0, K1, 0x20
_nh_stride K3, K0, K1, K2, 0x30
add $0x40, KEY
add $0x40, MESSAGE
sub $0x40, MESSAGE_LEN
jge .Lloop4
.Lloop4_done:
and $0x3f, MESSAGE_LEN
jz .Ldone
_nh_stride K0, K1, K2, K3, 0x00
sub $0x10, MESSAGE_LEN
jz .Ldone
_nh_stride K1, K2, K3, K0, 0x10
sub $0x10, MESSAGE_LEN
jz .Ldone
_nh_stride K2, K3, K0, K1, 0x20
.Ldone:
// Sum the accumulators for each pass, then store the sums to 'hash'
movdqa PASS0_SUMS, T0
movdqa PASS2_SUMS, T1
punpcklqdq PASS1_SUMS, T0 // => (PASS0_SUM_A PASS1_SUM_A)
punpcklqdq PASS3_SUMS, T1 // => (PASS2_SUM_A PASS3_SUM_A)
punpckhqdq PASS1_SUMS, PASS0_SUMS // => (PASS0_SUM_B PASS1_SUM_B)
punpckhqdq PASS3_SUMS, PASS2_SUMS // => (PASS2_SUM_B PASS3_SUM_B)
paddq PASS0_SUMS, T0
paddq PASS2_SUMS, T1
movdqu T0, 0x00(HASH)
movdqu T1, 0x10(HASH)
RET
SYM_FUNC_END(nh_sse2)

45
lib/crypto/x86/nh.h Normal file
View File

@@ -0,0 +1,45 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* x86_64 accelerated implementation of NH
*
* Copyright 2018 Google LLC
*/
#include <asm/fpu/api.h>
#include <linux/static_call.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sse2);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_avx2);
asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
__le64 hash[NH_NUM_PASSES]);
asmlinkage void nh_avx2(const u32 *key, const u8 *message, size_t message_len,
__le64 hash[NH_NUM_PASSES]);
static bool nh_arch(const u32 *key, const u8 *message, size_t message_len,
__le64 hash[NH_NUM_PASSES])
{
if (message_len >= 64 && static_branch_likely(&have_sse2) &&
irq_fpu_usable()) {
kernel_fpu_begin();
if (static_branch_likely(&have_avx2))
nh_avx2(key, message, message_len, hash);
else
nh_sse2(key, message, message_len, hash);
kernel_fpu_end();
return true;
}
return false;
}
#define nh_mod_init_arch nh_mod_init_arch
static void nh_mod_init_arch(void)
{
if (boot_cpu_has(X86_FEATURE_XMM2)) {
static_branch_enable(&have_sse2);
if (boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
NULL))
static_branch_enable(&have_avx2);
}
}