diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst index 26efca09aef3..13ef311dace8 100644 --- a/Documentation/arch/arm64/booting.rst +++ b/Documentation/arch/arm64/booting.rst @@ -556,6 +556,18 @@ Before jumping into the kernel, the following conditions must be met: - MDCR_EL3.TPM (bit 6) must be initialized to 0b0 + For CPUs with support for 64-byte loads and stores without status (FEAT_LS64): + + - If the kernel is entered at EL1 and EL2 is present: + + - HCRX_EL2.EnALS (bit 1) must be initialised to 0b1. + + For CPUs with support for 64-byte stores with status (FEAT_LS64_V): + + - If the kernel is entered at EL1 and EL2 is present: + + - HCRX_EL2.EnASR (bit 2) must be initialised to 0b1. + The requirements described above for CPU mode, caches, MMUs, architected timers, coherency and system registers apply to all CPUs. All CPUs must enter the kernel in the same exception level. Where the values documented diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst index a15df4956849..97315ae6c0da 100644 --- a/Documentation/arch/arm64/elf_hwcaps.rst +++ b/Documentation/arch/arm64/elf_hwcaps.rst @@ -444,6 +444,13 @@ HWCAP3_MTE_STORE_ONLY HWCAP3_LSFE Functionality implied by ID_AA64ISAR3_EL1.LSFE == 0b0001 +HWCAP3_LS64 + Functionality implied by ID_AA64ISAR1_EL1.LS64 == 0b0001. Note that + the function of instruction ld64b/st64b requires support by CPU, system + and target (device) memory location and HWCAP3_LS64 implies the support + of CPU. User should only use ld64b/st64b on supported target (device) + memory location, otherwise fallback to the non-atomic alternatives. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index a7ec57060f64..4c300caad901 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -212,6 +212,7 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | GIC-700 | #2941627 | ARM64_ERRATUM_2941627 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | SI L1 | #4311569 | ARM64_ERRATUM_4311569 | +----------------+-----------------+-----------------+-----------------------------+ | Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 01a3abef8abb..bfa0ab343081 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1303,12 +1303,13 @@ userspace, for example because of missing instruction syndrome decode information or because there is no device mapped at the accessed IPA, then userspace can ask the kernel to inject an external abort using the address from the exiting fault on the VCPU. It is a programming error to set -ext_dabt_pending after an exit which was not either KVM_EXIT_MMIO or -KVM_EXIT_ARM_NISV. This feature is only available if the system supports -KVM_CAP_ARM_INJECT_EXT_DABT. This is a helper which provides commonality in -how userspace reports accesses for the above cases to guests, across different -userspace implementations. Nevertheless, userspace can still emulate all Arm -exceptions by manipulating individual registers using the KVM_SET_ONE_REG API. +ext_dabt_pending after an exit which was not either KVM_EXIT_MMIO, +KVM_EXIT_ARM_NISV, or KVM_EXIT_ARM_LDST64B. This feature is only available if +the system supports KVM_CAP_ARM_INJECT_EXT_DABT. This is a helper which +provides commonality in how userspace reports accesses for the above cases to +guests, across different userspace implementations. Nevertheless, userspace +can still emulate all Arm exceptions by manipulating individual registers +using the KVM_SET_ONE_REG API. See KVM_GET_VCPU_EVENTS for the data structure. @@ -7050,12 +7051,14 @@ in send_page or recv a buffer to recv_page). :: - /* KVM_EXIT_ARM_NISV */ + /* KVM_EXIT_ARM_NISV / KVM_EXIT_ARM_LDST64B */ struct { __u64 esr_iss; __u64 fault_ipa; } arm_nisv; +- KVM_EXIT_ARM_NISV: + Used on arm64 systems. If a guest accesses memory not in a memslot, KVM will typically return to userspace and ask it to do MMIO emulation on its behalf. However, for certain classes of instructions, no instruction decode @@ -7089,6 +7092,32 @@ Note that although KVM_CAP_ARM_NISV_TO_USER will be reported if queried outside of a protected VM context, the feature will not be exposed if queried on a protected VM file descriptor. +- KVM_EXIT_ARM_LDST64B: + +Used on arm64 systems. When a guest using a LD64B, ST64B, ST64BV, ST64BV0, +outside of a memslot, KVM will return to userspace with KVM_EXIT_ARM_LDST64B, +exposing the relevant ESR_EL2 information and faulting IPA, similarly to +KVM_EXIT_ARM_NISV. + +Userspace is supposed to fully emulate the instructions, which includes: + + - fetch of the operands for a store, including ACCDATA_EL1 in the case + of a ST64BV0 instruction + - deal with the endianness if the guest is big-endian + - emulate the access, including the delivery of an exception if the + access didn't succeed + - provide a return value in the case of ST64BV/ST64BV0 + - return the data in the case of a load + - increment PC if the instruction was successfully executed + +Note that there is no expectation of performance for this emulation, as it +involves a large number of interaction with the guest state. It is, however, +expected that the instruction's semantics are preserved, specially the +single-copy atomicity property of the 64 byte access. + +This exit reason must be handled if userspace sets ID_AA64ISAR1_EL1.LS64 to a +non-zero value, indicating that FEAT_LS64* is enabled. + :: /* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */ diff --git a/MAINTAINERS b/MAINTAINERS index f367ae094908..5d371f76daf1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -336,6 +336,8 @@ ACPI FOR ARM64 (ACPI/arm64) M: Lorenzo Pieralisi M: Hanjun Guo M: Sudeep Holla +M: Catalin Marinas +M: Will Deacon L: linux-acpi@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h index 18b102a30741..574bbcc55382 100644 --- a/arch/arm/include/asm/syscall.h +++ b/arch/arm/include/asm/syscall.h @@ -92,8 +92,6 @@ static inline void syscall_set_nr(struct task_struct *task, (nr & __NR_SYSCALL_MASK); } -#define SYSCALL_MAX_ARGS 7 - static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, unsigned long *args) diff --git a/arch/arm64/Kbuild b/arch/arm64/Kbuild index 5bfbf7d79c99..d876bc0e5421 100644 --- a/arch/arm64/Kbuild +++ b/arch/arm64/Kbuild @@ -1,4 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only + +# Branch profiling isn't noinstr-safe +subdir-ccflags-$(CONFIG_TRACE_BRANCH_PROFILING) += -DDISABLE_BRANCH_PROFILING + obj-y += kernel/ mm/ net/ obj-$(CONFIG_KVM) += kvm/ obj-$(CONFIG_XEN) += xen/ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 93173f0a09c7..1d5e86068bd6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1155,6 +1155,25 @@ config ARM64_ERRATUM_3194386 If unsure, say Y. +config ARM64_ERRATUM_4311569 + bool "SI L1: 4311569: workaround for premature CMO completion erratum" + default y + help + This option adds the workaround for ARM SI L1 erratum 4311569. + + The erratum of SI L1 can cause an early response to a combined write + and cache maintenance operation (WR+CMO) before the operation is fully + completed to the Point of Serialization (POS). + This can result in a non-I/O coherent agent observing stale data, + potentially leading to system instability or incorrect behavior. + + Enabling this option implements a software workaround by inserting a + second loop of Cache Maintenance Operation (CMO) immediately following the + end of function to do CMOs. This ensures that the data is correctly serialized + before the buffer is handed off to a non-coherent agent. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -1680,7 +1699,6 @@ config MITIGATE_SPECTRE_BRANCH_HISTORY config ARM64_SW_TTBR0_PAN bool "Emulate Privileged Access Never using TTBR0_EL1 switching" depends on !KCSAN - select ARM64_PAN help Enabling this option prevents the kernel from accessing user-space memory directly by pointing TTBR0_EL1 to a reserved @@ -1859,36 +1877,6 @@ config ARM64_HW_AFDBM to work on pre-ARMv8.1 hardware and the performance impact is minimal. If unsure, say Y. -config ARM64_PAN - bool "Enable support for Privileged Access Never (PAN)" - default y - help - Privileged Access Never (PAN; part of the ARMv8.1 Extensions) - prevents the kernel or hypervisor from accessing user-space (EL0) - memory directly. - - Choosing this option will cause any unprotected (not using - copy_to_user et al) memory access to fail with a permission fault. - - The feature is detected at runtime, and will remain as a 'nop' - instruction if the cpu does not implement the feature. - -config ARM64_LSE_ATOMICS - bool - default ARM64_USE_LSE_ATOMICS - -config ARM64_USE_LSE_ATOMICS - bool "Atomic instructions" - default y - help - As part of the Large System Extensions, ARMv8.1 introduces new - atomic instructions that are designed specifically to scale in - very large systems. - - Say Y here to make use of these instructions for the in-kernel - atomic routines. This incurs a small overhead on CPUs that do - not support these instructions. - endmenu # "ARMv8.1 architectural features" menu "ARMv8.2 architectural features" @@ -2125,7 +2113,6 @@ config ARM64_MTE depends on ARM64_AS_HAS_MTE && ARM64_TAGGED_ADDR_ABI depends on AS_HAS_ARMV8_5 # Required for tag checking in the uaccess routines - select ARM64_PAN select ARCH_HAS_SUBPAGE_FAULTS select ARCH_USES_HIGH_VMA_FLAGS select ARCH_USES_PG_ARCH_2 @@ -2157,7 +2144,6 @@ menu "ARMv8.7 architectural features" config ARM64_EPAN bool "Enable support for Enhanced Privileged Access Never (EPAN)" default y - depends on ARM64_PAN help Enhanced Privileged Access Never (EPAN) allows Privileged Access Never to be used with Execute-only mappings. diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index f0ca7196f6fa..d3d46e5f7188 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -381,6 +381,9 @@ alternative_endif .macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup sub \tmp, \linesz, #1 bic \start, \start, \tmp +alternative_if ARM64_WORKAROUND_4311569 + mov \tmp, \start +alternative_else_nop_endif .Ldcache_op\@: .ifc \op, cvau __dcache_op_workaround_clean_cache \op, \start @@ -402,6 +405,13 @@ alternative_endif add \start, \start, \linesz cmp \start, \end b.lo .Ldcache_op\@ +alternative_if ARM64_WORKAROUND_4311569 + .ifnc \op, cvau + mov \start, \tmp + mov \tmp, xzr + cbnz \start, .Ldcache_op\@ + .endif +alternative_else_nop_endif dsb \domain _cond_uaccess_extable .Ldcache_op\@, \fixup diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 2c8029472ad4..177c691914f8 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -19,8 +19,6 @@ cpucap_is_possible(const unsigned int cap) "cap must be < ARM64_NCAPS"); switch (cap) { - case ARM64_HAS_PAN: - return IS_ENABLED(CONFIG_ARM64_PAN); case ARM64_HAS_EPAN: return IS_ENABLED(CONFIG_ARM64_EPAN); case ARM64_SVE: diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index cacd20df1786..9393220de069 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -83,9 +83,19 @@ /* Enable GCS if supported */ mrs_s x1, SYS_ID_AA64PFR1_EL1 ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 - cbz x1, .Lset_hcrx_\@ + cbz x1, .Lskip_gcs_hcrx_\@ orr x0, x0, #HCRX_EL2_GCSEn +.Lskip_gcs_hcrx_\@: + /* Enable LS64, LS64_V if supported */ + mrs_s x1, SYS_ID_AA64ISAR1_EL1 + ubfx x1, x1, #ID_AA64ISAR1_EL1_LS64_SHIFT, #4 + cbz x1, .Lset_hcrx_\@ + orr x0, x0, #HCRX_EL2_EnALS + cmp x1, #ID_AA64ISAR1_EL1_LS64_LS64_V + b.lt .Lset_hcrx_\@ + orr x0, x0, #HCRX_EL2_EnASR + .Lset_hcrx_\@: msr_s SYS_HCRX_EL2, x0 .Lskip_hcrx_\@: diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 4975a92cbd17..7e86d400864e 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -124,6 +124,7 @@ #define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n)) #define ESR_ELx_FSC_SECC (0x18) #define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n)) +#define ESR_ELx_FSC_EXCL_ATOMIC (0x35) #define ESR_ELx_FSC_ADDRSZ (0x00) /* @@ -488,6 +489,13 @@ static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) (esr == ESR_ELx_FSC_ACCESS_L(0)); } +static inline bool esr_fsc_is_excl_atomic_fault(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return esr == ESR_ELx_FSC_EXCL_ATOMIC; +} + static inline bool esr_fsc_is_addr_sz_fault(unsigned long esr) { esr &= ESR_ELx_FSC; diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 1f63814ae6c4..72ea4bda79f3 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -179,6 +179,7 @@ #define KERNEL_HWCAP_MTE_FAR __khwcap3_feature(MTE_FAR) #define KERNEL_HWCAP_MTE_STORE_ONLY __khwcap3_feature(MTE_STORE_ONLY) #define KERNEL_HWCAP_LSFE __khwcap3_feature(LSFE) +#define KERNEL_HWCAP_LS64 __khwcap3_feature(LS64) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index e1d30ba99d01..f463a654a2bb 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -671,7 +671,6 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, enum aarch64_insn_register Rn, enum aarch64_insn_register Rd, u8 lsb); -#ifdef CONFIG_ARM64_LSE_ATOMICS u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, enum aarch64_insn_register address, enum aarch64_insn_register value, @@ -683,28 +682,6 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, enum aarch64_insn_register value, enum aarch64_insn_size_type size, enum aarch64_insn_mem_order_type order); -#else -static inline -u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, - enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size, - enum aarch64_insn_mem_atomic_op op, - enum aarch64_insn_mem_order_type order) -{ - return AARCH64_BREAK_FAULT; -} - -static inline -u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, - enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size, - enum aarch64_insn_mem_order_type order) -{ - return AARCH64_BREAK_FAULT; -} -#endif u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type); u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type); u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result, diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 55d34192a8de..4818e5f0cf68 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -47,6 +47,7 @@ void kvm_skip_instr32(struct kvm_vcpu *vcpu); void kvm_inject_undefined(struct kvm_vcpu *vcpu); int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr); int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr); +int kvm_inject_dabt_excl_atomic(struct kvm_vcpu *vcpu, u64 addr); void kvm_inject_size_fault(struct kvm_vcpu *vcpu); static inline int kvm_inject_sea_dabt(struct kvm_vcpu *vcpu, u64 addr) @@ -678,6 +679,12 @@ static inline void vcpu_set_hcrx(struct kvm_vcpu *vcpu) if (kvm_has_sctlr2(kvm)) vcpu->arch.hcrx_el2 |= HCRX_EL2_SCTLR2En; + + if (kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64)) + vcpu->arch.hcrx_el2 |= HCRX_EL2_EnALS; + + if (kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_V)) + vcpu->arch.hcrx_el2 |= HCRX_EL2_EnASR; } } #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 3129a5819d0e..1e77c45bb0a8 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -4,8 +4,6 @@ #include -#ifdef CONFIG_ARM64_LSE_ATOMICS - #define __LSE_PREAMBLE ".arch_extension lse\n" #include @@ -27,11 +25,4 @@ #define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ ALTERNATIVE(llsc, __LSE_PREAMBLE lse, ARM64_HAS_LSE_ATOMICS) -#else /* CONFIG_ARM64_LSE_ATOMICS */ - -#define __lse_ll_sc_body(op, ...) __ll_sc_##op(__VA_ARGS__) - -#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc - -#endif /* CONFIG_ARM64_LSE_ATOMICS */ #endif /* __ASM_LSE_H */ diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h index 78beceec10cd..fc0fb42b0b64 100644 --- a/arch/arm64/include/asm/rwonce.h +++ b/arch/arm64/include/asm/rwonce.h @@ -58,7 +58,7 @@ default: \ atomic = 0; \ } \ - atomic ? (typeof(*__x))__u.__val : (*(volatile typeof(__x))__x);\ + atomic ? (typeof(*__x))__u.__val : (*(volatile typeof(*__x) *)__x);\ }) #endif /* !BUILD_VDSO */ diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 712daa90e643..5e4c7fc44f73 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -77,23 +77,29 @@ static inline void syscall_set_nr(struct task_struct *task, } } -#define SYSCALL_MAX_ARGS 6 - static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, unsigned long *args) { args[0] = regs->orig_x0; - args++; - - memcpy(args, ®s->regs[1], 5 * sizeof(args[0])); + args[1] = regs->regs[1]; + args[2] = regs->regs[2]; + args[3] = regs->regs[3]; + args[4] = regs->regs[4]; + args[5] = regs->regs[5]; } static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, const unsigned long *args) { - memcpy(®s->regs[0], args, 6 * sizeof(args[0])); + regs->regs[0] = args[0]; + regs->regs[1] = args[1]; + regs->regs[2] = args[2]; + regs->regs[3] = args[3]; + regs->regs[4] = args[4]; + regs->regs[5] = args[5]; + /* * Also copy the first argument into orig_x0 * so that syscall_get_arguments() would return it diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index a803b887b0b4..24fcd6adaa33 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -106,12 +106,6 @@ void arch_setup_new_exec(void); #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_TSC_SIGSEGV (1 << TIF_TSC_SIGSEGV) -#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ - _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \ - _TIF_NOTIFY_SIGNAL | _TIF_SIGPENDING | \ - _TIF_PATCH_PENDING) - #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ _TIF_SYSCALL_EMU) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 6490930deef8..9810106a3f66 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -124,14 +124,12 @@ static inline bool uaccess_ttbr0_enable(void) static inline void __uaccess_disable_hw_pan(void) { - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, - CONFIG_ARM64_PAN)); + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN)); } static inline void __uaccess_enable_hw_pan(void) { - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, - CONFIG_ARM64_PAN)); + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN)); } static inline void uaccess_disable_privileged(void) diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 575564ecdb0b..06f83ca8de56 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -146,5 +146,6 @@ #define HWCAP3_MTE_FAR (1UL << 0) #define HWCAP3_MTE_STORE_ONLY (1UL << 1) #define HWCAP3_LSFE (1UL << 2) +#define HWCAP3_LS64 (1UL << 3) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 8cb3b575a031..5c0ab6bfd44a 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -141,6 +141,30 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, return (ctr_real != sys) && (ctr_raw != sys); } +#ifdef CONFIG_ARM64_ERRATUM_4311569 +static DEFINE_STATIC_KEY_FALSE(arm_si_l1_workaround_4311569); +static int __init early_arm_si_l1_workaround_4311569_cfg(char *arg) +{ + static_branch_enable(&arm_si_l1_workaround_4311569); + pr_info("Enabling cache maintenance workaround for ARM SI-L1 erratum 4311569\n"); + + return 0; +} +early_param("arm_si_l1_workaround_4311569", early_arm_si_l1_workaround_4311569_cfg); + +/* + * We have some earlier use cases to call cache maintenance operation functions, for example, + * dcache_inval_poc() and dcache_clean_poc() in head.S, before making decision to turn on this + * workaround. Since the scope of this workaround is limited to non-coherent DMA agents, its + * safe to have the workaround off by default. + */ +static bool +need_arm_si_l1_workaround_4311569(const struct arm64_cpu_capabilities *entry, int scope) +{ + return static_branch_unlikely(&arm_si_l1_workaround_4311569); +} +#endif + static void cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap) { @@ -870,6 +894,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_RANGE_LIST(erratum_spec_ssbs_list), }, #endif +#ifdef CONFIG_ARM64_ERRATUM_4311569 + { + .capability = ARM64_WORKAROUND_4311569, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = need_arm_si_l1_workaround_4311569, + }, +#endif #ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD { .desc = "ARM errata 2966298, 3117295", diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f0e66cb27d17..6044d463d3fb 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -240,6 +240,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_LS64_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_XS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0), @@ -2164,7 +2165,6 @@ static bool has_bbml2_noabort(const struct arm64_cpu_capabilities *caps, int sco return cpu_supports_bbml2_noabort(); } -#ifdef CONFIG_ARM64_PAN static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) { /* @@ -2176,7 +2176,6 @@ static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); set_pstate_pan(1); } -#endif /* CONFIG_ARM64_PAN */ #ifdef CONFIG_ARM64_RAS_EXTN static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) @@ -2260,6 +2259,16 @@ static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_E0PD */ +static void cpu_enable_ls64(struct arm64_cpu_capabilities const *cap) +{ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_EnALS, SCTLR_EL1_EnALS); +} + +static void cpu_enable_ls64_v(struct arm64_cpu_capabilities const *cap) +{ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_EnASR, 0); +} + #ifdef CONFIG_ARM64_PSEUDO_NMI static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry, int scope) @@ -2541,7 +2550,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, ECV, CNTPOFF) }, -#ifdef CONFIG_ARM64_PAN { .desc = "Privileged Access Never", .capability = ARM64_HAS_PAN, @@ -2550,7 +2558,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_pan, ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, PAN, IMP) }, -#endif /* CONFIG_ARM64_PAN */ #ifdef CONFIG_ARM64_EPAN { .desc = "Enhanced Privileged Access Never", @@ -2560,7 +2567,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, PAN, PAN3) }, #endif /* CONFIG_ARM64_EPAN */ -#ifdef CONFIG_ARM64_LSE_ATOMICS { .desc = "LSE atomic instructions", .capability = ARM64_HAS_LSE_ATOMICS, @@ -2568,7 +2574,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, ATOMIC, IMP) }, -#endif /* CONFIG_ARM64_LSE_ATOMICS */ { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, @@ -3148,6 +3153,22 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, XNX, IMP) }, + { + .desc = "LS64", + .capability = ARM64_HAS_LS64, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .cpu_enable = cpu_enable_ls64, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LS64, LS64) + }, + { + .desc = "LS64_V", + .capability = ARM64_HAS_LS64_V, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .cpu_enable = cpu_enable_ls64_v, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LS64, LS64_V) + }, {}, }; @@ -3267,6 +3288,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_EBF16), HWCAP_CAP(ID_AA64ISAR1_EL1, DGH, IMP, CAP_HWCAP, KERNEL_HWCAP_DGH), HWCAP_CAP(ID_AA64ISAR1_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_I8MM), + HWCAP_CAP(ID_AA64ISAR1_EL1, LS64, LS64, CAP_HWCAP, KERNEL_HWCAP_LS64), HWCAP_CAP(ID_AA64ISAR2_EL1, LUT, IMP, CAP_HWCAP, KERNEL_HWCAP_LUT), HWCAP_CAP(ID_AA64ISAR3_EL1, FAMINMAX, IMP, CAP_HWCAP, KERNEL_HWCAP_FAMINMAX), HWCAP_CAP(ID_AA64ISAR3_EL1, LSFE, IMP, CAP_HWCAP, KERNEL_HWCAP_LSFE), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index c44e6d94f5de..6149bc91251d 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -81,6 +81,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_PACA] = "paca", [KERNEL_HWCAP_PACG] = "pacg", [KERNEL_HWCAP_GCS] = "gcs", + [KERNEL_HWCAP_LS64] = "ls64", [KERNEL_HWCAP_DCPODP] = "dcpodp", [KERNEL_HWCAP_SVE2] = "sve2", [KERNEL_HWCAP_SVEAES] = "sveaes", diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index 532d72ea42ee..b70f4df15a1a 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -41,7 +41,7 @@ static void *image_load(struct kimage *image, struct arm64_image_header *h; u64 flags, value; bool be_image, be_kernel; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; unsigned long text_offset, kernel_segment_number; struct kexec_segment *kernel_segment; int ret; diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index 941668800aea..c451ca17a656 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -103,10 +103,7 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t) * insn itself is trapped, then detect the case with the help of * invalid fault code which is being set in arch_uprobe_pre_xol */ - if (t->thread.fault_code != UPROBE_INV_FAULT_CODE) - return true; - - return false; + return t->thread.fault_code != UPROBE_INV_FAULT_CODE; } bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 80a580e019c5..b3801f532b10 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -887,6 +887,7 @@ static u8 spectre_bhb_loop_affected(void) MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), {}, }; static const struct midr_range spectre_bhb_k24_list[] = { diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6c5ff6807d4c..ba5eab23fd90 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1484,6 +1484,9 @@ static int poe_get(struct task_struct *target, if (!system_supports_poe()) return -EINVAL; + if (target == current) + current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0); + return membuf_write(&to, &target->thread.por_el0, sizeof(target->thread.por_el0)); } @@ -2341,9 +2344,10 @@ enum ptrace_syscall_dir { PTRACE_SYSCALL_EXIT, }; -static void report_syscall(struct pt_regs *regs, enum ptrace_syscall_dir dir) +static __always_inline unsigned long ptrace_save_reg(struct pt_regs *regs, + enum ptrace_syscall_dir dir, + int *regno) { - int regno; unsigned long saved_reg; /* @@ -2362,15 +2366,34 @@ static void report_syscall(struct pt_regs *regs, enum ptrace_syscall_dir dir) * - Syscall stops behave differently to seccomp and pseudo-step traps * (the latter do not nobble any registers). */ - regno = (is_compat_task() ? 12 : 7); - saved_reg = regs->regs[regno]; - regs->regs[regno] = dir; + *regno = (is_compat_task() ? 12 : 7); + saved_reg = regs->regs[*regno]; + regs->regs[*regno] = dir; - if (dir == PTRACE_SYSCALL_ENTER) { - if (ptrace_report_syscall_entry(regs)) - forget_syscall(regs); - regs->regs[regno] = saved_reg; - } else if (!test_thread_flag(TIF_SINGLESTEP)) { + return saved_reg; +} + +static int report_syscall_entry(struct pt_regs *regs) +{ + unsigned long saved_reg; + int regno, ret; + + saved_reg = ptrace_save_reg(regs, PTRACE_SYSCALL_ENTER, ®no); + ret = ptrace_report_syscall_entry(regs); + if (ret) + forget_syscall(regs); + regs->regs[regno] = saved_reg; + + return ret; +} + +static void report_syscall_exit(struct pt_regs *regs) +{ + unsigned long saved_reg; + int regno; + + saved_reg = ptrace_save_reg(regs, PTRACE_SYSCALL_EXIT, ®no); + if (!test_thread_flag(TIF_SINGLESTEP)) { ptrace_report_syscall_exit(regs, 0); regs->regs[regno] = saved_reg; } else { @@ -2388,10 +2411,11 @@ static void report_syscall(struct pt_regs *regs, enum ptrace_syscall_dir dir) int syscall_trace_enter(struct pt_regs *regs) { unsigned long flags = read_thread_flags(); + int ret; if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) { - report_syscall(regs, PTRACE_SYSCALL_ENTER); - if (flags & _TIF_SYSCALL_EMU) + ret = report_syscall_entry(regs); + if (ret || (flags & _TIF_SYSCALL_EMU)) return NO_SYSCALL; } @@ -2418,7 +2442,7 @@ void syscall_trace_exit(struct pt_regs *regs) trace_sys_exit(regs, syscall_get_return_value(current, regs)); if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP)) - report_syscall(regs, PTRACE_SYSCALL_EXIT); + report_syscall_exit(regs); rseq_syscall(regs); } diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 5d24dc53799b..3fe1faab0362 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -272,7 +272,7 @@ static void amu_fie_setup(const struct cpumask *cpus) cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus); - topology_set_scale_freq_source(&amu_sfd, amu_fie_cpus); + topology_set_scale_freq_source(&amu_sfd, cpus); pr_debug("CPUs[%*pbl]: counters will be used for FIE.", cpumask_pr_args(cpus)); @@ -284,7 +284,7 @@ static int init_amu_fie_callback(struct notifier_block *nb, unsigned long val, struct cpufreq_policy *policy = data; if (val == CPUFREQ_CREATE_POLICY) - amu_fie_setup(policy->related_cpus); + amu_fie_setup(policy->cpus); /* * We don't need to handle CPUFREQ_REMOVE_POLICY event as the AMU @@ -303,10 +303,70 @@ static struct notifier_block init_amu_fie_notifier = { .notifier_call = init_amu_fie_callback, }; +static int cpuhp_topology_online(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_policy(cpu); + + /* Those are cheap checks */ + + /* + * Skip this CPU if: + * - it has no cpufreq policy assigned yet, + * - no policy exists that spans CPUs with AMU counters, or + * - it was already handled. + */ + if (unlikely(!policy) || !cpumask_available(amu_fie_cpus) || + cpumask_test_cpu(cpu, amu_fie_cpus)) + return 0; + + /* + * Only proceed if all already-online CPUs in this policy + * support AMU counters. + */ + if (unlikely(!cpumask_subset(policy->cpus, amu_fie_cpus))) + return 0; + + /* + * If the new online CPU cannot pass this check, all the CPUs related to + * the same policy should be clear from amu_fie_cpus mask, otherwise they + * may use different source of the freq scale. + */ + if (!freq_counters_valid(cpu)) { + topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_ARCH, + policy->related_cpus); + cpumask_andnot(amu_fie_cpus, amu_fie_cpus, policy->related_cpus); + return 0; + } + + cpumask_set_cpu(cpu, amu_fie_cpus); + + topology_set_scale_freq_source(&amu_sfd, cpumask_of(cpu)); + + pr_debug("CPU[%u]: counter will be used for FIE.", cpu); + + return 0; +} + static int __init init_amu_fie(void) { - return cpufreq_register_notifier(&init_amu_fie_notifier, + int ret; + + ret = cpufreq_register_notifier(&init_amu_fie_notifier, CPUFREQ_POLICY_NOTIFIER); + if (ret) + return ret; + + ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "arm64/topology:online", + cpuhp_topology_online, + NULL); + if (ret < 0) { + cpufreq_unregister_notifier(&init_amu_fie_notifier, + CPUFREQ_POLICY_NOTIFIER); + return ret; + } + + return 0; } core_initcall(init_amu_fie); diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 808d26bed182..885bd5bb2f41 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1704,7 +1704,6 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level) } } -#ifdef CONFIG_ARM64_LSE_ATOMICS static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new) { u64 tmp = old; @@ -1729,12 +1728,6 @@ static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new) return ret; } -#else -static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new) -{ - return -EINVAL; -} -#endif static int __llsc_swap_desc(u64 __user *ptep, u64 old, u64 new) { diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index d1ccddf9e87d..11a10d8f5beb 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -126,9 +126,7 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) add x1, x1, #VCPU_CONTEXT - alternative_cb ARM64_ALWAYS_SYSTEM, kvm_pan_patch_el2_entry - nop - alternative_cb_end + ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN) // Store the guest regs x2 and x3 stp x2, x3, [x1, #CPU_XREG_OFFSET(2)] diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index dfcd66c65517..6cc7ad84d7d8 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -253,6 +253,40 @@ int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr) return 1; } +static int kvm_inject_nested_excl_atomic(struct kvm_vcpu *vcpu, u64 addr) +{ + u64 esr = FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_DABT_LOW) | + FIELD_PREP(ESR_ELx_FSC, ESR_ELx_FSC_EXCL_ATOMIC) | + ESR_ELx_IL; + + vcpu_write_sys_reg(vcpu, addr, FAR_EL2); + return kvm_inject_nested_sync(vcpu, esr); +} + +/** + * kvm_inject_dabt_excl_atomic - inject a data abort for unsupported exclusive + * or atomic access + * @vcpu: The VCPU to receive the data abort + * @addr: The address to report in the DFAR + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +int kvm_inject_dabt_excl_atomic(struct kvm_vcpu *vcpu, u64 addr) +{ + u64 esr; + + if (is_nested_ctxt(vcpu) && (vcpu_read_sys_reg(vcpu, HCR_EL2) & HCR_VM)) + return kvm_inject_nested_excl_atomic(vcpu, addr); + + __kvm_inject_sea(vcpu, false, addr); + esr = vcpu_read_sys_reg(vcpu, exception_esr_elx(vcpu)); + esr &= ~ESR_ELx_FSC; + esr |= ESR_ELx_FSC_EXCL_ATOMIC; + vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); + return 1; +} + void kvm_inject_size_fault(struct kvm_vcpu *vcpu) { unsigned long addr, esr; diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 54f9358c9e0e..e2285ed8c91d 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -159,6 +159,9 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) bool is_write; int len; u8 data_buf[8]; + u64 esr; + + esr = kvm_vcpu_get_esr(vcpu); /* * No valid syndrome? Ask userspace for help if it has @@ -168,7 +171,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) * though, so directly deliver an exception to the guest. */ if (!kvm_vcpu_dabt_isvalid(vcpu)) { - trace_kvm_mmio_nisv(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), + trace_kvm_mmio_nisv(*vcpu_pc(vcpu), esr, kvm_vcpu_get_hfar(vcpu), fault_ipa); if (vcpu_is_protected(vcpu)) @@ -185,6 +188,28 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) return -ENOSYS; } + /* + * When (DFSC == 0b00xxxx || DFSC == 0b10101x) && DFSC != 0b0000xx + * ESR_EL2[12:11] describe the Load/Store Type. This allows us to + * punt the LD64B/ST64B/ST64BV/ST64BV0 instructions to userspace, + * which will have to provide a full emulation of these 4 + * instructions. No, we don't expect this do be fast. + * + * We rely on traps being set if the corresponding features are not + * enabled, so if we get here, userspace has promised us to handle + * it already. + */ + switch (kvm_vcpu_trap_get_fault(vcpu)) { + case 0b000100 ... 0b001111: + case 0b101010 ... 0b101011: + if (FIELD_GET(GENMASK(12, 11), esr)) { + run->exit_reason = KVM_EXIT_ARM_LDST64B; + run->arm_nisv.esr_iss = esr & ~(u64)ESR_ELx_FSC; + run->arm_nisv.fault_ipa = fault_ipa; + return 0; + } + } + /* * Prepare MMIO operation. First decode the syndrome data we get * from the CPU. Then try if some in-kernel emulation feels diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 2caa97f87890..2ada3624d631 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1843,6 +1843,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return ret; } + /* + * Guest performs atomic/exclusive operations on memory with unsupported + * attributes (e.g. ld64b/st64b on normal memory when no FEAT_LS64WB) + * and trigger the exception here. Since the memslot is valid, inject + * the fault back to the guest. + */ + if (esr_fsc_is_excl_atomic_fault(kvm_vcpu_get_esr(vcpu))) { + kvm_inject_dabt_excl_atomic(vcpu, kvm_vcpu_get_hfar(vcpu)); + return 1; + } + if (nested) adjust_nested_fault_perms(nested, &prot, &writable); @@ -2080,7 +2091,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) /* Check the stage-2 fault is trans. fault or write fault */ if (!esr_fsc_is_translation_fault(esr) && !esr_fsc_is_permission_fault(esr) && - !esr_fsc_is_access_flag_fault(esr)) { + !esr_fsc_is_access_flag_fault(esr) && + !esr_fsc_is_excl_atomic_fault(esr)) { kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", kvm_vcpu_trap_get_class(vcpu), (unsigned long)kvm_vcpu_trap_get_fault(vcpu), diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index 4e298baddc2e..cc5b40917d0d 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -611,7 +611,6 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, state); } -#ifdef CONFIG_ARM64_LSE_ATOMICS static u32 aarch64_insn_encode_ldst_order(enum aarch64_insn_mem_order_type type, u32 insn) { @@ -755,7 +754,6 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn, value); } -#endif u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, enum aarch64_insn_register src, diff --git a/arch/arm64/mm/gcs.c b/arch/arm64/mm/gcs.c index 6e93f78de79b..04a23a497f20 100644 --- a/arch/arm64/mm/gcs.c +++ b/arch/arm64/mm/gcs.c @@ -199,8 +199,8 @@ int arch_set_shadow_stack_status(struct task_struct *task, unsigned long arg) size = gcs_size(0); gcs = alloc_gcs(0, size); - if (!gcs) - return -ENOMEM; + if (IS_ERR_VALUE(gcs)) + return gcs; task->thread.gcspr_el0 = gcs + size - sizeof(u64); task->thread.gcs_base = gcs; diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 10e246f11271..b12cbed9b5ad 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -24,7 +24,8 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, return NULL; /* Don't allow RAM to be mapped. */ - if (WARN_ON(pfn_is_map_memory(__phys_to_pfn(phys_addr)))) + if (WARN_ONCE(pfn_is_map_memory(__phys_to_pfn(phys_addr)), + "ioremap attempted on RAM pfn\n")) return NULL; /* diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 5d907ce3b6d3..22866b49be37 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -48,14 +48,14 @@ #define TCR_KASAN_SW_FLAGS 0 #endif -#ifdef CONFIG_KASAN_HW_TAGS -#define TCR_MTE_FLAGS TCR_EL1_TCMA1 | TCR_EL1_TBI1 | TCR_EL1_TBID1 -#elif defined(CONFIG_ARM64_MTE) +#ifdef CONFIG_ARM64_MTE /* * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on - * TBI being enabled at EL1. + * TBI being enabled at EL1. TCMA1 is needed to treat accesses with the + * match-all tag (0xF) as Tag Unchecked, irrespective of the SCTLR_EL1.TCF + * setting. */ -#define TCR_MTE_FLAGS TCR_EL1_TBI1 | TCR_EL1_TBID1 +#define TCR_MTE_FLAGS TCR_EL1_TCMA1 | TCR_EL1_TBI1 | TCR_EL1_TBID1 #else #define TCR_MTE_FLAGS 0 #endif diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index b6eb7a465ad2..5ce82edc508e 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -776,7 +776,6 @@ static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx) return 0; } -#ifdef CONFIG_ARM64_LSE_ATOMICS static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) { const u8 code = insn->code; @@ -843,12 +842,6 @@ static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) return 0; } -#else -static inline int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) -{ - return -EINVAL; -} -#endif static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) { diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 0fac75f01534..7261553b644b 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -46,6 +46,8 @@ HAS_HCX HAS_LDAPR HAS_LPA2 HAS_LSE_ATOMICS +HAS_LS64 +HAS_LS64_V HAS_MOPS HAS_NESTED_VIRT HAS_BBML2_NOABORT @@ -103,6 +105,7 @@ WORKAROUND_2077057 WORKAROUND_2457168 WORKAROUND_2645198 WORKAROUND_2658417 +WORKAROUND_4311569 WORKAROUND_AMPERE_AC03_CPU_38 WORKAROUND_AMPERE_AC04_CPU_23 WORKAROUND_TRBE_OVERWRITE_FILL_MODE diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h index 7db3b489c8ad..bab7cdd96cbe 100644 --- a/arch/xtensa/include/asm/syscall.h +++ b/arch/xtensa/include/asm/syscall.h @@ -61,7 +61,6 @@ static inline void syscall_set_return_value(struct task_struct *task, regs->areg[2] = (long) error ? error : val; } -#define SYSCALL_MAX_ARGS 6 #define XTENSA_SYSCALL_ARGUMENT_REGS {6, 3, 4, 5, 8, 9} static inline void syscall_get_arguments(struct task_struct *task, diff --git a/drivers/acpi/arm64/agdi.c b/drivers/acpi/arm64/agdi.c index e0df3daa4abf..feb4b2cb4618 100644 --- a/drivers/acpi/arm64/agdi.c +++ b/drivers/acpi/arm64/agdi.c @@ -16,7 +16,11 @@ #include "init.h" struct agdi_data { + unsigned char flags; /* AGDI Signaling Mode */ int sdei_event; + unsigned int gsiv; + bool use_nmi; + int irq; }; static int agdi_sdei_handler(u32 sdei_event, struct pt_regs *regs, void *arg) @@ -48,6 +52,57 @@ static int agdi_sdei_probe(struct platform_device *pdev, return 0; } +static irqreturn_t agdi_interrupt_handler_nmi(int irq, void *dev_id) +{ + nmi_panic(NULL, "Arm Generic Diagnostic Dump and Reset NMI Interrupt event issued\n"); + return IRQ_HANDLED; +} + +static irqreturn_t agdi_interrupt_handler_irq(int irq, void *dev_id) +{ + panic("Arm Generic Diagnostic Dump and Reset Interrupt event issued\n"); + return IRQ_HANDLED; +} + +static int agdi_interrupt_probe(struct platform_device *pdev, + struct agdi_data *adata) +{ + unsigned long irq_flags; + int ret; + int irq; + + irq = acpi_register_gsi(NULL, adata->gsiv, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_HIGH); + if (irq < 0) { + dev_err(&pdev->dev, "cannot register GSI#%d (%d)\n", adata->gsiv, irq); + return irq; + } + + irq_flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_AUTOEN | + IRQF_NO_THREAD; + /* try NMI first */ + ret = request_nmi(irq, &agdi_interrupt_handler_nmi, irq_flags, + "agdi_interrupt_nmi", NULL); + if (!ret) { + enable_nmi(irq); + adata->irq = irq; + adata->use_nmi = true; + return 0; + } + + /* Then try normal interrupt */ + ret = request_irq(irq, &agdi_interrupt_handler_irq, + irq_flags, "agdi_interrupt_irq", NULL); + if (ret) { + dev_err(&pdev->dev, "cannot register IRQ %d\n", ret); + acpi_unregister_gsi(adata->gsiv); + return ret; + } + enable_irq(irq); + adata->irq = irq; + + return 0; +} + static int agdi_probe(struct platform_device *pdev) { struct agdi_data *adata = dev_get_platdata(&pdev->dev); @@ -55,12 +110,15 @@ static int agdi_probe(struct platform_device *pdev) if (!adata) return -EINVAL; - return agdi_sdei_probe(pdev, adata); + if (adata->flags & ACPI_AGDI_SIGNALING_MODE) + return agdi_interrupt_probe(pdev, adata); + else + return agdi_sdei_probe(pdev, adata); } -static void agdi_remove(struct platform_device *pdev) +static void agdi_sdei_remove(struct platform_device *pdev, + struct agdi_data *adata) { - struct agdi_data *adata = dev_get_platdata(&pdev->dev); int err, i; err = sdei_event_disable(adata->sdei_event); @@ -83,6 +141,30 @@ static void agdi_remove(struct platform_device *pdev) adata->sdei_event, ERR_PTR(err)); } +static void agdi_interrupt_remove(struct platform_device *pdev, + struct agdi_data *adata) +{ + if (adata->irq == -1) + return; + + if (adata->use_nmi) + free_nmi(adata->irq, NULL); + else + free_irq(adata->irq, NULL); + + acpi_unregister_gsi(adata->gsiv); +} + +static void agdi_remove(struct platform_device *pdev) +{ + struct agdi_data *adata = dev_get_platdata(&pdev->dev); + + if (adata->flags & ACPI_AGDI_SIGNALING_MODE) + agdi_interrupt_remove(pdev, adata); + else + agdi_sdei_remove(pdev, adata); +} + static struct platform_driver agdi_driver = { .driver = { .name = "agdi", @@ -94,7 +176,7 @@ static struct platform_driver agdi_driver = { void __init acpi_agdi_init(void) { struct acpi_table_agdi *agdi_table; - struct agdi_data pdata; + struct agdi_data pdata = { 0 }; struct platform_device *pdev; acpi_status status; @@ -103,12 +185,13 @@ void __init acpi_agdi_init(void) if (ACPI_FAILURE(status)) return; - if (agdi_table->flags & ACPI_AGDI_SIGNALING_MODE) { - pr_warn("Interrupt signaling is not supported"); - goto err_put_table; - } + if (agdi_table->flags & ACPI_AGDI_SIGNALING_MODE) + pdata.gsiv = agdi_table->gsiv; + else + pdata.sdei_event = agdi_table->sdei_event; - pdata.sdei_event = agdi_table->sdei_event; + pdata.irq = -1; + pdata.flags = agdi_table->flags; pdev = platform_device_register_data(NULL, "agdi", 0, &pdata, sizeof(pdata)); if (IS_ERR(pdev)) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 84ec92bff642..c0ef6ea9c111 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -34,7 +34,14 @@ EXPORT_PER_CPU_SYMBOL_GPL(capacity_freq_ref); static bool supports_scale_freq_counters(const struct cpumask *cpus) { - return cpumask_subset(cpus, &scale_freq_counters_mask); + int i; + + for_each_cpu(i, cpus) { + if (cpumask_test_cpu(i, &scale_freq_counters_mask)) + return true; + } + + return false; } bool topology_scale_freq_invariant(void) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 50dde2980f1b..a7a69f4d7675 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -198,6 +198,12 @@ struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu) } EXPORT_SYMBOL_GPL(cpufreq_cpu_get_raw); +struct cpufreq_policy *cpufreq_cpu_policy(unsigned int cpu) +{ + return per_cpu(cpufreq_cpu_data, cpu); +} +EXPORT_SYMBOL_GPL(cpufreq_cpu_policy); + unsigned int cpufreq_generic_get(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 23245352a3fc..4fbafc4b7984 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -210,6 +210,7 @@ enum cmn_model { enum cmn_part { PART_CMN600 = 0x434, PART_CMN650 = 0x436, + PART_CMN600AE = 0x438, PART_CMN700 = 0x43c, PART_CI700 = 0x43a, PART_CMN_S3 = 0x43e, @@ -2266,6 +2267,9 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) reg = readq_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_01); part = FIELD_GET(CMN_CFGM_PID0_PART_0, reg); part |= FIELD_GET(CMN_CFGM_PID1_PART_1, reg) << 8; + /* 600AE is close enough that it's not really worth more complexity */ + if (part == PART_CMN600AE) + part = PART_CMN600; if (cmn->part && cmn->part != part) dev_warn(cmn->dev, "Firmware binding mismatch: expected part number 0x%x, found 0x%x\n", @@ -2418,6 +2422,15 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn); dn->portid_bits = xp->portid_bits; dn->deviceid_bits = xp->deviceid_bits; + /* + * Logical IDs are assigned from 0 per node type, so as + * soon as we see one bigger than expected, we can assume + * there are more than we can cope with. + */ + if (dn->logid > CMN_MAX_NODES_PER_EVENT) { + dev_err(cmn->dev, "Node ID invalid for supported CMN versions: %d\n", dn->logid); + return -ENODEV; + } switch (dn->type) { case CMN_TYPE_DTC: @@ -2467,7 +2480,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) break; /* Something has gone horribly wrong */ default: - dev_err(cmn->dev, "invalid device node type: 0x%x\n", dn->type); + dev_err(cmn->dev, "Device node type invalid for supported CMN versions: 0x%x\n", dn->type); return -ENODEV; } } @@ -2495,6 +2508,10 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) cmn->mesh_x = cmn->num_xps; cmn->mesh_y = cmn->num_xps / cmn->mesh_x; + if (max(cmn->mesh_x, cmn->mesh_y) > CMN_MAX_DIMENSION) { + dev_err(cmn->dev, "Mesh size invalid for supported CMN versions: %dx%d\n", cmn->mesh_x, cmn->mesh_y); + return -ENODEV; + } /* 1x1 config plays havoc with XP event encodings */ if (cmn->num_xps == 1) dev_warn(cmn->dev, "1x1 config not fully supported, translate XP events manually\n"); diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c index cb4fb59fe04b..32b0dd7c693b 100644 --- a/drivers/perf/arm_dsu_pmu.c +++ b/drivers/perf/arm_dsu_pmu.c @@ -66,13 +66,6 @@ */ #define DSU_PMU_IDX_CYCLE_COUNTER 31 -/* All event counters are 32bit, with a 64bit Cycle counter */ -#define DSU_PMU_COUNTER_WIDTH(idx) \ - (((idx) == DSU_PMU_IDX_CYCLE_COUNTER) ? 64 : 32) - -#define DSU_PMU_COUNTER_MASK(idx) \ - GENMASK_ULL((DSU_PMU_COUNTER_WIDTH((idx)) - 1), 0) - #define DSU_EXT_ATTR(_name, _func, _config) \ (&((struct dev_ext_attribute[]) { \ { \ @@ -107,6 +100,8 @@ struct dsu_hw_events { * @num_counters : Number of event counters implemented by the PMU, * excluding the cycle counter. * @irq : Interrupt line for counter overflow. + * @has_32b_pmevcntr : Are the non-cycle counters only 32-bit? + * @has_pmccntr : Do we even have a dedicated cycle counter? * @cpmceid_bitmap : Bitmap for the availability of architected common * events (event_code < 0x40). */ @@ -120,6 +115,8 @@ struct dsu_pmu { struct hlist_node cpuhp_node; s8 num_counters; int irq; + bool has_32b_pmevcntr; + bool has_pmccntr; DECLARE_BITMAP(cpmceid_bitmap, DSU_PMU_MAX_COMMON_EVENTS); }; @@ -286,10 +283,9 @@ static int dsu_pmu_get_event_idx(struct dsu_hw_events *hw_events, struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu); unsigned long *used_mask = hw_events->used_mask; - if (evtype == DSU_PMU_EVT_CYCLES) { - if (test_and_set_bit(DSU_PMU_IDX_CYCLE_COUNTER, used_mask)) - return -EAGAIN; - return DSU_PMU_IDX_CYCLE_COUNTER; + if (evtype == DSU_PMU_EVT_CYCLES && dsu_pmu->has_pmccntr) { + if (!test_and_set_bit(DSU_PMU_IDX_CYCLE_COUNTER, used_mask)) + return DSU_PMU_IDX_CYCLE_COUNTER; } idx = find_first_zero_bit(used_mask, dsu_pmu->num_counters); @@ -328,6 +324,11 @@ static inline void dsu_pmu_set_event(struct dsu_pmu *dsu_pmu, raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags); } +static u64 dsu_pmu_counter_mask(struct hw_perf_event *hw) +{ + return (hw->flags && hw->idx != DSU_PMU_IDX_CYCLE_COUNTER) ? U32_MAX : U64_MAX; +} + static void dsu_pmu_event_update(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -339,7 +340,7 @@ static void dsu_pmu_event_update(struct perf_event *event) new_count = dsu_pmu_read_counter(event); } while (local64_cmpxchg(&hwc->prev_count, prev_count, new_count) != prev_count); - delta = (new_count - prev_count) & DSU_PMU_COUNTER_MASK(hwc->idx); + delta = (new_count - prev_count) & dsu_pmu_counter_mask(hwc); local64_add(delta, &event->count); } @@ -362,8 +363,7 @@ static inline u32 dsu_pmu_get_reset_overflow(void) */ static void dsu_pmu_set_event_period(struct perf_event *event) { - int idx = event->hw.idx; - u64 val = DSU_PMU_COUNTER_MASK(idx) >> 1; + u64 val = dsu_pmu_counter_mask(&event->hw) >> 1; local64_set(&event->hw.prev_count, val); dsu_pmu_write_counter(event, val); @@ -564,6 +564,7 @@ static int dsu_pmu_event_init(struct perf_event *event) return -EINVAL; event->hw.config_base = event->attr.config; + event->hw.flags = dsu_pmu->has_32b_pmevcntr; return 0; } @@ -664,6 +665,14 @@ static void dsu_pmu_probe_pmu(struct dsu_pmu *dsu_pmu) cpmceid[1] = __dsu_pmu_read_pmceid(1); bitmap_from_arr32(dsu_pmu->cpmceid_bitmap, cpmceid, DSU_PMU_MAX_COMMON_EVENTS); + /* Newer DSUs have 64-bit counters */ + __dsu_pmu_write_counter(0, U64_MAX); + if (__dsu_pmu_read_counter(0) != U64_MAX) + dsu_pmu->has_32b_pmevcntr = true; + /* On even newer DSUs, PMCCNTR is RAZ/WI */ + __dsu_pmu_write_pmccntr(U64_MAX); + if (__dsu_pmu_read_pmccntr() == U64_MAX) + dsu_pmu->has_pmccntr = true; } static void dsu_pmu_set_active_cpu(int cpu, struct dsu_pmu *dsu_pmu) diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 4801115f2b54..5410fb7428d0 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -106,6 +106,8 @@ struct arm_spe_pmu { /* Keep track of our dynamic hotplug state */ static enum cpuhp_state arm_spe_pmu_online; +static void arm_spe_pmu_stop(struct perf_event *event, int flags); + enum arm_spe_pmu_buf_fault_action { SPE_PMU_BUF_FAULT_ACT_SPURIOUS, SPE_PMU_BUF_FAULT_ACT_FATAL, @@ -607,8 +609,8 @@ static u64 arm_spe_pmu_next_off(struct perf_output_handle *handle) return limit; } -static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle, - struct perf_event *event) +static int arm_spe_perf_aux_output_begin(struct perf_output_handle *handle, + struct perf_event *event) { u64 base, limit; struct arm_spe_pmu_buf *buf; @@ -622,7 +624,6 @@ static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle, /* Start a new aux session */ buf = perf_aux_output_begin(handle, event); if (!buf) { - event->hw.state |= PERF_HES_STOPPED; /* * We still need to clear the limit pointer, since the * profiler might only be disabled by virtue of a fault. @@ -642,6 +643,7 @@ static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle, out_write_limit: write_sysreg_s(limit, SYS_PMBLIMITR_EL1); + return (limit & PMBLIMITR_EL1_E) ? 0 : -EIO; } static void arm_spe_perf_aux_output_end(struct perf_output_handle *handle) @@ -781,7 +783,10 @@ static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev) * when we get to it. */ if (!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)) { - arm_spe_perf_aux_output_begin(handle, event); + if (arm_spe_perf_aux_output_begin(handle, event)) { + arm_spe_pmu_stop(event, PERF_EF_UPDATE); + break; + } isb(); } break; @@ -880,9 +885,10 @@ static void arm_spe_pmu_start(struct perf_event *event, int flags) struct perf_output_handle *handle = this_cpu_ptr(spe_pmu->handle); hwc->state = 0; - arm_spe_perf_aux_output_begin(handle, event); - if (hwc->state) + if (arm_spe_perf_aux_output_begin(handle, event)) { + arm_spe_pmu_stop(event, 0); return; + } reg = arm_spe_event_to_pmsfcr(event); write_sysreg_s(reg, SYS_PMSFCR_EL1); diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c index d094030220bf..68a54d97d2a8 100644 --- a/drivers/perf/cxl_pmu.c +++ b/drivers/perf/cxl_pmu.c @@ -877,7 +877,7 @@ static int cxl_pmu_probe(struct device *dev) if (!irq_name) return -ENOMEM; - rc = devm_request_irq(dev, irq, cxl_pmu_irq, IRQF_SHARED | IRQF_ONESHOT, + rc = devm_request_irq(dev, irq, cxl_pmu_irq, IRQF_SHARED | IRQF_NO_THREAD, irq_name, info); if (rc) return rc; diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 7dd282da67ce..9dcc22fd48ef 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -1244,7 +1244,7 @@ static int riscv_pm_pmu_notify(struct notifier_block *b, unsigned long cmd, { struct riscv_pmu *rvpmu = container_of(b, struct riscv_pmu, riscv_pm_nb); struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); - int enabled = bitmap_weight(cpuc->used_hw_ctrs, RISCV_MAX_COUNTERS); + bool enabled = !bitmap_empty(cpuc->used_hw_ctrs, RISCV_MAX_COUNTERS); struct perf_event *event; int idx; diff --git a/drivers/perf/starfive_starlink_pmu.c b/drivers/perf/starfive_starlink_pmu.c index 5e5a672b4229..964897c2baa9 100644 --- a/drivers/perf/starfive_starlink_pmu.c +++ b/drivers/perf/starfive_starlink_pmu.c @@ -450,8 +450,7 @@ static int starlink_pmu_pm_notify(struct notifier_block *b, starlink_pmu_pm_nb); struct starlink_hw_events *hw_events = this_cpu_ptr(starlink_pmu->hw_events); - int enabled = bitmap_weight(hw_events->used_mask, - STARLINK_PMU_MAX_COUNTERS); + bool enabled = !bitmap_empty(hw_events->used_mask, STARLINK_PMU_MAX_COUNTERS); struct perf_event *event; int idx; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 0465d1e6f72a..cc894fc38971 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -203,6 +203,7 @@ struct cpufreq_freqs { #ifdef CONFIG_CPU_FREQ struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu); +struct cpufreq_policy *cpufreq_cpu_policy(unsigned int cpu); struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu); void cpufreq_cpu_put(struct cpufreq_policy *policy); #else @@ -210,6 +211,10 @@ static inline struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu) { return NULL; } +static inline struct cpufreq_policy *cpufreq_cpu_policy(unsigned int cpu) +{ + return NULL; +} static inline struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) { return NULL; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index dddb781b0507..88cca0e22ece 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -180,6 +180,7 @@ struct kvm_xen_exit { #define KVM_EXIT_MEMORY_FAULT 39 #define KVM_EXIT_TDX 40 #define KVM_EXIT_ARM_SEA 41 +#define KVM_EXIT_ARM_LDST64B 42 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -402,7 +403,7 @@ struct kvm_run { } eoi; /* KVM_EXIT_HYPERV */ struct kvm_hyperv_exit hyperv; - /* KVM_EXIT_ARM_NISV */ + /* KVM_EXIT_ARM_NISV / KVM_EXIT_ARM_LDST64B */ struct { __u64 esr_iss; __u64 fault_ipa; diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index c4c72ee2ef55..e456f3b62fa1 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -30,13 +30,15 @@ all: @for DIR in $(ARM64_SUBTARGETS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir -p $$BUILD_TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@ \ + $(if $(FORCE_TARGETS),|| exit); \ done install: all @for DIR in $(ARM64_SUBTARGETS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ - make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@ \ + $(if $(FORCE_TARGETS),|| exit); \ done run_tests: all diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index c41640f18e4e..9d2df1f3e6bb 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -595,6 +597,45 @@ static void lrcpc3_sigill(void) : "=r" (data0), "=r" (data1) : "r" (src) :); } +static void ignore_signal(int sig, siginfo_t *info, void *context) +{ + ucontext_t *uc = context; + + uc->uc_mcontext.pc += 4; +} + +static void ls64_sigill(void) +{ + struct sigaction ign, old; + char src[64] __aligned(64) = { 1 }; + + /* + * LS64 requires target memory to be Device/Non-cacheable (if + * FEAT_LS64WB not supported) and the completer supports these + * instructions, otherwise we'll receive a SIGBUS. Since we are only + * testing the ABI here, so just ignore the SIGBUS and see if we can + * execute the instructions without receiving a SIGILL. Restore the + * handler of SIGBUS after this test. + */ + ign.sa_sigaction = ignore_signal; + ign.sa_flags = SA_SIGINFO | SA_RESTART; + sigemptyset(&ign.sa_mask); + sigaction(SIGBUS, &ign, &old); + + register void *xn asm ("x8") = src; + register u64 xt_1 asm ("x0"); + + /* LD64B x0, [x8] */ + asm volatile(".inst 0xf83fd100" : "=r" (xt_1) : "r" (xn) + : "x1", "x2", "x3", "x4", "x5", "x6", "x7"); + + /* ST64B x0, [x8] */ + asm volatile(".inst 0xf83f9100" : : "r" (xt_1), "r" (xn) + : "x1", "x2", "x3", "x4", "x5", "x6", "x7"); + + sigaction(SIGBUS, &old, NULL); +} + static const struct hwcap_data { const char *name; unsigned long at_hwcap; @@ -1134,6 +1175,14 @@ static const struct hwcap_data { .hwcap_bit = HWCAP3_MTE_STORE_ONLY, .cpuinfo = "mtestoreonly", }, + { + .name = "LS64", + .at_hwcap = AT_HWCAP3, + .hwcap_bit = HWCAP3_LS64, + .cpuinfo = "ls64", + .sigill_fn = ls64_sigill, + .sigill_reliable = true, + }, }; typedef void (*sighandler_fn)(int, siginfo_t *, void *); diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c index 1703543fb7c7..ce4550fb7224 100644 --- a/tools/testing/selftests/arm64/abi/tpidr2.c +++ b/tools/testing/selftests/arm64/abi/tpidr2.c @@ -128,8 +128,7 @@ static int sys_clone(unsigned long clone_flags, unsigned long newsp, int *parent_tidptr, unsigned long tls, int *child_tidptr) { - return my_syscall5(__NR_clone, clone_flags, newsp, parent_tidptr, tls, - child_tidptr); + return syscall(__NR_clone, clone_flags, newsp, parent_tidptr, tls, child_tidptr); } #define __STACK_SIZE (8 * 1024 * 1024) diff --git a/tools/testing/selftests/arm64/fp/fp-pidbench.S b/tools/testing/selftests/arm64/fp/fp-pidbench.S index 73830f6bc99b..881dfa3b342e 100644 --- a/tools/testing/selftests/arm64/fp/fp-pidbench.S +++ b/tools/testing/selftests/arm64/fp/fp-pidbench.S @@ -33,7 +33,7 @@ function _start puts "Iterations per test: " mov x20, #10000 - lsl x20, x20, #8 + lsl x20, x20, #12 mov x0, x20 bl putdec puts "\n" @@ -63,6 +63,10 @@ function _start puts "SVE used per syscall: " test_loop "rdvl x0, #8" + // Test non-SVE execution after SVE + puts "No SVE after SVE: " + test_loop + // And we're done out: mov x0, #0 diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c index 250977abc398..ae4cce6afe2b 100644 --- a/tools/testing/selftests/arm64/gcs/basic-gcs.c +++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c @@ -22,7 +22,7 @@ static size_t page_size = 65536; static __attribute__((noinline)) void valid_gcs_function(void) { /* Do something the compiler can't optimise out */ - my_syscall1(__NR_prctl, PR_SVE_GET_VL); + syscall(__NR_prctl, PR_SVE_GET_VL); } static inline int gcs_set_status(unsigned long mode) @@ -36,12 +36,10 @@ static inline int gcs_set_status(unsigned long mode) * other 3 values passed in registers to the syscall are zero * since the kernel validates them. */ - ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, mode, - 0, 0, 0); + ret = syscall(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, mode, 0, 0, 0); if (ret == 0) { - ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, - &new_mode, 0, 0, 0); + ret = syscall(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &new_mode, 0, 0, 0); if (ret == 0) { if (new_mode != mode) { ksft_print_msg("Mode set to %lx not %lx\n", @@ -49,7 +47,7 @@ static inline int gcs_set_status(unsigned long mode) ret = -EINVAL; } } else { - ksft_print_msg("Failed to validate mode: %d\n", ret); + ksft_print_msg("Failed to validate mode: %d\n", errno); } if (enabling != chkfeat_gcs()) { @@ -69,10 +67,9 @@ static bool read_status(void) unsigned long state; int ret; - ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, - &state, 0, 0, 0); + ret = syscall(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &state, 0, 0, 0); if (ret != 0) { - ksft_print_msg("Failed to read state: %d\n", ret); + ksft_print_msg("Failed to read state: %d\n", errno); return false; } @@ -188,9 +185,8 @@ static bool map_guarded_stack(void) int elem; bool pass = true; - buf = (void *)my_syscall3(__NR_map_shadow_stack, 0, page_size, - SHADOW_STACK_SET_MARKER | - SHADOW_STACK_SET_TOKEN); + buf = (void *)syscall(__NR_map_shadow_stack, 0, page_size, + SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN); if (buf == MAP_FAILED) { ksft_print_msg("Failed to map %lu byte GCS: %d\n", page_size, errno); @@ -257,8 +253,7 @@ static bool test_fork(void) valid_gcs_function(); get_gcspr(); - ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, - &child_mode, 0, 0, 0); + ret = syscall(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &child_mode, 0, 0, 0); if (ret == 0 && !(child_mode & PR_SHADOW_STACK_ENABLE)) { ksft_print_msg("GCS not enabled in child\n"); ret = -EINVAL; @@ -321,8 +316,7 @@ static bool test_vfork(void) valid_gcs_function(); get_gcspr(); - ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, - &child_mode, 0, 0, 0); + ret = syscall(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &child_mode, 0, 0, 0); if (ret == 0 && !(child_mode & PR_SHADOW_STACK_ENABLE)) { ksft_print_msg("GCS not enabled in child\n"); ret = EXIT_FAILURE; @@ -390,17 +384,15 @@ int main(void) if (!(getauxval(AT_HWCAP) & HWCAP_GCS)) ksft_exit_skip("SKIP GCS not supported\n"); - ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, - &gcs_mode, 0, 0, 0); + ret = syscall(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &gcs_mode, 0, 0, 0); if (ret != 0) - ksft_exit_fail_msg("Failed to read GCS state: %d\n", ret); + ksft_exit_fail_msg("Failed to read GCS state: %d\n", errno); if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) { gcs_mode = PR_SHADOW_STACK_ENABLE; - ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, - gcs_mode, 0, 0, 0); + ret = syscall(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, gcs_mode, 0, 0, 0); if (ret != 0) - ksft_exit_fail_msg("Failed to enable GCS: %d\n", ret); + ksft_exit_fail_msg("Failed to enable GCS: %d\n", errno); } ksft_set_plan(ARRAY_SIZE(tests)); @@ -410,9 +402,9 @@ int main(void) } /* One last test: disable GCS, we can do this one time */ - ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0); + ret = syscall(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0); if (ret != 0) - ksft_print_msg("Failed to disable GCS: %d\n", ret); + ksft_print_msg("Failed to disable GCS: %d\n", errno); ksft_finished(); diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore index 052d0f9f92b3..f6937f890039 100644 --- a/tools/testing/selftests/arm64/mte/.gitignore +++ b/tools/testing/selftests/arm64/mte/.gitignore @@ -6,3 +6,4 @@ check_mmap_options check_prctl check_ksm_options check_user_mem +check_hugetlb_options