mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00
um: Track userspace children dying in SECCOMP mode
When in seccomp mode, we would hang forever on the futex if a child has died unexpectedly. In contrast, ptrace mode will notice it and kill the corresponding thread when it fails to run it. Fix this issue using a new IRQ that is fired after a SIGCHLD and keeping an (internal) list of all MMs. In the IRQ handler, find the affected MM and set its PID to -1 as well as the futex variable to FUTEX_IN_KERN. This, together with futex returning -EINTR after the signal is sufficient to implement a race-free detection of a child dying. Note that this also enables IRQ handling while starting a userspace process. This should be safe and SECCOMP requires the IRQ in case the process does not come up properly. Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net> Signed-off-by: Benjamin Berg <benjamin.berg@intel.com> Link: https://patch.msgid.link/20250602130052.545733-5-benjamin@sipsolutions.net Signed-off-by: Johannes Berg <johannes.berg@intel.com>
This commit is contained in:
parent
b1e1bd2e69
commit
8420e08fe3
@ -13,17 +13,18 @@
|
|||||||
#define TELNETD_IRQ 8
|
#define TELNETD_IRQ 8
|
||||||
#define XTERM_IRQ 9
|
#define XTERM_IRQ 9
|
||||||
#define RANDOM_IRQ 10
|
#define RANDOM_IRQ 10
|
||||||
|
#define SIGCHLD_IRQ 11
|
||||||
|
|
||||||
#ifdef CONFIG_UML_NET_VECTOR
|
#ifdef CONFIG_UML_NET_VECTOR
|
||||||
|
|
||||||
#define VECTOR_BASE_IRQ (RANDOM_IRQ + 1)
|
#define VECTOR_BASE_IRQ (SIGCHLD_IRQ + 1)
|
||||||
#define VECTOR_IRQ_SPACE 8
|
#define VECTOR_IRQ_SPACE 8
|
||||||
|
|
||||||
#define UM_FIRST_DYN_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ)
|
#define UM_FIRST_DYN_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ)
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define UM_FIRST_DYN_IRQ (RANDOM_IRQ + 1)
|
#define UM_FIRST_DYN_IRQ (SIGCHLD_IRQ + 1)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -6,11 +6,14 @@
|
|||||||
#ifndef __ARCH_UM_MMU_H
|
#ifndef __ARCH_UM_MMU_H
|
||||||
#define __ARCH_UM_MMU_H
|
#define __ARCH_UM_MMU_H
|
||||||
|
|
||||||
|
#include "linux/types.h"
|
||||||
#include <mm_id.h>
|
#include <mm_id.h>
|
||||||
|
|
||||||
typedef struct mm_context {
|
typedef struct mm_context {
|
||||||
struct mm_id id;
|
struct mm_id id;
|
||||||
|
|
||||||
|
struct list_head list;
|
||||||
|
|
||||||
/* Address range in need of a TLB sync */
|
/* Address range in need of a TLB sync */
|
||||||
unsigned long sync_tlb_range_from;
|
unsigned long sync_tlb_range_from;
|
||||||
unsigned long sync_tlb_range_to;
|
unsigned long sync_tlb_range_to;
|
||||||
|
@ -17,6 +17,8 @@ enum um_irq_type {
|
|||||||
struct siginfo;
|
struct siginfo;
|
||||||
extern void sigio_handler(int sig, struct siginfo *unused_si,
|
extern void sigio_handler(int sig, struct siginfo *unused_si,
|
||||||
struct uml_pt_regs *regs, void *mc);
|
struct uml_pt_regs *regs, void *mc);
|
||||||
|
extern void sigchld_handler(int sig, struct siginfo *unused_si,
|
||||||
|
struct uml_pt_regs *regs, void *mc);
|
||||||
void sigio_run_timetravel_handlers(void);
|
void sigio_run_timetravel_handlers(void);
|
||||||
extern void free_irq_by_fd(int fd);
|
extern void free_irq_by_fd(int fd);
|
||||||
extern void deactivate_fd(int fd, int irqnum);
|
extern void deactivate_fd(int fd, int irqnum);
|
||||||
|
@ -197,6 +197,7 @@ extern int create_mem_file(unsigned long long len);
|
|||||||
extern void report_enomem(void);
|
extern void report_enomem(void);
|
||||||
|
|
||||||
/* process.c */
|
/* process.c */
|
||||||
|
pid_t os_reap_child(void);
|
||||||
extern void os_alarm_process(int pid);
|
extern void os_alarm_process(int pid);
|
||||||
extern void os_kill_process(int pid, int reap_child);
|
extern void os_kill_process(int pid, int reap_child);
|
||||||
extern void os_kill_ptraced_process(int pid, int reap_child);
|
extern void os_kill_ptraced_process(int pid, int reap_child);
|
||||||
|
@ -14,4 +14,6 @@ struct mm_id {
|
|||||||
|
|
||||||
void __switch_mm(struct mm_id *mm_idp);
|
void __switch_mm(struct mm_id *mm_idp);
|
||||||
|
|
||||||
|
void notify_mm_kill(int pid);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
|
|
||||||
#include <sysdep/ptrace.h>
|
#include <sysdep/ptrace.h>
|
||||||
|
|
||||||
|
extern int using_seccomp;
|
||||||
extern int userspace_pid[];
|
extern int userspace_pid[];
|
||||||
|
|
||||||
extern void new_thread_handler(void);
|
extern void new_thread_handler(void);
|
||||||
|
@ -690,3 +690,9 @@ void __init init_IRQ(void)
|
|||||||
/* Initialize EPOLL Loop */
|
/* Initialize EPOLL Loop */
|
||||||
os_setup_epoll();
|
os_setup_epoll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern void sigchld_handler(int sig, struct siginfo *unused_si,
|
||||||
|
struct uml_pt_regs *regs, void *mc)
|
||||||
|
{
|
||||||
|
do_IRQ(SIGCHLD_IRQ, regs);
|
||||||
|
}
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <linux/sched/signal.h>
|
#include <linux/sched/signal.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
|
||||||
|
#include <shared/irq_kern.h>
|
||||||
#include <asm/pgalloc.h>
|
#include <asm/pgalloc.h>
|
||||||
#include <asm/sections.h>
|
#include <asm/sections.h>
|
||||||
#include <asm/mmu_context.h>
|
#include <asm/mmu_context.h>
|
||||||
@ -19,6 +20,9 @@
|
|||||||
/* Ensure the stub_data struct covers the allocated area */
|
/* Ensure the stub_data struct covers the allocated area */
|
||||||
static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
|
static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
|
||||||
|
|
||||||
|
spinlock_t mm_list_lock;
|
||||||
|
struct list_head mm_list;
|
||||||
|
|
||||||
int init_new_context(struct task_struct *task, struct mm_struct *mm)
|
int init_new_context(struct task_struct *task, struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
struct mm_id *new_id = &mm->context.id;
|
struct mm_id *new_id = &mm->context.id;
|
||||||
@ -31,10 +35,12 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
|
|||||||
|
|
||||||
new_id->stack = stack;
|
new_id->stack = stack;
|
||||||
|
|
||||||
block_signals_trace();
|
scoped_guard(spinlock_irqsave, &mm_list_lock) {
|
||||||
new_id->pid = start_userspace(stack);
|
/* Insert into list, used for lookups when the child dies */
|
||||||
unblock_signals_trace();
|
list_add(&mm->context.list, &mm_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
new_id->pid = start_userspace(stack);
|
||||||
if (new_id->pid < 0) {
|
if (new_id->pid < 0) {
|
||||||
ret = new_id->pid;
|
ret = new_id->pid;
|
||||||
goto out_free;
|
goto out_free;
|
||||||
@ -60,13 +66,79 @@ void destroy_context(struct mm_struct *mm)
|
|||||||
* zero, resulting in a kill(0), which will result in the
|
* zero, resulting in a kill(0), which will result in the
|
||||||
* whole UML suddenly dying. Also, cover negative and
|
* whole UML suddenly dying. Also, cover negative and
|
||||||
* 1 cases, since they shouldn't happen either.
|
* 1 cases, since they shouldn't happen either.
|
||||||
|
*
|
||||||
|
* Negative cases happen if the child died unexpectedly.
|
||||||
*/
|
*/
|
||||||
if (mmu->id.pid < 2) {
|
if (mmu->id.pid >= 0 && mmu->id.pid < 2) {
|
||||||
printk(KERN_ERR "corrupt mm_context - pid = %d\n",
|
printk(KERN_ERR "corrupt mm_context - pid = %d\n",
|
||||||
mmu->id.pid);
|
mmu->id.pid);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
os_kill_ptraced_process(mmu->id.pid, 1);
|
|
||||||
|
if (mmu->id.pid > 0) {
|
||||||
|
os_kill_ptraced_process(mmu->id.pid, 1);
|
||||||
|
mmu->id.pid = -1;
|
||||||
|
}
|
||||||
|
|
||||||
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
|
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
|
||||||
|
|
||||||
|
guard(spinlock_irqsave)(&mm_list_lock);
|
||||||
|
|
||||||
|
list_del(&mm->context.list);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static irqreturn_t mm_sigchld_irq(int irq, void* dev)
|
||||||
|
{
|
||||||
|
struct mm_context *mm_context;
|
||||||
|
pid_t pid;
|
||||||
|
|
||||||
|
guard(spinlock)(&mm_list_lock);
|
||||||
|
|
||||||
|
while ((pid = os_reap_child()) > 0) {
|
||||||
|
/*
|
||||||
|
* A child died, check if we have an MM with the PID. This is
|
||||||
|
* only relevant in SECCOMP mode (as ptrace will fail anyway).
|
||||||
|
*
|
||||||
|
* See wait_stub_done_seccomp for more details.
|
||||||
|
*/
|
||||||
|
list_for_each_entry(mm_context, &mm_list, list) {
|
||||||
|
if (mm_context->id.pid == pid) {
|
||||||
|
struct stub_data *stub_data;
|
||||||
|
printk("Unexpectedly lost MM child! Affected tasks will segfault.");
|
||||||
|
|
||||||
|
/* Marks the MM as dead */
|
||||||
|
mm_context->id.pid = -1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* NOTE: If SMP is implemented, a futex_wake
|
||||||
|
* needs to be added here.
|
||||||
|
*/
|
||||||
|
stub_data = (void *)mm_context->id.stack;
|
||||||
|
stub_data->futex = FUTEX_IN_KERN;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* NOTE: Currently executing syscalls by
|
||||||
|
* affected tasks may finish normally.
|
||||||
|
*/
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return IRQ_HANDLED;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __init init_child_tracking(void)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
|
||||||
|
spin_lock_init(&mm_list_lock);
|
||||||
|
INIT_LIST_HEAD(&mm_list);
|
||||||
|
|
||||||
|
err = request_irq(SIGCHLD_IRQ, mm_sigchld_irq, 0, "SIGCHLD", NULL);
|
||||||
|
if (err < 0)
|
||||||
|
panic("Failed to register SIGCHLD IRQ: %d", err);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
early_initcall(init_child_tracking)
|
||||||
|
@ -18,17 +18,29 @@
|
|||||||
#include <init.h>
|
#include <init.h>
|
||||||
#include <longjmp.h>
|
#include <longjmp.h>
|
||||||
#include <os.h>
|
#include <os.h>
|
||||||
|
#include <skas/skas.h>
|
||||||
|
|
||||||
void os_alarm_process(int pid)
|
void os_alarm_process(int pid)
|
||||||
{
|
{
|
||||||
|
if (pid <= 0)
|
||||||
|
return;
|
||||||
|
|
||||||
kill(pid, SIGALRM);
|
kill(pid, SIGALRM);
|
||||||
}
|
}
|
||||||
|
|
||||||
void os_kill_process(int pid, int reap_child)
|
void os_kill_process(int pid, int reap_child)
|
||||||
{
|
{
|
||||||
|
if (pid <= 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Block signals until child is reaped */
|
||||||
|
block_signals();
|
||||||
|
|
||||||
kill(pid, SIGKILL);
|
kill(pid, SIGKILL);
|
||||||
if (reap_child)
|
if (reap_child)
|
||||||
CATCH_EINTR(waitpid(pid, NULL, __WALL));
|
CATCH_EINTR(waitpid(pid, NULL, __WALL));
|
||||||
|
|
||||||
|
unblock_signals();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Kill off a ptraced child by all means available. kill it normally first,
|
/* Kill off a ptraced child by all means available. kill it normally first,
|
||||||
@ -38,11 +50,27 @@ void os_kill_process(int pid, int reap_child)
|
|||||||
|
|
||||||
void os_kill_ptraced_process(int pid, int reap_child)
|
void os_kill_ptraced_process(int pid, int reap_child)
|
||||||
{
|
{
|
||||||
|
if (pid <= 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Block signals until child is reaped */
|
||||||
|
block_signals();
|
||||||
|
|
||||||
kill(pid, SIGKILL);
|
kill(pid, SIGKILL);
|
||||||
ptrace(PTRACE_KILL, pid);
|
ptrace(PTRACE_KILL, pid);
|
||||||
ptrace(PTRACE_CONT, pid);
|
ptrace(PTRACE_CONT, pid);
|
||||||
if (reap_child)
|
if (reap_child)
|
||||||
CATCH_EINTR(waitpid(pid, NULL, __WALL));
|
CATCH_EINTR(waitpid(pid, NULL, __WALL));
|
||||||
|
|
||||||
|
unblock_signals();
|
||||||
|
}
|
||||||
|
|
||||||
|
pid_t os_reap_child(void)
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
|
||||||
|
/* Try to reap a child */
|
||||||
|
return waitpid(-1, &status, WNOHANG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Don't use the glibc version, which caches the result in TLS. It misses some
|
/* Don't use the glibc version, which caches the result in TLS. It misses some
|
||||||
@ -151,6 +179,9 @@ void init_new_thread_signals(void)
|
|||||||
set_handler(SIGBUS);
|
set_handler(SIGBUS);
|
||||||
signal(SIGHUP, SIG_IGN);
|
signal(SIGHUP, SIG_IGN);
|
||||||
set_handler(SIGIO);
|
set_handler(SIGIO);
|
||||||
|
/* We (currently) only use the child reaper IRQ in seccomp mode */
|
||||||
|
if (using_seccomp)
|
||||||
|
set_handler(SIGCHLD);
|
||||||
signal(SIGWINCH, SIG_IGN);
|
signal(SIGWINCH, SIG_IGN);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,6 +29,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *, void *mc) =
|
|||||||
[SIGBUS] = relay_signal,
|
[SIGBUS] = relay_signal,
|
||||||
[SIGSEGV] = segv_handler,
|
[SIGSEGV] = segv_handler,
|
||||||
[SIGIO] = sigio_handler,
|
[SIGIO] = sigio_handler,
|
||||||
|
[SIGCHLD] = sigchld_handler,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
|
static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
|
||||||
@ -44,7 +45,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* enable signals if sig isn't IRQ signal */
|
/* enable signals if sig isn't IRQ signal */
|
||||||
if ((sig != SIGIO) && (sig != SIGWINCH))
|
if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGCHLD))
|
||||||
unblock_signals_trace();
|
unblock_signals_trace();
|
||||||
|
|
||||||
(*sig_info[sig])(sig, si, &r, mc);
|
(*sig_info[sig])(sig, si, &r, mc);
|
||||||
@ -64,6 +65,9 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
|
|||||||
#define SIGALRM_BIT 1
|
#define SIGALRM_BIT 1
|
||||||
#define SIGALRM_MASK (1 << SIGALRM_BIT)
|
#define SIGALRM_MASK (1 << SIGALRM_BIT)
|
||||||
|
|
||||||
|
#define SIGCHLD_BIT 2
|
||||||
|
#define SIGCHLD_MASK (1 << SIGCHLD_BIT)
|
||||||
|
|
||||||
int signals_enabled;
|
int signals_enabled;
|
||||||
#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
|
#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
|
||||||
static int signals_blocked, signals_blocked_pending;
|
static int signals_blocked, signals_blocked_pending;
|
||||||
@ -102,6 +106,11 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!enabled && (sig == SIGCHLD)) {
|
||||||
|
signals_pending |= SIGCHLD_MASK;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
block_signals_trace();
|
block_signals_trace();
|
||||||
|
|
||||||
sig_handler_common(sig, si, mc);
|
sig_handler_common(sig, si, mc);
|
||||||
@ -181,6 +190,8 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
|
|||||||
|
|
||||||
[SIGIO] = sig_handler,
|
[SIGIO] = sig_handler,
|
||||||
[SIGWINCH] = sig_handler,
|
[SIGWINCH] = sig_handler,
|
||||||
|
/* SIGCHLD is only actually registered in seccomp mode. */
|
||||||
|
[SIGCHLD] = sig_handler,
|
||||||
[SIGALRM] = timer_alarm_handler,
|
[SIGALRM] = timer_alarm_handler,
|
||||||
|
|
||||||
[SIGUSR1] = sigusr1_handler,
|
[SIGUSR1] = sigusr1_handler,
|
||||||
@ -309,6 +320,12 @@ void unblock_signals(void)
|
|||||||
if (save_pending & SIGIO_MASK)
|
if (save_pending & SIGIO_MASK)
|
||||||
sig_handler_common(SIGIO, NULL, NULL);
|
sig_handler_common(SIGIO, NULL, NULL);
|
||||||
|
|
||||||
|
if (save_pending & SIGCHLD_MASK) {
|
||||||
|
struct uml_pt_regs regs = {};
|
||||||
|
|
||||||
|
sigchld_handler(SIGCHLD, NULL, ®s, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
/* Do not reenter the handler */
|
/* Do not reenter the handler */
|
||||||
|
|
||||||
if ((save_pending & SIGALRM_MASK) && (!(signals_active & SIGALRM_MASK)))
|
if ((save_pending & SIGALRM_MASK) && (!(signals_active & SIGALRM_MASK)))
|
||||||
|
@ -309,6 +309,7 @@ static int __init init_stub_exe_fd(void)
|
|||||||
}
|
}
|
||||||
__initcall(init_stub_exe_fd);
|
__initcall(init_stub_exe_fd);
|
||||||
|
|
||||||
|
int using_seccomp;
|
||||||
int userspace_pid[NR_CPUS];
|
int userspace_pid[NR_CPUS];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
Reference in New Issue
Block a user