2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

um: Implement kernel side of SECCOMP based process handling

This adds the kernel side of the seccomp based process handling.

Co-authored-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Link: https://patch.msgid.link/20250602130052.545733-6-benjamin@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
This commit is contained in:
Benjamin Berg 2025-06-02 15:00:50 +02:00 committed by Johannes Berg
parent 8420e08fe3
commit 406d17c6c3
10 changed files with 459 additions and 138 deletions

View File

@ -16,3 +16,5 @@ DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC);
DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES); DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES);
DEFINE(UM_SECCOMP_ARCH_NATIVE, SECCOMP_ARCH_NATIVE);

View File

@ -286,7 +286,7 @@ int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len);
/* skas/process.c */ /* skas/process.c */
extern int is_skas_winch(int pid, int fd, void *data); extern int is_skas_winch(int pid, int fd, void *data);
extern int start_userspace(unsigned long stub_stack); extern int start_userspace(struct mm_id *mm_id);
extern void userspace(struct uml_pt_regs *regs); extern void userspace(struct uml_pt_regs *regs);
extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)); extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void));
extern void switch_threads(jmp_buf *me, jmp_buf *you); extern void switch_threads(jmp_buf *me, jmp_buf *you);

View File

@ -17,6 +17,8 @@
#define FUTEX_IN_KERN 1 #define FUTEX_IN_KERN 1
struct stub_init_data { struct stub_init_data {
int seccomp;
unsigned long stub_start; unsigned long stub_start;
int stub_code_fd; int stub_code_fd;
@ -24,7 +26,8 @@ struct stub_init_data {
int stub_data_fd; int stub_data_fd;
unsigned long stub_data_offset; unsigned long stub_data_offset;
unsigned long segv_handler; unsigned long signal_handler;
unsigned long signal_restorer;
}; };
#define STUB_NEXT_SYSCALL(s) \ #define STUB_NEXT_SYSCALL(s) \

View File

@ -40,11 +40,9 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
list_add(&mm->context.list, &mm_list); list_add(&mm->context.list, &mm_list);
} }
new_id->pid = start_userspace(stack); ret = start_userspace(new_id);
if (new_id->pid < 0) { if (ret < 0)
ret = new_id->pid;
goto out_free; goto out_free;
}
/* Ensure the new MM is clean and nothing unwanted is mapped */ /* Ensure the new MM is clean and nothing unwanted is mapped */
unmap(new_id, 0, STUB_START); unmap(new_id, 0, STUB_START);

View File

@ -3,6 +3,9 @@
#include <asm/unistd.h> #include <asm/unistd.h>
#include <sysdep/stub.h> #include <sysdep/stub.h>
#include <stub-data.h> #include <stub-data.h>
#include <linux/filter.h>
#include <linux/seccomp.h>
#include <generated/asm-offsets.h>
void _start(void); void _start(void);
@ -25,8 +28,6 @@ noinline static void real_init(void)
} sa = { } sa = {
/* Need to set SA_RESTORER (but the handler never returns) */ /* Need to set SA_RESTORER (but the handler never returns) */
.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000, .sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000,
/* no need to mask any signals */
.sa_mask = 0,
}; };
/* set a nice name */ /* set a nice name */
@ -35,6 +36,9 @@ noinline static void real_init(void)
/* Make sure this process dies if the kernel dies */ /* Make sure this process dies if the kernel dies */
stub_syscall2(__NR_prctl, PR_SET_PDEATHSIG, SIGKILL); stub_syscall2(__NR_prctl, PR_SET_PDEATHSIG, SIGKILL);
/* Needed in SECCOMP mode (and safe to do anyway) */
stub_syscall5(__NR_prctl, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
/* read information from STDIN and close it */ /* read information from STDIN and close it */
res = stub_syscall3(__NR_read, 0, res = stub_syscall3(__NR_read, 0,
(unsigned long)&init_data, sizeof(init_data)); (unsigned long)&init_data, sizeof(init_data));
@ -63,18 +67,133 @@ noinline static void real_init(void)
stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE; stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE;
stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0); stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0);
/* register SIGSEGV handler */ /* register signal handlers */
sa.sa_handler_ = (void *) init_data.segv_handler; sa.sa_handler_ = (void *) init_data.signal_handler;
res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, (unsigned long)&sa, 0, sa.sa_restorer = (void *) init_data.signal_restorer;
sizeof(sa.sa_mask)); if (!init_data.seccomp) {
if (res != 0) /* In ptrace mode, the SIGSEGV handler never returns */
stub_syscall1(__NR_exit, 13); sa.sa_mask = 0;
stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
if (res != 0)
stub_syscall1(__NR_exit, 13);
} else {
/* SECCOMP mode uses rt_sigreturn, need to mask all signals */
sa.sa_mask = ~0ULL;
stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP); res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
if (res != 0)
stub_syscall1(__NR_exit, 14);
stub_syscall1(__NR_exit, 14); res = stub_syscall4(__NR_rt_sigaction, SIGSYS,
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
if (res != 0)
stub_syscall1(__NR_exit, 15);
res = stub_syscall4(__NR_rt_sigaction, SIGALRM,
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
if (res != 0)
stub_syscall1(__NR_exit, 16);
res = stub_syscall4(__NR_rt_sigaction, SIGTRAP,
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
if (res != 0)
stub_syscall1(__NR_exit, 17);
res = stub_syscall4(__NR_rt_sigaction, SIGILL,
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
if (res != 0)
stub_syscall1(__NR_exit, 18);
res = stub_syscall4(__NR_rt_sigaction, SIGFPE,
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
if (res != 0)
stub_syscall1(__NR_exit, 19);
}
/*
* If in seccomp mode, install the SECCOMP filter and trigger a syscall.
* Otherwise set PTRACE_TRACEME and do a SIGSTOP.
*/
if (init_data.seccomp) {
struct sock_filter filter[] = {
#if __BITS_PER_LONG > 32
/* [0] Load upper 32bit of instruction pointer from seccomp_data */
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
(offsetof(struct seccomp_data, instruction_pointer) + 4)),
/* [1] Jump forward 3 instructions if the upper address is not identical */
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) >> 32, 0, 3),
#endif
/* [2] Load lower 32bit of instruction pointer from seccomp_data */
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
(offsetof(struct seccomp_data, instruction_pointer))),
/* [3] Mask out lower bits */
BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xfffff000),
/* [4] Jump to [6] if the lower bits are not on the expected page */
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) & 0xfffff000, 1, 0),
/* [5] Trap call, allow */
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
/* [6,7] Check architecture */
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
offsetof(struct seccomp_data, arch)),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,
UM_SECCOMP_ARCH_NATIVE, 1, 0),
/* [8] Kill (for architecture check) */
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
/* [9] Load syscall number */
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
offsetof(struct seccomp_data, nr)),
/* [10-14] Check against permitted syscalls */
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex,
5, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, STUB_MMAP_NR,
4, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap,
3, 0),
#ifdef __i386__
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_set_thread_area,
2, 0),
#else
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_arch_prctl,
2, 0),
#endif
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn,
1, 0),
/* [15] Not one of the permitted syscalls */
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
/* [16] Permitted call for the stub */
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog = {
.len = sizeof(filter) / sizeof(filter[0]),
.filter = filter,
};
if (stub_syscall3(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
SECCOMP_FILTER_FLAG_TSYNC,
(unsigned long)&prog) != 0)
stub_syscall1(__NR_exit, 20);
/* Fall through, the exit syscall will cause SIGSYS */
} else {
stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP);
}
stub_syscall1(__NR_exit, 30);
__builtin_unreachable(); __builtin_unreachable();
} }

View File

@ -2,6 +2,9 @@
#ifndef __UM_OS_LINUX_INTERNAL_H #ifndef __UM_OS_LINUX_INTERNAL_H
#define __UM_OS_LINUX_INTERNAL_H #define __UM_OS_LINUX_INTERNAL_H
#include <mm_id.h>
#include <stub-data.h>
/* /*
* elf_aux.c * elf_aux.c
*/ */
@ -16,5 +19,5 @@ void check_tmpexec(void);
* skas/process.c * skas/process.c
*/ */
void wait_stub_done(int pid); void wait_stub_done(int pid);
void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys);
#endif /* __UM_OS_LINUX_INTERNAL_H */ #endif /* __UM_OS_LINUX_INTERNAL_H */

View File

@ -80,27 +80,32 @@ static inline long do_syscall_stub(struct mm_id *mm_idp)
int n, i; int n, i;
int err, pid = mm_idp->pid; int err, pid = mm_idp->pid;
n = ptrace_setregs(pid, syscall_regs);
if (n < 0) {
printk(UM_KERN_ERR "Registers - \n");
for (i = 0; i < MAX_REG_NR; i++)
printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
panic("%s : PTRACE_SETREGS failed, errno = %d\n",
__func__, -n);
}
/* Inform process how much we have filled in. */ /* Inform process how much we have filled in. */
proc_data->syscall_data_len = mm_idp->syscall_data_len; proc_data->syscall_data_len = mm_idp->syscall_data_len;
err = ptrace(PTRACE_CONT, pid, 0, 0); if (using_seccomp) {
if (err) proc_data->restart_wait = 1;
panic("Failed to continue stub, pid = %d, errno = %d\n", pid, wait_stub_done_seccomp(mm_idp, 0, 1);
errno); } else {
n = ptrace_setregs(pid, syscall_regs);
if (n < 0) {
printk(UM_KERN_ERR "Registers -\n");
for (i = 0; i < MAX_REG_NR; i++)
printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
panic("%s : PTRACE_SETREGS failed, errno = %d\n",
__func__, -n);
}
wait_stub_done(pid); err = ptrace(PTRACE_CONT, pid, 0, 0);
if (err)
panic("Failed to continue stub, pid = %d, errno = %d\n",
pid, errno);
wait_stub_done(pid);
}
/* /*
* proc_data->err will be non-zero if there was an (unexpected) error. * proc_data->err will be negative if there was an (unexpected) error.
* In that case, syscall_data_len points to the last executed syscall, * In that case, syscall_data_len points to the last executed syscall,
* otherwise it will be zero (but we do not need to rely on that). * otherwise it will be zero (but we do not need to rely on that).
*/ */

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* /*
* Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
* Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
* Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/ */
@ -25,8 +26,11 @@
#include <registers.h> #include <registers.h>
#include <skas.h> #include <skas.h>
#include <sysdep/stub.h> #include <sysdep/stub.h>
#include <sysdep/mcontext.h>
#include <linux/futex.h>
#include <linux/threads.h> #include <linux/threads.h>
#include <timetravel.h> #include <timetravel.h>
#include <asm-generic/rwonce.h>
#include "../internal.h" #include "../internal.h"
int is_skas_winch(int pid, int fd, void *data) int is_skas_winch(int pid, int fd, void *data)
@ -142,6 +146,73 @@ bad_wait:
fatal_sigsegv(); fatal_sigsegv();
} }
void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys)
{
struct stub_data *data = (void *)mm_idp->stack;
int ret;
do {
if (!running) {
data->signal = 0;
data->futex = FUTEX_IN_CHILD;
CATCH_EINTR(syscall(__NR_futex, &data->futex,
FUTEX_WAKE, 1, NULL, NULL, 0));
}
do {
/*
* We need to check whether the child is still alive
* before and after the FUTEX_WAIT call. Before, in
* case it just died but we still updated data->futex
* to FUTEX_IN_CHILD. And after, in case it died while
* we were waiting (and SIGCHLD woke us up, see the
* IRQ handler in mmu.c).
*
* Either way, if PID is negative, then we have no
* choice but to kill the task.
*/
if (__READ_ONCE(mm_idp->pid) < 0)
goto out_kill;
ret = syscall(__NR_futex, &data->futex,
FUTEX_WAIT, FUTEX_IN_CHILD,
NULL, NULL, 0);
if (ret < 0 && errno != EINTR && errno != EAGAIN) {
printk(UM_KERN_ERR "%s : FUTEX_WAIT failed, errno = %d\n",
__func__, errno);
goto out_kill;
}
} while (data->futex == FUTEX_IN_CHILD);
if (__READ_ONCE(mm_idp->pid) < 0)
goto out_kill;
running = 0;
/* We may receive a SIGALRM before SIGSYS, iterate again. */
} while (wait_sigsys && data->signal == SIGALRM);
if (data->mctx_offset > sizeof(data->sigstack) - sizeof(mcontext_t)) {
printk(UM_KERN_ERR "%s : invalid mcontext offset", __func__);
goto out_kill;
}
if (wait_sigsys && data->signal != SIGSYS) {
printk(UM_KERN_ERR "%s : expected SIGSYS but got %d",
__func__, data->signal);
goto out_kill;
}
return;
out_kill:
printk(UM_KERN_ERR "%s : failed to wait for stub, pid = %d, errno = %d\n",
__func__, mm_idp->pid, errno);
/* This is not true inside start_userspace */
if (current_mm_id() == mm_idp)
fatal_sigsegv();
}
extern unsigned long current_stub_stack(void); extern unsigned long current_stub_stack(void);
static void get_skas_faultinfo(int pid, struct faultinfo *fi) static void get_skas_faultinfo(int pid, struct faultinfo *fi)
@ -185,14 +256,26 @@ static int userspace_tramp(void *stack)
int pipe_fds[2]; int pipe_fds[2];
unsigned long long offset; unsigned long long offset;
struct stub_init_data init_data = { struct stub_init_data init_data = {
.seccomp = using_seccomp,
.stub_start = STUB_START, .stub_start = STUB_START,
.segv_handler = STUB_CODE +
(unsigned long) stub_segv_handler -
(unsigned long) __syscall_stub_start,
}; };
struct iomem_region *iomem; struct iomem_region *iomem;
int ret; int ret;
if (using_seccomp) {
init_data.signal_handler = STUB_CODE +
(unsigned long) stub_signal_interrupt -
(unsigned long) __syscall_stub_start;
init_data.signal_restorer = STUB_CODE +
(unsigned long) stub_signal_restorer -
(unsigned long) __syscall_stub_start;
} else {
init_data.signal_handler = STUB_CODE +
(unsigned long) stub_segv_handler -
(unsigned long) __syscall_stub_start;
init_data.signal_restorer = 0;
}
init_data.stub_code_fd = phys_mapping(uml_to_phys(__syscall_stub_start), init_data.stub_code_fd = phys_mapping(uml_to_phys(__syscall_stub_start),
&offset); &offset);
init_data.stub_code_offset = MMAP_OFFSET(offset); init_data.stub_code_offset = MMAP_OFFSET(offset);
@ -323,8 +406,9 @@ int userspace_pid[NR_CPUS];
* when negative: an error number. * when negative: an error number.
* FIXME: can PIDs become negative?! * FIXME: can PIDs become negative?!
*/ */
int start_userspace(unsigned long stub_stack) int start_userspace(struct mm_id *mm_id)
{ {
struct stub_data *proc_data = (void *)mm_id->stack;
void *stack; void *stack;
unsigned long sp; unsigned long sp;
int pid, status, n, err; int pid, status, n, err;
@ -343,10 +427,13 @@ int start_userspace(unsigned long stub_stack)
/* set stack pointer to the end of the stack page, so it can grow downwards */ /* set stack pointer to the end of the stack page, so it can grow downwards */
sp = (unsigned long)stack + UM_KERN_PAGE_SIZE; sp = (unsigned long)stack + UM_KERN_PAGE_SIZE;
if (using_seccomp)
proc_data->futex = FUTEX_IN_CHILD;
/* clone into new userspace process */ /* clone into new userspace process */
pid = clone(userspace_tramp, (void *) sp, pid = clone(userspace_tramp, (void *) sp,
CLONE_VFORK | CLONE_VM | SIGCHLD, CLONE_VFORK | CLONE_VM | SIGCHLD,
(void *)stub_stack); (void *)mm_id->stack);
if (pid < 0) { if (pid < 0) {
err = -errno; err = -errno;
printk(UM_KERN_ERR "%s : clone failed, errno = %d\n", printk(UM_KERN_ERR "%s : clone failed, errno = %d\n",
@ -354,29 +441,34 @@ int start_userspace(unsigned long stub_stack)
return err; return err;
} }
do { if (using_seccomp) {
CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); wait_stub_done_seccomp(mm_id, 1, 1);
if (n < 0) { } else {
do {
CATCH_EINTR(n = waitpid(pid, &status,
WUNTRACED | __WALL));
if (n < 0) {
err = -errno;
printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
__func__, errno);
goto out_kill;
}
} while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
err = -EINVAL;
printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
__func__, status);
goto out_kill;
}
if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
(void *) PTRACE_O_TRACESYSGOOD) < 0) {
err = -errno; err = -errno;
printk(UM_KERN_ERR "%s : wait failed, errno = %d\n", printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
__func__, errno); __func__, errno);
goto out_kill; goto out_kill;
} }
} while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
err = -EINVAL;
printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
__func__, status);
goto out_kill;
}
if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
(void *) PTRACE_O_TRACESYSGOOD) < 0) {
err = -errno;
printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
__func__, errno);
goto out_kill;
} }
if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) { if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) {
@ -386,6 +478,8 @@ int start_userspace(unsigned long stub_stack)
goto out_kill; goto out_kill;
} }
mm_id->pid = pid;
return pid; return pid;
out_kill: out_kill:
@ -399,7 +493,9 @@ extern unsigned long tt_extra_sched_jiffies;
void userspace(struct uml_pt_regs *regs) void userspace(struct uml_pt_regs *regs)
{ {
int err, status, op, pid = userspace_pid[0]; int err, status, op, pid = userspace_pid[0];
siginfo_t si; siginfo_t si_ptrace;
siginfo_t *si;
int sig;
/* Handle any immediate reschedules or signals */ /* Handle any immediate reschedules or signals */
interrupt_end(); interrupt_end();
@ -432,104 +528,182 @@ void userspace(struct uml_pt_regs *regs)
current_mm_sync(); current_mm_sync();
/* Flush out any pending syscalls */ if (using_seccomp) {
err = syscall_stub_flush(current_mm_id()); struct mm_id *mm_id = current_mm_id();
if (err) { struct stub_data *proc_data = (void *) mm_id->stack;
if (err == -ENOMEM) int ret;
report_enomem();
printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d", ret = set_stub_state(regs, proc_data, singlestepping());
__func__, -err); if (ret) {
fatal_sigsegv(); printk(UM_KERN_ERR "%s - failed to set regs: %d",
} __func__, ret);
fatal_sigsegv();
}
/* /* Must have been reset by the syscall caller */
* This can legitimately fail if the process loads a if (proc_data->restart_wait != 0)
* bogus value into a segment register. It will panic("Programming error: Flag to only run syscalls in child was not cleared!");
* segfault and PTRACE_GETREGS will read that value
* out of the process. However, PTRACE_SETREGS will
* fail. In this case, there is nothing to do but
* just kill the process.
*/
if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
__func__, errno);
fatal_sigsegv();
}
if (put_fp_registers(pid, regs->fp)) { /* Mark pending syscalls for flushing */
printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n", proc_data->syscall_data_len = mm_id->syscall_data_len;
__func__, errno); mm_id->syscall_data_len = 0;
fatal_sigsegv();
}
if (singlestepping()) proc_data->signal = 0;
op = PTRACE_SYSEMU_SINGLESTEP; proc_data->futex = FUTEX_IN_CHILD;
else CATCH_EINTR(syscall(__NR_futex, &proc_data->futex,
op = PTRACE_SYSEMU; FUTEX_WAKE, 1, NULL, NULL, 0));
do {
ret = syscall(__NR_futex, &proc_data->futex,
FUTEX_WAIT, FUTEX_IN_CHILD, NULL, NULL, 0);
} while ((ret == -1 && errno == EINTR) ||
proc_data->futex == FUTEX_IN_CHILD);
if (ptrace(op, pid, 0, 0)) { sig = proc_data->signal;
printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
__func__, op, errno);
fatal_sigsegv();
}
CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); if (sig == SIGTRAP && proc_data->err != 0) {
if (err < 0) { printk(UM_KERN_ERR "%s - Error flushing stub syscalls",
printk(UM_KERN_ERR "%s - wait failed, errno = %d\n", __func__);
__func__, errno); syscall_stub_dump_error(mm_id);
fatal_sigsegv(); fatal_sigsegv();
} }
regs->is_user = 1; ret = get_stub_state(regs, proc_data, NULL);
if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) { if (ret) {
printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n", printk(UM_KERN_ERR "%s - failed to get regs: %d",
__func__, errno); __func__, ret);
fatal_sigsegv(); fatal_sigsegv();
} }
if (get_fp_registers(pid, regs->fp)) { if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si))
printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n", panic("%s - Invalid siginfo offset from child",
__func__, errno); __func__);
fatal_sigsegv(); si = (void *)&proc_data->sigstack[proc_data->si_offset];
regs->is_user = 1;
/* Fill in ORIG_RAX and extract fault information */
PT_SYSCALL_NR(regs->gp) = si->si_syscall;
if (sig == SIGSEGV) {
mcontext_t *mcontext = (void *)&proc_data->sigstack[proc_data->mctx_offset];
GET_FAULTINFO_FROM_MC(regs->faultinfo, mcontext);
}
} else {
/* Flush out any pending syscalls */
err = syscall_stub_flush(current_mm_id());
if (err) {
if (err == -ENOMEM)
report_enomem();
printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d",
__func__, -err);
fatal_sigsegv();
}
/*
* This can legitimately fail if the process loads a
* bogus value into a segment register. It will
* segfault and PTRACE_GETREGS will read that value
* out of the process. However, PTRACE_SETREGS will
* fail. In this case, there is nothing to do but
* just kill the process.
*/
if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
__func__, errno);
fatal_sigsegv();
}
if (put_fp_registers(pid, regs->fp)) {
printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n",
__func__, errno);
fatal_sigsegv();
}
if (singlestepping())
op = PTRACE_SYSEMU_SINGLESTEP;
else
op = PTRACE_SYSEMU;
if (ptrace(op, pid, 0, 0)) {
printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
__func__, op, errno);
fatal_sigsegv();
}
CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
if (err < 0) {
printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
__func__, errno);
fatal_sigsegv();
}
regs->is_user = 1;
if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n",
__func__, errno);
fatal_sigsegv();
}
if (get_fp_registers(pid, regs->fp)) {
printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n",
__func__, errno);
fatal_sigsegv();
}
if (WIFSTOPPED(status)) {
sig = WSTOPSIG(status);
/*
* These signal handlers need the si argument
* and SIGSEGV needs the faultinfo.
* The SIGIO and SIGALARM handlers which constitute
* the majority of invocations, do not use it.
*/
switch (sig) {
case SIGSEGV:
get_skas_faultinfo(pid,
&regs->faultinfo);
fallthrough;
case SIGTRAP:
case SIGILL:
case SIGBUS:
case SIGFPE:
case SIGWINCH:
ptrace(PTRACE_GETSIGINFO, pid, 0,
(struct siginfo *)&si_ptrace);
si = &si_ptrace;
break;
default:
si = NULL;
break;
}
} else {
sig = 0;
}
} }
UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
if (WIFSTOPPED(status)) { if (sig) {
int sig = WSTOPSIG(status);
/* These signal handlers need the si argument.
* The SIGIO and SIGALARM handlers which constitute the
* majority of invocations, do not use it.
*/
switch (sig) { switch (sig) {
case SIGSEGV: case SIGSEGV:
case SIGTRAP: if (using_seccomp || PTRACE_FULL_FAULTINFO)
case SIGILL: (*sig_info[SIGSEGV])(SIGSEGV,
case SIGBUS: (struct siginfo *)si,
case SIGFPE:
case SIGWINCH:
ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si);
break;
}
switch (sig) {
case SIGSEGV:
get_skas_faultinfo(pid, &regs->faultinfo);
if (PTRACE_FULL_FAULTINFO)
(*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si,
regs, NULL); regs, NULL);
else else
segv(regs->faultinfo, 0, 1, NULL, NULL); segv(regs->faultinfo, 0, 1, NULL, NULL);
break;
case SIGSYS:
handle_syscall(regs);
break; break;
case SIGTRAP + 0x80: case SIGTRAP + 0x80:
handle_trap(pid, regs); handle_trap(pid, regs);
break; break;
case SIGTRAP: case SIGTRAP:
relay_signal(SIGTRAP, (struct siginfo *)&si, regs, NULL); relay_signal(SIGTRAP, (struct siginfo *)si, regs, NULL);
break; break;
case SIGALRM: case SIGALRM:
break; break;
@ -539,7 +713,7 @@ void userspace(struct uml_pt_regs *regs)
case SIGFPE: case SIGFPE:
case SIGWINCH: case SIGWINCH:
block_signals_trace(); block_signals_trace();
(*sig_info[sig])(sig, (struct siginfo *)&si, regs, NULL); (*sig_info[sig])(sig, (struct siginfo *)si, regs, NULL);
unblock_signals_trace(); unblock_signals_trace();
break; break;
default: default:

View File

@ -4,7 +4,9 @@
#include <linux/elf.h> #include <linux/elf.h>
#include <linux/crypto.h> #include <linux/crypto.h>
#include <linux/kbuild.h> #include <linux/kbuild.h>
#include <linux/audit.h>
#include <asm/mman.h> #include <asm/mman.h>
#include <asm/seccomp.h>
/* workaround for a warning with -Wmissing-prototypes */ /* workaround for a warning with -Wmissing-prototypes */
void foo(void); void foo(void);

View File

@ -12,6 +12,7 @@
#include <skas.h> #include <skas.h>
#include <sysdep/tls.h> #include <sysdep/tls.h>
#include <asm/desc.h> #include <asm/desc.h>
#include <stub-data.h>
/* /*
* If needed we can detect when it's uninitialized. * If needed we can detect when it's uninitialized.
@ -21,13 +22,27 @@
static int host_supports_tls = -1; static int host_supports_tls = -1;
int host_gdt_entry_tls_min; int host_gdt_entry_tls_min;
static int do_set_thread_area(struct user_desc *info) static int do_set_thread_area(struct task_struct* task, struct user_desc *info)
{ {
int ret; int ret;
u32 cpu; u32 cpu;
if (info->entry_number < host_gdt_entry_tls_min ||
info->entry_number >= host_gdt_entry_tls_min + GDT_ENTRY_TLS_ENTRIES)
return -EINVAL;
if (using_seccomp) {
int idx = info->entry_number - host_gdt_entry_tls_min;
struct stub_data *data = (void *)task->mm->context.id.stack;
data->arch_data.tls[idx] = *info;
data->arch_data.sync |= BIT(idx);
return 0;
}
cpu = get_cpu(); cpu = get_cpu();
ret = os_set_thread_area(info, userspace_pid[cpu]); ret = os_set_thread_area(info, task->mm->context.id.pid);
put_cpu(); put_cpu();
if (ret) if (ret)
@ -97,7 +112,7 @@ static int load_TLS(int flags, struct task_struct *to)
if (!(flags & O_FORCE) && curr->flushed) if (!(flags & O_FORCE) && curr->flushed)
continue; continue;
ret = do_set_thread_area(&curr->tls); ret = do_set_thread_area(current, &curr->tls);
if (ret) if (ret)
goto out; goto out;
@ -275,7 +290,7 @@ SYSCALL_DEFINE1(set_thread_area, struct user_desc __user *, user_desc)
return -EFAULT; return -EFAULT;
} }
ret = do_set_thread_area(&info); ret = do_set_thread_area(current, &info);
if (ret) if (ret)
return ret; return ret;
return set_tls_entry(current, &info, idx, 1); return set_tls_entry(current, &info, idx, 1);