Skip to content

Commit 406d17c

Browse files
benzeajmberg
authored andcommitted
um: Implement kernel side of SECCOMP based process handling
This adds the kernel side of the seccomp based process handling. Co-authored-by: Johannes Berg <[email protected]> Signed-off-by: Benjamin Berg <[email protected]> Signed-off-by: Benjamin Berg <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Johannes Berg <[email protected]>
1 parent 8420e08 commit 406d17c

File tree

10 files changed

+459
-138
lines changed

10 files changed

+459
-138
lines changed

arch/um/include/shared/common-offsets.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,5 @@ DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
1616
DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC);
1717

1818
DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES);
19+
20+
DEFINE(UM_SECCOMP_ARCH_NATIVE, SECCOMP_ARCH_NATIVE);

arch/um/include/shared/os.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len);
286286

287287
/* skas/process.c */
288288
extern int is_skas_winch(int pid, int fd, void *data);
289-
extern int start_userspace(unsigned long stub_stack);
289+
extern int start_userspace(struct mm_id *mm_id);
290290
extern void userspace(struct uml_pt_regs *regs);
291291
extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void));
292292
extern void switch_threads(jmp_buf *me, jmp_buf *you);

arch/um/include/shared/skas/stub-data.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,17 @@
1717
#define FUTEX_IN_KERN 1
1818

1919
struct stub_init_data {
20+
int seccomp;
21+
2022
unsigned long stub_start;
2123

2224
int stub_code_fd;
2325
unsigned long stub_code_offset;
2426
int stub_data_fd;
2527
unsigned long stub_data_offset;
2628

27-
unsigned long segv_handler;
29+
unsigned long signal_handler;
30+
unsigned long signal_restorer;
2831
};
2932

3033
#define STUB_NEXT_SYSCALL(s) \

arch/um/kernel/skas/mmu.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,9 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
4040
list_add(&mm->context.list, &mm_list);
4141
}
4242

43-
new_id->pid = start_userspace(stack);
44-
if (new_id->pid < 0) {
45-
ret = new_id->pid;
43+
ret = start_userspace(new_id);
44+
if (ret < 0)
4645
goto out_free;
47-
}
4846

4947
/* Ensure the new MM is clean and nothing unwanted is mapped */
5048
unmap(new_id, 0, STUB_START);

arch/um/kernel/skas/stub_exe.c

Lines changed: 130 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
#include <asm/unistd.h>
44
#include <sysdep/stub.h>
55
#include <stub-data.h>
6+
#include <linux/filter.h>
7+
#include <linux/seccomp.h>
8+
#include <generated/asm-offsets.h>
69

710
void _start(void);
811

@@ -25,8 +28,6 @@ noinline static void real_init(void)
2528
} sa = {
2629
/* Need to set SA_RESTORER (but the handler never returns) */
2730
.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000,
28-
/* no need to mask any signals */
29-
.sa_mask = 0,
3031
};
3132

3233
/* set a nice name */
@@ -35,6 +36,9 @@ noinline static void real_init(void)
3536
/* Make sure this process dies if the kernel dies */
3637
stub_syscall2(__NR_prctl, PR_SET_PDEATHSIG, SIGKILL);
3738

39+
/* Needed in SECCOMP mode (and safe to do anyway) */
40+
stub_syscall5(__NR_prctl, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
41+
3842
/* read information from STDIN and close it */
3943
res = stub_syscall3(__NR_read, 0,
4044
(unsigned long)&init_data, sizeof(init_data));
@@ -63,18 +67,133 @@ noinline static void real_init(void)
6367
stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE;
6468
stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0);
6569

66-
/* register SIGSEGV handler */
67-
sa.sa_handler_ = (void *) init_data.segv_handler;
68-
res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, (unsigned long)&sa, 0,
69-
sizeof(sa.sa_mask));
70-
if (res != 0)
71-
stub_syscall1(__NR_exit, 13);
70+
/* register signal handlers */
71+
sa.sa_handler_ = (void *) init_data.signal_handler;
72+
sa.sa_restorer = (void *) init_data.signal_restorer;
73+
if (!init_data.seccomp) {
74+
/* In ptrace mode, the SIGSEGV handler never returns */
75+
sa.sa_mask = 0;
76+
77+
res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
78+
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
79+
if (res != 0)
80+
stub_syscall1(__NR_exit, 13);
81+
} else {
82+
/* SECCOMP mode uses rt_sigreturn, need to mask all signals */
83+
sa.sa_mask = ~0ULL;
84+
85+
res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
86+
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
87+
if (res != 0)
88+
stub_syscall1(__NR_exit, 14);
89+
90+
res = stub_syscall4(__NR_rt_sigaction, SIGSYS,
91+
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
92+
if (res != 0)
93+
stub_syscall1(__NR_exit, 15);
94+
95+
res = stub_syscall4(__NR_rt_sigaction, SIGALRM,
96+
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
97+
if (res != 0)
98+
stub_syscall1(__NR_exit, 16);
99+
100+
res = stub_syscall4(__NR_rt_sigaction, SIGTRAP,
101+
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
102+
if (res != 0)
103+
stub_syscall1(__NR_exit, 17);
104+
105+
res = stub_syscall4(__NR_rt_sigaction, SIGILL,
106+
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
107+
if (res != 0)
108+
stub_syscall1(__NR_exit, 18);
109+
110+
res = stub_syscall4(__NR_rt_sigaction, SIGFPE,
111+
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
112+
if (res != 0)
113+
stub_syscall1(__NR_exit, 19);
114+
}
115+
116+
/*
117+
* If in seccomp mode, install the SECCOMP filter and trigger a syscall.
118+
* Otherwise set PTRACE_TRACEME and do a SIGSTOP.
119+
*/
120+
if (init_data.seccomp) {
121+
struct sock_filter filter[] = {
122+
#if __BITS_PER_LONG > 32
123+
/* [0] Load upper 32bit of instruction pointer from seccomp_data */
124+
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
125+
(offsetof(struct seccomp_data, instruction_pointer) + 4)),
126+
127+
/* [1] Jump forward 3 instructions if the upper address is not identical */
128+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) >> 32, 0, 3),
129+
#endif
130+
/* [2] Load lower 32bit of instruction pointer from seccomp_data */
131+
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
132+
(offsetof(struct seccomp_data, instruction_pointer))),
133+
134+
/* [3] Mask out lower bits */
135+
BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xfffff000),
136+
137+
/* [4] Jump to [6] if the lower bits are not on the expected page */
138+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) & 0xfffff000, 1, 0),
139+
140+
/* [5] Trap call, allow */
141+
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
142+
143+
/* [6,7] Check architecture */
144+
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
145+
offsetof(struct seccomp_data, arch)),
146+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,
147+
UM_SECCOMP_ARCH_NATIVE, 1, 0),
148+
149+
/* [8] Kill (for architecture check) */
150+
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
151+
152+
/* [9] Load syscall number */
153+
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
154+
offsetof(struct seccomp_data, nr)),
155+
156+
/* [10-14] Check against permitted syscalls */
157+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex,
158+
5, 0),
159+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, STUB_MMAP_NR,
160+
4, 0),
161+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap,
162+
3, 0),
163+
#ifdef __i386__
164+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_set_thread_area,
165+
2, 0),
166+
#else
167+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_arch_prctl,
168+
2, 0),
169+
#endif
170+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn,
171+
1, 0),
172+
173+
/* [15] Not one of the permitted syscalls */
174+
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
175+
176+
/* [16] Permitted call for the stub */
177+
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
178+
};
179+
struct sock_fprog prog = {
180+
.len = sizeof(filter) / sizeof(filter[0]),
181+
.filter = filter,
182+
};
183+
184+
if (stub_syscall3(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
185+
SECCOMP_FILTER_FLAG_TSYNC,
186+
(unsigned long)&prog) != 0)
187+
stub_syscall1(__NR_exit, 20);
72188

73-
stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
189+
/* Fall through, the exit syscall will cause SIGSYS */
190+
} else {
191+
stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
74192

75-
stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP);
193+
stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP);
194+
}
76195

77-
stub_syscall1(__NR_exit, 14);
196+
stub_syscall1(__NR_exit, 30);
78197

79198
__builtin_unreachable();
80199
}

arch/um/os-Linux/internal.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
#ifndef __UM_OS_LINUX_INTERNAL_H
33
#define __UM_OS_LINUX_INTERNAL_H
44

5+
#include <mm_id.h>
6+
#include <stub-data.h>
7+
58
/*
69
* elf_aux.c
710
*/
@@ -16,5 +19,5 @@ void check_tmpexec(void);
1619
* skas/process.c
1720
*/
1821
void wait_stub_done(int pid);
19-
22+
void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys);
2023
#endif /* __UM_OS_LINUX_INTERNAL_H */

arch/um/os-Linux/skas/mem.c

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -80,27 +80,32 @@ static inline long do_syscall_stub(struct mm_id *mm_idp)
8080
int n, i;
8181
int err, pid = mm_idp->pid;
8282

83-
n = ptrace_setregs(pid, syscall_regs);
84-
if (n < 0) {
85-
printk(UM_KERN_ERR "Registers - \n");
86-
for (i = 0; i < MAX_REG_NR; i++)
87-
printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
88-
panic("%s : PTRACE_SETREGS failed, errno = %d\n",
89-
__func__, -n);
90-
}
91-
9283
/* Inform process how much we have filled in. */
9384
proc_data->syscall_data_len = mm_idp->syscall_data_len;
9485

95-
err = ptrace(PTRACE_CONT, pid, 0, 0);
96-
if (err)
97-
panic("Failed to continue stub, pid = %d, errno = %d\n", pid,
98-
errno);
99-
100-
wait_stub_done(pid);
86+
if (using_seccomp) {
87+
proc_data->restart_wait = 1;
88+
wait_stub_done_seccomp(mm_idp, 0, 1);
89+
} else {
90+
n = ptrace_setregs(pid, syscall_regs);
91+
if (n < 0) {
92+
printk(UM_KERN_ERR "Registers -\n");
93+
for (i = 0; i < MAX_REG_NR; i++)
94+
printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
95+
panic("%s : PTRACE_SETREGS failed, errno = %d\n",
96+
__func__, -n);
97+
}
98+
99+
err = ptrace(PTRACE_CONT, pid, 0, 0);
100+
if (err)
101+
panic("Failed to continue stub, pid = %d, errno = %d\n",
102+
pid, errno);
103+
104+
wait_stub_done(pid);
105+
}
101106

102107
/*
103-
* proc_data->err will be non-zero if there was an (unexpected) error.
108+
* proc_data->err will be negative if there was an (unexpected) error.
104109
* In that case, syscall_data_len points to the last executed syscall,
105110
* otherwise it will be zero (but we do not need to rely on that).
106111
*/

0 commit comments

Comments
 (0)