Skip to content

Commit 15bbeec

Browse files
committed
Merge tag 'core-entry-2023-04-24' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core entry/ptrace update from Thomas Gleixner: "Provide a ptrace set/get interface for syscall user dispatch. The main purpose is to enable checkpoint/restore (CRIU) to handle processes which utilize syscall user dispatch correctly" * tag 'core-entry-2023-04-24' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: selftest, ptrace: Add selftest for syscall user dispatch config api ptrace: Provide set/get interface for syscall user dispatch syscall_user_dispatch: Untag selector address before access_ok() syscall_user_dispatch: Split up set_syscall_user_dispatch()
2 parents 29e95a4 + 8c8fa60 commit 15bbeec

File tree

8 files changed

+200
-10
lines changed

8 files changed

+200
-10
lines changed

Documentation/admin-guide/syscall-user-dispatch.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ thread-wide, without the need to invoke the kernel directly. selector
7373
can be set to SYSCALL_DISPATCH_FILTER_ALLOW or SYSCALL_DISPATCH_FILTER_BLOCK.
7474
Any other value should terminate the program with a SIGSYS.
7575

76+
Additionally, a tasks syscall user dispatch configuration can be peeked
77+
and poked via the PTRACE_(GET|SET)_SYSCALL_USER_DISPATCH_CONFIG ptrace
78+
requests. This is useful for checkpoint/restart software.
79+
7680
Security Notes
7781
--------------
7882

include/linux/syscall_user_dispatch.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
2222
#define clear_syscall_work_syscall_user_dispatch(tsk) \
2323
clear_task_syscall_work(tsk, SYSCALL_USER_DISPATCH)
2424

25+
int syscall_user_dispatch_get_config(struct task_struct *task, unsigned long size,
26+
void __user *data);
27+
28+
int syscall_user_dispatch_set_config(struct task_struct *task, unsigned long size,
29+
void __user *data);
30+
2531
#else
2632
struct syscall_user_dispatch {};
2733

@@ -35,6 +41,18 @@ static inline void clear_syscall_work_syscall_user_dispatch(struct task_struct *
3541
{
3642
}
3743

44+
static inline int syscall_user_dispatch_get_config(struct task_struct *task,
45+
unsigned long size, void __user *data)
46+
{
47+
return -EINVAL;
48+
}
49+
50+
static inline int syscall_user_dispatch_set_config(struct task_struct *task,
51+
unsigned long size, void __user *data)
52+
{
53+
return -EINVAL;
54+
}
55+
3856
#endif /* CONFIG_GENERIC_ENTRY */
3957

4058
#endif /* _SYSCALL_USER_DISPATCH_H */

include/uapi/linux/ptrace.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,36 @@ struct ptrace_rseq_configuration {
112112
__u32 pad;
113113
};
114114

115+
#define PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG 0x4210
116+
#define PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG 0x4211
117+
118+
/*
119+
* struct ptrace_sud_config - Per-task configuration for Syscall User Dispatch
120+
* @mode: One of PR_SYS_DISPATCH_ON or PR_SYS_DISPATCH_OFF
121+
* @selector: Tracees user virtual address of SUD selector
122+
* @offset: SUD exclusion area (virtual address)
123+
* @len: Length of SUD exclusion area
124+
*
125+
* Used to get/set the syscall user dispatch configuration for a tracee.
126+
* Selector is optional (may be NULL), and if invalid will produce
127+
* a SIGSEGV in the tracee upon first access.
128+
*
129+
* If mode is PR_SYS_DISPATCH_ON, syscall dispatch will be enabled. If
130+
* PR_SYS_DISPATCH_OFF, syscall dispatch will be disabled and all other
131+
* parameters must be 0. The value in *selector (if not null), also determines
132+
* whether syscall dispatch will occur.
133+
*
134+
* The Syscall User Dispatch Exclusion area described by offset/len is the
135+
* virtual address space from which syscalls will not produce a user
136+
* dispatch.
137+
*/
138+
struct ptrace_sud_config {
139+
__u64 mode;
140+
__u64 selector;
141+
__u64 offset;
142+
__u64 len;
143+
};
144+
115145
/*
116146
* These values are stored in task->ptrace_message
117147
* by ptrace_stop to describe the current syscall-stop.

kernel/entry/syscall_user_dispatch.c

Lines changed: 65 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
*/
55
#include <linux/sched.h>
66
#include <linux/prctl.h>
7+
#include <linux/ptrace.h>
78
#include <linux/syscall_user_dispatch.h>
89
#include <linux/uaccess.h>
910
#include <linux/signal.h>
@@ -68,8 +69,9 @@ bool syscall_user_dispatch(struct pt_regs *regs)
6869
return true;
6970
}
7071

71-
int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
72-
unsigned long len, char __user *selector)
72+
static int task_set_syscall_user_dispatch(struct task_struct *task, unsigned long mode,
73+
unsigned long offset, unsigned long len,
74+
char __user *selector)
7375
{
7476
switch (mode) {
7577
case PR_SYS_DISPATCH_OFF:
@@ -86,23 +88,77 @@ int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
8688
if (offset && offset + len <= offset)
8789
return -EINVAL;
8890

89-
if (selector && !access_ok(selector, sizeof(*selector)))
91+
/*
92+
* access_ok() will clear memory tags for tagged addresses
93+
* if current has memory tagging enabled.
94+
95+
* To enable a tracer to set a tracees selector the
96+
* selector address must be untagged for access_ok(),
97+
* otherwise an untagged tracer will always fail to set a
98+
* tagged tracees selector.
99+
*/
100+
if (selector && !access_ok(untagged_addr(selector), sizeof(*selector)))
90101
return -EFAULT;
91102

92103
break;
93104
default:
94105
return -EINVAL;
95106
}
96107

97-
current->syscall_dispatch.selector = selector;
98-
current->syscall_dispatch.offset = offset;
99-
current->syscall_dispatch.len = len;
100-
current->syscall_dispatch.on_dispatch = false;
108+
task->syscall_dispatch.selector = selector;
109+
task->syscall_dispatch.offset = offset;
110+
task->syscall_dispatch.len = len;
111+
task->syscall_dispatch.on_dispatch = false;
101112

102113
if (mode == PR_SYS_DISPATCH_ON)
103-
set_syscall_work(SYSCALL_USER_DISPATCH);
114+
set_task_syscall_work(task, SYSCALL_USER_DISPATCH);
115+
else
116+
clear_task_syscall_work(task, SYSCALL_USER_DISPATCH);
117+
118+
return 0;
119+
}
120+
121+
int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
122+
unsigned long len, char __user *selector)
123+
{
124+
return task_set_syscall_user_dispatch(current, mode, offset, len, selector);
125+
}
126+
127+
int syscall_user_dispatch_get_config(struct task_struct *task, unsigned long size,
128+
void __user *data)
129+
{
130+
struct syscall_user_dispatch *sd = &task->syscall_dispatch;
131+
struct ptrace_sud_config cfg;
132+
133+
if (size != sizeof(cfg))
134+
return -EINVAL;
135+
136+
if (test_task_syscall_work(task, SYSCALL_USER_DISPATCH))
137+
cfg.mode = PR_SYS_DISPATCH_ON;
104138
else
105-
clear_syscall_work(SYSCALL_USER_DISPATCH);
139+
cfg.mode = PR_SYS_DISPATCH_OFF;
140+
141+
cfg.offset = sd->offset;
142+
cfg.len = sd->len;
143+
cfg.selector = (__u64)(uintptr_t)sd->selector;
144+
145+
if (copy_to_user(data, &cfg, sizeof(cfg)))
146+
return -EFAULT;
106147

107148
return 0;
108149
}
150+
151+
int syscall_user_dispatch_set_config(struct task_struct *task, unsigned long size,
152+
void __user *data)
153+
{
154+
struct ptrace_sud_config cfg;
155+
156+
if (size != sizeof(cfg))
157+
return -EINVAL;
158+
159+
if (copy_from_user(&cfg, data, sizeof(cfg)))
160+
return -EFAULT;
161+
162+
return task_set_syscall_user_dispatch(task, cfg.mode, cfg.offset, cfg.len,
163+
(char __user *)(uintptr_t)cfg.selector);
164+
}

kernel/ptrace.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include <linux/compat.h>
3333
#include <linux/sched/signal.h>
3434
#include <linux/minmax.h>
35+
#include <linux/syscall_user_dispatch.h>
3536

3637
#include <asm/syscall.h> /* for syscall_get_* */
3738

@@ -1259,6 +1260,14 @@ int ptrace_request(struct task_struct *child, long request,
12591260
break;
12601261
#endif
12611262

1263+
case PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG:
1264+
ret = syscall_user_dispatch_set_config(child, addr, datavp);
1265+
break;
1266+
1267+
case PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG:
1268+
ret = syscall_user_dispatch_get_config(child, addr, datavp);
1269+
break;
1270+
12621271
default:
12631272
break;
12641273
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# SPDX-License-Identifier: GPL-2.0-only
22
get_syscall_info
3+
get_set_sud
34
peeksiginfo
45
vmaccess
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# SPDX-License-Identifier: GPL-2.0-only
22
CFLAGS += -std=c99 -pthread -Wall $(KHDR_INCLUDES)
33

4-
TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess
4+
TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess get_set_sud
55

66
include ../lib.mk
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
#define _GNU_SOURCE
3+
#include "../kselftest_harness.h"
4+
#include <stdio.h>
5+
#include <string.h>
6+
#include <errno.h>
7+
#include <sys/wait.h>
8+
#include <sys/syscall.h>
9+
#include <sys/prctl.h>
10+
11+
#include "linux/ptrace.h"
12+
13+
static int sys_ptrace(int request, pid_t pid, void *addr, void *data)
14+
{
15+
return syscall(SYS_ptrace, request, pid, addr, data);
16+
}
17+
18+
TEST(get_set_sud)
19+
{
20+
struct ptrace_sud_config config;
21+
pid_t child;
22+
int ret = 0;
23+
int status;
24+
25+
child = fork();
26+
ASSERT_GE(child, 0);
27+
if (child == 0) {
28+
ASSERT_EQ(0, sys_ptrace(PTRACE_TRACEME, 0, 0, 0)) {
29+
TH_LOG("PTRACE_TRACEME: %m");
30+
}
31+
kill(getpid(), SIGSTOP);
32+
_exit(1);
33+
}
34+
35+
waitpid(child, &status, 0);
36+
37+
memset(&config, 0xff, sizeof(config));
38+
config.mode = PR_SYS_DISPATCH_ON;
39+
40+
ret = sys_ptrace(PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG, child,
41+
(void *)sizeof(config), &config);
42+
43+
ASSERT_EQ(ret, 0);
44+
ASSERT_EQ(config.mode, PR_SYS_DISPATCH_OFF);
45+
ASSERT_EQ(config.selector, 0);
46+
ASSERT_EQ(config.offset, 0);
47+
ASSERT_EQ(config.len, 0);
48+
49+
config.mode = PR_SYS_DISPATCH_ON;
50+
config.selector = 0;
51+
config.offset = 0x400000;
52+
config.len = 0x1000;
53+
54+
ret = sys_ptrace(PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG, child,
55+
(void *)sizeof(config), &config);
56+
57+
ASSERT_EQ(ret, 0);
58+
59+
memset(&config, 1, sizeof(config));
60+
ret = sys_ptrace(PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG, child,
61+
(void *)sizeof(config), &config);
62+
63+
ASSERT_EQ(ret, 0);
64+
ASSERT_EQ(config.mode, PR_SYS_DISPATCH_ON);
65+
ASSERT_EQ(config.selector, 0);
66+
ASSERT_EQ(config.offset, 0x400000);
67+
ASSERT_EQ(config.len, 0x1000);
68+
69+
kill(child, SIGKILL);
70+
}
71+
72+
TEST_HARNESS_MAIN

0 commit comments

Comments
 (0)