Skip to content

Commit c47fa8a

Browse files
committed
fix: aptos thread with a ghost file
- added test to reproduce the issue - fixed the issue - added way to run criu tests on container
1 parent 710e9a7 commit c47fa8a

File tree

6 files changed

+200
-1
lines changed

6 files changed

+200
-1
lines changed

Makefile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,14 @@ docker-test:
421421
./test/zdtm.py run -a --keep-going --ignore-taint
422422
.PHONY: docker-test
423423

424+
#
425+
# CastAI custom targets
426+
castai-test:
427+
docker build -t criu-test -f test/Dockerfile .
428+
docker run --rm --privileged --cgroupns=host -v /lib/modules:/lib/modules \
429+
criu-test run -a -p 4 --keep-going --ignore-taint
430+
.PHONY: castai-test
431+
424432
help:
425433
@echo ' Targets:'
426434
@echo ' all - Build all [*] targets'
@@ -439,6 +447,7 @@ help:
439447
@echo ' test - Run zdtm test-suite'
440448
@echo ' gcov - Make code coverage report'
441449
@echo ' unittest - Run unit tests'
450+
@echo ' castai-test - Build and run all ZDTM tests in Docker'
442451
@echo ' lint - Run code linters'
443452
@echo ' indent - Indent C code'
444453
@echo ' amdgpu_plugin - Make AMD GPU plugin'

criu/files-reg.c

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1351,6 +1351,48 @@ static int check_path_remap(struct fd_link *link, const struct fd_parms *parms,
13511351
pr_info("Dumping dead process remap of %d\n", pid);
13521352
return dump_dead_process_remap(pid, id);
13531353
}
1354+
1355+
/*
1356+
* The process is alive, but the path may reference a
1357+
* dead thread: /proc/<pid>/task/<tid>/... If the
1358+
* thread <tid> has exited, the task/<tid> directory
1359+
* no longer exists and the file can't be opened on
1360+
* restore. Handle this by creating a TASK_HELPER
1361+
* process with vPID=<tid> and rewriting the path to
1362+
* /proc/<tid>/task/<tid>/... so it resolves through
1363+
* the helper's /proc entry.
1364+
*/
1365+
if (*end == '/' && !strncmp(end, "/task/", 6)) {
1366+
pid_t tid;
1367+
char *tend;
1368+
char task_path[PATH_MAX];
1369+
1370+
tid = strtol(end + 6, &tend, 10);
1371+
if (tid != 0 && (*tend == '/' || *tend == '\0')) {
1372+
/*
1373+
* Check if /proc/<pid>/task/<tid>
1374+
* exists by temporarily truncating
1375+
* the path at the character after
1376+
* the tid.
1377+
*/
1378+
char saved = *tend;
1379+
*tend = '\0';
1380+
ret = faccessat(mntns_root, rpath, F_OK, 0);
1381+
*tend = saved;
1382+
1383+
if (ret) {
1384+
snprintf(task_path, sizeof(task_path),
1385+
"%.*s%d/task/%d%s",
1386+
(int)(start - rpath) + 1,
1387+
rpath, tid, tid, tend);
1388+
pr_info("Dumping dead thread remap %d/%d\n",
1389+
pid, tid);
1390+
strcpy(link->name, task_path);
1391+
link->len = strlen(link->name);
1392+
return dump_dead_process_remap(tid, id);
1393+
}
1394+
}
1395+
}
13541396
}
13551397

13561398
return 0;
@@ -1661,7 +1703,7 @@ static int get_build_id(const int fd, const struct stat *fd_status, unsigned cha
16611703
*/
16621704
mapped_size = min_t(size_t, fd_status->st_size, BUILD_ID_MAP_SIZE);
16631705
start_addr = mmap(0, mapped_size, PROT_READ, MAP_PRIVATE | MAP_FILE, fd, 0);
1664-
if ((void*)start_addr == MAP_FAILED) {
1706+
if ((void *)start_addr == MAP_FAILED) {
16651707
pr_warn("Couldn't mmap file with fd %d\n", fd);
16661708
return -1;
16671709
}

test/Dockerfile

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
FROM ubuntu:24.04
2+
3+
RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y \
4+
libnet-dev \
5+
libnl-route-3-dev \
6+
gcc \
7+
bsdmainutils \
8+
build-essential \
9+
git-core \
10+
iptables \
11+
libaio-dev \
12+
libbsd-dev \
13+
libcap-dev \
14+
libnl-3-dev \
15+
libprotobuf-c-dev \
16+
libprotobuf-dev \
17+
libselinux-dev \
18+
libpcre2-dev \
19+
iproute2 \
20+
kmod \
21+
pkg-config \
22+
protobuf-c-compiler \
23+
protobuf-compiler \
24+
python3-minimal \
25+
python3-protobuf \
26+
uuid-dev \
27+
python3-yaml
28+
29+
COPY . /criu
30+
WORKDIR /criu
31+
RUN make mrproper && make -j $(nproc) && make -C test/zdtm -j $(nproc)
32+
33+
ENTRYPOINT ["./test/zdtm.py"]

test/zdtm/static/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ TST_NOFILE := \
190190
unhashed_proc \
191191
cow00 \
192192
child_opened_proc \
193+
proc_task_comm \
193194
posix_timers \
194195
sigpending \
195196
sigaltstack \
@@ -606,6 +607,7 @@ socket_aio: LDLIBS += -lrt -pthread
606607
uptime_grow: LDLIBS += -lrt -pthread
607608
unlink_largefile: CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE
608609
inotify_system_nodel: CFLAGS += -DNO_DEL
610+
proc_task_comm: LDLIBS += -pthread
609611
pthread00: LDLIBS += -pthread
610612
pthread00-pac: CFLAGS += ${PAC_CFLAGS}
611613
pthread00-pac: LDLIBS += -pthread

test/zdtm/static/proc_task_comm.c

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/*
2+
* Test that CRIU can checkpoint/restore a process that has an open fd
3+
* referencing /proc/self/task/<tid>/comm where the thread <tid> has
4+
* already exited. This reproduces a bug seen with Rust applications
5+
* using Prometheus metrics libraries that hold open fds to per-thread
6+
* /proc/self/task/<tid>/stat files.
7+
*/
8+
9+
#include <stdio.h>
10+
#include <stdlib.h>
11+
#include <unistd.h>
12+
#include <fcntl.h>
13+
#include <string.h>
14+
#include <errno.h>
15+
#include <pthread.h>
16+
#include <syscall.h>
17+
18+
#include "zdtmtst.h"
19+
20+
const char *test_doc = "Check C/R of fd referencing /proc/self/task/<dead_tid>/comm";
21+
const char *test_author = "CRIU developers";
22+
23+
static int thread_fd = -1;
24+
static pid_t thread_tid;
25+
26+
static void *thread_fn(void *arg)
27+
{
28+
char path[128];
29+
30+
thread_tid = syscall(__NR_gettid);
31+
32+
/*
33+
* Open our own /proc/self/task/<tid>/comm. The kernel resolves
34+
* /proc/self to /proc/<pid>, so the stored path will be
35+
* /proc/<pid>/task/<tid>/comm with numeric PID and TID.
36+
*/
37+
snprintf(path, sizeof(path), "/proc/self/task/%d/comm", thread_tid);
38+
thread_fd = open(path, O_RDONLY);
39+
if (thread_fd < 0) {
40+
pr_perror("Failed to open %s in thread", path);
41+
return (void *)1;
42+
}
43+
44+
test_msg("Thread %d opened %s as fd %d\n", thread_tid, path, thread_fd);
45+
return NULL;
46+
}
47+
48+
int main(int argc, char **argv)
49+
{
50+
pthread_t th;
51+
void *retval;
52+
int ret;
53+
54+
test_init(argc, argv);
55+
56+
/* Create a thread that opens its own /proc/self/task/<tid>/comm */
57+
ret = pthread_create(&th, NULL, thread_fn, NULL);
58+
if (ret) {
59+
pr_perror("pthread_create failed");
60+
return 1;
61+
}
62+
63+
/* Wait for the thread to finish — it has opened the fd and exited */
64+
ret = pthread_join(th, &retval);
65+
if (ret) {
66+
pr_perror("pthread_join failed");
67+
return 1;
68+
}
69+
70+
if (retval != NULL) {
71+
fail("Thread failed to open proc file");
72+
return 1;
73+
}
74+
75+
if (thread_fd < 0) {
76+
fail("Thread did not produce a valid fd");
77+
return 1;
78+
}
79+
80+
/*
81+
* At this point:
82+
* - thread_fd points to /proc/<pid>/task/<dead_tid>/comm
83+
* - The thread with <dead_tid> has exited
84+
* - The fd is still valid (held open by the process)
85+
*
86+
* CRIU will dump this fd with the path stored as-is. On restore,
87+
* the dead thread's TID won't exist, so openat() will fail with
88+
* ENOENT unless CRIU handles this case.
89+
*/
90+
test_msg("Thread %d has exited, fd %d still open\n",
91+
thread_tid, thread_fd);
92+
93+
test_daemon();
94+
test_waitsig();
95+
96+
/*
97+
* After restore, verify the fd is still valid.
98+
* We only check F_GETFD — the thread is dead, so the file content
99+
* may not be meaningful, but the fd must be valid.
100+
*/
101+
ret = fcntl(thread_fd, F_GETFD);
102+
if (ret < 0) {
103+
fail("fd %d is not valid after restore: %s",
104+
thread_fd, strerror(errno));
105+
close(thread_fd);
106+
return 1;
107+
}
108+
109+
close(thread_fd);
110+
pass();
111+
return 0;
112+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{'flavor': 'ns uns'}

0 commit comments

Comments
 (0)