Skip to content

Commit a27acde

Browse files
authored
Merge pull request #35 from castai/filipe/LIVE-600-aptos
fix: aptos thread with a ghost file
2 parents 710e9a7 + fa36672 commit a27acde

File tree

10 files changed

+559
-7
lines changed

10 files changed

+559
-7
lines changed

Makefile

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,24 @@ docker-test:
421421
./test/zdtm.py run -a --keep-going --ignore-taint
422422
.PHONY: docker-test
423423

424+
#
425+
# CastAI custom targets
426+
#
427+
# castai-test-build: build the Docker image with CRIU compiled.
428+
# Only needed when CRIU source (not test) code changes.
429+
# castai-test: run the full ZDTM test suite inside Docker.
430+
# Volume-mounts test/ so test code changes take effect immediately
431+
# without rebuilding the image (zdtm.py compiles tests on the fly).
432+
castai-test-build:
433+
docker build -t criu-test -f test/Dockerfile .
434+
.PHONY: castai-test-build
435+
436+
castai-test: castai-test-build
437+
docker run --rm --privileged --cgroupns=host -v /lib/modules:/lib/modules \
438+
-v $(CURDIR)/test:/criu/test \
439+
criu-test run -a -p 4 --keep-going --ignore-taint
440+
.PHONY: castai-test
441+
424442
help:
425443
@echo ' Targets:'
426444
@echo ' all - Build all [*] targets'
@@ -439,6 +457,7 @@ help:
439457
@echo ' test - Run zdtm test-suite'
440458
@echo ' gcov - Make code coverage report'
441459
@echo ' unittest - Run unit tests'
460+
@echo ' castai-test - Build and run all ZDTM tests in Docker'
442461
@echo ' lint - Run code linters'
443462
@echo ' indent - Indent C code'
444463
@echo ' amdgpu_plugin - Make AMD GPU plugin'

criu/cr-restore.c

Lines changed: 77 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -272,16 +272,19 @@ static int crtools_prepare_shared(void)
272272
*/
273273

274274
static struct collect_image_info *cinfos[] = {
275-
&file_locks_cinfo, &pipe_data_cinfo, &fifo_data_cinfo, &sk_queues_cinfo,
275+
&file_locks_cinfo,
276+
&pipe_data_cinfo,
277+
&fifo_data_cinfo,
278+
&sk_queues_cinfo,
276279
#ifdef CONFIG_HAS_LIBBPF
277280
&bpfmap_data_cinfo,
278281
#endif
279282
};
280283

281284
static struct collect_image_info *cinfos_files[] = {
282-
&unix_sk_cinfo, &fifo_cinfo, &pipe_cinfo, &nsfile_cinfo, &packet_sk_cinfo,
283-
&netlink_sk_cinfo, &eventfd_cinfo, &epoll_cinfo, &epoll_tfd_cinfo, &signalfd_cinfo,
284-
&tunfile_cinfo, &timerfd_cinfo, &inotify_cinfo, &inotify_mark_cinfo, &fanotify_cinfo,
285+
&unix_sk_cinfo, &fifo_cinfo, &pipe_cinfo, &nsfile_cinfo, &packet_sk_cinfo,
286+
&netlink_sk_cinfo, &eventfd_cinfo, &epoll_cinfo, &epoll_tfd_cinfo, &signalfd_cinfo,
287+
&tunfile_cinfo, &timerfd_cinfo, &inotify_cinfo, &inotify_mark_cinfo, &fanotify_cinfo,
285288
&fanotify_mark_cinfo, &ext_file_cinfo, &memfd_cinfo, &pidfd_cinfo
286289
};
287290

@@ -500,6 +503,65 @@ static int collect_inotify_fds(struct task_restore_args *ta)
500503
return 0;
501504
}
502505

506+
static int collect_deferred_proc_fds(struct task_restore_args *ta)
507+
{
508+
struct list_head *list = &rsti(current)->fds;
509+
struct fdt *fdt = rsti(current)->fdt;
510+
struct fdinfo_list_entry *fle;
511+
512+
/* Only the fdt owner restores fds */
513+
if (fdt && fdt->pid != vpid(current))
514+
return 0;
515+
516+
ta->deferred_fds = (struct deferred_proc_fd *)rst_mem_align_cpos(RM_PRIVATE);
517+
ta->deferred_fds_n = 0;
518+
519+
list_for_each_entry(fle, list, ps_list) {
520+
struct file_desc *d = fle->desc;
521+
struct reg_file_info *rfi;
522+
struct deferred_proc_fd *df;
523+
char *orig_path;
524+
525+
if (d->ops->type != FD_TYPES__REG)
526+
continue;
527+
528+
rfi = container_of(d, struct reg_file_info, d);
529+
if (!rfi->deferred_thread_fd)
530+
continue;
531+
532+
df = rst_mem_alloc(sizeof(*df), RM_PRIVATE);
533+
if (!df)
534+
return -1;
535+
536+
orig_path = rfi->orig_path;
537+
538+
/*
539+
* orig_path is "proc/<pid>/task/<tid>/...", strip "proc/"
540+
* prefix to get a path relative to /proc for
541+
* sys_openat(proc_fd, ...).
542+
*/
543+
if (strncmp(orig_path, "proc/", 5) == 0)
544+
orig_path += 5;
545+
546+
if (strlen(orig_path) >= sizeof(df->path)) {
547+
pr_err("Deferred proc path too long: %s\n", orig_path);
548+
return -1;
549+
}
550+
551+
df->target_fd = fle->fe->fd;
552+
df->flags = rfi->rfe->flags;
553+
df->pos = rfi->rfe->pos;
554+
strncpy(df->path, orig_path, sizeof(df->path) - 1);
555+
df->path[sizeof(df->path) - 1] = '\0';
556+
557+
ta->deferred_fds_n++;
558+
pr_info("Collected deferred proc fd %d -> %s\n",
559+
df->target_fd, df->path);
560+
}
561+
562+
return 0;
563+
}
564+
503565
static int open_core(int pid, CoreEntry **pcore)
504566
{
505567
int ret;
@@ -676,6 +738,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
676738
if (collect_inotify_fds(ta) < 0)
677739
return -1;
678740

741+
if (collect_deferred_proc_fds(ta) < 0)
742+
return -1;
743+
679744
if (prepare_proc_misc(pid, core->tc, ta))
680745
return -1;
681746

@@ -1709,7 +1774,10 @@ static int restore_task_with_children(void *_arg)
17091774
}
17101775

17111776
int __attribute((weak)) arch_ptrace_restore(int pid, struct pstree_item *item);
1712-
int arch_ptrace_restore(int pid, struct pstree_item *item) { return 0; }
1777+
int arch_ptrace_restore(int pid, struct pstree_item *item)
1778+
{
1779+
return 0;
1780+
}
17131781

17141782
static int attach_to_tasks(bool root_seized)
17151783
{
@@ -3133,7 +3201,9 @@ static void *restorer_munmap_addr(CoreEntry *core, void *restorer_blob)
31333201
}
31343202

31353203
void arch_rsti_init(struct pstree_item *p) __attribute__((weak));
3136-
void arch_rsti_init(struct pstree_item *p) {}
3204+
void arch_rsti_init(struct pstree_item *p)
3205+
{
3206+
}
31373207

31383208
static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, unsigned long alen, CoreEntry *core)
31393209
{
@@ -3329,6 +3399,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
33293399
RST_MEM_FIXUP_PPTR(task_args->zombies);
33303400
RST_MEM_FIXUP_PPTR(task_args->vma_ios);
33313401
RST_MEM_FIXUP_PPTR(task_args->inotify_fds);
3402+
RST_MEM_FIXUP_PPTR(task_args->deferred_fds);
33323403

33333404
task_args->compatible_mode = core_is_compat(core);
33343405
/*

criu/files-reg.c

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2552,8 +2552,28 @@ int collect_filemap(struct vma_area *vma)
25522552

25532553
static int open_fe_fd(struct file_desc *fd, int *new_fd)
25542554
{
2555+
struct reg_file_info *rfi;
25552556
int tmp;
25562557

2558+
rfi = container_of(fd, struct reg_file_info, d);
2559+
2560+
if (rfi->deferred_thread_fd) {
2561+
/*
2562+
* This is a live-thread /proc/<pid>/task/<tid>/... fd.
2563+
* The thread doesn't exist yet (created later by the
2564+
* restorer via clone()), so open /dev/null as a
2565+
* placeholder. The restorer will reopen the correct
2566+
* path after threads are created.
2567+
*/
2568+
tmp = open("/dev/null", rfi->rfe->flags & O_ACCMODE);
2569+
if (tmp < 0) {
2570+
pr_perror("Can't open /dev/null for deferred proc fd");
2571+
return -1;
2572+
}
2573+
*new_fd = tmp;
2574+
return 0;
2575+
}
2576+
25572577
tmp = open_path(fd, do_open_reg, NULL);
25582578
if (tmp < 0)
25592579
return -1;
@@ -2596,6 +2616,121 @@ struct file_desc *try_collect_special_file(u32 id, int optional)
25962616
return fdesc;
25972617
}
25982618

2619+
/*
2620+
* On restore, fix up paths like proc/<pid>/task/<tid>/... where
2621+
* tid != pid. These paths can't be opened at prepare_fds() time
2622+
* because non-leader threads don't exist yet -- they are created
2623+
* later by the restorer blob via clone(). Two cases:
2624+
*
2625+
* 1) Dead thread (tid not in pstree): create a TASK_HELPER process
2626+
* with vPID=tid and rewrite the path to proc/<tid>/task/<tid>/...
2627+
* so it resolves through the helper's /proc entry.
2628+
*
2629+
* 2) Live thread (tid in pstree as TASK_THREAD): rewrite the path
2630+
* to proc/<pid>/task/<pid>/... so it points to the thread-group
2631+
* leader, which does exist at prepare_fds() time.
2632+
*/
2633+
static int fixup_thread_proc_path(struct reg_file_info *rfi)
2634+
{
2635+
char *path = rfi->path;
2636+
char *task_str, *tid_str, *tid_end;
2637+
pid_t pid, tid;
2638+
char *new_path;
2639+
struct pid *tid_node;
2640+
2641+
/*
2642+
* rfi->path looks like "proc/<pid>/task/<tid>/..." (leading
2643+
* slash already stripped). We only care about procfs paths.
2644+
*/
2645+
if (strncmp(path, "proc/", 5))
2646+
return 0;
2647+
2648+
/* Parse the pid: "proc/<pid>/task/..." */
2649+
pid = strtol(path + 5, &task_str, 10);
2650+
if (pid == 0 || *task_str != '/')
2651+
return 0;
2652+
2653+
/* Check for "/task/<tid>" */
2654+
if (strncmp(task_str, "/task/", 6))
2655+
return 0;
2656+
2657+
tid_str = task_str + 6;
2658+
tid = strtol(tid_str, &tid_end, 10);
2659+
if (tid == 0 || (*tid_end != '/' && *tid_end != '\0'))
2660+
return 0;
2661+
2662+
/* If pid == tid the path already refers to the leader */
2663+
if (pid == tid)
2664+
return 0;
2665+
2666+
tid_node = pstree_pid_by_virt(tid);
2667+
2668+
/*
2669+
* If the TID belongs to a process (not a thread), the path
2670+
* will resolve on restore without any fixup.
2671+
*/
2672+
if (tid_node && tid_node->state != TASK_THREAD)
2673+
return 0;
2674+
2675+
if (!tid_node) {
2676+
/*
2677+
* Dead thread: tid is not in the process tree.
2678+
* Create a TASK_HELPER with vPID=tid so that
2679+
* /proc/<tid>/task/<tid>/... resolves on restore.
2680+
*/
2681+
struct pstree_item *helper;
2682+
2683+
helper = lookup_create_item(tid);
2684+
if (!helper)
2685+
return -1;
2686+
2687+
if (helper->pid->state == TASK_UNDEF) {
2688+
helper->sid = root_item->sid;
2689+
helper->pgid = root_item->pgid;
2690+
helper->pid->ns[0].virt = tid;
2691+
helper->parent = root_item;
2692+
helper->ids = root_item->ids;
2693+
if (init_pstree_helper(helper)) {
2694+
pr_err("Can't init helper for dead thread %d\n", tid);
2695+
return -1;
2696+
}
2697+
list_add_tail(&helper->sibling, &root_item->children);
2698+
pr_info("Added a helper for restoring dead thread /proc/%d/task/%d\n",
2699+
pid, tid);
2700+
}
2701+
2702+
/*
2703+
* "proc/" + tid + "/task/" + tid + tid_end + '\0'
2704+
* Use PATH_MAX as a safe upper bound.
2705+
*/
2706+
new_path = xmalloc(PATH_MAX);
2707+
if (!new_path)
2708+
return -1;
2709+
2710+
snprintf(new_path, PATH_MAX, "proc/%d/task/%d%s",
2711+
tid, tid, tid_end);
2712+
pr_info("Rewrote dead thread path: %s -> %s\n",
2713+
rfi->path, new_path);
2714+
} else {
2715+
/*
2716+
* Live thread: the thread exists in the pstree but
2717+
* won't be created until the restorer blob runs
2718+
* clone(), which is after prepare_fds(). Mark this
2719+
* file as deferred -- open_fe_fd() will open /dev/null
2720+
* as a placeholder, and the restorer will reopen the
2721+
* correct path after threads exist.
2722+
*/
2723+
rfi->deferred_thread_fd = true;
2724+
rfi->orig_path = rfi->path;
2725+
pr_info("Deferred live thread proc path: %s\n", rfi->path);
2726+
return 0;
2727+
}
2728+
2729+
rfi->path = new_path;
2730+
2731+
return 0;
2732+
}
2733+
25992734
static int collect_one_regfile(void *o, ProtobufCMessage *base, struct cr_img *i)
26002735
{
26012736
struct reg_file_info *rfi = o;
@@ -2609,6 +2744,11 @@ static int collect_one_regfile(void *o, ProtobufCMessage *base, struct cr_img *i
26092744
rfi->path = rfi->rfe->name + 1;
26102745
rfi->remap = NULL;
26112746
rfi->size_mode_checked = false;
2747+
rfi->deferred_thread_fd = false;
2748+
rfi->orig_path = NULL;
2749+
2750+
if (fixup_thread_proc_path(rfi))
2751+
return -1;
26122752

26132753
pr_info("Collected [%s] ID %#x\n", rfi->path, rfi->rfe->id);
26142754
return file_desc_add(&rfi->d, rfi->rfe->id, &reg_desc_ops);

criu/include/files-reg.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@ struct reg_file_info {
2424
struct file_remap *remap;
2525
bool size_mode_checked;
2626
bool is_dir;
27+
bool deferred_thread_fd; /* placeholder for live thread proc fd */
2728
char *path;
29+
char *orig_path; /* original proc path for deferred reopen */
2830
};
2931

3032
extern int open_reg_by_id(u32 id);

criu/include/restorer.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,13 @@ struct restore_vma_io {
141141

142142
#define RIO_SIZE(niovs) (sizeof(struct restore_vma_io) + (niovs) * sizeof(struct iovec))
143143

144+
struct deferred_proc_fd {
145+
int target_fd; /* fd number to dup2 over */
146+
int flags; /* open flags (O_RDONLY etc) */
147+
off_t pos; /* file position to restore */
148+
char path[128]; /* path relative to /proc, e.g. "1/task/7/stat" */
149+
};
150+
144151
struct task_restore_args {
145152
struct thread_restore_args *t; /* thread group leader */
146153

@@ -196,6 +203,9 @@ struct task_restore_args {
196203
int *inotify_fds; /* fds to cleanup inotify events at CR_STATE_RESTORE_SIGCHLD stage */
197204
unsigned int inotify_fds_n;
198205

206+
struct deferred_proc_fd *deferred_fds; /* live-thread proc fds to reopen after clone() */
207+
unsigned int deferred_fds_n;
208+
199209
/* * * * * * * * * * * * * * * * * * * * */
200210

201211
unsigned long task_size;

0 commit comments

Comments
 (0)