|
| 1 | +/* |
| 2 | + * Test that CRIU can checkpoint/restore a process that has an open fd |
| 3 | + * referencing /proc/self/task/<tid>/comm where the thread <tid> has |
| 4 | + * already exited. This reproduces a bug seen with Rust applications |
| 5 | + * using Prometheus metrics libraries that hold open fds to per-thread |
| 6 | + * /proc/self/task/<tid>/stat files. |
| 7 | + */ |
| 8 | + |
| 9 | +#include <stdio.h> |
| 10 | +#include <stdlib.h> |
| 11 | +#include <unistd.h> |
| 12 | +#include <fcntl.h> |
| 13 | +#include <string.h> |
| 14 | +#include <errno.h> |
| 15 | +#include <pthread.h> |
| 16 | +#include <syscall.h> |
| 17 | + |
| 18 | +#include "zdtmtst.h" |
| 19 | + |
| 20 | +const char *test_doc = "Check C/R of fd referencing /proc/self/task/<dead_tid>/comm"; |
| 21 | +const char *test_author = "CRIU developers"; |
| 22 | + |
| 23 | +static int thread_fd = -1; |
| 24 | +static pid_t thread_tid; |
| 25 | + |
| 26 | +static void *thread_fn(void *arg) |
| 27 | +{ |
| 28 | + char path[128]; |
| 29 | + |
| 30 | + thread_tid = syscall(__NR_gettid); |
| 31 | + |
| 32 | + /* |
| 33 | + * Open our own /proc/self/task/<tid>/comm. The kernel resolves |
| 34 | + * /proc/self to /proc/<pid>, so the stored path will be |
| 35 | + * /proc/<pid>/task/<tid>/comm with numeric PID and TID. |
| 36 | + */ |
| 37 | + snprintf(path, sizeof(path), "/proc/self/task/%d/comm", thread_tid); |
| 38 | + thread_fd = open(path, O_RDONLY); |
| 39 | + if (thread_fd < 0) { |
| 40 | + pr_perror("Failed to open %s in thread", path); |
| 41 | + return (void *)1; |
| 42 | + } |
| 43 | + |
| 44 | + test_msg("Thread %d opened %s as fd %d\n", thread_tid, path, thread_fd); |
| 45 | + return NULL; |
| 46 | +} |
| 47 | + |
| 48 | +int main(int argc, char **argv) |
| 49 | +{ |
| 50 | + pthread_t th; |
| 51 | + void *retval; |
| 52 | + int ret; |
| 53 | + |
| 54 | + test_init(argc, argv); |
| 55 | + |
| 56 | + /* Create a thread that opens its own /proc/self/task/<tid>/comm */ |
| 57 | + ret = pthread_create(&th, NULL, thread_fn, NULL); |
| 58 | + if (ret) { |
| 59 | + pr_perror("pthread_create failed"); |
| 60 | + return 1; |
| 61 | + } |
| 62 | + |
| 63 | + /* Wait for the thread to finish — it has opened the fd and exited */ |
| 64 | + ret = pthread_join(th, &retval); |
| 65 | + if (ret) { |
| 66 | + pr_perror("pthread_join failed"); |
| 67 | + return 1; |
| 68 | + } |
| 69 | + |
| 70 | + if (retval != NULL) { |
| 71 | + fail("Thread failed to open proc file"); |
| 72 | + return 1; |
| 73 | + } |
| 74 | + |
| 75 | + if (thread_fd < 0) { |
| 76 | + fail("Thread did not produce a valid fd"); |
| 77 | + return 1; |
| 78 | + } |
| 79 | + |
| 80 | + /* |
| 81 | + * At this point: |
| 82 | + * - thread_fd points to /proc/<pid>/task/<dead_tid>/comm |
| 83 | + * - The thread with <dead_tid> has exited |
| 84 | + * - The fd is still valid (held open by the process) |
| 85 | + * |
| 86 | + * CRIU will dump this fd with the path stored as-is. On restore, |
| 87 | + * the dead thread's TID won't exist, so openat() will fail with |
| 88 | + * ENOENT unless CRIU handles this case. |
| 89 | + */ |
| 90 | + test_msg("Thread %d has exited, fd %d still open\n", |
| 91 | + thread_tid, thread_fd); |
| 92 | + |
| 93 | + test_daemon(); |
| 94 | + test_waitsig(); |
| 95 | + |
| 96 | + /* |
| 97 | + * After restore, verify the fd is still valid. |
| 98 | + * We only check F_GETFD — the thread is dead, so the file content |
| 99 | + * may not be meaningful, but the fd must be valid. |
| 100 | + */ |
| 101 | + ret = fcntl(thread_fd, F_GETFD); |
| 102 | + if (ret < 0) { |
| 103 | + fail("fd %d is not valid after restore: %s", |
| 104 | + thread_fd, strerror(errno)); |
| 105 | + close(thread_fd); |
| 106 | + return 1; |
| 107 | + } |
| 108 | + |
| 109 | + close(thread_fd); |
| 110 | + pass(); |
| 111 | + return 0; |
| 112 | +} |
0 commit comments