|
4 | 4 | #include "strvec.h"
|
5 | 5 | #include "trace2.h"
|
6 | 6 |
|
7 |
| -static void get_ancestry_names(struct strvec *names) |
| 7 | +/* |
| 8 | + * We need more complex parsing in stat_parent_pid() and |
| 9 | + * parse_proc_stat() below than a dumb fscanf(). That's because while |
| 10 | + * the statcomm field is surrounded by parentheses, the process itself |
| 11 | + * is free to insert any arbitrary byte sequence its its name. That |
| 12 | + * can include newlines, spaces, closing parentheses etc. |
| 13 | + * |
| 14 | + * See do_task_stat() in fs/proc/array.c in linux.git, this is in |
| 15 | + * contrast with the escaped version of the name found in |
| 16 | + * /proc/%d/status. |
| 17 | + * |
| 18 | + * So instead of using fscanf() we'll read N bytes from it, look for |
| 19 | + * the first "(", and then the last ")", anything in-between is our |
| 20 | + * process name. |
| 21 | + * |
| 22 | + * How much N do we need? On Linux /proc/sys/kernel/pid_max is 2^15 by |
| 23 | + * default, but it can be raised set to values of up to 2^22. So |
| 24 | + * that's 7 digits for a PID. We have 2 PIDs in the first four fields |
| 25 | + * we're interested in, so 2 * 7 = 14. |
| 26 | + * |
| 27 | + * We then have 3 spaces between those four values, and we'd like to |
| 28 | + * get to the space between the 4th and the 5th (the "pgrp" field) to |
| 29 | + * make sure we read the entire "ppid" field. So that brings us up to |
| 30 | + * 14 + 3 + 1 = 18. Add the two parentheses around the "comm" value |
| 31 | + * and it's 20. The "state" value itself is then one character (now at |
| 32 | + * 21). |
| 33 | + * |
| 34 | + * Finally the maximum length of the "comm" name itself is 15 |
| 35 | + * characters, e.g. a setting of "123456789abcdefg" will be truncated |
| 36 | + * to "123456789abcdef". See PR_SET_NAME in prctl(2). So all in all |
| 37 | + * we'd need to read 21 + 15 = 36 bytes. |
| 38 | + * |
| 39 | + * Let's just read 2^6 (64) instead for good measure. If PID_MAX ever |
| 40 | + * grows past 2^22 we'll be future-proof. We'll then anchor at the |
| 41 | + * last ")" we find to locate the parent PID. |
| 42 | + */ |
| 43 | +#define STAT_PARENT_PID_READ_N 64 |
| 44 | + |
| 45 | +static int parse_proc_stat(struct strbuf *sb, struct strbuf *name, |
| 46 | + int *statppid) |
8 | 47 | {
|
| 48 | + const char *comm_lhs = strchr(sb->buf, '('); |
| 49 | + const char *comm_rhs = strrchr(sb->buf, ')'); |
| 50 | + const char *ppid_lhs, *ppid_rhs; |
| 51 | + char *p; |
| 52 | + pid_t ppid; |
| 53 | + |
| 54 | + if (!comm_lhs || !comm_rhs) |
| 55 | + goto bad_kernel; |
| 56 | + |
| 57 | + /* |
| 58 | + * We're at the ")", that's followed by " X ", where X is a |
| 59 | + * single "state" character. So advance by 4 bytes. |
| 60 | + */ |
| 61 | + ppid_lhs = comm_rhs + 4; |
| 62 | + |
| 63 | + /* |
| 64 | + * Read until the space between the "ppid" and "pgrp" fields |
| 65 | + * to make sure we're anchored after the untruncated "ppid" |
| 66 | + * field.. |
| 67 | + */ |
| 68 | + ppid_rhs = strchr(ppid_lhs, ' '); |
| 69 | + if (!ppid_rhs) |
| 70 | + goto bad_kernel; |
| 71 | + |
| 72 | + ppid = strtol(ppid_lhs, &p, 10); |
| 73 | + if (ppid_rhs == p) { |
| 74 | + const char *comm = comm_lhs + 1; |
| 75 | + size_t commlen = comm_rhs - comm; |
| 76 | + |
| 77 | + strbuf_add(name, comm, commlen); |
| 78 | + *statppid = ppid; |
| 79 | + |
| 80 | + return 0; |
| 81 | + } |
| 82 | + |
| 83 | +bad_kernel: |
9 | 84 | /*
|
10 |
| - * NEEDSWORK: We could gather the entire pstree into an array to match |
11 |
| - * functionality with compat/win32/trace2_win32_process_info.c. |
12 |
| - * To do so, we may want to examine /proc/<pid>/stat. For now, just |
13 |
| - * gather the immediate parent name which is readily accessible from |
14 |
| - * /proc/$(getppid())/comm. |
| 85 | + * We were able to read our STAT_PARENT_PID_READ_N bytes from |
| 86 | + * /proc/%d/stat, but the content is bad. Broken kernel? |
| 87 | + * Should not happen, but handle it gracefully. |
15 | 88 | */
|
| 89 | + return -1; |
| 90 | +} |
| 91 | + |
| 92 | +static int stat_parent_pid(pid_t pid, struct strbuf *name, int *statppid) |
| 93 | +{ |
16 | 94 | struct strbuf procfs_path = STRBUF_INIT;
|
17 |
| - struct strbuf name = STRBUF_INIT; |
| 95 | + struct strbuf sb = STRBUF_INIT; |
| 96 | + FILE *fp; |
| 97 | + int ret = -1; |
18 | 98 |
|
19 | 99 | /* try to use procfs if it's present. */
|
20 |
| - strbuf_addf(&procfs_path, "/proc/%d/comm", getppid()); |
21 |
| - if (strbuf_read_file(&name, procfs_path.buf, 0)) { |
22 |
| - strbuf_release(&procfs_path); |
23 |
| - strbuf_trim_trailing_newline(&name); |
24 |
| - strvec_push(names, strbuf_detach(&name, NULL)); |
25 |
| - } |
| 100 | + strbuf_addf(&procfs_path, "/proc/%d/stat", pid); |
| 101 | + fp = fopen(procfs_path.buf, "r"); |
| 102 | + if (!fp) |
| 103 | + goto cleanup; |
| 104 | + |
| 105 | + /* |
| 106 | + * We could be more strict here and assert that we read at |
| 107 | + * least STAT_PARENT_PID_READ_N. My reading of procfs(5) is |
| 108 | + * that on any modern kernel (at least since 2.6.0 released in |
| 109 | + * 2003) even if all the mandatory numeric fields were zero'd |
| 110 | + * out we'd get at least 100 bytes, but let's just check that |
| 111 | + * we got anything at all and trust the parse_proc_stat() |
| 112 | + * function to handle its "Bad Kernel?" error checking. |
| 113 | + */ |
| 114 | + if (!strbuf_fread(&sb, STAT_PARENT_PID_READ_N, fp)) |
| 115 | + goto cleanup; |
| 116 | + if (parse_proc_stat(&sb, name, statppid) < 0) |
| 117 | + goto cleanup; |
| 118 | + |
| 119 | + ret = 0; |
| 120 | +cleanup: |
| 121 | + if (fp) |
| 122 | + fclose(fp); |
| 123 | + strbuf_release(&procfs_path); |
| 124 | + strbuf_release(&sb); |
| 125 | + |
| 126 | + return ret; |
| 127 | +} |
| 128 | + |
| 129 | +static void push_ancestry_name(struct strvec *names, pid_t pid) |
| 130 | +{ |
| 131 | + struct strbuf name = STRBUF_INIT; |
| 132 | + int ppid; |
| 133 | + |
| 134 | + if (stat_parent_pid(pid, &name, &ppid) < 0) |
| 135 | + goto cleanup; |
| 136 | + |
| 137 | + strvec_push(names, name.buf); |
| 138 | + |
| 139 | + /* |
| 140 | + * Both errors and reaching the end of the process chain are |
| 141 | + * reported as fields of 0 by proc(5) |
| 142 | + */ |
| 143 | + if (ppid) |
| 144 | + push_ancestry_name(names, ppid); |
| 145 | +cleanup: |
| 146 | + strbuf_release(&name); |
26 | 147 |
|
27 | 148 | return;
|
28 |
| - /* NEEDSWORK: add non-procfs-linux implementations here */ |
29 | 149 | }
|
30 | 150 |
|
31 | 151 | void trace2_collect_process_info(enum trace2_process_info_reason reason)
|
32 | 152 | {
|
33 |
| - if (!trace2_is_enabled()) |
34 |
| - return; |
| 153 | + struct strvec names = STRVEC_INIT; |
35 | 154 |
|
36 |
| - /* someday we may want to write something extra here, but not today */ |
37 |
| - if (reason == TRACE2_PROCESS_INFO_EXIT) |
| 155 | + if (!trace2_is_enabled()) |
38 | 156 | return;
|
39 | 157 |
|
40 |
| - if (reason == TRACE2_PROCESS_INFO_STARTUP) { |
| 158 | + switch (reason) { |
| 159 | + case TRACE2_PROCESS_INFO_EXIT: |
41 | 160 | /*
|
42 |
| - * NEEDSWORK: we could do the entire ptree in an array instead, |
43 |
| - * see compat/win32/trace2_win32_process_info.c. |
| 161 | + * The Windows version of this calls its |
| 162 | + * get_peak_memory_info() here. We may want to insert |
| 163 | + * similar process-end statistics here in the future. |
44 | 164 | */
|
45 |
| - struct strvec names = STRVEC_INIT; |
46 |
| - |
47 |
| - get_ancestry_names(&names); |
| 165 | + break; |
| 166 | + case TRACE2_PROCESS_INFO_STARTUP: |
| 167 | + push_ancestry_name(&names, getppid()); |
48 | 168 |
|
49 | 169 | if (names.nr)
|
50 | 170 | trace2_cmd_ancestry(names.v);
|
51 | 171 | strvec_clear(&names);
|
| 172 | + break; |
52 | 173 | }
|
53 | 174 |
|
54 | 175 | return;
|
|
0 commit comments