Skip to content

Commit 04c496c

Browse files
committed
[CommandLine][Linux] Don't read the command line from /proc/self/cmdline.
Instead of reading from `/proc/self/cmdline`, take advantage of the fact that the initial stack layout is ABI specified, and that we already have a pointer into it (`environ`). This lets us walk up the stack until we find `argc`, at which point we also know where `argv` is. We do this from a static initializer because a `setenv()` or `putenv()` can change `environ` (if you add a new environment variable), and it's even permissible to just outright change `environ` yourself too. It seems reasonable to suggest to people that they shouldn't be doing those things from a static initializer, and as long as they don't, they won't run before we've had a chance to find `argv`. Just in case someone _does_ do this, we also check that `environ` points into the stack. If it doesn't, they won't get any arguments, so if that happens, that's a clue that they're messing with `environ` too early. This works around a problem (#69658) with Docker Desktop 4.25.0 and Rosetta, wherein we end up with an extra argument visible in `/proc/self/cmdline`, and also avoids allocating memory for the command line arguments. rdar://117963394
1 parent 1e09b22 commit 04c496c

File tree

1 file changed

+145
-1
lines changed

1 file changed

+145
-1
lines changed

stdlib/public/CommandLineSupport/CommandLine.cpp

Lines changed: 145 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,151 @@ static char **swift::getUnsafeArgvArgc(int *outArgLen) {
172172

173173
template <typename F>
174174
static void swift::enumerateUnsafeArgv(const F& body) { }
175-
#elif defined(__linux__) || defined(__CYGWIN__)
175+
#elif defined(__linux__)
176+
// On Linux, there is no easy way to get the argument vector pointer outside
177+
// of the main() function. However, the ABI specifications dictate the layout
178+
// of the process's initial stack, which looks something like:
179+
//
180+
// stack top ----> ┌────────────────────────┐
181+
// │ Unspecified │
182+
// ┊ ┊
183+
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
184+
// │ Information block │
185+
// │ (argument strings, │
186+
// │ environment strings, │
187+
// │ auxiliary information) │
188+
// ┊ ┊
189+
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
190+
// │ Unspecified │
191+
// ┊ ┊
192+
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
193+
// │ NULL │
194+
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
195+
// │ Auxiliary Vector │
196+
// ┊ ┊
197+
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
198+
// │ NULL │
199+
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
200+
// │ Environment Pointers │
201+
// ┊ ┊
202+
// environ ------> ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
203+
// │ NULL │
204+
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
205+
// │ Argument Pointers │
206+
// ┊ ┊
207+
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
208+
// │ Argument Count │
209+
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
210+
// ┊ ┊
211+
//
212+
// See https://gitlab.com/x86-psABIs/x86-64-ABI,
213+
// https://gitlab.com/x86-psABIs/i386-ABI
214+
//
215+
// The upshot is that if we can get hold of `environ` before anything has
216+
// had a chance to change it, we can find the `argv` array and also the
217+
// argument count, `argc`, by walking back up the stack.
218+
//
219+
// (Note that Linux uses this same layout for all platforms, not just x86-based
220+
// ones. It also has a fixed layout for the data at the top of the stack, but
221+
// we don't need to take advantage of that here and can stick to things that
222+
// are defined in the ABI specs.)
223+
224+
// We'll need this in a minute
225+
extern char **environ;
226+
227+
namespace {
228+
229+
struct ArgvGrabber {
230+
char **argv;
231+
int argc;
232+
233+
ArgvGrabber();
234+
235+
private:
236+
struct stack {
237+
void *base;
238+
void *top;
239+
240+
stack() : base(nullptr), top(nullptr) {}
241+
stack(void *b, void *t) : base(b), top(t) {}
242+
};
243+
244+
stack findStack();
245+
void findArgv(stack s);
246+
};
247+
248+
// Find the stack by looking at /proc/self/maps
249+
ArgvGrabber::stack ArgvGrabber::findStack(void) {
250+
FILE *maps = fopen("/proc/self/maps", "r");
251+
if (!maps)
252+
return stack();
253+
254+
char line[256];
255+
void *base = NULL, *top = NULL;
256+
bool found = false;
257+
while (fgets(line, sizeof(line), maps)) {
258+
// line is on the stack, so we know we're looking at the right
259+
// region if line is between base and top.
260+
//
261+
// Note that we can't look for [stack], because Rosetta and qemu
262+
// set up a separate stack for the emulated code.
263+
if (sscanf(line, "%p-%p", &base, &top) == 2
264+
&& (void *)line >= base && (void *)line < top) {
265+
found = true;
266+
break;
267+
}
268+
}
269+
270+
fclose(maps);
271+
272+
if (!found)
273+
return stack();
274+
275+
return stack(base, top);
276+
}
277+
278+
// Find argv by walking backwards from environ
279+
void ArgvGrabber::findArgv(ArgvGrabber::stack stack) {
280+
if (!stack.base)
281+
return;
282+
283+
// Check that environ points to the stack
284+
char **envp = environ;
285+
if ((void *)envp < stack.base || (void *)envp >= stack.top)
286+
return;
287+
288+
char **ptr = envp - 1;
289+
290+
// We're now pointing at the NULL that terminates argv. Keep going back
291+
// while we're seeing pointers (values greater than envp).
292+
while ((void *)(ptr - 1) > stack.base) {
293+
--ptr;
294+
295+
// The first thing less than envp must be the argc value
296+
if ((void *)*ptr < (void *)envp) {
297+
argc = (int)(intptr_t)*ptr++;
298+
argv = ptr;
299+
return;
300+
}
301+
}
302+
}
303+
304+
ArgvGrabber::ArgvGrabber() : argv(nullptr), argc(0) {
305+
findArgv(findStack());
306+
}
307+
308+
ArgvGrabber argvGrabber;
309+
310+
} // namespace
311+
312+
static char **swift::getUnsafeArgvArgc(int *outArgLen) {
313+
*outArgLen = argvGrabber.argc;
314+
return argvGrabber.argv;
315+
}
316+
317+
template <typename F>
318+
static void swift::enumerateUnsafeArgv(const F& body) { }
319+
#elif defined(__CYGWIN__)
176320
static char **swift::getUnsafeArgvArgc(int *outArgLen) {
177321
return nullptr;
178322
}

0 commit comments

Comments
 (0)