From 1affc20487885156452914de07aab6385cd94aa7 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Mon, 10 Feb 2025 14:20:37 +0000 Subject: [PATCH 01/11] i#1973: musl: Scan stack for kernel arguments when used as library Musl doesn't pass any arguments to constructors. To obtain environment variables for initialization of dynamorio, we scan the stack and search for specific patterns in the auxiliary vector passed by kernel, then walk back towards the top and find the environment variables. We choose to match AT_EXECFN, which has been passed unconditionally by the kernel from 2012. Its value should be a valid address, providing an extra check. This makes using dynamorio as a shared library possible on musl thus fixes several testcases. The logic could be enabled as fallback on older Android platforms as well. Issue: #1973 --- core/unix/os.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/core/unix/os.c b/core/unix/os.c index 807def9b4f..ee5a834a56 100644 --- a/core/unix/os.c +++ b/core/unix/os.c @@ -788,6 +788,82 @@ static init_fn_t #else /* If we're a normal shared object, then we override _init. */ + +# if defined(MUSL) +# define EFAULT 14 +# define AT_EXECFN 31 +static int +check_address_readable(void *addr) +{ + return dynamorio_syscall(SYS_rt_sigprocmask, ~0L, addr, NULL, + sizeof(kernel_sigset_t)) != EFAULT; +} + +/* When entering the entry point, the stack layout looks like + * sp => argc + * argv + * NULL (end of argv) + * envp + * NULL (end of envp) + * auxv + * search_auxvector() walks towards the higher address and locate one of the + * auxvector entry, then walk backwards and find the beginning of auxvector. */ +static void * +search_auxvector(void *sp) +{ + /* XXX: Check whether 64 * PAGE_SIZE is an appropriate limit */ + for (size_t offset = 0; offset < PAGE_SIZE * 64; offset += sizeof(ulong)) { + ELF_AUXV_TYPE *p = sp + offset; + + if (((uintptr_t)(&p->a_un) & (PAGE_SIZE - 1)) == 0 && + !check_address_readable(&p->a_un)) + return NULL; + + /* Check for AT_EXECFN entry in the auxvector, which contains pathname + * of the program and should be a readable address. */ + if (p->a_type == AT_EXECFN && check_address_readable((void *)p->a_un.a_val)) { + for (; (void *)p > sp; p--) { + /* The maximum key in auxvector is much smaller than 0x400. + * This assumes envp contains much higher addresses. An auxvector + * entry with zero as key indicates the end, thus the only case + * that it's encountered when searching towards auxvector's + * start is an empty envp. */ + if ((p->a_type == 0 || p->a_type >= 0x400) && p->a_un.a_val == 0) + return p + 1; + } + return NULL; /* shouldn't reach here */ + } + } + + return NULL; +} + +static void +search_kernel_args_on_stack(int *argc, char ***argv, char ***envp) +{ + ulong *sp; + GET_STACK_PTR(sp); + + ulong *auxv = search_auxvector(sp); + + ASSERT_MESSAGE(CHKLVL_ASSERTS, "failed to find auxv", auxv != NULL); + + ulong *p = &auxv[-2]; + for (; p[-1] && &p[-1] > sp; p--) + ; + + ASSERT_MESSAGE(CHKLVL_ASSERTS, "failed to find envp", p != sp); + + *envp = (char **)p; + + /* XXX: It's hard to determine the start of argv b/c argc locates immediately + * before it. Luckily, our_init only makes use of envp. argc and argv are + * zeroed. */ + *argc = 0; + *argv = NULL; +} +# endif + INITIALIZER_ATTRIBUTES int _init(int argc, char **argv, char **envp) { @@ -803,6 +879,11 @@ _init(int argc, char **argv, char **envp) envp = NULL; } ASSERT_MESSAGE(CHKLVL_ASSERTS, "failed to find envp", envp != NULL); +# endif +# ifdef MUSL + /* i#1973: musl passes nothing to library init routines. We scan the stack + * to find the arguments passed by kernel. */ + search_kernel_args_on_stack(&argc, &argv, &envp); # endif return our_init(argc, argv, envp); } From a5ce285f7d2b9dae147b1223b4e80be43dc5ce42 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Tue, 20 May 2025 09:14:58 +0000 Subject: [PATCH 02/11] Drop unnecessary defines --- core/unix/os.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/unix/os.c b/core/unix/os.c index a8eb320b7f..5f50056114 100644 --- a/core/unix/os.c +++ b/core/unix/os.c @@ -183,6 +183,7 @@ char **our_environ; #include "decode_fast.h" /* decode_cti: maybe os_handle_mov_seg should be ifdef X86? */ #include +#include #include #include #include @@ -791,8 +792,6 @@ static init_fn_t */ # if defined(MUSL) -# define EFAULT 14 -# define AT_EXECFN 31 static int check_address_readable(void *addr) { From 6d6c523f4076e5cb602e5fdfcb41fb0df1d4d169 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Tue, 20 May 2025 09:19:09 +0000 Subject: [PATCH 03/11] Improve description of search_auxvector --- core/unix/os.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/unix/os.c b/core/unix/os.c index 5f50056114..39e920b7ad 100644 --- a/core/unix/os.c +++ b/core/unix/os.c @@ -806,6 +806,10 @@ check_address_readable(void *addr) * envp * NULL (end of envp) * auxv + * + * But the stack pointer has gone much farther towards the low-end of address + * space, so it's hard to reliably locate the address of auxvector. + * * search_auxvector() walks towards the higher address and locate one of the * auxvector entry, then walk backwards and find the beginning of auxvector. */ static void * From 10b7f0e1d6c75492a285aec61e04b000ce3c5b4d Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Tue, 20 May 2025 10:23:01 +0000 Subject: [PATCH 04/11] Use PR_GET_AUXV to obtain an exact pointer address as marker --- core/unix/os.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/core/unix/os.c b/core/unix/os.c index 39e920b7ad..443b1a524a 100644 --- a/core/unix/os.c +++ b/core/unix/os.c @@ -115,6 +115,8 @@ typedef struct rlimit64 rlimit64_t; #endif #ifdef LINUX +/* Definitions for PR_GET_AUXV */ +# include /* For clone and its flags, the manpage says to include sched.h with _GNU_SOURCE * defined. _GNU_SOURCE brings in unwanted extensions and causes name * conflicts. Instead, we include unix/sched.h which comes from the Linux @@ -799,6 +801,22 @@ check_address_readable(void *addr) sizeof(kernel_sigset_t)) != EFAULT; } +static ELF_WORD get_auxv_value(ELF_WORD type) +{ + /* Currently no architecture defines more than 60 auxvector keys */ + ELF_AUXV_TYPE auxv[64]; + + dynamorio_syscall(SYS_prctl, 5, PR_GET_AUXV, auxv, sizeof(auxv), 0, 0); + + for (int i = 0; auxv[i].a_type != AT_NULL; i++) { + if (auxv[i].a_type == type) + return auxv[i].a_un.a_val; + } + + ASSERT_NOT_REACHED(); + return 0; +} + /* When entering the entry point, the stack layout looks like * sp => argc * argv @@ -815,7 +833,8 @@ check_address_readable(void *addr) static void * search_auxvector(void *sp) { - /* XXX: Check whether 64 * PAGE_SIZE is an appropriate limit */ + ELF_WORD phdr = get_auxv_value(AT_PHDR); + for (size_t offset = 0; offset < PAGE_SIZE * 64; offset += sizeof(ulong)) { ELF_AUXV_TYPE *p = sp + offset; @@ -825,7 +844,7 @@ search_auxvector(void *sp) /* Check for AT_EXECFN entry in the auxvector, which contains pathname * of the program and should be a readable address. */ - if (p->a_type == AT_EXECFN && check_address_readable((void *)p->a_un.a_val)) { + if (p->a_type == AT_PHDR && p->a_un.a_val == phdr) { for (; (void *)p > sp; p--) { /* The maximum key in auxvector is much smaller than 0x400. * This assumes envp contains much higher addresses. An auxvector From fe3e8778336d7070834fad3d949b5efc2bae354b Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Tue, 20 May 2025 10:33:15 +0000 Subject: [PATCH 05/11] Switch to d_r_safe_read --- core/unix/os.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/core/unix/os.c b/core/unix/os.c index 443b1a524a..d149605c53 100644 --- a/core/unix/os.c +++ b/core/unix/os.c @@ -794,13 +794,6 @@ static init_fn_t */ # if defined(MUSL) -static int -check_address_readable(void *addr) -{ - return dynamorio_syscall(SYS_rt_sigprocmask, ~0L, addr, NULL, - sizeof(kernel_sigset_t)) != EFAULT; -} - static ELF_WORD get_auxv_value(ELF_WORD type) { /* Currently no architecture defines more than 60 auxvector keys */ @@ -837,13 +830,11 @@ search_auxvector(void *sp) for (size_t offset = 0; offset < PAGE_SIZE * 64; offset += sizeof(ulong)) { ELF_AUXV_TYPE *p = sp + offset; + ELF_AUXV_TYPE entry; - if (((uintptr_t)(&p->a_un) & (PAGE_SIZE - 1)) == 0 && - !check_address_readable(&p->a_un)) - return NULL; + if (!d_r_safe_read(p, sizeof(ELF_AUXV_TYPE), &entry)) + return NULL; - /* Check for AT_EXECFN entry in the auxvector, which contains pathname - * of the program and should be a readable address. */ if (p->a_type == AT_PHDR && p->a_un.a_val == phdr) { for (; (void *)p > sp; p--) { /* The maximum key in auxvector is much smaller than 0x400. @@ -885,7 +876,7 @@ search_kernel_args_on_stack(int *argc, char ***argv, char ***envp) *argc = 0; *argv = NULL; } -# endif +# endif /* MUSL */ INITIALIZER_ATTRIBUTES int _init(int argc, char **argv, char **envp) From 6b1217409d33d835ea6c1989ff89b668dcb37a59 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Tue, 20 May 2025 10:52:59 +0000 Subject: [PATCH 06/11] Misc changes --- core/unix/os.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/core/unix/os.c b/core/unix/os.c index d149605c53..e30dc26a71 100644 --- a/core/unix/os.c +++ b/core/unix/os.c @@ -794,7 +794,8 @@ static init_fn_t */ # if defined(MUSL) -static ELF_WORD get_auxv_value(ELF_WORD type) +static ELF_WORD +get_auxv_value(ELF_WORD type) { /* Currently no architecture defines more than 60 auxvector keys */ ELF_AUXV_TYPE auxv[64]; @@ -821,8 +822,9 @@ static ELF_WORD get_auxv_value(ELF_WORD type) * But the stack pointer has gone much farther towards the low-end of address * space, so it's hard to reliably locate the address of auxvector. * - * search_auxvector() walks towards the higher address and locate one of the - * auxvector entry, then walk backwards and find the beginning of auxvector. */ + * search_auxvector() walks towards the higher address and locates one of the + * auxvector entries, then walks backwards and finds the beginning of auxvector. + */ static void * search_auxvector(void *sp) { @@ -863,7 +865,7 @@ search_kernel_args_on_stack(int *argc, char ***argv, char ***envp) ASSERT_MESSAGE(CHKLVL_ASSERTS, "failed to find auxv", auxv != NULL); ulong *p = &auxv[-2]; - for (; p[-1] && &p[-1] > sp; p--) + for (; p[-1] != 0 && &p[-1] > sp; p--) ; ASSERT_MESSAGE(CHKLVL_ASSERTS, "failed to find envp", p != sp); From 8875f7d5078ccbc8b6d9b5e21b9cdc8ff92b27f3 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Tue, 20 May 2025 10:54:14 +0000 Subject: [PATCH 07/11] Style --- core/unix/os.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/unix/os.c b/core/unix/os.c index e30dc26a71..2586a9d358 100644 --- a/core/unix/os.c +++ b/core/unix/os.c @@ -804,7 +804,7 @@ get_auxv_value(ELF_WORD type) for (int i = 0; auxv[i].a_type != AT_NULL; i++) { if (auxv[i].a_type == type) - return auxv[i].a_un.a_val; + return auxv[i].a_un.a_val; } ASSERT_NOT_REACHED(); @@ -832,10 +832,10 @@ search_auxvector(void *sp) for (size_t offset = 0; offset < PAGE_SIZE * 64; offset += sizeof(ulong)) { ELF_AUXV_TYPE *p = sp + offset; - ELF_AUXV_TYPE entry; + ELF_AUXV_TYPE entry; if (!d_r_safe_read(p, sizeof(ELF_AUXV_TYPE), &entry)) - return NULL; + return NULL; if (p->a_type == AT_PHDR && p->a_un.a_val == phdr) { for (; (void *)p > sp; p--) { From fb03426597e0e9b6ea5a8d7c70fca1752d370c9d Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Tue, 20 May 2025 11:26:59 +0000 Subject: [PATCH 08/11] Update comment --- core/unix/os.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/core/unix/os.c b/core/unix/os.c index 2586a9d358..07871585f6 100644 --- a/core/unix/os.c +++ b/core/unix/os.c @@ -794,6 +794,12 @@ static init_fn_t */ # if defined(MUSL) +/* We rely on PR_GET_AUXV to obtain a copy of auxvector and take one of the + * pointer entries as marker when searching through the address space. + * + * TODO: PR_GET_AUXV is relatively new (introduced in Linux 6.4), implement a + * fallback when it's unavailable. + */ static ELF_WORD get_auxv_value(ELF_WORD type) { @@ -812,6 +818,7 @@ get_auxv_value(ELF_WORD type) } /* When entering the entry point, the stack layout looks like + * * sp => argc * argv * NULL (end of argv) From f561bea39bb50add130fd8d2d8275b78036a102b Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Wed, 21 May 2025 04:25:02 +0000 Subject: [PATCH 09/11] Switch to look for AT_UID --- core/unix/os.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/core/unix/os.c b/core/unix/os.c index 07871585f6..205907a299 100644 --- a/core/unix/os.c +++ b/core/unix/os.c @@ -799,6 +799,11 @@ static init_fn_t * * TODO: PR_GET_AUXV is relatively new (introduced in Linux 6.4), implement a * fallback when it's unavailable. + * TODO i#7491: PR_GET_AUXV isn't correctly intercepted, thus executable-related + * auxvector entries cannot be used as target marker when searching, or + * applications linked to libdynamorio.so, like test client.gonative, will fail + * to execute when running under early injection. Switch to use a more-reliable + * address-related entry when the problem is resolved. */ static ELF_WORD get_auxv_value(ELF_WORD type) @@ -827,7 +832,7 @@ get_auxv_value(ELF_WORD type) * auxv * * But the stack pointer has gone much farther towards the low-end of address - * space, so it's hard to reliably locate the address of auxvector. + * space, thus it's hard to reliably locate the address of auxvector. * * search_auxvector() walks towards the higher address and locates one of the * auxvector entries, then walks backwards and finds the beginning of auxvector. @@ -835,7 +840,7 @@ get_auxv_value(ELF_WORD type) static void * search_auxvector(void *sp) { - ELF_WORD phdr = get_auxv_value(AT_PHDR); + ELF_WORD uid = get_auxv_value(AT_UID); for (size_t offset = 0; offset < PAGE_SIZE * 64; offset += sizeof(ulong)) { ELF_AUXV_TYPE *p = sp + offset; @@ -844,7 +849,7 @@ search_auxvector(void *sp) if (!d_r_safe_read(p, sizeof(ELF_AUXV_TYPE), &entry)) return NULL; - if (p->a_type == AT_PHDR && p->a_un.a_val == phdr) { + if (p->a_type == AT_UID && p->a_un.a_val == uid) { for (; (void *)p > sp; p--) { /* The maximum key in auxvector is much smaller than 0x400. * This assumes envp contains much higher addresses. An auxvector From 2d352c5323a605d31f9e701bf8138ada2d7ba71c Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Wed, 21 May 2025 04:52:30 +0000 Subject: [PATCH 10/11] Don't include elf.h on other platforms --- core/unix/os.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/unix/os.c b/core/unix/os.c index 205907a299..103e9b4cd8 100644 --- a/core/unix/os.c +++ b/core/unix/os.c @@ -184,8 +184,12 @@ char **our_environ; #include "decode_fast.h" /* decode_cti: maybe os_handle_mov_seg should be ifdef X86? */ -#include +/* For auxvector keys (AT_*) used by search_auxvector() */ +#ifdef MUSL #include +#endif + +#include #include #include #include From 6cc679a4812b6b473980c446ccd20ff98a6b3340 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Wed, 21 May 2025 05:01:06 +0000 Subject: [PATCH 11/11] Format --- core/unix/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/unix/os.c b/core/unix/os.c index 103e9b4cd8..a51f17f492 100644 --- a/core/unix/os.c +++ b/core/unix/os.c @@ -186,7 +186,7 @@ char **our_environ; /* For auxvector keys (AT_*) used by search_auxvector() */ #ifdef MUSL -#include +# include #endif #include