Skip to content

Commit 585367a

Browse files
committed
Fix crashtracker on Alpine
1 parent e0e1029 commit 585367a

File tree

11 files changed

+145
-41
lines changed

11 files changed

+145
-41
lines changed

profiler/src/ProfilerEngine/Datadog.Linux.ApiWrapper/functions_to_wrap.c

Lines changed: 116 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -67,18 +67,25 @@ __thread unsigned long long functions_entered_counter = 0;
6767
// By calling fork(), the child process and the parent process will have their own address space,
6868
// which means that the child process won't be able to modify the parent process's variables.
6969
// We need a way to enable communication between the child and parent processes.
70-
// This is done by creating a shared memory region and use it as a flag to indicate that
71-
// the application is crashing.
72-
// This variable will be a pointer to that shared memory region.
70+
// This is done by creating a shared memory region and save crashing data in it.
71+
// crashing_data_t is a struct that contains the following fields:
72+
// - is_app_crashing: use it as a flag to indicate that the application is crashing.
73+
// - thread_context: the thread context of the crashing thread. Unwind the crashing thread's callstack
74+
// skipping the signal frame. Useful for alpine.
75+
76+
typedef struct crashing_data_t {
77+
int is_app_crashing;
78+
ucontext_t* thread_context;
79+
} crashing_data_t;
7380
__attribute__((visibility("hidden")))
74-
int* is_app_crashing = NULL;
81+
crashing_data_t* crash_data = NULL;
7582

7683
// this function is called by the profiler
7784
unsigned long long dd_inside_wrapped_functions()
7885
{
7986
int app_is_crashing = 0;
80-
if (is_app_crashing != NULL) {
81-
app_is_crashing = *is_app_crashing;
87+
if (crash_data != NULL) {
88+
app_is_crashing = crash_data->is_app_crashing;
8289
}
8390
return functions_entered_counter + app_is_crashing;
8491
}
@@ -472,6 +479,25 @@ int ShouldCallCustomCreatedump(const char* pathname, char* const argv[])
472479
return 0;
473480
}
474481

482+
static void ptr_to_decimal(char* buf, unsigned long val) {
483+
char tmp[21];
484+
int i = 0;
485+
if (val == 0) {
486+
buf[0] = '0';
487+
buf[1] = '\0';
488+
return;
489+
}
490+
while (val > 0) {
491+
tmp[i++] = '0' + (val % 10);
492+
val /= 10;
493+
}
494+
// reverse into buf
495+
for (int j = 0; j < i; j++) {
496+
buf[j] = tmp[i - 1 - j];
497+
}
498+
buf[i] = '\0';
499+
}
500+
475501
int execve(const char* pathname, char* const argv[], char* const envp[])
476502
{
477503
check_init();
@@ -483,8 +509,11 @@ int execve(const char* pathname, char* const argv[], char* const envp[])
483509
return __real_execve(pathname, argv, envp);
484510
}
485511

486-
if (is_app_crashing != NULL) {
487-
*is_app_crashing = 1;
512+
ucontext_t* thread_context = NULL;
513+
514+
if (crash_data != NULL) {
515+
crash_data->is_app_crashing = 1;
516+
thread_context = crash_data->thread_context;
488517
}
489518
// Execute the alternative crash handler, and prepend "createdump" to the arguments
490519

@@ -493,7 +522,13 @@ int execve(const char* pathname, char* const argv[], char* const envp[])
493522
while (argv[argc++] != NULL);
494523

495524
// We add two arguments: the path to dd-dotnet, and "createdump"
496-
char** newArgv = malloc((argc + 2) * sizeof(char*));
525+
int newArgc = argc + 2;
526+
527+
if (thread_context != NULL) {
528+
newArgc += 2;
529+
}
530+
531+
char** newArgv = malloc((newArgc) * sizeof(char*));
497532

498533
// By convention, argv[0] contains the name of the executable
499534
// Insert createdump as the first actual argument
@@ -520,6 +555,14 @@ int execve(const char* pathname, char* const argv[], char* const envp[])
520555
newArgv[new_idx++] = argv[idx++];
521556
}
522557
}
558+
559+
char context_addr[21]; // 20 hex digits + null
560+
if (thread_context != NULL) {
561+
ptr_to_decimal(context_addr, (unsigned long)thread_context);
562+
newArgv[new_idx++] = "--dd-thread-context";
563+
newArgv[new_idx++] = context_addr;
564+
}
565+
523566
newArgv[new_idx] = NULL; // NULL terminate the array
524567

525568
size_t envp_count;
@@ -673,6 +716,66 @@ pid_t fork()
673716
}
674717
#endif
675718
#endif
719+
720+
typedef void (*sigsegv_handler_fn)(int signum, siginfo_t* info, void* context);
721+
static _Atomic sigsegv_handler_fn sigsegv_current_handler;
722+
static void dd_sigsegv_handler(int signum, siginfo_t* info, void* context)
723+
{
724+
if (crash_data != NULL) {
725+
crash_data->thread_context = (ucontext_t*)context;
726+
}
727+
sigsegv_handler_fn handler = sigsegv_current_handler;
728+
if (handler != NULL) {
729+
handler(signum, info, context);
730+
}
731+
}
732+
733+
static pthread_mutex_t sigaction_lock = PTHREAD_MUTEX_INITIALIZER;
734+
static int (*__real_sigaction)(int signum, const struct sigaction *_Nullable restrict act, struct sigaction *_Nullable restrict oldact) = NULL;
735+
int sigaction(int signum,
736+
const struct sigaction *_Nullable restrict act,
737+
struct sigaction *_Nullable restrict oldact)
738+
{
739+
check_init();
740+
741+
if (signum == SIGSEGV && act != NULL && ((act->sa_flags & SA_SIGINFO) == SA_SIGINFO))
742+
{
743+
struct sigaction new_act = *act;
744+
new_act.sa_sigaction = dd_sigsegv_handler;
745+
746+
pthread_mutex_lock(&sigaction_lock);
747+
void (*prev_handler)(int signum, siginfo_t* info, void* context) = sigsegv_current_handler;
748+
sigsegv_current_handler = act->sa_sigaction;
749+
int result = __real_sigaction(signum, &new_act, oldact);
750+
// lock to update the current act and fixup the old one
751+
if (oldact != NULL &&
752+
((oldact->sa_flags & SA_SIGINFO) == SA_SIGINFO) &&
753+
(oldact->sa_sigaction == dd_sigsegv_handler))
754+
{
755+
oldact->sa_sigaction = prev_handler;
756+
}
757+
pthread_mutex_unlock(&sigaction_lock);
758+
759+
return result;
760+
}
761+
762+
if (signum == SIGSEGV && act == NULL)
763+
{
764+
pthread_mutex_lock(&sigaction_lock);
765+
int result = __real_sigaction(signum, act, oldact);
766+
if (oldact != NULL &&
767+
((oldact->sa_flags & SA_SIGINFO) == SA_SIGINFO) &&
768+
(oldact->sa_sigaction == dd_sigsegv_handler))
769+
{
770+
oldact->sa_sigaction = sigsegv_current_handler;
771+
}
772+
pthread_mutex_unlock(&sigaction_lock);
773+
return result;
774+
}
775+
776+
return __real_sigaction(signum, act, oldact);
777+
}
778+
676779
static pthread_once_t once_control = PTHREAD_ONCE_INIT;
677780

678781
static void init()
@@ -682,6 +785,7 @@ static void init()
682785
__real_dlclose = __dd_dlsym(RTLD_NEXT, "dlclose");
683786
__real_dladdr = __dd_dlsym(RTLD_NEXT, "dladdr");
684787
__real_execve = __dd_dlsym(RTLD_NEXT, "execve");
788+
__real_sigaction = __dd_dlsym(RTLD_NEXT, "sigaction");
685789
#ifdef DD_ALPINE
686790
__real_pthread_create = __dd_dlsym(RTLD_NEXT, "pthread_create");
687791
__real_pthread_attr_init = __dd_dlsym(RTLD_NEXT, "pthread_attr_init");
@@ -691,10 +795,10 @@ static void init()
691795
#endif
692796
// if we failed at allocating memory for the shared variable
693797
// the parent process won't be notified that the app is crashing.
694-
is_app_crashing = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
798+
crash_data = mmap(NULL, sizeof(crashing_data_t), PROT_READ | PROT_WRITE,
695799
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
696-
if (is_app_crashing != MAP_FAILED) {
697-
*is_app_crashing = 0; // Initialize flag
800+
if (crash_data != MAP_FAILED) {
801+
memset(crash_data, 0, sizeof(crashing_data_t));
698802
}
699803
}
700804

profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/CrashReportingLinux.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,11 @@ std::vector<ModuleInfo> CrashReportingLinux::GetModules()
139139
return modules;
140140
}
141141

142-
std::vector<StackFrame> CrashReportingLinux::GetThreadFrames(int32_t tid, ResolveManagedCallstack resolveManagedCallstack, void* context)
142+
std::vector<StackFrame> CrashReportingLinux::GetThreadFrames(int32_t tid, void* threadContext, ResolveManagedCallstack resolveManagedCallstack, void* context)
143143
{
144144
std::vector<StackFrame> frames;
145145

146-
auto libunwindContext = _UPT_create(tid);
146+
auto libunwindContext = threadContext != nullptr ? threadContext : _UPT_create(tid);
147147

148148
unw_cursor_t cursor;
149149

profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/CrashReportingLinux.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class CrashReportingLinux : public CrashReporting
3131

3232
private:
3333
std::vector<std::pair<int32_t, std::string>> GetThreads() override;
34-
std::vector<StackFrame> GetThreadFrames(int32_t tid, ResolveManagedCallstack resolveManagedCallstack, void* context) override;
34+
std::vector<StackFrame> GetThreadFrames(int32_t tid, void* threadContext, ResolveManagedCallstack resolveManagedCallstack, void* context) override;
3535
const ModuleInfo* FindModule(uintptr_t ip);
3636
std::vector<ModuleInfo> GetModules();
3737
std::string GetSignalInfo(int32_t signal) override;

profiler/src/ProfilerEngine/Datadog.Profiler.Native.Windows/CrashReportingWindows.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ std::vector<std::pair<int32_t, std::string>> CrashReportingWindows::GetThreads()
8686
return threads;
8787
}
8888

89-
std::vector<StackFrame> CrashReportingWindows::GetThreadFrames(int32_t tid, ResolveManagedCallstack resolveManagedCallstack, void* callbackContext)
89+
std::vector<StackFrame> CrashReportingWindows::GetThreadFrames(int32_t tid, void* ignoredThreadContext, ResolveManagedCallstack resolveManagedCallstack, void* callbackContext)
9090
{
9191
std::vector<StackFrame> frames;
9292

profiler/src/ProfilerEngine/Datadog.Profiler.Native.Windows/CrashReportingWindows.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class CrashReportingWindows : public CrashReporting
3939

4040
private:
4141
std::vector<std::pair<int32_t, std::string>> GetThreads() override;
42-
std::vector<StackFrame> GetThreadFrames(int32_t tid, ResolveManagedCallstack resolveManagedCallstack, void* context) override;
42+
std::vector<StackFrame> GetThreadFrames(int32_t tid, void* ignoredThreadContext, ResolveManagedCallstack resolveManagedCallstack, void* context) override;
4343
std::string GetSignalInfo(int32_t signal) override;
4444
std::vector<ModuleInfo> GetModules();
4545
const ModuleInfo* FindModule(uintptr_t ip);

profiler/src/ProfilerEngine/Datadog.Profiler.Native/CrashReporting.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ int32_t CrashReporting::SetSignalInfo(int32_t signal, const char* description)
216216
return 0;
217217
}
218218

219-
int32_t CrashReporting::ResolveStacks(int32_t crashingThreadId, ResolveManagedCallstack resolveCallback, void* context, bool* isSuspicious)
219+
int32_t CrashReporting::ResolveStacks(int32_t crashingThreadId, void* threadContext, ResolveManagedCallstack resolveCallback, void* context, bool* isSuspicious)
220220
{
221221
auto threads = GetThreads();
222222

@@ -226,7 +226,8 @@ int32_t CrashReporting::ResolveStacks(int32_t crashingThreadId, ResolveManagedCa
226226

227227
for (auto const& [threadId, threadName] : threads)
228228
{
229-
auto frames = GetThreadFrames(threadId, resolveCallback, context);
229+
auto context = threadContext != nullptr ? threadContext : nullptr;
230+
auto frames = GetThreadFrames(threadId, context, resolveCallback, context);
230231

231232
auto [stackTrace, succeeded] = ExtractResult(ddog_crasht_StackTrace_new());
232233

profiler/src/ProfilerEngine/Datadog.Profiler.Native/CrashReporting.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ ICrashReporting : public IUnknown
106106
// only for tests
107107
virtual int32_t STDMETHODCALLTYPE Panic() = 0;
108108
virtual int32_t STDMETHODCALLTYPE SetSignalInfo(int32_t signal, const char* description) = 0;
109-
virtual int32_t STDMETHODCALLTYPE ResolveStacks(int32_t crashingThreadId, ResolveManagedCallstack resolveCallback, void* context, bool* isSuspicious) = 0;
109+
virtual int32_t STDMETHODCALLTYPE ResolveStacks(int32_t crashingThreadId, void* threadContext, ResolveManagedCallstack resolveCallback, void* context, bool* isSuspicious) = 0;
110110
virtual int32_t STDMETHODCALLTYPE SetMetadata(const char* libraryName, const char* libraryVersion, const char* family, Tag* tags, int32_t tagCount) = 0;
111111
virtual int32_t STDMETHODCALLTYPE Send() = 0;
112112
virtual int32_t STDMETHODCALLTYPE WriteToFile(const char* url) = 0;
@@ -128,7 +128,7 @@ class CrashReporting : public ICrashReporting
128128
int32_t STDMETHODCALLTYPE Initialize() override;
129129
int32_t STDMETHODCALLTYPE Panic() override;
130130
int32_t STDMETHODCALLTYPE SetSignalInfo(int32_t signal, const char* description) override;
131-
int32_t STDMETHODCALLTYPE ResolveStacks(int32_t crashingThreadId, ResolveManagedCallstack resolveCallback, void* context, bool* isSuspicious) override;
131+
int32_t STDMETHODCALLTYPE ResolveStacks(int32_t crashingThreadId, void* threadContext, ResolveManagedCallstack resolveCallback, void* context, bool* isSuspicious) override;
132132
int32_t STDMETHODCALLTYPE SetMetadata(const char* libraryName, const char* libraryVersion, const char* family, Tag* tags, int32_t tagCount) override;
133133
int32_t STDMETHODCALLTYPE Send() override;
134134
int32_t STDMETHODCALLTYPE WriteToFile(const char* url) override;
@@ -141,7 +141,7 @@ class CrashReporting : public ICrashReporting
141141
ddog_crasht_Handle_CrashInfoBuilder _builder;
142142
void SetLastError(ddog_Error error);
143143
virtual std::vector<std::pair<int32_t, std::string>> GetThreads() = 0;
144-
virtual std::vector<StackFrame> GetThreadFrames(int32_t tid, ResolveManagedCallstack resolveManagedCallstack, void* context) = 0;
144+
virtual std::vector<StackFrame> GetThreadFrames(int32_t tid, void* threadContext, ResolveManagedCallstack resolveManagedCallstack, void* context) = 0;
145145
virtual std::string GetSignalInfo(int32_t signal) = 0;
146146

147147
#ifdef DD_TEST

tracer/src/Datadog.Trace.Tools.dd_dotnet/CreatedumpCommand.cs

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,12 @@ public CreatedumpCommand()
5151
this.SetHandler(Execute);
5252
}
5353

54-
internal static bool ParseArguments(string[] arguments, out int pid, out int? signal, out int? crashThread)
54+
internal static bool ParseArguments(string[] arguments, out int pid, out int? signal, out int? crashThread, out IntPtr threadContext)
5555
{
5656
pid = default;
5757
signal = default;
5858
crashThread = default;
59+
threadContext = default;
5960

6061
// Parse the createdump command-line
6162
// Unfortunately, the pid is not necessarily at the beginning or the end, it can be between other arguments.
@@ -94,7 +95,8 @@ internal static bool ParseArguments(string[] arguments, out int pid, out int? si
9495
{ "--nativeaot", false },
9596
{ "--code", true },
9697
{ "--errno", true },
97-
{ "--address", true }
98+
{ "--address", true },
99+
{ "--dd-thread-context", true }
98100
};
99101

100102
const string pidRegex = "[0-9]+";
@@ -173,6 +175,11 @@ internal static bool ParseArguments(string[] arguments, out int pid, out int? si
173175
crashThread = crashThreadValue;
174176
}
175177

178+
if (parsedArguments.TryGetValue("--dd-thread-context", out var rawThreadContext) && ulong.TryParse(rawThreadContext, out var threadContextValue))
179+
{
180+
threadContext = (IntPtr)threadContextValue;
181+
}
182+
176183
// Have we found the pid?
177184
if (pidCandidates.Count == 1)
178185
{
@@ -452,9 +459,9 @@ private void Execute(InvocationContext context)
452459
{
453460
if (IsTelemetryEnabled())
454461
{
455-
if (ParseArguments(allArguments, out var pid, out var signal, out var crashThread))
462+
if (ParseArguments(allArguments, out var pid, out var signal, out var crashThread, out var threadContext))
456463
{
457-
GenerateCrashReport(pid, signal, crashThread);
464+
GenerateCrashReport(pid, signal, crashThread, threadContext);
458465
}
459466
else
460467
{
@@ -509,7 +516,7 @@ private void Execute(InvocationContext context)
509516
DebugPrint("dd-dotnet exited normally");
510517
}
511518

512-
private unsafe void GenerateCrashReport(int pid, int? signal, int? crashThread)
519+
private unsafe void GenerateCrashReport(int pid, int? signal, int? crashThread, IntPtr threadContext)
513520
{
514521
DebugPrint($"Generating crash report for pid {pid} (signal: {signal}, crashing thread id: {crashThread})");
515522

@@ -630,7 +637,7 @@ private unsafe void GenerateCrashReport(int pid, int? signal, int? crashThread)
630637
{
631638
DebugPrint("Resolving callstacks");
632639
var callback = (delegate* unmanaged<int, IntPtr, ResolveMethodData**, int*, int>)&ResolveManagedCallstack;
633-
crashReport.ResolveStacks(crashThread ?? 0, (IntPtr)callback, GCHandle.ToIntPtr(handle), out isSuspicious);
640+
crashReport.ResolveStacks(crashThread ?? 0, threadContext, (IntPtr)callback, GCHandle.ToIntPtr(handle), out isSuspicious);
634641
}
635642
catch (Win32Exception ex)
636643
{

tracer/src/Datadog.Trace.Tools.dd_dotnet/Generated/Datadog.Trace.Tools.dd_dotnet.SourceGenerators/NativeObjectsGenerator/dd_dotnet.ICrashReport.g.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,10 @@ public int SetSignalInfo(int a0, nint a1)
9696
}
9797
return returnvalue;
9898
}
99-
public int ResolveStacks(int a0, nint a1, nint a2, out bool a3)
99+
public int ResolveStacks(int a0, nint a1, nint a2, nint a3, out bool a4)
100100
{
101-
var func = (delegate* unmanaged[Stdcall]<IntPtr, int, nint, nint, out bool, out int, int>)*(VTable + 7);
102-
var result = func(_implementation, a0, a1, a2, out a3, out var returnvalue);
101+
var func = (delegate* unmanaged[Stdcall]<IntPtr, int, nint, nint, nint, out bool, out int, int>)*(VTable + 7);
102+
var result = func(_implementation, a0, a1, a2, a3, out a4, out var returnvalue);
103103
if (result != 0)
104104
{
105105
throw new System.ComponentModel.Win32Exception(result);

tracer/src/Datadog.Trace.Tools.dd_dotnet/ICrashReport.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ internal unsafe interface ICrashReport : IUnknown
2525

2626
int SetSignalInfo(int signal, IntPtr description);
2727

28-
int ResolveStacks(int crashingThreadId, IntPtr resolveCallback, IntPtr context, out bool isSuspicious);
28+
int ResolveStacks(int crashingThreadId, IntPtr threadContext, IntPtr resolveCallback, IntPtr context, out bool isSuspicious);
2929

3030
int SetMetadata(IntPtr libraryName, IntPtr libraryVersion, IntPtr family, Tag* tags, int tagsCount);
3131

0 commit comments

Comments
 (0)