Skip to content

Commit 0a56ee9

Browse files
fix(profiling): remove slow getpid call from memalloc path [backport 2.18] (#11849)
Backport 6bfe77e from #11848 to 2.18. memalloc uses getpid to detect whether the process has forked, so that we can unlock the memalloc lock in the child process (if it isn't already locked). Unfortunately the getpid call is quite slow. From the man page: "calls to getpid() always invoke the actual system call, rather than returning a cached value." Furthermore, we _always_ attempt to take the lock for allocations, even if we aren't going to sample them. So this is basically adding a syscall to every allocation. Move this logic out of the allocation path. Switch to using pthread_atfork handlers to ensure that the lock is held prior to forking, and unlock it in the parent and child after forking. This (maybe) has the added benefit of making sure the data structures are in a consistent state in the child process after forking. Unclear if that's an issue prior to this change, though. I may be missing some code that resets the profiler on fork anyway?
1 parent 4e0ecc8 commit 0a56ee9

File tree

4 files changed

+65
-13
lines changed

4 files changed

+65
-13
lines changed

ddtrace/profiling/collector/_memalloc.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,28 @@ static alloc_tracker_t* global_alloc_tracker;
5757
static void
5858
memalloc_init(void);
5959

60+
static void
61+
memalloc_prefork(void)
62+
{
63+
// Lock the mutex prior to forking. This ensures that the memory profiler
64+
// data structures will be in a consistent state in the child process.
65+
// The rest of the memalloc calls do trylock so we don't run the risk
66+
// of deadlocking if some other fork handler allocates
67+
memlock_lock(&g_memalloc_lock);
68+
}
69+
70+
static void
71+
memalloc_postfork_parent(void)
72+
{
73+
memlock_unlock(&g_memalloc_lock);
74+
}
75+
76+
static void
77+
memalloc_postfork_child(void)
78+
{
79+
memlock_unlock(&g_memalloc_lock);
80+
}
81+
6082
#ifdef _MSC_VER
6183
#pragma section(".CRT$XCU", read)
6284
__declspec(allocate(".CRT$XCU")) void (*memalloc_init_func)(void) = memalloc_init;
@@ -81,6 +103,9 @@ memalloc_init()
81103
}
82104
}
83105
memlock_init(&g_memalloc_lock, crash_on_mutex_pass);
106+
#ifndef _WIN32
107+
pthread_atfork(memalloc_prefork, memalloc_postfork_parent, memalloc_postfork_child);
108+
#endif
84109
}
85110

86111
static void

ddtrace/profiling/collector/_memalloc_heap.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,25 @@ static heap_tracker_t global_heap_tracker;
3636
static void
3737
memheap_init(void);
3838

39+
static void
40+
memheap_prefork(void)
41+
{
42+
// See memalloc_prefork for an explanation of why this is here
43+
memlock_lock(&g_memheap_lock);
44+
}
45+
46+
static void
47+
memheap_postfork_parent(void)
48+
{
49+
memlock_unlock(&g_memheap_lock);
50+
}
51+
52+
static void
53+
memheap_postfork_child(void)
54+
{
55+
memlock_unlock(&g_memheap_lock);
56+
}
57+
3958
#ifdef _MSC_VER
4059
#pragma section(".CRT$XCU", read)
4160
__declspec(allocate(".CRT$XCU")) void (*memheap_init_func)(void) = memheap_init;
@@ -60,6 +79,9 @@ memheap_init()
6079
}
6180
}
6281
memlock_init(&g_memheap_lock, crash_on_mutex_pass);
82+
#ifndef _WIN32
83+
pthread_atfork(memheap_prefork, memheap_postfork_parent, memheap_postfork_child);
84+
#endif
6385
}
6486

6587
static uint32_t

ddtrace/profiling/collector/_memalloc_reentrant.h

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -125,19 +125,6 @@ memlock_trylock(memlock_t* lock)
125125
if (!lock)
126126
return false;
127127

128-
#ifdef __linux__
129-
// On Linux, we need to make sure we didn't just fork
130-
// pthreads will guarantee the lock is consistent, but we at least need to clear it
131-
static pid_t my_pid = 0;
132-
if (my_pid == 0) {
133-
my_pid = getpid();
134-
} else if (my_pid != getpid()) {
135-
// We've forked, so we need to free the lock
136-
memlock_unlock(lock);
137-
my_pid = getpid();
138-
}
139-
#endif
140-
141128
#ifdef _WIN32
142129
bool result = WAIT_OBJECT_0 == WaitForSingleObject(lock->mutex, 0); // 0ms timeout -> no wait
143130
#else
@@ -153,6 +140,19 @@ memlock_trylock(memlock_t* lock)
153140
return result;
154141
}
155142

143+
static inline void
144+
memlock_lock(memlock_t* lock)
145+
{
146+
if (!lock)
147+
return;
148+
149+
#ifdef _WIN32
150+
WaitForSingleObject(lock->mutex, INFINITE);
151+
#else
152+
pthread_mutex_lock(&lock->mutex);
153+
#endif
154+
}
155+
156156
// Cleanup function
157157
static inline bool
158158
memlock_destroy(memlock_t* lock)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
fixes:
3+
- |
4+
profiling: Removed a system call from the memory allocation profiler, used to detect forks,
5+
which ran on every allocation and resulted in a significant slowdown.

0 commit comments

Comments
 (0)