88
99#include " CtxInstrProfiling.h"
1010#include " sanitizer_common/sanitizer_allocator_internal.h"
11+ #include " sanitizer_common/sanitizer_atomic.h"
12+ #include " sanitizer_common/sanitizer_atomic_clang.h"
1113#include " sanitizer_common/sanitizer_common.h"
1214#include " sanitizer_common/sanitizer_dense_map.h"
1315#include " sanitizer_common/sanitizer_libc.h"
@@ -27,6 +29,20 @@ __sanitizer::SpinMutex AllContextsMutex;
2729SANITIZER_GUARDED_BY (AllContextsMutex)
2830__sanitizer::Vector<ContextRoot *> AllContextRoots;
2931
32+ __sanitizer::atomic_uintptr_t AllFunctionsData = {};
33+
34+ // Keep all the functions for which we collect a flat profile in a linked list.
35+ __sanitizer::SpinMutex FlatCtxArenaMutex;
36+ SANITIZER_GUARDED_BY (FlatCtxArenaMutex)
37+ Arena *FlatCtxArenaHead = nullptr ;
38+ SANITIZER_GUARDED_BY (FlatCtxArenaMutex)
39+ Arena *FlatCtxArena = nullptr ;
40+
41+ // Set to true when we enter a root, and false when we exit - regardless if this
42+ // thread collects a contextual profile for that root.
43+ __thread bool IsUnderContext = false ;
44+ __sanitizer::atomic_uint8_t ProfilingStarted = {};
45+
3046// utility to taint a pointer by setting the LSB. There is an assumption
3147// throughout that the addresses of contexts are even (really, they should be
3248// align(8), but "even"-ness is the minimum assumption)
@@ -109,7 +125,10 @@ void resetContextNode(ContextNode &Node) {
109125 resetContextNode (*Next);
110126}
111127
112- void onContextEnter (ContextNode &Node) { ++Node.counters ()[0 ]; }
128+ ContextNode *onContextEnter (ContextNode &Node) {
129+ ++Node.counters ()[0 ];
130+ return &Node;
131+ }
113132
114133} // namespace
115134
@@ -182,12 +201,75 @@ ContextNode *getCallsiteSlow(GUID Guid, ContextNode **InsertionPoint,
182201 return Ret;
183202}
184203
185- ContextNode *__llvm_ctx_profile_get_context (void *Callee, GUID Guid,
186- uint32_t NumCounters,
204+ ContextNode *getFlatProfile (FunctionData &Data, GUID Guid,
205+ uint32_t NumCounters) {
206+ if (ContextNode *Existing = Data.FlatCtx )
207+ return Existing;
208+ {
209+ // We could instead try to take the lock and, if that fails, return
210+ // TheScratchContext. But that could leave message pump loops more sparsely
211+ // profiled than everything else. Maybe that doesn't matter, and we can
212+ // optimize this later.
213+ __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L (&Data.Mutex );
214+ if (ContextNode *Existing = Data.FlatCtx )
215+ return Existing;
216+
217+ auto NeededSize = ContextNode::getAllocSize (NumCounters, 0 );
218+ char *AllocBuff = nullptr ;
219+ {
220+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> FL (
221+ &FlatCtxArenaMutex);
222+ if (FlatCtxArena)
223+ AllocBuff = FlatCtxArena->tryBumpAllocate (NeededSize);
224+ if (!AllocBuff) {
225+ FlatCtxArena = Arena::allocateNewArena (getArenaAllocSize (NeededSize),
226+ FlatCtxArena);
227+ AllocBuff = FlatCtxArena->tryBumpAllocate (NeededSize);
228+ }
229+ if (!FlatCtxArenaHead)
230+ FlatCtxArenaHead = FlatCtxArena;
231+ }
232+ auto *Ret = allocContextNode (AllocBuff, Guid, NumCounters, 0 );
233+ Data.FlatCtx = Ret;
234+
235+ Data.Next = reinterpret_cast <FunctionData *>(
236+ __sanitizer::atomic_load_relaxed (&AllFunctionsData));
237+ while (!__sanitizer::atomic_compare_exchange_strong (
238+ &AllFunctionsData, reinterpret_cast <uintptr_t *>(&Data.Next ),
239+ reinterpret_cast <uintptr_t >(&Data),
240+ __sanitizer::memory_order_release)) {
241+ }
242+ }
243+
244+ return Data.FlatCtx ;
245+ }
246+
247+ ContextNode *getUnhandledContext (FunctionData &Data, GUID Guid,
248+ uint32_t NumCounters) {
249+ // 1) if we are under a root (regardless if this thread is collecting or not a
250+ // contextual profile for that root), do not collect a flat profile. We want
251+ // to keep flat profiles only for activations that can't happen under a root,
252+ // to avoid confusing profiles. We can, for example, combine flattened and
253+ // flat profiles meaningfully, as we wouldn't double-count anything.
254+ //
255+ // 2) to avoid lengthy startup, don't bother with flat profiles until the
256+ // profiling started. We would reset them anyway when profiling starts.
257+ // HOWEVER. This does lose profiling for message pumps: those functions are
258+ // entered once and never exit. They should be assumed to be entered before
259+ // profiling starts - because profiling should start after the server is up
260+ // and running (which is equivalent to "message pumps are set up").
261+ if (IsUnderContext || !__sanitizer::atomic_load_relaxed (&ProfilingStarted))
262+ return TheScratchContext;
263+ return markAsScratch (
264+ onContextEnter (*getFlatProfile (Data, Guid, NumCounters)));
265+ }
266+
267+ ContextNode *__llvm_ctx_profile_get_context (FunctionData *Data, void *Callee,
268+ GUID Guid, uint32_t NumCounters,
187269 uint32_t NumCallsites) {
188270 // fast "out" if we're not even doing contextual collection.
189271 if (!__llvm_ctx_profile_current_context_root)
190- return TheScratchContext ;
272+ return getUnhandledContext (*Data, Guid, NumCounters) ;
191273
192274 // also fast "out" if the caller is scratch. We can see if it's scratch by
193275 // looking at the interior pointer into the subcontexts vector that the caller
@@ -196,7 +278,7 @@ ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
196278 // precisely, aligned - 8 values)
197279 auto **CallsiteContext = consume (__llvm_ctx_profile_callsite[0 ]);
198280 if (!CallsiteContext || isScratch (CallsiteContext))
199- return TheScratchContext ;
281+ return getUnhandledContext (*Data, Guid, NumCounters) ;
200282
201283 // if the callee isn't the expected one, return scratch.
202284 // Signal handler(s) could have been invoked at any point in the execution.
@@ -214,7 +296,7 @@ ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
214296 // for that case.
215297 auto *ExpectedCallee = consume (__llvm_ctx_profile_expected_callee[0 ]);
216298 if (ExpectedCallee != Callee)
217- return TheScratchContext ;
299+ return getUnhandledContext (*Data, Guid, NumCounters) ;
218300
219301 auto *Callsite = *CallsiteContext;
220302 // in the case of indirect calls, we will have all seen targets forming a
@@ -257,6 +339,7 @@ void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters,
257339ContextNode *__llvm_ctx_profile_start_context (
258340 ContextRoot *Root, GUID Guid, uint32_t Counters,
259341 uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
342+ IsUnderContext = true ;
260343 if (!Root->FirstMemBlock ) {
261344 setupContext (Root, Guid, Counters, Callsites);
262345 }
@@ -272,6 +355,7 @@ ContextNode *__llvm_ctx_profile_start_context(
272355
273356void __llvm_ctx_profile_release_context (ContextRoot *Root)
274357 SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
358+ IsUnderContext = false ;
275359 if (__llvm_ctx_profile_current_context_root) {
276360 __llvm_ctx_profile_current_context_root = nullptr ;
277361 Root->Taken .Unlock ();
@@ -291,10 +375,12 @@ void __llvm_ctx_profile_start_collection() {
291375
292376 resetContextNode (*Root->FirstNode );
293377 }
378+ __sanitizer::atomic_store_relaxed (&ProfilingStarted, true );
294379 __sanitizer::Printf (" [ctxprof] Initial NumMemUnits: %zu \n " , NumMemUnits);
295380}
296381
297382bool __llvm_ctx_profile_fetch (ProfileWriter &Writer) {
383+ __sanitizer::atomic_store_relaxed (&ProfilingStarted, false );
298384 __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
299385 &AllContextsMutex);
300386
@@ -310,17 +396,43 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
310396 Writer.writeContextual (*Root->FirstNode );
311397 }
312398 Writer.endContextSection ();
399+ Writer.startFlatSection ();
400+ // The list progresses behind the head, so taking this snapshot allows the
401+ // list to grow concurrently without causing a race condition with our
402+ // traversing it.
403+ const auto *Pos = reinterpret_cast <const FunctionData *>(
404+ __sanitizer::atomic_load_relaxed (&AllFunctionsData));
405+ for (; Pos; Pos = Pos->Next )
406+ Writer.writeFlat (Pos->FlatCtx ->guid (), Pos->FlatCtx ->counters (),
407+ Pos->FlatCtx ->counters_size ());
408+ Writer.endFlatSection ();
313409 return true ;
314410}
315411
316412void __llvm_ctx_profile_free () {
317- __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
318- &AllContextsMutex);
319- for (int I = 0 , E = AllContextRoots.Size (); I < E; ++I)
320- for (auto *A = AllContextRoots[I]->FirstMemBlock ; A;) {
413+ __sanitizer::atomic_store_relaxed (&ProfilingStarted, false );
414+ {
415+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
416+ &AllContextsMutex);
417+ for (int I = 0 , E = AllContextRoots.Size (); I < E; ++I)
418+ for (auto *A = AllContextRoots[I]->FirstMemBlock ; A;) {
419+ auto *C = A;
420+ A = A->next ();
421+ __sanitizer::InternalFree (C);
422+ }
423+ AllContextRoots.Reset ();
424+ }
425+ __sanitizer::atomic_store_relaxed (&AllFunctionsData, 0U );
426+ {
427+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
428+ &FlatCtxArenaMutex);
429+ FlatCtxArena = nullptr ;
430+ for (auto *A = FlatCtxArenaHead; A;) {
321431 auto *C = A;
322- A = A ->next ();
432+ A = C ->next ();
323433 __sanitizer::InternalFree (C);
324434 }
325- AllContextRoots.Reset ();
435+
436+ FlatCtxArenaHead = nullptr ;
437+ }
326438}
0 commit comments