77// ===----------------------------------------------------------------------===//
88
99#include " CtxInstrProfiling.h"
10+ #include " RootAutoDetector.h"
1011#include " sanitizer_common/sanitizer_allocator_internal.h"
1112#include " sanitizer_common/sanitizer_atomic.h"
1213#include " sanitizer_common/sanitizer_atomic_clang.h"
@@ -43,6 +44,12 @@ Arena *FlatCtxArena = nullptr;
4344__thread bool IsUnderContext = false ;
4445__sanitizer::atomic_uint8_t ProfilingStarted = {};
4546
47+ __sanitizer::atomic_uintptr_t RootDetector = {};
48+ RootAutoDetector *getRootDetector () {
49+ return reinterpret_cast <RootAutoDetector *>(
50+ __sanitizer::atomic_load_relaxed (&RootDetector));
51+ }
52+
4653// utility to taint a pointer by setting the LSB. There is an assumption
4754// throughout that the addresses of contexts are even (really, they should be
4855// align(8), but "even"-ness is the minimum assumption)
@@ -201,7 +208,7 @@ ContextNode *getCallsiteSlow(GUID Guid, ContextNode **InsertionPoint,
201208 return Ret;
202209}
203210
204- ContextNode *getFlatProfile (FunctionData &Data, GUID Guid,
211+ ContextNode *getFlatProfile (FunctionData &Data, void *Callee, GUID Guid,
205212 uint32_t NumCounters) {
206213 if (ContextNode *Existing = Data.FlatCtx )
207214 return Existing;
@@ -232,6 +239,7 @@ ContextNode *getFlatProfile(FunctionData &Data, GUID Guid,
232239 auto *Ret = allocContextNode (AllocBuff, Guid, NumCounters, 0 );
233240 Data.FlatCtx = Ret;
234241
242+ Data.EntryAddress = Callee;
235243 Data.Next = reinterpret_cast <FunctionData *>(
236244 __sanitizer::atomic_load_relaxed (&AllFunctionsData));
237245 while (!__sanitizer::atomic_compare_exchange_strong (
@@ -296,8 +304,9 @@ ContextNode *tryStartContextGivenRoot(ContextRoot *Root, GUID Guid,
296304 return TheScratchContext;
297305}
298306
299- ContextNode *getUnhandledContext (FunctionData &Data, GUID Guid,
300- uint32_t NumCounters) {
307+ ContextNode *getUnhandledContext (FunctionData &Data, void *Callee, GUID Guid,
308+ uint32_t NumCounters, uint32_t NumCallsites,
309+ ContextRoot *CtxRoot) {
301310
302311 // 1) if we are currently collecting a contextual profile, fetch a ContextNode
303312 // in the `Unhandled` set. We want to do this regardless of `ProfilingStarted`
@@ -316,27 +325,32 @@ ContextNode *getUnhandledContext(FunctionData &Data, GUID Guid,
316325 // entered once and never exit. They should be assumed to be entered before
317326 // profiling starts - because profiling should start after the server is up
318327 // and running (which is equivalent to "message pumps are set up").
319- ContextRoot *R = __llvm_ctx_profile_current_context_root;
320- if (!R) {
328+ if (!CtxRoot) {
329+ if (auto *RAD = getRootDetector ())
330+ RAD->sample ();
331+ else if (auto *CR = Data.CtxRoot )
332+ return tryStartContextGivenRoot (CR, Guid, NumCounters, NumCallsites);
321333 if (IsUnderContext || !__sanitizer::atomic_load_relaxed (&ProfilingStarted))
322334 return TheScratchContext;
323335 else
324336 return markAsScratch (
325- onContextEnter (*getFlatProfile (Data, Guid, NumCounters)));
337+ onContextEnter (*getFlatProfile (Data, Callee, Guid, NumCounters)));
326338 }
327- auto [Iter, Ins] = R ->Unhandled .insert ({Guid, nullptr });
339+ auto [Iter, Ins] = CtxRoot ->Unhandled .insert ({Guid, nullptr });
328340 if (Ins)
329- Iter->second =
330- getCallsiteSlow (Guid, &R-> FirstUnhandledCalleeNode , NumCounters, 0 );
341+ Iter->second = getCallsiteSlow (Guid, &CtxRoot-> FirstUnhandledCalleeNode ,
342+ NumCounters, 0 );
331343 return markAsScratch (onContextEnter (*Iter->second ));
332344}
333345
334346ContextNode *__llvm_ctx_profile_get_context (FunctionData *Data, void *Callee,
335347 GUID Guid, uint32_t NumCounters,
336348 uint32_t NumCallsites) {
349+ auto *CtxRoot = __llvm_ctx_profile_current_context_root;
337350 // fast "out" if we're not even doing contextual collection.
338- if (!__llvm_ctx_profile_current_context_root)
339- return getUnhandledContext (*Data, Guid, NumCounters);
351+ if (!CtxRoot)
352+ return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
353+ nullptr );
340354
341355 // also fast "out" if the caller is scratch. We can see if it's scratch by
342356 // looking at the interior pointer into the subcontexts vector that the caller
@@ -345,7 +359,8 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
345359 // precisely, aligned - 8 values)
346360 auto **CallsiteContext = consume (__llvm_ctx_profile_callsite[0 ]);
347361 if (!CallsiteContext || isScratch (CallsiteContext))
348- return getUnhandledContext (*Data, Guid, NumCounters);
362+ return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
363+ CtxRoot);
349364
350365 // if the callee isn't the expected one, return scratch.
351366 // Signal handler(s) could have been invoked at any point in the execution.
@@ -363,7 +378,8 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
363378 // for that case.
364379 auto *ExpectedCallee = consume (__llvm_ctx_profile_expected_callee[0 ]);
365380 if (ExpectedCallee != Callee)
366- return getUnhandledContext (*Data, Guid, NumCounters);
381+ return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
382+ CtxRoot);
367383
368384 auto *Callsite = *CallsiteContext;
369385 // in the case of indirect calls, we will have all seen targets forming a
@@ -388,21 +404,23 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
388404ContextNode *__llvm_ctx_profile_start_context (FunctionData *FData, GUID Guid,
389405 uint32_t Counters,
390406 uint32_t Callsites) {
407+
391408 return tryStartContextGivenRoot (FData->getOrAllocateContextRoot (), Guid,
392409 Counters, Callsites);
393410}
394411
395412void __llvm_ctx_profile_release_context (FunctionData *FData)
396413 SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
414+ const auto *CurrentRoot = __llvm_ctx_profile_current_context_root;
415+ if (!CurrentRoot || FData->CtxRoot != CurrentRoot)
416+ return ;
397417 IsUnderContext = false ;
398- if (__llvm_ctx_profile_current_context_root) {
399- __llvm_ctx_profile_current_context_root = nullptr ;
400- assert (FData->CtxRoot );
401- FData->CtxRoot ->Taken .Unlock ();
402- }
418+ assert (FData->CtxRoot );
419+ __llvm_ctx_profile_current_context_root = nullptr ;
420+ FData->CtxRoot ->Taken .Unlock ();
403421}
404422
405- void __llvm_ctx_profile_start_collection () {
423+ void __llvm_ctx_profile_start_collection (unsigned AutodetectDuration ) {
406424 size_t NumMemUnits = 0 ;
407425 __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
408426 &AllContextsMutex);
@@ -418,12 +436,28 @@ void __llvm_ctx_profile_start_collection() {
418436 resetContextNode (*Root->FirstUnhandledCalleeNode );
419437 __sanitizer::atomic_store_relaxed (&Root->TotalEntries , 0 );
420438 }
439+ if (AutodetectDuration) {
440+ // we leak RD intentionally. Knowing when to free it is tricky, there's a
441+ // race condition with functions observing the `RootDectector` as non-null.
442+ // This can be addressed but the alternatives have some added complexity and
443+ // it's not (yet) worth it.
444+ auto *RD = new (__sanitizer::InternalAlloc (sizeof (RootAutoDetector)))
445+ RootAutoDetector (AllFunctionsData, RootDetector, AutodetectDuration);
446+ RD->start ();
447+ } else {
448+ __sanitizer::Printf (" [ctxprof] Initial NumMemUnits: %zu \n " , NumMemUnits);
449+ }
421450 __sanitizer::atomic_store_relaxed (&ProfilingStarted, true );
422- __sanitizer::Printf (" [ctxprof] Initial NumMemUnits: %zu \n " , NumMemUnits);
423451}
424452
425453bool __llvm_ctx_profile_fetch (ProfileWriter &Writer) {
426454 __sanitizer::atomic_store_relaxed (&ProfilingStarted, false );
455+ if (auto *RD = getRootDetector ()) {
456+ __sanitizer::Printf (" [ctxprof] Expected the root autodetector to have "
457+ " finished well before attempting to fetch a context" );
458+ RD->join ();
459+ }
460+
427461 __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
428462 &AllContextsMutex);
429463
@@ -448,8 +482,9 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
448482 const auto *Pos = reinterpret_cast <const FunctionData *>(
449483 __sanitizer::atomic_load_relaxed (&AllFunctionsData));
450484 for (; Pos; Pos = Pos->Next )
451- Writer.writeFlat (Pos->FlatCtx ->guid (), Pos->FlatCtx ->counters (),
452- Pos->FlatCtx ->counters_size ());
485+ if (!Pos->CtxRoot )
486+ Writer.writeFlat (Pos->FlatCtx ->guid (), Pos->FlatCtx ->counters (),
487+ Pos->FlatCtx ->counters_size ());
453488 Writer.endFlatSection ();
454489 return true ;
455490}
0 commit comments