77// ===----------------------------------------------------------------------===//
88
99#include " CtxInstrProfiling.h"
10+ #include " RootAutoDetector.h"
1011#include " sanitizer_common/sanitizer_allocator_internal.h"
1112#include " sanitizer_common/sanitizer_atomic.h"
1213#include " sanitizer_common/sanitizer_atomic_clang.h"
@@ -43,6 +44,12 @@ Arena *FlatCtxArena = nullptr;
4344__thread bool IsUnderContext = false ;
4445__sanitizer::atomic_uint8_t ProfilingStarted = {};
4546
47+ __sanitizer::atomic_uintptr_t RootDetector = {};
48+ RootAutoDetector *getRootDetector () {
49+ return reinterpret_cast <RootAutoDetector *>(
50+ __sanitizer::atomic_load_relaxed (&RootDetector));
51+ }
52+
4653// utility to taint a pointer by setting the LSB. There is an assumption
4754// throughout that the addresses of contexts are even (really, they should be
4855// align(8), but "even"-ness is the minimum assumption)
@@ -201,7 +208,7 @@ ContextNode *getCallsiteSlow(GUID Guid, ContextNode **InsertionPoint,
201208 return Ret;
202209}
203210
204- ContextNode *getFlatProfile (FunctionData &Data, GUID Guid,
211+ ContextNode *getFlatProfile (FunctionData &Data, void *Callee, GUID Guid,
205212 uint32_t NumCounters) {
206213 if (ContextNode *Existing = Data.FlatCtx )
207214 return Existing;
@@ -232,6 +239,7 @@ ContextNode *getFlatProfile(FunctionData &Data, GUID Guid,
232239 auto *Ret = allocContextNode (AllocBuff, Guid, NumCounters, 0 );
233240 Data.FlatCtx = Ret;
234241
242+ Data.EntryAddress = Callee;
235243 Data.Next = reinterpret_cast <FunctionData *>(
236244 __sanitizer::atomic_load_relaxed (&AllFunctionsData));
237245 while (!__sanitizer::atomic_compare_exchange_strong (
@@ -277,8 +285,29 @@ ContextRoot *FunctionData::getOrAllocateContextRoot() {
277285 return Root;
278286}
279287
280- ContextNode *getUnhandledContext (FunctionData &Data, GUID Guid,
281- uint32_t NumCounters) {
288+ ContextNode *tryStartContextGivenRoot (ContextRoot *Root, GUID Guid,
289+ uint32_t Counters, uint32_t Callsites)
290+ SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
291+ IsUnderContext = true ;
292+ __sanitizer::atomic_fetch_add (&Root->TotalEntries , 1 ,
293+ __sanitizer::memory_order_relaxed);
294+
295+ if (!Root->FirstMemBlock ) {
296+ setupContext (Root, Guid, Counters, Callsites);
297+ }
298+ if (Root->Taken .TryLock ()) {
299+ __llvm_ctx_profile_current_context_root = Root;
300+ onContextEnter (*Root->FirstNode );
301+ return Root->FirstNode ;
302+ }
303+ // If this thread couldn't take the lock, return scratch context.
304+ __llvm_ctx_profile_current_context_root = nullptr ;
305+ return TheScratchContext;
306+ }
307+
308+ ContextNode *getUnhandledContext (FunctionData &Data, void *Callee, GUID Guid,
309+ uint32_t NumCounters, uint32_t NumCallsites,
310+ ContextRoot *CtxRoot) {
282311
283312 // 1) if we are currently collecting a contextual profile, fetch a ContextNode
284313 // in the `Unhandled` set. We want to do this regardless of `ProfilingStarted`
@@ -297,27 +326,32 @@ ContextNode *getUnhandledContext(FunctionData &Data, GUID Guid,
297326 // entered once and never exit. They should be assumed to be entered before
298327 // profiling starts - because profiling should start after the server is up
299328 // and running (which is equivalent to "message pumps are set up").
300- ContextRoot *R = __llvm_ctx_profile_current_context_root;
301- if (!R) {
329+ if (!CtxRoot) {
330+ if (auto *RAD = getRootDetector ())
331+ RAD->sample ();
332+ else if (auto *CR = Data.CtxRoot )
333+ return tryStartContextGivenRoot (CR, Guid, NumCounters, NumCallsites);
302334 if (IsUnderContext || !__sanitizer::atomic_load_relaxed (&ProfilingStarted))
303335 return TheScratchContext;
304336 else
305337 return markAsScratch (
306- onContextEnter (*getFlatProfile (Data, Guid, NumCounters)));
338+ onContextEnter (*getFlatProfile (Data, Callee, Guid, NumCounters)));
307339 }
308- auto [Iter, Ins] = R ->Unhandled .insert ({Guid, nullptr });
340+ auto [Iter, Ins] = CtxRoot ->Unhandled .insert ({Guid, nullptr });
309341 if (Ins)
310- Iter->second =
311- getCallsiteSlow (Guid, &R-> FirstUnhandledCalleeNode , NumCounters, 0 );
342+ Iter->second = getCallsiteSlow (Guid, &CtxRoot-> FirstUnhandledCalleeNode ,
343+ NumCounters, 0 );
312344 return markAsScratch (onContextEnter (*Iter->second ));
313345}
314346
315347ContextNode *__llvm_ctx_profile_get_context (FunctionData *Data, void *Callee,
316348 GUID Guid, uint32_t NumCounters,
317349 uint32_t NumCallsites) {
350+ auto *CtxRoot = __llvm_ctx_profile_current_context_root;
318351 // fast "out" if we're not even doing contextual collection.
319- if (!__llvm_ctx_profile_current_context_root)
320- return getUnhandledContext (*Data, Guid, NumCounters);
352+ if (!CtxRoot)
353+ return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
354+ nullptr );
321355
322356 // also fast "out" if the caller is scratch. We can see if it's scratch by
323357 // looking at the interior pointer into the subcontexts vector that the caller
@@ -326,7 +360,8 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
326360 // precisely, aligned - 8 values)
327361 auto **CallsiteContext = consume (__llvm_ctx_profile_callsite[0 ]);
328362 if (!CallsiteContext || isScratch (CallsiteContext))
329- return getUnhandledContext (*Data, Guid, NumCounters);
363+ return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
364+ CtxRoot);
330365
331366 // if the callee isn't the expected one, return scratch.
332367 // Signal handler(s) could have been invoked at any point in the execution.
@@ -344,7 +379,8 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
344379 // for that case.
345380 auto *ExpectedCallee = consume (__llvm_ctx_profile_expected_callee[0 ]);
346381 if (ExpectedCallee != Callee)
347- return getUnhandledContext (*Data, Guid, NumCounters);
382+ return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
383+ CtxRoot);
348384
349385 auto *Callsite = *CallsiteContext;
350386 // in the case of indirect calls, we will have all seen targets forming a
@@ -366,40 +402,26 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
366402 return Ret;
367403}
368404
369- ContextNode *__llvm_ctx_profile_start_context (
370- FunctionData *FData, GUID Guid, uint32_t Counters,
371- uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
372- IsUnderContext = true ;
373-
374- auto *Root = FData->getOrAllocateContextRoot ();
375-
376- __sanitizer::atomic_fetch_add (&Root->TotalEntries , 1 ,
377- __sanitizer::memory_order_relaxed);
405+ ContextNode *__llvm_ctx_profile_start_context (FunctionData *FData, GUID Guid,
406+ uint32_t Counters,
407+ uint32_t Callsites) {
378408
379- if (!Root->FirstMemBlock ) {
380- setupContext (Root, Guid, Counters, Callsites);
381- }
382- if (Root->Taken .TryLock ()) {
383- __llvm_ctx_profile_current_context_root = Root;
384- onContextEnter (*Root->FirstNode );
385- return Root->FirstNode ;
386- }
387- // If this thread couldn't take the lock, return scratch context.
388- __llvm_ctx_profile_current_context_root = nullptr ;
389- return TheScratchContext;
409+ return tryStartContextGivenRoot (FData->getOrAllocateContextRoot (), Guid,
410+ Counters, Callsites);
390411}
391412
392413void __llvm_ctx_profile_release_context (FunctionData *FData)
393414 SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
415+ const auto *CurrentRoot = __llvm_ctx_profile_current_context_root;
416+ if (!CurrentRoot || FData->CtxRoot != CurrentRoot)
417+ return ;
394418 IsUnderContext = false ;
395- if (__llvm_ctx_profile_current_context_root) {
396- __llvm_ctx_profile_current_context_root = nullptr ;
397- assert (FData->CtxRoot );
398- FData->CtxRoot ->Taken .Unlock ();
399- }
419+ assert (FData->CtxRoot );
420+ __llvm_ctx_profile_current_context_root = nullptr ;
421+ FData->CtxRoot ->Taken .Unlock ();
400422}
401423
402- void __llvm_ctx_profile_start_collection () {
424+ void __llvm_ctx_profile_start_collection (unsigned AutodetectDuration ) {
403425 size_t NumMemUnits = 0 ;
404426 __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
405427 &AllContextsMutex);
@@ -415,12 +437,24 @@ void __llvm_ctx_profile_start_collection() {
415437 resetContextNode (*Root->FirstUnhandledCalleeNode );
416438 __sanitizer::atomic_store_relaxed (&Root->TotalEntries , 0 );
417439 }
440+ if (AutodetectDuration) {
441+ auto *RD = new (__sanitizer::InternalAlloc (sizeof (RootAutoDetector)))
442+ RootAutoDetector (AllFunctionsData, RootDetector, AutodetectDuration);
443+ RD->start ();
444+ } else {
445+ __sanitizer::Printf (" [ctxprof] Initial NumMemUnits: %zu \n " , NumMemUnits);
446+ }
418447 __sanitizer::atomic_store_relaxed (&ProfilingStarted, true );
419- __sanitizer::Printf (" [ctxprof] Initial NumMemUnits: %zu \n " , NumMemUnits);
420448}
421449
422450bool __llvm_ctx_profile_fetch (ProfileWriter &Writer) {
423451 __sanitizer::atomic_store_relaxed (&ProfilingStarted, false );
452+ if (auto *RD = getRootDetector ()) {
453+ __sanitizer::Printf (" [ctxprof] Expected the root autodetector to have "
454+ " finished well before attempting to fetch a context" );
455+ RD->join ();
456+ }
457+
424458 __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
425459 &AllContextsMutex);
426460
@@ -445,8 +479,9 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
445479 const auto *Pos = reinterpret_cast <const FunctionData *>(
446480 __sanitizer::atomic_load_relaxed (&AllFunctionsData));
447481 for (; Pos; Pos = Pos->Next )
448- Writer.writeFlat (Pos->FlatCtx ->guid (), Pos->FlatCtx ->counters (),
449- Pos->FlatCtx ->counters_size ());
482+ if (!Pos->CtxRoot )
483+ Writer.writeFlat (Pos->FlatCtx ->guid (), Pos->FlatCtx ->counters (),
484+ Pos->FlatCtx ->counters_size ());
450485 Writer.endFlatSection ();
451486 return true ;
452487}
0 commit comments