@@ -41,7 +41,44 @@ Arena *FlatCtxArena = nullptr;
4141
4242// Set to true when we enter a root, and false when we exit - regardless if this
4343// thread collects a contextual profile for that root.
44- __thread bool IsUnderContext = false ;
44+ __thread int UnderContextRefCount = 0 ;
45+ __thread void *volatile EnteredContextAddress = 0 ;
46+
47+ void onFunctionEntered (void *Address) {
48+ UnderContextRefCount += (Address == EnteredContextAddress);
49+ assert (UnderContextRefCount > 0 );
50+ }
51+
52+ void onFunctionExited (void *Address) {
53+ UnderContextRefCount -= (Address == EnteredContextAddress);
54+ assert (UnderContextRefCount >= 0 );
55+ }
56+
57+ // Returns true if it was entered the first time
58+ bool rootEnterIsFirst (void * Address) {
59+ bool Ret = true ;
60+ if (!EnteredContextAddress) {
61+ EnteredContextAddress = Address;
62+ assert (UnderContextRefCount == 0 );
63+ Ret = true ;
64+ }
65+ onFunctionEntered (Address);
66+ return Ret;
67+ }
68+
69+ // Return true if this also exits the root.
70+ bool exitsRoot (void * Address) {
71+ onFunctionExited (Address);
72+ if (UnderContextRefCount == 0 ) {
73+ EnteredContextAddress = nullptr ;
74+ return true ;
75+ }
76+ return false ;
77+
78+ }
79+
80+ bool hasEnteredARoot () { return UnderContextRefCount > 0 ; }
81+
4582__sanitizer::atomic_uint8_t ProfilingStarted = {};
4683
4784__sanitizer::atomic_uintptr_t RootDetector = {};
@@ -287,62 +324,65 @@ ContextRoot *FunctionData::getOrAllocateContextRoot() {
287324 return Root;
288325}
289326
290- ContextNode *tryStartContextGivenRoot (ContextRoot *Root, GUID Guid,
291- uint32_t Counters, uint32_t Callsites)
292- SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
293- IsUnderContext = true ;
294- __sanitizer::atomic_fetch_add (&Root->TotalEntries , 1 ,
295- __sanitizer::memory_order_relaxed);
327+ ContextNode *tryStartContextGivenRoot (
328+ ContextRoot *Root, void *EntryAddress, GUID Guid, uint32_t Counters,
329+ uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
330+
331+ if (rootEnterIsFirst (EntryAddress))
332+ __sanitizer::atomic_fetch_add (&Root->TotalEntries , 1 ,
333+ __sanitizer::memory_order_relaxed);
296334 if (!Root->FirstMemBlock ) {
297335 setupContext (Root, Guid, Counters, Callsites);
298336 }
299337 if (Root->Taken .TryLock ()) {
338+ assert (__llvm_ctx_profile_current_context_root == nullptr );
300339 __llvm_ctx_profile_current_context_root = Root;
301340 onContextEnter (*Root->FirstNode );
302341 return Root->FirstNode ;
303342 }
304343 // If this thread couldn't take the lock, return scratch context.
305- __llvm_ctx_profile_current_context_root = nullptr ;
306344 return TheScratchContext;
307345}
308346
347+ ContextNode *getOrStartContextOutsideCollection (FunctionData &Data,
348+ ContextRoot *OwnCtxRoot,
349+ void *Callee, GUID Guid,
350+ uint32_t NumCounters,
351+ uint32_t NumCallsites) {
352+ // This must only be called when __llvm_ctx_profile_current_context_root is
353+ // null.
354+ assert (__llvm_ctx_profile_current_context_root == nullptr );
355+ // OwnCtxRoot is Data.CtxRoot. Since it's volatile, and is used by the caller,
356+ // pre-load it.
357+ assert (Data.CtxRoot == OwnCtxRoot);
358+ // If we have a root detector, try sampling.
359+ // Otherwise - regardless if we started profiling or not, if Data.CtxRoot is
360+ // allocated, try starting a context tree - basically, as-if
361+ // __llvm_ctx_profile_start_context were called.
362+ if (auto *RAD = getRootDetector ())
363+ RAD->sample ();
364+ else if (reinterpret_cast <uintptr_t >(OwnCtxRoot) > 1 )
365+ return tryStartContextGivenRoot (OwnCtxRoot, Data.EntryAddress , Guid,
366+ NumCounters, NumCallsites);
367+
368+ // If we didn't start profiling, or if we are under a context, just not
369+ // collecting, return the scratch buffer.
370+ if (hasEnteredARoot () ||
371+ !__sanitizer::atomic_load_relaxed (&ProfilingStarted))
372+ return TheScratchContext;
373+ return markAsScratch (
374+ onContextEnter (*getFlatProfile (Data, Callee, Guid, NumCounters)));
375+ }
376+
309377ContextNode *getUnhandledContext (FunctionData &Data, void *Callee, GUID Guid,
310378 uint32_t NumCounters, uint32_t NumCallsites,
311- ContextRoot *CtxRoot) {
312-
313- // 1) if we are currently collecting a contextual profile, fetch a ContextNode
314- // in the `Unhandled` set. We want to do this regardless of `ProfilingStarted`
315- // to (hopefully) offset the penalty of creating these contexts to before
316- // profiling.
317- //
318- // 2) if we are under a root (regardless if this thread is collecting or not a
319- // contextual profile for that root), do not collect a flat profile. We want
320- // to keep flat profiles only for activations that can't happen under a root,
321- // to avoid confusing profiles. We can, for example, combine flattened and
322- // flat profiles meaningfully, as we wouldn't double-count anything.
323- //
324- // 3) to avoid lengthy startup, don't bother with flat profiles until the
325- // profiling has started. We would reset them anyway when profiling starts.
326- // HOWEVER. This does lose profiling for message pumps: those functions are
327- // entered once and never exit. They should be assumed to be entered before
328- // profiling starts - because profiling should start after the server is up
329- // and running (which is equivalent to "message pumps are set up").
330- if (!CtxRoot) {
331- if (auto *RAD = getRootDetector ())
332- RAD->sample ();
333- else if (auto *CR = Data.CtxRoot ) {
334- if (canBeRoot (CR))
335- return tryStartContextGivenRoot (CR, Guid, NumCounters, NumCallsites);
336- }
337- if (IsUnderContext || !__sanitizer::atomic_load_relaxed (&ProfilingStarted))
338- return TheScratchContext;
339- else
340- return markAsScratch (
341- onContextEnter (*getFlatProfile (Data, Callee, Guid, NumCounters)));
342- }
343- auto [Iter, Ins] = CtxRoot->Unhandled .insert ({Guid, nullptr });
379+ ContextRoot &CtxRoot) {
380+ // This nust only be called when
381+ // __llvm_ctx_profile_current_context_root is not null
382+ assert (__llvm_ctx_profile_current_context_root != nullptr );
383+ auto [Iter, Ins] = CtxRoot.Unhandled .insert ({Guid, nullptr });
344384 if (Ins)
345- Iter->second = getCallsiteSlow (Guid, &CtxRoot-> FirstUnhandledCalleeNode ,
385+ Iter->second = getCallsiteSlow (Guid, &CtxRoot. FirstUnhandledCalleeNode ,
346386 NumCounters, 0 );
347387 return markAsScratch (onContextEnter (*Iter->second ));
348388}
@@ -351,10 +391,13 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
351391 GUID Guid, uint32_t NumCounters,
352392 uint32_t NumCallsites) {
353393 auto *CtxRoot = __llvm_ctx_profile_current_context_root;
354- // fast "out" if we're not even doing contextual collection.
394+ auto *OwnCtxRoot = Data-> CtxRoot ;
355395 if (!CtxRoot)
356- return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
357- nullptr );
396+ return getOrStartContextOutsideCollection (*Data, OwnCtxRoot, Callee, Guid,
397+ NumCounters, NumCallsites);
398+ onFunctionEntered (Callee);
399+ assert (canBeRoot (CtxRoot));
400+ // should we re-enter the root we're currently collecting,
358401
359402 // also fast "out" if the caller is scratch. We can see if it's scratch by
360403 // looking at the interior pointer into the subcontexts vector that the caller
@@ -364,7 +407,7 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
364407 auto **CallsiteContext = consume (__llvm_ctx_profile_callsite[0 ]);
365408 if (!CallsiteContext || isScratch (CallsiteContext))
366409 return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
367- CtxRoot);
410+ * CtxRoot);
368411
369412 // if the callee isn't the expected one, return scratch.
370413 // Signal handler(s) could have been invoked at any point in the execution.
@@ -383,7 +426,7 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
383426 auto *ExpectedCallee = consume (__llvm_ctx_profile_expected_callee[0 ]);
384427 if (ExpectedCallee != Callee)
385428 return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
386- CtxRoot);
429+ * CtxRoot);
387430
388431 auto *Callsite = *CallsiteContext;
389432 // in the case of indirect calls, we will have all seen targets forming a
@@ -410,16 +453,20 @@ ContextNode *__llvm_ctx_profile_start_context(FunctionData *FData, GUID Guid,
410453 uint32_t Callsites) {
411454 auto *Root = FData->getOrAllocateContextRoot ();
412455 assert (canBeRoot (Root));
413- return tryStartContextGivenRoot (Root, Guid, Counters, Callsites);
456+ auto *EntryAddress = FData->EntryAddress ;
457+ return tryStartContextGivenRoot (Root, EntryAddress, Guid, Counters,
458+ Callsites);
414459}
415460
416461void __llvm_ctx_profile_release_context (FunctionData *FData)
417462 SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
463+ if (!exitsRoot (FData->EntryAddress ))
464+ return ;
418465 const auto *CurrentRoot = __llvm_ctx_profile_current_context_root;
419466 auto *CR = FData->CtxRoot ;
420467 if (!CurrentRoot || CR != CurrentRoot)
421468 return ;
422- IsUnderContext = false ;
469+
423470 assert (CR && canBeRoot (CR));
424471 __llvm_ctx_profile_current_context_root = nullptr ;
425472 CR->Taken .Unlock ();
@@ -500,6 +547,10 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
500547void __llvm_ctx_profile_free () {
501548 __sanitizer::atomic_store_relaxed (&ProfilingStarted, false );
502549 {
550+ __sanitizer::atomic_store_relaxed (&ProfilingStarted, false );
551+ if (auto *RD = getRootDetector ()) {
552+ RD->join ();
553+ }
503554 __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
504555 &AllContextsMutex);
505556 for (int I = 0 , E = AllContextRoots.Size (); I < E; ++I)
@@ -522,5 +573,7 @@ void __llvm_ctx_profile_free() {
522573 }
523574
524575 FlatCtxArenaHead = nullptr ;
576+ UnderContextRefCount = 0 ;
577+ EnteredContextAddress = nullptr ;
525578 }
526579}
0 commit comments