88
99#include " CtxInstrProfiling.h"
1010#include " sanitizer_common/sanitizer_allocator_internal.h"
11+ #include " sanitizer_common/sanitizer_atomic.h"
12+ #include " sanitizer_common/sanitizer_atomic_clang.h"
1113#include " sanitizer_common/sanitizer_common.h"
1214#include " sanitizer_common/sanitizer_dense_map.h"
1315#include " sanitizer_common/sanitizer_libc.h"
@@ -27,6 +29,20 @@ __sanitizer::SpinMutex AllContextsMutex;
2729SANITIZER_GUARDED_BY (AllContextsMutex)
2830__sanitizer::Vector<ContextRoot *> AllContextRoots;
2931
32+ __sanitizer::atomic_uintptr_t AllFunctionsData = {};
33+
34+ // Keep all the functions for which we collect a flat profile in a linked list.
35+ __sanitizer::SpinMutex FlatCtxArenaMutex;
36+ SANITIZER_GUARDED_BY (FlatCtxArenaMutex)
37+ Arena* FlatCtxArenaHead = nullptr ;
38+ SANITIZER_GUARDED_BY (FlatCtxArenaMutex)
39+ Arena* FlatCtxArena = nullptr ;
40+
41+ // Set to true when we enter a root, and false when we exit - regardless if this
42+ // thread collects a contextual profile for that root.
43+ __thread bool IsUnderContext = false ;
44+ __sanitizer::atomic_uint8_t ProfilingStarted = {};
45+
3046// utility to taint a pointer by setting the LSB. There is an assumption
3147// throughout that the addresses of contexts are even (really, they should be
3248// align(8), but "even"-ness is the minimum assumption)
@@ -109,7 +125,10 @@ void resetContextNode(ContextNode &Node) {
109125 resetContextNode (*Next);
110126}
111127
112- void onContextEnter (ContextNode &Node) { ++Node.counters ()[0 ]; }
128+ ContextNode *onContextEnter (ContextNode &Node) {
129+ ++Node.counters ()[0 ];
130+ return &Node;
131+ }
113132
114133} // namespace
115134
@@ -182,12 +201,74 @@ ContextNode *getCallsiteSlow(GUID Guid, ContextNode **InsertionPoint,
182201 return Ret;
183202}
184203
185- ContextNode *__llvm_ctx_profile_get_context (void *Callee, GUID Guid,
186- uint32_t NumCounters,
204+ ContextNode *getFlatProfile (FunctionData &Data, GUID Guid, uint32_t NumCounters) {
205+ if (ContextNode *Existing = Data.FlatCtx )
206+ return Existing;
207+ {
208+ // We could instead try to take the lock and, if that fails, return
209+ // TheScratchContext. But that could leave message pump loops more sparsely
210+ // profiled than everything else. Maybe that doesn't matter, and we can
211+ // optimize this later.
212+ __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L (&Data.Mutex );
213+ if (ContextNode *Existing = Data.FlatCtx )
214+ return Existing;
215+
216+ auto NeededSize = ContextNode::getAllocSize (NumCounters, 0 );
217+ char *AllocBuff = nullptr ;
218+ {
219+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> FL (
220+ &FlatCtxArenaMutex);
221+ if (FlatCtxArena)
222+ AllocBuff = FlatCtxArena->tryBumpAllocate (NeededSize);
223+ if (!AllocBuff) {
224+ FlatCtxArena = Arena::allocateNewArena (getArenaAllocSize (NeededSize),
225+ FlatCtxArena);
226+ AllocBuff = FlatCtxArena->tryBumpAllocate (NeededSize);
227+ }
228+ if (!FlatCtxArenaHead)
229+ FlatCtxArenaHead = FlatCtxArena;
230+ }
231+ auto *Ret = allocContextNode (AllocBuff, Guid, NumCounters, 0 );
232+ Data.FlatCtx = Ret;
233+
234+ Data.Next = reinterpret_cast <FunctionData *>(
235+ __sanitizer::atomic_load_relaxed (&AllFunctionsData));
236+ while (!__sanitizer::atomic_compare_exchange_strong (
237+ &AllFunctionsData, reinterpret_cast <uintptr_t *>(&Data.Next ),
238+ reinterpret_cast <uintptr_t >(&Data),
239+ __sanitizer::memory_order_release)) {
240+ }
241+ }
242+
243+ return Data.FlatCtx ;
244+ }
245+
246+ ContextNode *getUnhandledContext (FunctionData &Data, GUID Guid,
247+ uint32_t NumCounters) {
248+ // 1) if we are under a root (regardless if this thread is collecting or not a
249+ // contextual profile for that root), do not collect a flat profile. We want
250+ // to keep flat profiles only for activations that can't happen under a root,
251+ // to avoid confusing profiles. We can, for example, combine flattened and
252+ // flat profiles meaningfully, as we wouldn't double-count anything.
253+ //
254+ // 2) to avoid lengthy startup, don't bother with flat profiles until the
255+ // profiling started. We would reset them anyway when profiling starts.
256+ // HOWEVER. This does loose profiling for message pumps: those functions are
257+ // entered once and never exit. They should be assumed to be entered before
258+ // profiling starts - because profiling should start after the server is up
259+ // and running (which is equivalent to "message pumps are set up").
260+ if (IsUnderContext || !__sanitizer::atomic_load_relaxed (&ProfilingStarted))
261+ return TheScratchContext;
262+ return markAsScratch (
263+ onContextEnter (*getFlatProfile (Data, Guid, NumCounters)));
264+ }
265+
266+ ContextNode *__llvm_ctx_profile_get_context (FunctionData *Data, void *Callee,
267+ GUID Guid, uint32_t NumCounters,
187268 uint32_t NumCallsites) {
188269 // fast "out" if we're not even doing contextual collection.
189270 if (!__llvm_ctx_profile_current_context_root)
190- return TheScratchContext ;
271+ return getUnhandledContext (*Data, Guid, NumCounters) ;
191272
192273 // also fast "out" if the caller is scratch. We can see if it's scratch by
193274 // looking at the interior pointer into the subcontexts vector that the caller
@@ -196,7 +277,7 @@ ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
196277 // precisely, aligned - 8 values)
197278 auto **CallsiteContext = consume (__llvm_ctx_profile_callsite[0 ]);
198279 if (!CallsiteContext || isScratch (CallsiteContext))
199- return TheScratchContext ;
280+ return getUnhandledContext (*Data, Guid, NumCounters) ;
200281
201282 // if the callee isn't the expected one, return scratch.
202283 // Signal handler(s) could have been invoked at any point in the execution.
@@ -214,7 +295,7 @@ ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
214295 // for that case.
215296 auto *ExpectedCallee = consume (__llvm_ctx_profile_expected_callee[0 ]);
216297 if (ExpectedCallee != Callee)
217- return TheScratchContext ;
298+ return getUnhandledContext (*Data, Guid, NumCounters) ;
218299
219300 auto *Callsite = *CallsiteContext;
220301 // in the case of indirect calls, we will have all seen targets forming a
@@ -257,6 +338,7 @@ void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters,
257338ContextNode *__llvm_ctx_profile_start_context (
258339 ContextRoot *Root, GUID Guid, uint32_t Counters,
259340 uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
341+ IsUnderContext = true ;
260342 if (!Root->FirstMemBlock ) {
261343 setupContext (Root, Guid, Counters, Callsites);
262344 }
@@ -272,6 +354,7 @@ ContextNode *__llvm_ctx_profile_start_context(
272354
273355void __llvm_ctx_profile_release_context (ContextRoot *Root)
274356 SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
357+ IsUnderContext = false ;
275358 if (__llvm_ctx_profile_current_context_root) {
276359 __llvm_ctx_profile_current_context_root = nullptr ;
277360 Root->Taken .Unlock ();
@@ -291,10 +374,12 @@ void __llvm_ctx_profile_start_collection() {
291374
292375 resetContextNode (*Root->FirstNode );
293376 }
377+ __sanitizer::atomic_store_relaxed (&ProfilingStarted, true );
294378 __sanitizer::Printf (" [ctxprof] Initial NumMemUnits: %zu \n " , NumMemUnits);
295379}
296380
297381bool __llvm_ctx_profile_fetch (ProfileWriter &Writer) {
382+ __sanitizer::atomic_store_relaxed (&ProfilingStarted, false );
298383 __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
299384 &AllContextsMutex);
300385
@@ -310,17 +395,42 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
310395 Writer.writeContextual (*Root->FirstNode );
311396 }
312397 Writer.endContextSection ();
398+ Writer.startFlatSection ();
399+ // The list progresses behind the head, so taking this snapshot allows the
400+ // list to grow concurrently without causing a race condition with our
401+ // traversing it.
402+ const auto *Pos = reinterpret_cast <const FunctionData *>(
403+ __sanitizer::atomic_load_relaxed (&AllFunctionsData));
404+ for (; Pos; Pos = Pos->Next )
405+ Writer.writeFlat (Pos->FlatCtx ->guid (), Pos->FlatCtx ->counters (),
406+ Pos->FlatCtx ->counters_size ());
407+ Writer.endFlatSection ();
313408 return true ;
314409}
315410
316411void __llvm_ctx_profile_free () {
317- __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
318- &AllContextsMutex);
319- for (int I = 0 , E = AllContextRoots.Size (); I < E; ++I)
320- for (auto *A = AllContextRoots[I]->FirstMemBlock ; A;) {
412+ {
413+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
414+ &AllContextsMutex);
415+ for (int I = 0 , E = AllContextRoots.Size (); I < E; ++I)
416+ for (auto *A = AllContextRoots[I]->FirstMemBlock ; A;) {
417+ auto *C = A;
418+ A = A->next ();
419+ __sanitizer::InternalFree (C);
420+ }
421+ AllContextRoots.Reset ();
422+ }
423+ __sanitizer::atomic_store_relaxed (&AllFunctionsData, 0U );
424+ {
425+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
426+ &FlatCtxArenaMutex);
427+ FlatCtxArena = nullptr ;
428+ for (auto *A = FlatCtxArenaHead; A;) {
321429 auto *C = A;
322- A = A ->next ();
430+ A = C ->next ();
323431 __sanitizer::InternalFree (C);
324432 }
325- AllContextRoots.Reset ();
433+
434+ FlatCtxArenaHead = nullptr ;
435+ }
326436}
0 commit comments