Skip to content

Commit edf55b9

Browse files
committed
timing: Create ITTAPI events on the fly
Instead of initializing all ITTAPI events during init, this change makes ITTAPI events use a statically-allocated object to track whether the event has been created. This makes our generation of events more similar to the Tracy API, where source locations are generated statically, in-line at each macro call-site instead of constructing them all up front.
1 parent f9c9d22 commit edf55b9

File tree

2 files changed

+21
-23
lines changed

2 files changed

+21
-23
lines changed

src/timing.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,6 @@ const char *jl_timing_names[(int)JL_TIMING_LAST] =
5050

5151
JL_DLLEXPORT jl_timing_counter_t jl_timing_counters[JL_TIMING_COUNTER_LAST];
5252

53-
#ifdef USE_ITTAPI
54-
JL_DLLEXPORT __itt_event jl_timing_ittapi_events[(int)JL_TIMING_EVENT_LAST];
55-
#endif
56-
5753
void jl_print_timings(void)
5854
{
5955
#ifdef USE_TIMING_COUNTS
@@ -91,9 +87,6 @@ void jl_init_timing(void)
9187

9288
int i __attribute__((unused)) = 0;
9389
#ifdef USE_ITTAPI
94-
#define X(name) jl_timing_ittapi_events[i++] = __itt_event_create(#name, strlen(#name));
95-
JL_TIMING_EVENTS
96-
#undef X
9790
i = 0;
9891
#define X(name) jl_timing_counters[i++].ittapi_counter = __itt_counter_create(#name, "julia.runtime");
9992
JL_TIMING_COUNTERS

src/timing.h

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -220,10 +220,8 @@ enum jl_timing_counter_types {
220220
JL_TIMING_COUNTER_LAST
221221
};
222222

223-
/**
224-
* Timing back-ends differ in terms of whether they support nested
225-
* and asynchronous events.
226-
**/
223+
#define TIMING_XCONCAT(x1, x2) x1##x2
224+
#define TIMING_CONCAT(x1, x2) TIMING_XCONCAT(x1, x2)
227225

228226
/**
229227
* Timing Backend: Aggregated timing counts (implemented in timing.c)
@@ -249,8 +247,8 @@ enum jl_timing_counter_types {
249247

250248
#ifdef USE_TRACY
251249
#define _TRACY_CTX_MEMBER TracyCZoneCtx tracy_ctx; const struct ___tracy_source_location_data *tracy_srcloc;
252-
#define _TRACY_CTOR(block, name) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; \
253-
(block)->tracy_srcloc = &TracyConcat(__tracy_source_location,TracyLine)
250+
#define _TRACY_CTOR(block, name) static const struct ___tracy_source_location_data TIMING_CONCAT(__tracy_source_location,__LINE__) = { name, __func__, TracyFile, (uint32_t)__LINE__, 0 }; \
251+
(block)->tracy_srcloc = &TIMING_CONCAT(__tracy_source_location,__LINE__)
254252
#define _TRACY_START(block) (block)->tracy_ctx = ___tracy_emit_zone_begin( (block)->tracy_srcloc, 1 );
255253
#define _TRACY_STOP(ctx) TracyCZoneEnd(*ctx)
256254
#else
@@ -260,12 +258,21 @@ enum jl_timing_counter_types {
260258
#define _TRACY_STOP(block)
261259
#endif
262260

261+
/**
262+
* Timing Backend: Intel VTune (ITTAPI)
263+
**/
264+
263265
#ifdef USE_ITTAPI
264-
#define _ITTAPI_CTX_MEMBER
265-
#define _ITTAPI_START(block) __itt_event_start(jl_timing_ittapi_events[block->event])
266-
#define _ITTAPI_STOP(block) __itt_event_end(jl_timing_ittapi_events[block->event])
266+
#define _ITTAPI_CTX_MEMBER __itt_event ittapi_event;
267+
#define _ITTAPI_CTOR(block, name) static __itt_event TIMING_CONCAT(__itt_event,__LINE__) = INT_MAX; \
268+
if (TIMING_CONCAT(__itt_event,__LINE__) == INT_MAX) \
269+
TIMING_CONCAT(__itt_event,__LINE__) = __itt_event_create(name, strlen(name)); \
270+
(block)->ittapi_event = TIMING_CONCAT(__itt_event,__LINE__)
271+
#define _ITTAPI_START(block) __itt_event_start((block)->ittapi_event)
272+
#define _ITTAPI_STOP(block) __itt_event_end((block)->ittapi_event)
267273
#else
268274
#define _ITTAPI_CTX_MEMBER
275+
#define _ITTAPI_CTOR(block, name)
269276
#define _ITTAPI_START(block)
270277
#define _ITTAPI_STOP(block)
271278
#endif
@@ -316,9 +323,6 @@ STATIC_INLINE void _jl_timing_counts_destroy(jl_timing_counts_t *block, int subs
316323

317324
extern JL_DLLEXPORT uint64_t jl_timing_enable_mask;
318325
extern const char *jl_timing_names[(int)JL_TIMING_LAST];
319-
#ifdef USE_ITTAPI
320-
extern JL_DLLEXPORT __itt_event jl_timing_ittapi_events[(int)JL_TIMING_EVENT_LAST];
321-
#endif
322326

323327
struct _jl_timing_block_t { // typedef in julia.h
324328
struct _jl_timing_block_t *prev;
@@ -401,11 +405,12 @@ STATIC_INLINE void _jl_timing_suspend_destroy(jl_timing_suspend_t *suspend) JL_N
401405
JL_TIMING_CREATE_BLOCK(__timing_block, subsystem, event); \
402406
jl_timing_block_start(&__timing_block)
403407

404-
#define JL_TIMING_CREATE_BLOCK(new_block_name, subsystem, event) \
408+
#define JL_TIMING_CREATE_BLOCK(block, subsystem, event) \
405409
__attribute__((cleanup(_jl_timing_block_destroy))) \
406-
jl_timing_block_t new_block_name; \
407-
_jl_timing_block_ctor(&new_block_name, JL_TIMING_ ## subsystem, JL_TIMING_EVENT_ ## event); \
408-
_TRACY_CTOR(&new_block_name, #event)
410+
jl_timing_block_t block; \
411+
_jl_timing_block_ctor(&block, JL_TIMING_ ## subsystem, JL_TIMING_EVENT_ ## event); \
412+
_TRACY_CTOR(&block, #event); \
413+
_ITTAPI_CTOR(&block, #event)
409414

410415
#define JL_TIMING_SUSPEND_TASK(subsystem, ct) \
411416
__attribute__((cleanup(_jl_timing_suspend_destroy))) \

0 commit comments

Comments
 (0)