@@ -26,8 +26,6 @@ static uint64_t t0;
2626
2727JL_DLLEXPORT _Atomic(uint64_t ) jl_timing_disable_mask [(JL_TIMING_SUBSYSTEM_LAST + sizeof (uint64_t ) * CHAR_BIT - 1 ) / (sizeof (uint64_t ) * CHAR_BIT )];
2828
29- static arraylist_t jl_timing_counts_events ;
30-
3129// Used to as an item limit when several strings of metadata can
3230// potentially be associated with a single timing zone.
3331JL_DLLEXPORT uint32_t jl_timing_print_limit = 10 ;
@@ -41,12 +39,23 @@ const char *jl_timing_subsystems[(int)JL_TIMING_SUBSYSTEM_LAST] =
4139
4240JL_DLLEXPORT jl_timing_counter_t jl_timing_counters [JL_TIMING_COUNTER_LAST ];
4341
42+ #ifdef USE_TIMING_COUNTS
43+ static arraylist_t jl_timing_counts_events ;
44+ static jl_mutex_t jl_timing_counts_events_lock ;
45+ #endif //USE_TIMING_COUNTS
46+
47+ #ifdef USE_ITTAPI
48+ static arraylist_t jl_timing_ittapi_events ;
49+ static jl_mutex_t jl_timing_ittapi_events_lock ;
50+ #endif //USE_ITTAPI
51+
4452void jl_print_timings (void )
4553{
4654#ifdef USE_TIMING_COUNTS
55+ JL_LOCK_NOGC (& jl_timing_counts_events_lock );
4756 uint64_t total_time = cycleclock () - t0 ;
4857 uint64_t root_time = total_time ;
49- for (int i = 0 ; i < jl_timing_counts_events .len ; i ++ ) {
58+ for (int i = 1 ; i < jl_timing_counts_events .len ; i ++ ) {
5059 jl_timing_counts_event_t * other_event = (jl_timing_counts_event_t * )jl_timing_counts_events .items [i ];
5160 root_time -= jl_atomic_load_relaxed (& other_event -> self );
5261 }
@@ -66,6 +75,7 @@ void jl_print_timings(void)
6675 self , 100 * (((double )self ) / total_time ),
6776 total , 100 * (((double )total ) / total_time ));
6877 }
78+ JL_UNLOCK_NOGC (& jl_timing_counts_events_lock );
6979
7080 fprintf (stderr , "\nJULIA COUNTERS\n" );
7181 fprintf (stderr , "%-25s, %-20s\n" , "Counter" , "Value" );
@@ -91,16 +101,25 @@ void jl_init_timing(void)
91101 _Static_assert (JL_TIMING_SUBSYSTEM_LAST < sizeof (uint64_t ) * CHAR_BIT , "Too many timing subsystems!" );
92102
93103#ifdef USE_TIMING_COUNTS
104+ JL_MUTEX_INIT (& jl_timing_counts_events_lock , "jl_timing_counts_events_lock" );
105+
94106 // Create events list for counts backend
95107 arraylist_new (& jl_timing_counts_events , 1 );
96108
97109 jl_timing_counts_event_t * new_event = (jl_timing_counts_event_t * )malloc (sizeof (jl_timing_counts_event_t ));
98110 arraylist_push (& jl_timing_counts_events , (void * )new_event );
99111
100112 new_event -> name = "ROOT" ;
113+ jl_atomic_store_relaxed (& new_event -> self , 0 );
101114 jl_atomic_store_relaxed (& new_event -> total , 0 );
102115#endif
103116
117+ #ifdef USE_ITTAPI
118+ // Create events list for ITTAPI backend
119+ JL_MUTEX_INIT (& jl_timing_ittapi_events_lock , "jl_timing_ittapi_events_lock" );
120+ arraylist_new (& jl_timing_ittapi_events , 0 );
121+ #endif
122+
104123 // Sort the subsystem names for quick enable/disable lookups
105124 qsort (
106125 jl_timing_subsystems , JL_TIMING_SUBSYSTEM_LAST ,
@@ -171,24 +190,60 @@ static const int get_timing_subsystem(const char *subsystem) {
171190 return (int )(match - & jl_timing_subsystems [0 ]);
172191}
173192
193+ #ifdef USE_ITTAPI
194+
195+ typedef struct {
196+ __itt_event event ;
197+ const char * name ;
198+ } cached_ittapi_event_t ;
199+
200+ static __itt_event _jl_timing_ittapi_event_create (const char * event ) {
201+ JL_LOCK_NOGC (& jl_timing_ittapi_events_lock );
202+ const size_t n = jl_timing_ittapi_events .len ;
203+ for (size_t i = 0 ; i < n ; i ++ ) {
204+ cached_ittapi_event_t * other_event = (cached_ittapi_event_t * )jl_timing_ittapi_events .items [i ];
205+ if (strcmp (event , other_event -> name ) == 0 ) {
206+ JL_UNLOCK_NOGC (& jl_timing_ittapi_events_lock );
207+ return other_event -> event ;
208+ }
209+ }
210+
211+ // No matching event found - create a new one
212+ cached_ittapi_event_t * new_event = (cached_ittapi_event_t * )malloc (sizeof (cached_ittapi_event_t ));
213+ arraylist_push (& jl_timing_ittapi_events , (void * )new_event );
214+ new_event -> name = event ;
215+ new_event -> event = __itt_event_create (event , strlen (event ));
216+ JL_UNLOCK_NOGC (& jl_timing_ittapi_events_lock );
217+
218+ return new_event -> event ;
219+ }
220+
221+ #endif // USE_ITTAPI
222+
174223#ifdef USE_TIMING_COUNTS
175224
176225// This function is analogous to __itt_event_create but for the counts backend
177226//
178227// `event` is required to live forever
179228static jl_timing_counts_event_t * _jl_timing_counts_event_create (const char * event ) {
229+ JL_LOCK_NOGC (& jl_timing_counts_events_lock );
180230 const size_t n = jl_timing_counts_events .len ;
181231 for (size_t i = 0 ; i < n ; i ++ ) {
182232 jl_timing_counts_event_t * other_event = (jl_timing_counts_event_t * )jl_timing_counts_events .items [i ];
183- if (strcmp (event , other_event -> name ) == 0 )
233+ if (strcmp (event , other_event -> name ) == 0 ) {
234+ JL_UNLOCK_NOGC (& jl_timing_counts_events_lock );
184235 return other_event ;
236+ }
185237 }
186238
187239 // No matching event found - create a new one
188240 jl_timing_counts_event_t * new_event = (jl_timing_counts_event_t * )malloc (sizeof (jl_timing_counts_event_t ));
189241 arraylist_push (& jl_timing_counts_events , (void * )new_event );
190242 new_event -> name = event ;
243+ jl_atomic_store_relaxed (& new_event -> self , 0 );
191244 jl_atomic_store_relaxed (& new_event -> total , 0 );
245+ JL_UNLOCK_NOGC (& jl_timing_counts_events_lock );
246+
192247 return new_event ;
193248}
194249
@@ -244,6 +299,10 @@ JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, c
244299 event -> counts_event = _jl_timing_counts_event_create (name );
245300#endif // USE_TIMING_COUNTS
246301
302+ #ifdef USE_ITTAPI
303+ event -> ittapi_event = _jl_timing_ittapi_event_create (name );
304+ #endif // USE_ITTAPI
305+
247306#ifdef USE_TRACY
248307 event -> tracy_srcloc .name = name ;
249308 event -> tracy_srcloc .function = function ;
@@ -252,15 +311,17 @@ JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, c
252311 event -> tracy_srcloc .color = color ;
253312#endif // USE_TRACY
254313
255- #ifdef USE_ITTAPI
256- event -> ittapi_event = __itt_event_create (name , strlen (name ));
257- #endif // USE_ITTAPI
258-
259314 return event ;
260315}
261316
262- JL_DLLEXPORT void _jl_timing_block_init (jl_timing_block_t * block , jl_timing_event_t * event ) {
263- memset (block , 0 , sizeof (* block ));
317+ JL_DLLEXPORT void _jl_timing_block_init (char * buf , size_t size , jl_timing_event_t * event ) {
318+ if (size < sizeof (jl_timing_block_t )) {
319+ jl_errorf ("jl_timing_block_t buffer must be at least %d bytes" , sizeof (jl_timing_block_t ));
320+ return ;
321+ }
322+
323+ jl_timing_block_t * block = (jl_timing_block_t * )buf ;
324+ memset (block , 0 , sizeof (jl_timing_block_t ));
264325 block -> event = event ;
265326}
266327
@@ -592,7 +653,7 @@ void jl_destroy_timing(void) { }
592653
593654JL_DLLEXPORT jl_timing_event_t * _jl_timing_event_create (const char * subsystem , const char * name , const char * function , const char * file , int line , int color ) { return NULL ; }
594655
595- JL_DLLEXPORT void _jl_timing_block_init (jl_timing_block_t * block , jl_timing_event_t * event ) { }
656+ JL_DLLEXPORT void _jl_timing_block_init (char * buf , size_t size , jl_timing_event_t * event ) { }
596657JL_DLLEXPORT void _jl_timing_block_start (jl_timing_block_t * block ) { }
597658JL_DLLEXPORT void _jl_timing_block_end (jl_timing_block_t * block ) { }
598659
0 commit comments