Skip to content

Commit 909c57f

Browse files
Merge pull request #49535 from JuliaLang/pc/ittapi-invalidations
Count invalidations, JIT memory, and image memory in profiling reports
2 parents b9806d6 + ee86c06 commit 909c57f

File tree

7 files changed

+151
-12
lines changed

7 files changed

+151
-12
lines changed

src/cgmemmgr.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,8 @@ uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size,
866866
code_allocated = true;
867867
#endif
868868
total_allocated += Size;
869+
jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
870+
jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, Size);
869871
if (exe_alloc)
870872
return (uint8_t*)exe_alloc->alloc(Size, Alignment);
871873
return SectionMemoryManager::allocateCodeSection(Size, Alignment, SectionID,
@@ -879,6 +881,8 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size,
879881
bool isReadOnly)
880882
{
881883
total_allocated += Size;
884+
jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
885+
jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, Size);
882886
if (!isReadOnly)
883887
return (uint8_t*)rw_alloc.alloc(Size, Alignment);
884888
if (ro_alloc)

src/gc.c

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,10 +1189,16 @@ static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT
11891189
}
11901190
}
11911191

1192+
static int64_t inc_live_bytes(int64_t inc) JL_NOTSAFEPOINT
1193+
{
1194+
jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, inc);
1195+
return live_bytes += inc;
1196+
}
1197+
11921198
void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
11931199
{
11941200
combine_thread_gc_counts(&gc_num);
1195-
live_bytes += (gc_num.deferred_alloc + gc_num.allocd);
1201+
inc_live_bytes(gc_num.deferred_alloc + gc_num.allocd);
11961202
gc_num.allocd = 0;
11971203
gc_num.deferred_alloc = 0;
11981204
reset_thread_gc_counts();
@@ -3121,9 +3127,9 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
31213127
{
31223128
combine_thread_gc_counts(&gc_num);
31233129

3124-
#ifdef USE_TRACY
3125-
TracyCPlot("Heap size", live_bytes + gc_num.allocd);
3126-
#endif
3130+
// We separate the update of the graph from the update of live_bytes here
3131+
// so that the sweep shows a downward trend in memory usage.
3132+
jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, gc_num.allocd);
31273133

31283134
jl_gc_markqueue_t *mq = &ptls->mark_queue;
31293135

@@ -3369,7 +3375,10 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
33693375
}
33703376

33713377
last_live_bytes = live_bytes;
3378+
// Can't call inc_live_bytes here because we already added allocd
3379+
// to the graph earlier
33723380
live_bytes += -gc_num.freed + gc_num.allocd;
3381+
jl_timing_counter_dec(JL_TIMING_COUNTER_HeapSize, gc_num.freed);
33733382

33743383
if (collection == JL_GC_AUTO) {
33753384
//If we aren't freeing enough or are seeing lots and lots of pointers let it increase faster
@@ -3513,10 +3522,6 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
35133522
SetLastError(last_error);
35143523
#endif
35153524
errno = last_errno;
3516-
3517-
#ifdef USE_TRACY
3518-
TracyCPlot("Heap size", jl_gc_live_bytes());
3519-
#endif
35203525
}
35213526

35223527
void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
@@ -3620,9 +3625,6 @@ void jl_gc_init(void)
36203625
if (jl_options.heap_size_hint)
36213626
jl_gc_set_max_memory(jl_options.heap_size_hint);
36223627

3623-
#ifdef USE_TRACY
3624-
TracyCPlotConfig("Heap size", TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
3625-
#endif
36263628
t_start = jl_hrtime();
36273629
}
36283630

@@ -3808,7 +3810,7 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds
38083810

38093811
if (jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED) {
38103812
ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz;
3811-
live_bytes += allocsz - oldsz;
3813+
inc_live_bytes(allocsz - oldsz);
38123814
}
38133815
else if (allocsz < oldsz)
38143816
jl_atomic_store_relaxed(&ptls->gc_num.freed,

src/gf.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1642,6 +1642,7 @@ static void do_nothing_with_codeinst(jl_code_instance_t *ci) {}
16421642
// recursively invalidate cached methods that had an edge to a replaced method
16431643
static void invalidate_method_instance(void (*f)(jl_code_instance_t*), jl_method_instance_t *replaced, size_t max_world, int depth)
16441644
{
1645+
jl_timing_counter_inc(JL_TIMING_COUNTER_Invalidations, 1);
16451646
if (_jl_debug_method_invalidation) {
16461647
jl_value_t *boxeddepth = NULL;
16471648
JL_GC_PUSH1(&boxeddepth);

src/jitlayers.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,10 +844,27 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
844844
jitlink::PassConfiguration &Config) override {
845845
Config.PostAllocationPasses.push_back([this](jitlink::LinkGraph &G) {
846846
size_t graph_size = 0;
847+
size_t code_size = 0;
848+
size_t data_size = 0;
847849
for (auto block : G.blocks()) {
848850
graph_size += block->getSize();
849851
}
852+
for (auto &section : G.sections()) {
853+
size_t secsize = 0;
854+
for (auto block : section.blocks()) {
855+
secsize += block->getSize();
856+
}
857+
if ((section.getMemProt() & jitlink::MemProt::Exec) == jitlink::MemProt::None) {
858+
data_size += secsize;
859+
} else {
860+
code_size += secsize;
861+
}
862+
graph_size += secsize;
863+
}
850864
this->total_size.fetch_add(graph_size, std::memory_order_relaxed);
865+
jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, graph_size);
866+
jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, code_size);
867+
jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, data_size);
851868
return Error::success();
852869
});
853870
}

src/staticdata.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3330,6 +3330,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
33303330
arraylist_push(&jl_linkage_blobs, (void*)image_base);
33313331
arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg));
33323332
arraylist_push(&jl_image_relocs, (void*)relocs_base);
3333+
jl_timing_counter_inc(JL_TIMING_COUNTER_ImageSize, sizeof_sysimg + sizeof(uintptr_t));
33333334
rebuild_image_blob_tree();
33343335

33353336
// jl_printf(JL_STDOUT, "%ld blobs to link against\n", jl_linkage_blobs.len >> 1);

src/timing.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,23 +48,38 @@ const char *jl_timing_names[(int)JL_TIMING_LAST] =
4848
#undef X
4949
};
5050

51+
JL_DLLEXPORT jl_timing_counter_t jl_timing_counters[JL_TIMING_COUNTER_LAST];
52+
5153
#ifdef USE_ITTAPI
5254
JL_DLLEXPORT __itt_event jl_timing_ittapi_events[(int)JL_TIMING_EVENT_LAST];
5355
#endif
5456

5557
void jl_print_timings(void)
5658
{
59+
#ifdef USE_TIMING_COUNTS
5760
uint64_t total_time = cycleclock() - t0;
5861
uint64_t root_time = total_time;
5962
for (int i = 0; i < JL_TIMING_LAST; i++) {
6063
root_time -= jl_timing_counts[i];
6164
}
6265
jl_timing_counts[0] = root_time;
66+
fprintf(stderr, "\nJULIA TIMINGS\n");
6367
for (int i = 0; i < JL_TIMING_LAST; i++) {
6468
if (jl_timing_counts[i] != 0)
6569
fprintf(stderr, "%-25s : %5.2f %% %" PRIu64 "\n", jl_timing_names[i],
6670
100 * (((double)jl_timing_counts[i]) / total_time), jl_timing_counts[i]);
6771
}
72+
73+
fprintf(stderr, "\nJULIA COUNTERS\n");
74+
#define X(name) do { \
75+
int64_t val = (int64_t) jl_atomic_load_relaxed(&jl_timing_counters[(int)JL_TIMING_COUNTER_##name].basic_counter); \
76+
if (val != 0) \
77+
fprintf(stderr, "%-25s : %" PRIi64 "\n", #name, val); \
78+
} while (0);
79+
80+
JL_TIMING_COUNTERS
81+
#undef X
82+
#endif
6883
}
6984

7085
void jl_init_timing(void)
@@ -79,6 +94,24 @@ void jl_init_timing(void)
7994
#define X(name) jl_timing_ittapi_events[i++] = __itt_event_create(#name, strlen(#name));
8095
JL_TIMING_EVENTS
8196
#undef X
97+
i = 0;
98+
#define X(name) jl_timing_counters[i++].ittapi_counter = __itt_counter_create(#name, "julia.runtime");
99+
JL_TIMING_COUNTERS
100+
#undef X
101+
#endif
102+
#ifdef USE_TRACY
103+
i = 0;
104+
#define X(counter_name) jl_timing_counters[i].tracy_counter = (jl_tracy_counter_t){0, #counter_name}; \
105+
TracyCPlotConfig(jl_timing_counters[i++].tracy_counter.name, TracyPlotFormatNumber, /* rectilinear */ 1, /* fill */ 1, /* color */ 0);
106+
JL_TIMING_COUNTERS
107+
#undef X
108+
// We reference these by enum indexing and then asking for the name, since that allows the compiler
109+
// to catch name mismatches.
110+
TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_HeapSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
111+
TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_JITSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
112+
TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_JITCodeSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
113+
TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_JITDataSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
114+
TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_ImageSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
82115
#endif
83116
}
84117

src/timing.h

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,13 @@ static inline const char *gnu_basename(const char *path)
1616
return base ? base+1 : path;
1717
}
1818

19+
#ifdef USE_TRACY
20+
typedef struct {
21+
_Atomic(int64_t) val;
22+
char* name;
23+
} jl_tracy_counter_t;
24+
#endif
25+
1926
#ifdef __cplusplus
2027
extern "C" {
2128
#endif
@@ -78,6 +85,9 @@ extern JL_DLLEXPORT uint32_t jl_timing_print_limit;
7885
#define jl_timing_block_exit_task(ct, ptls) ((jl_timing_block_t *)NULL)
7986
#define jl_pop_timing_block(blk)
8087

88+
#define jl_timing_counter_inc(counter, value)
89+
#define jl_timing_counter_dec(counter, value)
90+
8191
#define jl_profile_lock_init(lock, name)
8292
#define jl_profile_lock_start_wait(lock)
8393
#define jl_profile_lock_acquired(lock)
@@ -181,6 +191,15 @@ JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str);
181191
X(NATIVE_Create) \
182192

183193

194+
#define JL_TIMING_COUNTERS \
195+
X(Invalidations) \
196+
X(HeapSize) \
197+
X(JITSize) \
198+
X(JITCodeSize) \
199+
X(JITDataSize) \
200+
X(ImageSize) \
201+
202+
184203
enum jl_timing_owners {
185204
#define X(name) JL_TIMING_ ## name,
186205
JL_TIMING_OWNERS
@@ -195,6 +214,13 @@ enum jl_timing_events {
195214
JL_TIMING_EVENT_LAST
196215
};
197216

217+
enum jl_timing_counter_types {
218+
#define X(name) JL_TIMING_COUNTER_ ## name,
219+
JL_TIMING_COUNTERS
220+
#undef X
221+
JL_TIMING_COUNTER_LAST
222+
};
223+
198224
/**
199225
* Timing back-ends differ in terms of whether they support nested
200226
* and asynchronous events.
@@ -404,6 +430,61 @@ struct jl_timing_suspend_cpp_t {
404430
_jl_timing_suspend_ctor(&__timing_suspend, #subsystem, ct)
405431
#endif
406432

433+
// Counting
434+
#ifdef USE_ITTAPI
435+
#define _ITTAPI_COUNTER_MEMBER __itt_counter ittapi_counter;
436+
#else
437+
#define _ITTAPI_COUNTER_MEMBER
438+
#endif
439+
440+
#ifdef USE_TRACY
441+
# define _TRACY_COUNTER_MEMBER jl_tracy_counter_t tracy_counter;
442+
# else
443+
# define _TRACY_COUNTER_MEMBER
444+
#endif
445+
446+
#ifdef USE_TIMING_COUNTS
447+
#define _COUNTS_MEMBER _Atomic(uint64_t) basic_counter;
448+
#else
449+
#define _COUNTS_MEMBER
450+
#endif
451+
452+
typedef struct {
453+
_ITTAPI_COUNTER_MEMBER
454+
_TRACY_COUNTER_MEMBER
455+
_COUNTS_MEMBER
456+
} jl_timing_counter_t;
457+
458+
JL_DLLEXPORT extern jl_timing_counter_t jl_timing_counters[JL_TIMING_COUNTER_LAST];
459+
460+
static inline void jl_timing_counter_inc(int counter, uint64_t val) JL_NOTSAFEPOINT {
461+
#ifdef USE_ITTAPI
462+
__itt_counter_inc_delta(jl_timing_counters[counter].ittapi_counter, val);
463+
#endif
464+
#ifdef USE_TRACY
465+
jl_tracy_counter_t *tracy_counter = &jl_timing_counters[counter].tracy_counter;
466+
uint64_t oldval = jl_atomic_fetch_add_relaxed(&tracy_counter->val, val);
467+
TracyCPlotI(tracy_counter->name, oldval + val);
468+
#endif
469+
#ifdef USE_TIMING_COUNTS
470+
jl_atomic_fetch_add_relaxed(&jl_timing_counters[counter].basic_counter, val);
471+
#endif
472+
}
473+
474+
static inline void jl_timing_counter_dec(int counter, uint64_t val) JL_NOTSAFEPOINT {
475+
#ifdef USE_ITTAPI
476+
__itt_counter_dec_delta(jl_timing_counters[counter].ittapi_counter, val);
477+
#endif
478+
#ifdef USE_TRACY
479+
jl_tracy_counter_t *tracy_counter = &jl_timing_counters[counter].tracy_counter;
480+
uint64_t oldval = jl_atomic_fetch_add_relaxed(&tracy_counter->val, -val);
481+
TracyCPlotI(tracy_counter->name, oldval - val);
482+
#endif
483+
#ifdef USE_TIMING_COUNTS
484+
jl_atomic_fetch_add_relaxed(&jl_timing_counters[counter].basic_counter, -(int64_t)val);
485+
#endif
486+
}
487+
407488
// Locking profiling
408489
static inline void jl_profile_lock_init(jl_mutex_t *lock, const char *name) {
409490
#ifdef USE_ITTAPI

0 commit comments

Comments
 (0)