Skip to content

Commit 9e14bf8

Browse files
authored
count bytes allocated through malloc more precisely (#55223)
Should make the accounting for memory allocated through malloc a bit more accurate. Should also simplify the accounting code by eliminating the use of `jl_gc_count_freed` in `jl_genericmemory_to_string`.
1 parent 4278ded commit 9e14bf8

File tree

6 files changed

+117
-113
lines changed

6 files changed

+117
-113
lines changed

src/gc-common.c

Lines changed: 31 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
#include "julia_gcext.h"
77
#include "julia_assert.h"
88
#include "threading.h"
9-
#ifdef __GLIBC__
10-
#include <malloc.h> // for malloc_trim
11-
#endif
129

1310
#ifdef __cplusplus
1411
extern "C" {
@@ -120,6 +117,37 @@ JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t
120117
jl_gc_deregister_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
121118
}
122119

120+
// =========================================================================== //
121+
// malloc wrappers, aligned allocation
122+
// =========================================================================== //
123+
124+
#if defined(_OS_WINDOWS_)
125+
// helper function based partly on wine msvcrt80+ heap.c
126+
// but with several fixes to improve the correctness of the computation and remove unnecessary parameters
127+
#define SAVED_PTR(x) ((void *)((DWORD_PTR)((char *)x - sizeof(void *)) & \
128+
~(sizeof(void *) - 1)))
129+
static size_t _aligned_msize(void *p)
130+
{
131+
void *alloc_ptr = *(void**)SAVED_PTR(p);
132+
return _msize(alloc_ptr) - ((char*)p - (char*)alloc_ptr);
133+
}
134+
#undef SAVED_PTR
135+
#endif
136+
137+
size_t memory_block_usable_size(void *p, int isaligned) JL_NOTSAFEPOINT
138+
{
139+
#if defined(_OS_WINDOWS_)
140+
if (isaligned)
141+
return _aligned_msize(p);
142+
else
143+
return _msize(p);
144+
#elif defined(_OS_DARWIN_)
145+
return malloc_size(p);
146+
#else
147+
return malloc_usable_size(p);
148+
#endif
149+
}
150+
123151
// =========================================================================== //
124152
// Finalization
125153
// =========================================================================== //
@@ -505,63 +533,6 @@ JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz)
505533
return jl_gc_alloc(ptls, sz, NULL);
506534
}
507535

508-
// allocation wrappers that save the size of allocations, to allow using
509-
// jl_gc_counted_* functions with a libc-compatible API.
510-
511-
JL_DLLEXPORT void *jl_malloc(size_t sz)
512-
{
513-
int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT);
514-
if (p == NULL)
515-
return NULL;
516-
p[0] = sz;
517-
return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
518-
}
519-
520-
//_unchecked_calloc does not check for potential overflow of nm*sz
521-
STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) {
522-
size_t nmsz = nm*sz;
523-
int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1);
524-
if (p == NULL)
525-
return NULL;
526-
p[0] = nmsz;
527-
return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
528-
}
529-
530-
JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
531-
{
532-
if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT)
533-
return NULL;
534-
return _unchecked_calloc(nm, sz);
535-
}
536-
537-
JL_DLLEXPORT void jl_free(void *p)
538-
{
539-
if (p != NULL) {
540-
int64_t *pp = (int64_t *)p - 2;
541-
size_t sz = pp[0];
542-
jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT);
543-
}
544-
}
545-
546-
JL_DLLEXPORT void *jl_realloc(void *p, size_t sz)
547-
{
548-
int64_t *pp;
549-
size_t szold;
550-
if (p == NULL) {
551-
pp = NULL;
552-
szold = 0;
553-
}
554-
else {
555-
pp = (int64_t *)p - 2;
556-
szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT;
557-
}
558-
int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT);
559-
if (pnew == NULL)
560-
return NULL;
561-
pnew[0] = sz;
562-
return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
563-
}
564-
565536
// allocator entry points
566537

567538
JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty)

src/gc-common.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,14 @@
1212
#endif
1313
#endif
1414

15+
#include <stdlib.h>
16+
17+
#if defined(_OS_DARWIN_)
18+
#include <malloc/malloc.h>
19+
#else
20+
#include <malloc.h> // for malloc_trim
21+
#endif
22+
1523
#ifdef __cplusplus
1624
extern "C" {
1725
#endif

src/gc-stock.c

Lines changed: 74 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,6 @@
99
#include "julia_atomics.h"
1010
#include "julia_gcext.h"
1111
#include "julia_assert.h"
12-
#ifdef __GLIBC__
13-
#include <malloc.h> // for malloc_trim
14-
#endif
1512

1613
#ifdef __cplusplus
1714
extern "C" {
@@ -569,11 +566,6 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
569566
jl_batch_accum_heap_size(ptls, sz);
570567
}
571568

572-
void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT
573-
{
574-
jl_batch_accum_free_size(jl_current_task->ptls, sz);
575-
}
576-
577569
// Only safe to update the heap inside the GC
578570
static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT
579571
{
@@ -643,13 +635,15 @@ static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT
643635
jl_genericmemory_t *m = (jl_genericmemory_t*)v;
644636
assert(jl_genericmemory_how(m) == 1 || jl_genericmemory_how(m) == 2);
645637
char *d = (char*)m->ptr;
638+
size_t freed_bytes = memory_block_usable_size(d, isaligned);
639+
assert(freed_bytes != 0);
646640
if (isaligned)
647641
jl_free_aligned(d);
648642
else
649643
free(d);
650644
jl_atomic_store_relaxed(&gc_heap_stats.heap_size,
651-
jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - jl_genericmemory_nbytes(m));
652-
gc_num.freed += jl_genericmemory_nbytes(m);
645+
jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - freed_bytes);
646+
gc_num.freed += freed_bytes;
653647
gc_num.freecall++;
654648
}
655649

@@ -3652,14 +3646,69 @@ JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void)
36523646
return max_total_memory;
36533647
}
36543648

3655-
// allocation wrappers that track allocation and let collection run
3649+
// allocation wrappers that add to gc pressure
3650+
3651+
JL_DLLEXPORT void *jl_malloc(size_t sz)
3652+
{
3653+
return jl_gc_counted_malloc(sz);
3654+
}
3655+
3656+
//_unchecked_calloc does not check for potential overflow of nm*sz
3657+
STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) {
3658+
size_t nmsz = nm*sz;
3659+
return jl_gc_counted_calloc(nmsz, 1);
3660+
}
3661+
3662+
JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
3663+
{
3664+
if (nm > SSIZE_MAX/sz)
3665+
return NULL;
3666+
return _unchecked_calloc(nm, sz);
3667+
}
3668+
3669+
JL_DLLEXPORT void jl_free(void *p)
3670+
{
3671+
if (p != NULL) {
3672+
size_t sz = memory_block_usable_size(p, 0);
3673+
free(p);
3674+
jl_task_t *ct = jl_get_current_task();
3675+
if (ct != NULL)
3676+
jl_batch_accum_free_size(ct->ptls, sz);
3677+
}
3678+
}
3679+
3680+
JL_DLLEXPORT void *jl_realloc(void *p, size_t sz)
3681+
{
3682+
size_t old = p ? memory_block_usable_size(p, 0) : 0;
3683+
void *data = realloc(p, sz);
3684+
jl_task_t *ct = jl_get_current_task();
3685+
if (data != NULL && ct != NULL) {
3686+
sz = memory_block_usable_size(data, 0);
3687+
jl_ptls_t ptls = ct->ptls;
3688+
maybe_collect(ptls);
3689+
if (!(sz < old))
3690+
jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
3691+
jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + (sz - old));
3692+
jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc,
3693+
jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc) + 1);
3694+
3695+
int64_t diff = sz - old;
3696+
if (diff < 0) {
3697+
jl_batch_accum_free_size(ptls, -diff);
3698+
}
3699+
else {
3700+
jl_batch_accum_heap_size(ptls, diff);
3701+
}
3702+
}
3703+
return data;
3704+
}
36563705

36573706
JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
36583707
{
3659-
jl_gcframe_t **pgcstack = jl_get_pgcstack();
3660-
jl_task_t *ct = jl_current_task;
36613708
void *data = malloc(sz);
3662-
if (data != NULL && pgcstack != NULL && ct->world_age) {
3709+
jl_task_t *ct = jl_get_current_task();
3710+
if (data != NULL && ct != NULL) {
3711+
sz = memory_block_usable_size(data, 0);
36633712
jl_ptls_t ptls = ct->ptls;
36643713
maybe_collect(ptls);
36653714
jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
@@ -3673,54 +3722,29 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
36733722

36743723
JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
36753724
{
3676-
jl_gcframe_t **pgcstack = jl_get_pgcstack();
3677-
jl_task_t *ct = jl_current_task;
36783725
void *data = calloc(nm, sz);
3679-
if (data != NULL && pgcstack != NULL && ct->world_age) {
3726+
jl_task_t *ct = jl_get_current_task();
3727+
if (data != NULL && ct != NULL) {
3728+
sz = memory_block_usable_size(data, 0);
36803729
jl_ptls_t ptls = ct->ptls;
36813730
maybe_collect(ptls);
36823731
jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
3683-
jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + nm*sz);
3732+
jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz);
36843733
jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc,
36853734
jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1);
3686-
jl_batch_accum_heap_size(ptls, sz * nm);
3735+
jl_batch_accum_heap_size(ptls, sz);
36873736
}
36883737
return data;
36893738
}
36903739

36913740
JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
36923741
{
3693-
jl_gcframe_t **pgcstack = jl_get_pgcstack();
3694-
jl_task_t *ct = jl_current_task;
3695-
free(p);
3696-
if (pgcstack != NULL && ct->world_age) {
3697-
jl_batch_accum_free_size(ct->ptls, sz);
3698-
}
3742+
return jl_free(p);
36993743
}
37003744

37013745
JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz)
37023746
{
3703-
jl_gcframe_t **pgcstack = jl_get_pgcstack();
3704-
jl_task_t *ct = jl_current_task;
3705-
void *data = realloc(p, sz);
3706-
if (data != NULL && pgcstack != NULL && ct->world_age) {
3707-
jl_ptls_t ptls = ct->ptls;
3708-
maybe_collect(ptls);
3709-
if (!(sz < old))
3710-
jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
3711-
jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + (sz - old));
3712-
jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc,
3713-
jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc) + 1);
3714-
3715-
int64_t diff = sz - old;
3716-
if (diff < 0) {
3717-
jl_batch_accum_free_size(ptls, -diff);
3718-
}
3719-
else {
3720-
jl_batch_accum_heap_size(ptls, diff);
3721-
}
3722-
}
3723-
return data;
3747+
return jl_realloc(p, sz);
37243748
}
37253749

37263750
// allocating blocks for Arrays and Strings
@@ -3741,11 +3765,13 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
37413765
if (b == NULL)
37423766
jl_throw(jl_memory_exception);
37433767

3768+
size_t allocated_bytes = memory_block_usable_size(b, 1);
3769+
assert(allocated_bytes >= allocsz);
37443770
jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
3745-
jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz);
3771+
jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocated_bytes);
37463772
jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc,
37473773
jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1);
3748-
jl_batch_accum_heap_size(ptls, allocsz);
3774+
jl_batch_accum_heap_size(ptls, allocated_bytes);
37493775
#ifdef _OS_WINDOWS_
37503776
SetLastError(last_error);
37513777
#endif

src/genericmemory.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,8 @@ JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void
165165
if (own_buffer) {
166166
int isaligned = 0; // TODO: allow passing memalign'd buffers
167167
jl_gc_track_malloced_genericmemory(ct->ptls, m, isaligned);
168-
jl_gc_count_allocd(nel*elsz);
168+
size_t allocated_bytes = memory_block_usable_size(data, isaligned);
169+
jl_gc_count_allocd(allocated_bytes);
169170
}
170171
return m;
171172
}
@@ -208,8 +209,6 @@ JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_
208209
JL_GC_PUSH1(&o);
209210
jl_value_t *str = jl_pchar_to_string((const char*)m->ptr, len);
210211
JL_GC_POP();
211-
if (how == 1) // TODO: we might like to early-call jl_gc_free_memory here instead actually, but hopefully `m` will die soon
212-
jl_gc_count_freed(mlength);
213212
return str;
214213
}
215214
// n.b. how == 0 is always pool-allocated, so the freed bytes are computed from the pool not the object

src/julia_internal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -615,8 +615,8 @@ jl_svec_t *jl_perm_symsvec(size_t n, ...);
615615

616616
void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned) JL_NOTSAFEPOINT;
617617
size_t jl_genericmemory_nbytes(jl_genericmemory_t *a) JL_NOTSAFEPOINT;
618+
size_t memory_block_usable_size(void *mem, int isaligned) JL_NOTSAFEPOINT;
618619
void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT;
619-
void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT;
620620
void jl_gc_run_all_finalizers(jl_task_t *ct);
621621
void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task);
622622
void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT;

test/compiler/codegen.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ function g_dict_hash_alloc()
407407
end
408408
# Warm up
409409
f_dict_hash_alloc(); g_dict_hash_alloc();
410-
@test (@allocated f_dict_hash_alloc()) == (@allocated g_dict_hash_alloc())
410+
@test abs((@allocated f_dict_hash_alloc()) / (@allocated g_dict_hash_alloc()) - 1) < 0.1 # less that 10% difference
411411

412412
# returning an argument shouldn't alloc a new box
413413
@noinline f33829(x) = (global called33829 = true; x)

0 commit comments

Comments
 (0)