66#include "julia_atomics.h"
77#include "julia_gcext.h"
88#include "julia_assert.h"
9- #ifdef __GLIBC__
9+ #include <stdlib.h>
10+
11+ #if defined(_OS_DARWIN_ )
12+ #include <malloc/malloc.h>
13+ #else
1014#include <malloc.h> // for malloc_trim
1115#endif
1216
@@ -1121,17 +1125,8 @@ static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
11211125
11221126void jl_gc_track_malloced_genericmemory (jl_ptls_t ptls , jl_genericmemory_t * m , int isaligned ){
11231127 // This is **NOT** a GC safe point.
1124- mallocarray_t * ma ;
1125- if (ptls -> heap .mafreelist == NULL ) {
1126- ma = (mallocarray_t * )malloc_s (sizeof (mallocarray_t ));
1127- }
1128- else {
1129- ma = ptls -> heap .mafreelist ;
1130- ptls -> heap .mafreelist = ma -> next ;
1131- }
1132- ma -> a = (jl_value_t * )((uintptr_t )m | !!isaligned );
1133- ma -> next = ptls -> heap .mallocarrays ;
1134- ptls -> heap .mallocarrays = ma ;
1128+ void * a = (void * )((uintptr_t )m | !!isaligned );
1129+ small_arraylist_push (& ptls -> heap .mallocarrays , a );
11351130}
11361131
11371132
@@ -1143,10 +1138,6 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
11431138 jl_batch_accum_heap_size (ptls , sz );
11441139}
11451140
1146- void jl_gc_count_freed (size_t sz ) JL_NOTSAFEPOINT
1147- {
1148- jl_batch_accum_free_size (jl_current_task -> ptls , sz );
1149- }
11501141
11511142// Only safe to update the heap inside the GC
11521143static void combine_thread_gc_counts (jl_gc_num_t * dest , int update_heap ) JL_NOTSAFEPOINT
@@ -1222,19 +1213,21 @@ size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT
12221213}
12231214
12241215
1225- static void jl_gc_free_memory (jl_value_t * v , int isaligned ) JL_NOTSAFEPOINT
1216+ static void jl_gc_free_memory (jl_genericmemory_t * v , int isaligned ) JL_NOTSAFEPOINT
12261217{
12271218 assert (jl_is_genericmemory (v ));
12281219 jl_genericmemory_t * m = (jl_genericmemory_t * )v ;
12291220 assert (jl_genericmemory_how (m ) == 1 || jl_genericmemory_how (m ) == 2 );
12301221 char * d = (char * )m -> ptr ;
1222+ size_t freed_bytes = memory_block_usable_size (d , isaligned );
1223+ assert (freed_bytes != 0 );
12311224 if (isaligned )
12321225 jl_free_aligned (d );
12331226 else
12341227 free (d );
12351228 jl_atomic_store_relaxed (& gc_heap_stats .heap_size ,
1236- jl_atomic_load_relaxed (& gc_heap_stats .heap_size ) - jl_genericmemory_nbytes ( m ) );
1237- gc_num .freed += jl_genericmemory_nbytes ( m ) ;
1229+ jl_atomic_load_relaxed (& gc_heap_stats .heap_size ) - freed_bytes );
1230+ gc_num .freed += freed_bytes ;
12381231 gc_num .freecall ++ ;
12391232}
12401233
@@ -1245,24 +1238,23 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT
12451238 for (int t_i = 0 ; t_i < gc_n_threads ; t_i ++ ) {
12461239 jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
12471240 if (ptls2 != NULL ) {
1248- mallocarray_t * ma = ptls2 -> heap . mallocarrays ;
1249- mallocarray_t * * pma = & ptls2 -> heap .mallocarrays ;
1250- while ( ma != NULL ) {
1251- mallocarray_t * nxt = ma -> next ;
1252- jl_value_t * a = ( jl_value_t * )(( uintptr_t ) ma -> a & ~ 1 );
1253- int bits = jl_astaggedvalue ( a ) -> bits . gc ;
1254- if (gc_marked (bits )) {
1255- pma = & ma -> next ;
1241+ size_t n = 0 ;
1242+ size_t l = ptls2 -> heap .mallocarrays . len ;
1243+ void * * lst = ptls2 -> heap . mallocarrays . items ;
1244+ // filter without preserving order
1245+ while ( n < l ) {
1246+ jl_genericmemory_t * m = ( jl_genericmemory_t * )(( uintptr_t ) lst [ n ] & ~ 1 ) ;
1247+ if (gc_marked (jl_astaggedvalue ( m ) -> bits . gc )) {
1248+ n ++ ;
12561249 }
12571250 else {
1258- * pma = nxt ;
1259- int isaligned = ( uintptr_t ) ma -> a & 1 ;
1260- jl_gc_free_memory ( a , isaligned ) ;
1261- free ( ma ) ;
1251+ int isaligned = ( uintptr_t ) lst [ n ] & 1 ;
1252+ jl_gc_free_memory ( m , isaligned ) ;
1253+ l -- ;
1254+ lst [ n ] = lst [ l ] ;
12621255 }
1263- gc_time_count_mallocd_memory (bits );
1264- ma = nxt ;
12651256 }
1257+ ptls2 -> heap .mallocarrays .len = l ;
12661258 }
12671259 }
12681260 gc_time_mallocd_memory_end ();
@@ -3968,8 +3960,7 @@ void jl_init_thread_heap(jl_ptls_t ptls)
39683960 small_arraylist_new (& heap -> live_tasks , 0 );
39693961 for (int i = 0 ; i < JL_N_STACK_POOLS ; i ++ )
39703962 small_arraylist_new (& heap -> free_stacks [i ], 0 );
3971- heap -> mallocarrays = NULL ;
3972- heap -> mafreelist = NULL ;
3963+ small_arraylist_new (& heap -> mallocarrays , 0 );
39733964 heap -> big_objects = NULL ;
39743965 heap -> remset = & heap -> _remset [0 ];
39753966 heap -> last_remset = & heap -> _remset [1 ];
@@ -4069,58 +4060,44 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void)
40694060 jl_throw (jl_memory_exception );
40704061}
40714062
4072- // allocation wrappers that track allocation and let collection run
4063+ // allocation wrappers that add to gc pressure
40734064
4074- JL_DLLEXPORT void * jl_gc_counted_malloc (size_t sz )
4065+ JL_DLLEXPORT void * jl_malloc (size_t sz )
40754066{
4076- jl_gcframe_t * * pgcstack = jl_get_pgcstack ();
4077- jl_task_t * ct = jl_current_task ;
4078- void * data = malloc (sz );
4079- if (data != NULL && pgcstack != NULL && ct -> world_age ) {
4080- jl_ptls_t ptls = ct -> ptls ;
4081- maybe_collect (ptls );
4082- jl_atomic_store_relaxed (& ptls -> gc_num .allocd ,
4083- jl_atomic_load_relaxed (& ptls -> gc_num .allocd ) + sz );
4084- jl_atomic_store_relaxed (& ptls -> gc_num .malloc ,
4085- jl_atomic_load_relaxed (& ptls -> gc_num .malloc ) + 1 );
4086- jl_batch_accum_heap_size (ptls , sz );
4087- }
4088- return data ;
4067+ return jl_gc_counted_malloc (sz );
40894068}
40904069
4091- JL_DLLEXPORT void * jl_gc_counted_calloc (size_t nm , size_t sz )
4070+ //_unchecked_calloc does not check for potential overflow of nm*sz
4071+ STATIC_INLINE void * _unchecked_calloc (size_t nm , size_t sz ) {
4072+ size_t nmsz = nm * sz ;
4073+ return jl_gc_counted_calloc (nmsz , 1 );
4074+ }
4075+
4076+ JL_DLLEXPORT void * jl_calloc (size_t nm , size_t sz )
40924077{
4093- jl_gcframe_t * * pgcstack = jl_get_pgcstack ();
4094- jl_task_t * ct = jl_current_task ;
4095- void * data = calloc (nm , sz );
4096- if (data != NULL && pgcstack != NULL && ct -> world_age ) {
4097- jl_ptls_t ptls = ct -> ptls ;
4098- maybe_collect (ptls );
4099- jl_atomic_store_relaxed (& ptls -> gc_num .allocd ,
4100- jl_atomic_load_relaxed (& ptls -> gc_num .allocd ) + nm * sz );
4101- jl_atomic_store_relaxed (& ptls -> gc_num .malloc ,
4102- jl_atomic_load_relaxed (& ptls -> gc_num .malloc ) + 1 );
4103- jl_batch_accum_heap_size (ptls , sz * nm );
4104- }
4105- return data ;
4078+ if (nm > SSIZE_MAX /sz )
4079+ return NULL ;
4080+ return _unchecked_calloc (nm , sz );
41064081}
41074082
4108- JL_DLLEXPORT void jl_gc_counted_free_with_size (void * p , size_t sz )
4083+ JL_DLLEXPORT void jl_free (void * p )
41094084{
4110- jl_gcframe_t * * pgcstack = jl_get_pgcstack ();
4111- jl_task_t * ct = jl_current_task ;
4112- free (p );
4113- if (pgcstack != NULL && ct -> world_age ) {
4114- jl_batch_accum_free_size (ct -> ptls , sz );
4085+ if (p != NULL ) {
4086+ size_t sz = memory_block_usable_size (p , 0 );
4087+ free (p );
4088+ jl_task_t * ct = jl_get_current_task ();
4089+ if (ct != NULL )
4090+ jl_batch_accum_free_size (ct -> ptls , sz );
41154091 }
41164092}
41174093
4118- JL_DLLEXPORT void * jl_gc_counted_realloc_with_old_size (void * p , size_t old , size_t sz )
4094+ JL_DLLEXPORT void * jl_realloc (void * p , size_t sz )
41194095{
4120- jl_gcframe_t * * pgcstack = jl_get_pgcstack ();
4121- jl_task_t * ct = jl_current_task ;
4096+ size_t old = p ? memory_block_usable_size (p , 0 ) : 0 ;
41224097 void * data = realloc (p , sz );
4123- if (data != NULL && pgcstack != NULL && ct -> world_age ) {
4098+ jl_task_t * ct = jl_get_current_task ();
4099+ if (data != NULL && ct != NULL ) {
4100+ sz = memory_block_usable_size (data , 0 );
41244101 jl_ptls_t ptls = ct -> ptls ;
41254102 maybe_collect (ptls );
41264103 if (!(sz < old ))
@@ -4140,63 +4117,80 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size
41404117 return data ;
41414118}
41424119
4143- // allocation wrappers that save the size of allocations, to allow using
4144- // jl_gc_counted_* functions with a libc-compatible API.
4145-
4146- JL_DLLEXPORT void * jl_malloc (size_t sz )
4120+ JL_DLLEXPORT void * jl_gc_counted_malloc (size_t sz )
41474121{
4148- int64_t * p = (int64_t * )jl_gc_counted_malloc (sz + JL_SMALL_BYTE_ALIGNMENT );
4149- if (p == NULL )
4150- return NULL ;
4151- p [0 ] = sz ;
4152- return (void * )(p + 2 ); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
4122+ jl_task_t * ct = jl_current_task ;
4123+ void * data = malloc (sz );
4124+ if (data != NULL && ct != NULL && ct -> world_age ) {
4125+ sz = memory_block_usable_size (data , 0 );
4126+ jl_ptls_t ptls = ct -> ptls ;
4127+ maybe_collect (ptls );
4128+ jl_atomic_store_relaxed (& ptls -> gc_num .allocd ,
4129+ jl_atomic_load_relaxed (& ptls -> gc_num .allocd ) + sz );
4130+ jl_atomic_store_relaxed (& ptls -> gc_num .malloc ,
4131+ jl_atomic_load_relaxed (& ptls -> gc_num .malloc ) + 1 );
4132+ jl_batch_accum_heap_size (ptls , sz );
4133+ }
4134+ return data ;
41534135}
41544136
4155- //_unchecked_calloc does not check for potential overflow of nm*sz
4156- STATIC_INLINE void * _unchecked_calloc (size_t nm , size_t sz ) {
4157- size_t nmsz = nm * sz ;
4158- int64_t * p = (int64_t * )jl_gc_counted_calloc (nmsz + JL_SMALL_BYTE_ALIGNMENT , 1 );
4159- if (p == NULL )
4160- return NULL ;
4161- p [0 ] = nmsz ;
4162- return (void * )(p + 2 ); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
4137+ JL_DLLEXPORT void * jl_gc_counted_calloc (size_t nm , size_t sz )
4138+ {
4139+ jl_task_t * ct = jl_current_task ;
4140+ void * data = calloc (nm , sz );
4141+ if (data != NULL && ct != NULL && ct -> world_age ) {
4142+ sz = memory_block_usable_size (data , 0 );
4143+ jl_ptls_t ptls = ct -> ptls ;
4144+ maybe_collect (ptls );
4145+ jl_atomic_store_relaxed (& ptls -> gc_num .allocd ,
4146+ jl_atomic_load_relaxed (& ptls -> gc_num .allocd ) + sz );
4147+ jl_atomic_store_relaxed (& ptls -> gc_num .malloc ,
4148+ jl_atomic_load_relaxed (& ptls -> gc_num .malloc ) + 1 );
4149+ jl_batch_accum_heap_size (ptls , sz );
4150+ }
4151+ return data ;
41634152}
41644153
4165- JL_DLLEXPORT void * jl_calloc ( size_t nm , size_t sz )
4154+ JL_DLLEXPORT void jl_gc_counted_free_with_size ( void * p , size_t sz )
41664155{
4167- if (nm > SSIZE_MAX /sz - JL_SMALL_BYTE_ALIGNMENT )
4168- return NULL ;
4169- return _unchecked_calloc (nm , sz );
4156+ jl_free (p );
41704157}
41714158
4172- JL_DLLEXPORT void jl_free (void * p )
4159+ JL_DLLEXPORT void * jl_gc_counted_realloc_with_old_size (void * p , size_t old , size_t sz )
41734160{
4174- if (p != NULL ) {
4175- int64_t * pp = (int64_t * )p - 2 ;
4176- size_t sz = pp [0 ];
4177- jl_gc_counted_free_with_size (pp , sz + JL_SMALL_BYTE_ALIGNMENT );
4178- }
4161+ return jl_realloc (p , sz );
41794162}
41804163
4181- JL_DLLEXPORT void * jl_realloc (void * p , size_t sz )
4164+ // =========================================================================== //
4165+ // malloc wrappers, aligned allocation
4166+ // =========================================================================== //
4167+
4168+ #if defined(_OS_WINDOWS_ )
4169+ // helper function based partly on wine msvcrt80+ heap.c
4170+ // but with several fixes to improve the correctness of the computation and remove unnecessary parameters
4171+ #define SAVED_PTR (x ) ((void *)((DWORD_PTR)((char *)x - sizeof(void *)) & \
4172+ ~(sizeof(void *) - 1)))
4173+ static size_t _aligned_msize (void * p )
41824174{
4183- int64_t * pp ;
4184- size_t szold ;
4185- if (p == NULL ) {
4186- pp = NULL ;
4187- szold = 0 ;
4188- }
4189- else {
4190- pp = (int64_t * )p - 2 ;
4191- szold = pp [0 ] + JL_SMALL_BYTE_ALIGNMENT ;
4192- }
4193- int64_t * pnew = (int64_t * )jl_gc_counted_realloc_with_old_size (pp , szold , sz + JL_SMALL_BYTE_ALIGNMENT );
4194- if (pnew == NULL )
4195- return NULL ;
4196- pnew [0 ] = sz ;
4197- return (void * )(pnew + 2 ); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
4175+ void * alloc_ptr = * (void * * )SAVED_PTR (p );
4176+ return _msize (alloc_ptr ) - ((char * )p - (char * )alloc_ptr );
41984177}
4178+ #undef SAVED_PTR
4179+ #endif
41994180
4181+ size_t memory_block_usable_size (void * p , int isaligned ) JL_NOTSAFEPOINT
4182+ {
4183+ #if defined(_OS_WINDOWS_ )
4184+ if (isaligned )
4185+ return _aligned_msize (p );
4186+ else
4187+ return _msize (p );
4188+ #elif defined(_OS_DARWIN_ )
4189+ return malloc_size (p );
4190+ #else
4191+ return malloc_usable_size (p );
4192+ #endif
4193+ }
42004194// allocating blocks for Arrays and Strings
42014195
42024196JL_DLLEXPORT void * jl_gc_managed_malloc (size_t sz )
@@ -4214,12 +4208,13 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
42144208 void * b = malloc_cache_align (allocsz );
42154209 if (b == NULL )
42164210 jl_throw (jl_memory_exception );
4217-
4211+ size_t allocated_bytes = memory_block_usable_size (b , 1 );
4212+ assert (allocated_bytes >= allocsz );
42184213 jl_atomic_store_relaxed (& ptls -> gc_num .allocd ,
4219- jl_atomic_load_relaxed (& ptls -> gc_num .allocd ) + allocsz );
4214+ jl_atomic_load_relaxed (& ptls -> gc_num .allocd ) + allocated_bytes );
42204215 jl_atomic_store_relaxed (& ptls -> gc_num .malloc ,
42214216 jl_atomic_load_relaxed (& ptls -> gc_num .malloc ) + 1 );
4222- jl_batch_accum_heap_size (ptls , allocsz );
4217+ jl_batch_accum_heap_size (ptls , allocated_bytes );
42234218#ifdef _OS_WINDOWS_
42244219 SetLastError (last_error );
42254220#endif
0 commit comments