|
| 1 | +diff --git a/src/gc.c b/src/gc.c |
| 2 | +index c85d1e5455..c82b2b645d 100644 |
| 3 | +--- a/src/gc.c |
| 4 | ++++ b/src/gc.c |
| 5 | +@@ -6,6 +6,8 @@ |
| 6 | + #include "julia_gcext.h" |
| 7 | + #include "julia_assert.h" |
| 8 | + #include <math.h> |
| 9 | ++#include <stddef.h> |
| 10 | ++#include <stdint.h> |
| 11 | + #include <sys/types.h> |
| 12 | + #ifdef __GLIBC__ |
| 13 | + #include <malloc.h> // for malloc_trim |
| 14 | +@@ -1004,8 +1006,14 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) |
| 15 | + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); |
| 16 | + jl_atomic_store_relaxed(&ptls->gc_num.bigalloc, |
| 17 | + jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1); |
| 18 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, allocsz); |
| 19 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, allocsz); |
| 20 | ++ uint64_t alloc_thresh = jl_atomic_load_relaxed(&ptls->gc_num.alloc_thresh); |
| 21 | ++ if (alloc_thresh + sz < 128*1024) |
| 22 | ++ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, alloc_thresh + sz); |
| 23 | ++ else { |
| 24 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, alloc_thresh + sz); |
| 25 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_thresh + sz); |
| 26 | ++ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, 0); |
| 27 | ++ } |
| 28 | + #ifdef MEMDEBUG |
| 29 | + memset(v, 0xee, allocsz); |
| 30 | + #endif |
| 31 | +@@ -1051,8 +1059,10 @@ static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT |
| 32 | + if (nxt) |
| 33 | + nxt->prev = pv; |
| 34 | + gc_num.freed += v->sz&~3; |
| 35 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, v->sz&~3); |
| 36 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(v->sz&~3)); |
| 37 | ++ jl_atomic_store_relaxed(&gc_heap_stats.heap_size, |
| 38 | ++ jl_atomic_load_relaxed(&gc_heap_stats.heap_size) + (v->sz&~3)); |
| 39 | ++ jl_atomic_store_relaxed(&gc_heap_stats.heap_size, |
| 40 | ++ jl_atomic_load_relaxed(&gc_heap_stats.heap_size) + (v->sz&~3)); |
| 41 | + #ifdef MEMDEBUG |
| 42 | + memset(v, 0xbb, v->sz&~3); |
| 43 | + #endif |
| 44 | +@@ -1112,8 +1122,14 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT |
| 45 | + jl_ptls_t ptls = jl_current_task->ptls; |
| 46 | + jl_atomic_store_relaxed(&ptls->gc_num.allocd, |
| 47 | + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); |
| 48 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, sz); |
| 49 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, sz); |
| 50 | ++ uint64_t alloc_thresh = jl_atomic_load_relaxed(&ptls->gc_num.alloc_thresh); |
| 51 | ++ if (alloc_thresh + sz < 128*1024) |
| 52 | ++ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, alloc_thresh + sz); |
| 53 | ++ else { |
| 54 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, alloc_thresh + sz); |
| 55 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_thresh + sz); |
| 56 | ++ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, 0); |
| 57 | ++ } |
| 58 | + } |
| 59 | + |
| 60 | + static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT |
| 61 | +@@ -1126,12 +1142,15 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT |
| 62 | + jl_ptls_t ptls = gc_all_tls_states[i]; |
| 63 | + if (ptls) { |
| 64 | + dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval); |
| 65 | +- dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.freed); |
| 66 | + dest->malloc += jl_atomic_load_relaxed(&ptls->gc_num.malloc); |
| 67 | + dest->realloc += jl_atomic_load_relaxed(&ptls->gc_num.realloc); |
| 68 | + dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_num.poolalloc); |
| 69 | + dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc); |
| 70 | +- dest->freecall += jl_atomic_load_relaxed(&ptls->gc_num.freecall); |
| 71 | ++ uint64_t alloc_thresh = jl_atomic_load_relaxed(&ptls->gc_num.alloc_thresh); |
| 72 | ++ uint64_t free_thresh = jl_atomic_load_relaxed(&ptls->gc_num.free_thresh); |
| 73 | ++ jl_atomic_store_relaxed(&gc_heap_stats.bytes_mallocd, alloc_thresh + jl_atomic_load_relaxed(&gc_heap_stats.bytes_mallocd)); |
| 74 | ++ jl_atomic_store_relaxed(&gc_heap_stats.malloc_bytes_freed, free_thresh + jl_atomic_load_relaxed(&gc_heap_stats.malloc_bytes_freed)); |
| 75 | ++ jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_thresh - free_thresh + jl_atomic_load_relaxed(&gc_heap_stats.heap_size)); |
| 76 | + } |
| 77 | + } |
| 78 | + } |
| 79 | +@@ -1188,8 +1207,10 @@ static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT |
| 80 | + jl_free_aligned(d); |
| 81 | + else |
| 82 | + free(d); |
| 83 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, jl_array_nbytes(a)); |
| 84 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -jl_array_nbytes(a)); |
| 85 | ++ jl_atomic_store_relaxed(&gc_heap_stats.malloc_bytes_freed, |
| 86 | ++ jl_atomic_load_relaxed(&gc_heap_stats.malloc_bytes_freed) + jl_array_nbytes(a)); |
| 87 | ++ jl_atomic_store_relaxed(&gc_heap_stats.heap_size, |
| 88 | ++ jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - jl_array_nbytes(a)); |
| 89 | + gc_num.freed += jl_array_nbytes(a); |
| 90 | + gc_num.freecall++; |
| 91 | + } |
| 92 | +@@ -3589,8 +3610,14 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) |
| 93 | + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); |
| 94 | + jl_atomic_store_relaxed(&ptls->gc_num.malloc, |
| 95 | + jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); |
| 96 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, sz); |
| 97 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, sz); |
| 98 | ++ uint64_t alloc_thresh = jl_atomic_load_relaxed(&ptls->gc_num.alloc_thresh); |
| 99 | ++ if (alloc_thresh + sz < 128*1024) |
| 100 | ++ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, alloc_thresh + sz); |
| 101 | ++ else { |
| 102 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, alloc_thresh + sz); |
| 103 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_thresh + sz); |
| 104 | ++ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, 0); |
| 105 | ++ } |
| 106 | + } |
| 107 | + return malloc(sz); |
| 108 | + } |
| 109 | +@@ -3606,8 +3633,14 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) |
| 110 | + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz); |
| 111 | + jl_atomic_store_relaxed(&ptls->gc_num.malloc, |
| 112 | + jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); |
| 113 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, nm*sz); |
| 114 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, nm*sz); |
| 115 | ++ uint64_t alloc_thresh = jl_atomic_load_relaxed(&ptls->gc_num.alloc_thresh); |
| 116 | ++ if (alloc_thresh + sz*nm < 128*1024) |
| 117 | ++ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, alloc_thresh + sz*nm); |
| 118 | ++ else { |
| 119 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, alloc_thresh + sz*nm); |
| 120 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_thresh + sz*nm); |
| 121 | ++ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, 0); |
| 122 | ++ } |
| 123 | + } |
| 124 | + return calloc(nm, sz); |
| 125 | + } |
| 126 | +@@ -3619,12 +3652,15 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) |
| 127 | + free(p); |
| 128 | + if (pgcstack != NULL && ct->world_age) { |
| 129 | + jl_ptls_t ptls = ct->ptls; |
| 130 | +- jl_atomic_store_relaxed(&ptls->gc_num.freed, |
| 131 | +- jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz); |
| 132 | +- jl_atomic_store_relaxed(&ptls->gc_num.freecall, |
| 133 | +- jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1); |
| 134 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, sz); |
| 135 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -sz); |
| 136 | ++ uint64_t free_thresh = jl_atomic_load_relaxed(&ptls->gc_num.free_thresh); |
| 137 | ++ if (free_thresh + sz < 128*1024) { |
| 138 | ++ jl_atomic_store_relaxed(&ptls->gc_num.free_thresh, free_thresh + sz); |
| 139 | ++ } |
| 140 | ++ else { |
| 141 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, free_thresh + sz); |
| 142 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_thresh + sz)); |
| 143 | ++ jl_atomic_store_relaxed(&ptls->gc_num.free_thresh, 0); |
| 144 | ++ } |
| 145 | + } |
| 146 | + } |
| 147 | + |
| 148 | +@@ -3635,17 +3671,28 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size |
| 149 | + if (pgcstack != NULL && ct->world_age) { |
| 150 | + jl_ptls_t ptls = ct->ptls; |
| 151 | + maybe_collect(ptls); |
| 152 | +- if (sz < old) |
| 153 | +- jl_atomic_store_relaxed(&ptls->gc_num.freed, |
| 154 | +- jl_atomic_load_relaxed(&ptls->gc_num.freed) + (old - sz)); |
| 155 | +- else |
| 156 | ++ if (!(sz < old)) |
| 157 | + jl_atomic_store_relaxed(&ptls->gc_num.allocd, |
| 158 | + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old)); |
| 159 | + jl_atomic_store_relaxed(&ptls->gc_num.realloc, |
| 160 | + jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); |
| 161 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, sz); |
| 162 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, old); |
| 163 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, sz-old); |
| 164 | ++ |
| 165 | ++ uint64_t free_thresh = jl_atomic_load_relaxed(&ptls->gc_num.free_thresh); |
| 166 | ++ if (free_thresh + old < 128*1024) |
| 167 | ++ jl_atomic_store_relaxed(&ptls->gc_num.free_thresh, free_thresh + old); |
| 168 | ++ else { |
| 169 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, free_thresh + old); |
| 170 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_thresh + old)); |
| 171 | ++ jl_atomic_store_relaxed(&ptls->gc_num.free_thresh, 0); |
| 172 | ++ } |
| 173 | ++ uint64_t alloc_thresh = jl_atomic_load_relaxed(&ptls->gc_num.alloc_thresh); |
| 174 | ++ if (alloc_thresh + sz < 128*1024) |
| 175 | ++ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, alloc_thresh + sz); |
| 176 | ++ else { |
| 177 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, alloc_thresh + sz); |
| 178 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_thresh + sz); |
| 179 | ++ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, 0); |
| 180 | ++ } |
| 181 | + } |
| 182 | + return realloc(p, sz); |
| 183 | + } |
| 184 | +@@ -3720,8 +3767,14 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) |
| 185 | + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); |
| 186 | + jl_atomic_store_relaxed(&ptls->gc_num.malloc, |
| 187 | + jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); |
| 188 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, allocsz); |
| 189 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, allocsz); |
| 190 | ++ uint64_t alloc_thresh = jl_atomic_load_relaxed(&ptls->gc_num.alloc_thresh); |
| 191 | ++ if (alloc_thresh + sz < 128*1024) |
| 192 | ++ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, alloc_thresh + sz); |
| 193 | ++ else { |
| 194 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, alloc_thresh + sz); |
| 195 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_thresh + sz); |
| 196 | ++ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, 0); |
| 197 | ++ } |
| 198 | + int last_errno = errno; |
| 199 | + #ifdef _OS_WINDOWS_ |
| 200 | + DWORD last_error = GetLastError(); |
| 201 | +@@ -3752,17 +3805,28 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds |
| 202 | + ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz; |
| 203 | + inc_live_bytes(allocsz - oldsz); |
| 204 | + } |
| 205 | +- else if (allocsz < oldsz) |
| 206 | +- jl_atomic_store_relaxed(&ptls->gc_num.freed, |
| 207 | +- jl_atomic_load_relaxed(&ptls->gc_num.freed) + (oldsz - allocsz)); |
| 208 | +- else |
| 209 | ++ else if (!(allocsz < oldsz)) |
| 210 | + jl_atomic_store_relaxed(&ptls->gc_num.allocd, |
| 211 | + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz)); |
| 212 | + jl_atomic_store_relaxed(&ptls->gc_num.realloc, |
| 213 | + jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); |
| 214 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, allocsz); |
| 215 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, oldsz); |
| 216 | +- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, allocsz-oldsz); |
| 217 | ++ |
| 218 | ++ uint64_t free_thresh = jl_atomic_load_relaxed(&ptls->gc_num.free_thresh); |
| 219 | ++ if (free_thresh + oldsz < 128*1024) |
| 220 | ++ jl_atomic_store_relaxed(&ptls->gc_num.free_thresh, free_thresh + oldsz); |
| 221 | ++ else { |
| 222 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, free_thresh + oldsz); |
| 223 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_thresh + oldsz)); |
| 224 | ++ jl_atomic_store_relaxed(&ptls->gc_num.free_thresh, 0); |
| 225 | ++ } |
| 226 | ++ uint64_t alloc_thresh = jl_atomic_load_relaxed(&ptls->gc_num.alloc_thresh); |
| 227 | ++ if (alloc_thresh + allocsz < 128*1024) |
| 228 | ++ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, alloc_thresh + allocsz); |
| 229 | ++ else { |
| 230 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, alloc_thresh + allocsz); |
| 231 | ++ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_thresh + allocsz); |
| 232 | ++ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, 0); |
| 233 | ++ } |
| 234 | + int last_errno = errno; |
| 235 | + #ifdef _OS_WINDOWS_ |
| 236 | + DWORD last_error = GetLastError(); |
| 237 | +diff --git a/src/julia_threads.h b/src/julia_threads.h |
| 238 | +index f4c235243e..a672a92fb9 100644 |
| 239 | +--- a/src/julia_threads.h |
| 240 | ++++ b/src/julia_threads.h |
| 241 | +@@ -130,12 +130,12 @@ typedef struct { |
| 242 | + |
| 243 | + typedef struct { |
| 244 | + _Atomic(int64_t) allocd; |
| 245 | +- _Atomic(int64_t) freed; |
| 246 | + _Atomic(uint64_t) malloc; |
| 247 | + _Atomic(uint64_t) realloc; |
| 248 | + _Atomic(uint64_t) poolalloc; |
| 249 | + _Atomic(uint64_t) bigalloc; |
| 250 | +- _Atomic(uint64_t) freecall; |
| 251 | ++ _Atomic(int64_t) free_thresh; // fiels used to batch fetch add operations for the GC |
| 252 | ++ _Atomic(uint64_t) alloc_thresh; |
| 253 | + } jl_thread_gc_num_t; |
| 254 | + |
| 255 | + typedef struct { |
0 commit comments