Skip to content

Commit 0110fa8

Browse files
committed
Merge branch 'rc-4.3.1'
2 parents 9bef119 + b0f5658 commit 0110fa8

File tree

10 files changed

+390
-46
lines changed

10 files changed

+390
-46
lines changed

ChangeLog

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@ brevity. Much more detail can be found in the git revision history:
44

55
https://github.com/jemalloc/jemalloc
66

7+
* 4.3.1 (November 7, 2016)
8+
9+
Bug fixes:
10+
- Fix a severe virtual memory leak. This regression was first released in
11+
4.3.0. (@interwq, @jasone)
12+
- Refactor atomic and prng APIs to restore support for 32-bit platforms that
13+
use pre-C11 toolchains, e.g. FreeBSD's mips. (@jasone)
14+
715
* 4.3.0 (November 4, 2016)
816

917
This is the first release that passes the test suite for multiple Windows

include/jemalloc/internal/arena.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ struct arena_s {
370370
* PRNG state for cache index randomization of large allocation base
371371
* pointers.
372372
*/
373-
uint64_t offset_state;
373+
size_t offset_state;
374374

375375
dss_prec_t dss_prec;
376376

include/jemalloc/internal/atomic.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ void atomic_write_u(unsigned *p, unsigned x);
6666
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
6767
/******************************************************************************/
6868
/* 64-bit operations. */
69-
#if (defined(__amd64__) || defined(__x86_64__))
69+
#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
70+
# if (defined(__amd64__) || defined(__x86_64__))
7071
JEMALLOC_INLINE uint64_t
7172
atomic_add_uint64(uint64_t *p, uint64_t x)
7273
{
@@ -124,7 +125,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
124125
: "memory" /* Clobbers. */
125126
);
126127
}
127-
#elif (defined(JEMALLOC_C11ATOMICS))
128+
# elif (defined(JEMALLOC_C11ATOMICS))
128129
JEMALLOC_INLINE uint64_t
129130
atomic_add_uint64(uint64_t *p, uint64_t x)
130131
{
@@ -152,7 +153,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
152153
volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
153154
atomic_store(a, x);
154155
}
155-
#elif (defined(JEMALLOC_ATOMIC9))
156+
# elif (defined(JEMALLOC_ATOMIC9))
156157
JEMALLOC_INLINE uint64_t
157158
atomic_add_uint64(uint64_t *p, uint64_t x)
158159
{
@@ -192,7 +193,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
192193

193194
atomic_store_rel_long(p, x);
194195
}
195-
#elif (defined(JEMALLOC_OSATOMIC))
196+
# elif (defined(JEMALLOC_OSATOMIC))
196197
JEMALLOC_INLINE uint64_t
197198
atomic_add_uint64(uint64_t *p, uint64_t x)
198199
{
@@ -224,7 +225,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
224225
o = atomic_read_uint64(p);
225226
} while (atomic_cas_uint64(p, o, x));
226227
}
227-
#elif (defined(_MSC_VER))
228+
# elif (defined(_MSC_VER))
228229
JEMALLOC_INLINE uint64_t
229230
atomic_add_uint64(uint64_t *p, uint64_t x)
230231
{
@@ -254,7 +255,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
254255

255256
InterlockedExchange64(p, x);
256257
}
257-
#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
258+
# elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
258259
defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
259260
JEMALLOC_INLINE uint64_t
260261
atomic_add_uint64(uint64_t *p, uint64_t x)
@@ -283,8 +284,9 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
283284

284285
__sync_lock_test_and_set(p, x);
285286
}
286-
#else
287-
# error "Missing implementation for 64-bit atomic operations"
287+
# else
288+
# error "Missing implementation for 64-bit atomic operations"
289+
# endif
288290
#endif
289291

290292
/******************************************************************************/

include/jemalloc/internal/private_symbols.txt

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -405,8 +405,15 @@ pind2sz_tab
405405
pow2_ceil_u32
406406
pow2_ceil_u64
407407
pow2_ceil_zu
408-
prng_lg_range
409-
prng_range
408+
prng_lg_range_u32
409+
prng_lg_range_u64
410+
prng_lg_range_zu
411+
prng_range_u32
412+
prng_range_u64
413+
prng_range_zu
414+
prng_state_next_u32
415+
prng_state_next_u64
416+
prng_state_next_zu
410417
prof_active
411418
prof_active_get
412419
prof_active_get_unlocked

include/jemalloc/internal/prng.h

Lines changed: 139 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,12 @@
1919
* the next has a cycle of 4, etc. For this reason, we prefer to use the upper
2020
* bits.
2121
*/
22-
#define PRNG_A UINT64_C(6364136223846793005)
23-
#define PRNG_C UINT64_C(1442695040888963407)
22+
23+
#define PRNG_A_32 UINT32_C(1103515241)
24+
#define PRNG_C_32 UINT32_C(12347)
25+
26+
#define PRNG_A_64 UINT64_C(6364136223846793005)
27+
#define PRNG_C_64 UINT64_C(1442695040888963407)
2428

2529
#endif /* JEMALLOC_H_TYPES */
2630
/******************************************************************************/
@@ -35,28 +39,133 @@
3539
#ifdef JEMALLOC_H_INLINES
3640

3741
#ifndef JEMALLOC_ENABLE_INLINE
38-
uint64_t prng_lg_range(uint64_t *state, unsigned lg_range);
39-
uint64_t prng_range(uint64_t *state, uint64_t range);
42+
uint32_t prng_state_next_u32(uint32_t state);
43+
uint64_t prng_state_next_u64(uint64_t state);
44+
size_t prng_state_next_zu(size_t state);
45+
46+
uint32_t prng_lg_range_u32(uint32_t *state, unsigned lg_range,
47+
bool atomic);
48+
uint64_t prng_lg_range_u64(uint64_t *state, unsigned lg_range);
49+
size_t prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic);
50+
51+
uint32_t prng_range_u32(uint32_t *state, uint32_t range, bool atomic);
52+
uint64_t prng_range_u64(uint64_t *state, uint64_t range);
53+
size_t prng_range_zu(size_t *state, size_t range, bool atomic);
4054
#endif
4155

4256
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_))
57+
JEMALLOC_ALWAYS_INLINE uint32_t
58+
prng_state_next_u32(uint32_t state)
59+
{
60+
61+
return ((state * PRNG_A_32) + PRNG_C_32);
62+
}
63+
4364
JEMALLOC_ALWAYS_INLINE uint64_t
44-
prng_lg_range(uint64_t *state, unsigned lg_range)
65+
prng_state_next_u64(uint64_t state)
4566
{
46-
uint64_t ret;
67+
68+
return ((state * PRNG_A_64) + PRNG_C_64);
69+
}
70+
71+
JEMALLOC_ALWAYS_INLINE size_t
72+
prng_state_next_zu(size_t state)
73+
{
74+
75+
#if LG_SIZEOF_PTR == 2
76+
return ((state * PRNG_A_32) + PRNG_C_32);
77+
#elif LG_SIZEOF_PTR == 3
78+
return ((state * PRNG_A_64) + PRNG_C_64);
79+
#else
80+
#error Unsupported pointer size
81+
#endif
82+
}
83+
84+
JEMALLOC_ALWAYS_INLINE uint32_t
85+
prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic)
86+
{
87+
uint32_t ret, state1;
88+
89+
assert(lg_range > 0);
90+
assert(lg_range <= 32);
91+
92+
if (atomic) {
93+
uint32_t state0;
94+
95+
do {
96+
state0 = atomic_read_uint32(state);
97+
state1 = prng_state_next_u32(state0);
98+
} while (atomic_cas_uint32(state, state0, state1));
99+
} else {
100+
state1 = prng_state_next_u32(*state);
101+
*state = state1;
102+
}
103+
ret = state1 >> (32 - lg_range);
104+
105+
return (ret);
106+
}
107+
108+
/* 64-bit atomic operations cannot be supported on all relevant platforms. */
109+
JEMALLOC_ALWAYS_INLINE uint64_t
110+
prng_lg_range_u64(uint64_t *state, unsigned lg_range)
111+
{
112+
uint64_t ret, state1;
47113

48114
assert(lg_range > 0);
49115
assert(lg_range <= 64);
50116

51-
ret = (*state * PRNG_A) + PRNG_C;
52-
*state = ret;
53-
ret >>= (64 - lg_range);
117+
state1 = prng_state_next_u64(*state);
118+
*state = state1;
119+
ret = state1 >> (64 - lg_range);
120+
121+
return (ret);
122+
}
123+
124+
JEMALLOC_ALWAYS_INLINE size_t
125+
prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic)
126+
{
127+
size_t ret, state1;
128+
129+
assert(lg_range > 0);
130+
assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR));
131+
132+
if (atomic) {
133+
size_t state0;
134+
135+
do {
136+
state0 = atomic_read_z(state);
137+
state1 = prng_state_next_zu(state0);
138+
} while (atomic_cas_z(state, state0, state1));
139+
} else {
140+
state1 = prng_state_next_zu(*state);
141+
*state = state1;
142+
}
143+
ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
144+
145+
return (ret);
146+
}
147+
148+
JEMALLOC_ALWAYS_INLINE uint32_t
149+
prng_range_u32(uint32_t *state, uint32_t range, bool atomic)
150+
{
151+
uint32_t ret;
152+
unsigned lg_range;
153+
154+
assert(range > 1);
155+
156+
/* Compute the ceiling of lg(range). */
157+
lg_range = ffs_u32(pow2_ceil_u32(range)) - 1;
158+
159+
/* Generate a result in [0..range) via repeated trial. */
160+
do {
161+
ret = prng_lg_range_u32(state, lg_range, atomic);
162+
} while (ret >= range);
54163

55164
return (ret);
56165
}
57166

58167
JEMALLOC_ALWAYS_INLINE uint64_t
59-
prng_range(uint64_t *state, uint64_t range)
168+
prng_range_u64(uint64_t *state, uint64_t range)
60169
{
61170
uint64_t ret;
62171
unsigned lg_range;
@@ -68,7 +177,26 @@ prng_range(uint64_t *state, uint64_t range)
68177

69178
/* Generate a result in [0..range) via repeated trial. */
70179
do {
71-
ret = prng_lg_range(state, lg_range);
180+
ret = prng_lg_range_u64(state, lg_range);
181+
} while (ret >= range);
182+
183+
return (ret);
184+
}
185+
186+
JEMALLOC_ALWAYS_INLINE size_t
187+
prng_range_zu(size_t *state, size_t range, bool atomic)
188+
{
189+
size_t ret;
190+
unsigned lg_range;
191+
192+
assert(range > 1);
193+
194+
/* Compute the ceiling of lg(range). */
195+
lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
196+
197+
/* Generate a result in [0..range) via repeated trial. */
198+
do {
199+
ret = prng_lg_range_zu(state, lg_range, atomic);
72200
} while (ret >= range);
73201

74202
return (ret);

src/arena.c

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
150150
arena_miscelm_get_const(chunk, pageind))));
151151
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
152152
LG_PAGE));
153+
assert((npages << LG_PAGE) < chunksize);
154+
assert(pind2sz(pind) <= chunksize);
153155
arena_run_heap_insert(&arena->runs_avail[pind],
154156
arena_miscelm_get_mutable(chunk, pageind));
155157
}
@@ -162,6 +164,8 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
162164
arena_miscelm_get_const(chunk, pageind))));
163165
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
164166
LG_PAGE));
167+
assert((npages << LG_PAGE) < chunksize);
168+
assert(pind2sz(pind) <= chunksize);
165169
arena_run_heap_remove(&arena->runs_avail[pind],
166170
arena_miscelm_get_mutable(chunk, pageind));
167171
}
@@ -1046,7 +1050,7 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
10461050

10471051
pind = psz2ind(run_quantize_ceil(size));
10481052

1049-
for (i = pind; pind2sz(i) <= large_maxclass; i++) {
1053+
for (i = pind; pind2sz(i) <= chunksize; i++) {
10501054
arena_chunk_map_misc_t *miscelm = arena_run_heap_first(
10511055
&arena->runs_avail[i]);
10521056
if (miscelm != NULL)
@@ -1195,7 +1199,7 @@ arena_decay_deadline_init(arena_t *arena)
11951199
if (arena->decay.time > 0) {
11961200
nstime_t jitter;
11971201

1198-
nstime_init(&jitter, prng_range(&arena->decay.jitter_state,
1202+
nstime_init(&jitter, prng_range_u64(&arena->decay.jitter_state,
11991203
nstime_ns(&arena->decay.interval)));
12001204
nstime_add(&arena->decay.deadline, &jitter);
12011205
}
@@ -1922,8 +1926,7 @@ arena_reset(tsd_t *tsd, arena_t *arena)
19221926
assert(!arena->purging);
19231927
arena->nactive = 0;
19241928

1925-
for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t);
1926-
i++)
1929+
for (i = 0; i < NPSIZES; i++)
19271930
arena_run_heap_new(&arena->runs_avail[i]);
19281931

19291932
malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
@@ -2562,7 +2565,8 @@ arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
25622565
* that is a multiple of the cacheline size, e.g. [0 .. 63) * 64
25632566
* for 4 KiB pages and 64-byte cachelines.
25642567
*/
2565-
r = prng_lg_range(&arena->offset_state, LG_PAGE - LG_CACHELINE);
2568+
r = prng_lg_range_zu(&arena->offset_state, LG_PAGE -
2569+
LG_CACHELINE, false);
25662570
random_offset = ((uintptr_t)r) << LG_CACHELINE;
25672571
} else
25682572
random_offset = 0;
@@ -3500,7 +3504,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
35003504
* deterministic seed.
35013505
*/
35023506
arena->offset_state = config_debug ? ind :
3503-
(uint64_t)(uintptr_t)arena;
3507+
(size_t)(uintptr_t)arena;
35043508
}
35053509

35063510
arena->dss_prec = chunk_dss_prec_get();
@@ -3514,8 +3518,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
35143518
arena->nactive = 0;
35153519
arena->ndirty = 0;
35163520

3517-
for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t);
3518-
i++)
3521+
for (i = 0; i < NPSIZES; i++)
35193522
arena_run_heap_new(&arena->runs_avail[i]);
35203523

35213524
qr_new(&arena->runs_dirty, rd_link);

src/ckh.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
9999
* Cycle through the cells in the bucket, starting at a random position.
100100
* The randomness avoids worst-case search overhead as buckets fill up.
101101
*/
102-
offset = (unsigned)prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS);
102+
offset = (unsigned)prng_lg_range_u64(&ckh->prng_state,
103+
LG_CKH_BUCKET_CELLS);
103104
for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
104105
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
105106
((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
@@ -141,7 +142,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
141142
* were an item for which both hashes indicated the same
142143
* bucket.
143144
*/
144-
i = (unsigned)prng_lg_range(&ckh->prng_state,
145+
i = (unsigned)prng_lg_range_u64(&ckh->prng_state,
145146
LG_CKH_BUCKET_CELLS);
146147
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
147148
assert(cell->key != NULL);

src/prof.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -874,7 +874,7 @@ prof_sample_threshold_update(prof_tdata_t *tdata)
874874
* pp 500
875875
* (http://luc.devroye.org/rnbookindex.html)
876876
*/
877-
r = prng_lg_range(&tdata->prng_state, 53);
877+
r = prng_lg_range_u64(&tdata->prng_state, 53);
878878
u = (double)r * (1.0/9007199254740992.0L);
879879
tdata->bytes_until_sample = (uint64_t)(log(u) /
880880
log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))

0 commit comments

Comments
 (0)