Skip to content

Commit 5d6cb6e

Browse files
committed
Refactor prng to not use 64-bit atomics on 32-bit platforms.
This resolves jemalloc#495.
1 parent a4e83e8 commit 5d6cb6e

File tree

7 files changed

+364
-32
lines changed

7 files changed

+364
-32
lines changed

include/jemalloc/internal/arena.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ struct arena_s {
370370
* PRNG state for cache index randomization of large allocation base
371371
* pointers.
372372
*/
373-
uint64_t offset_state;
373+
size_t offset_state;
374374

375375
dss_prec_t dss_prec;
376376

include/jemalloc/internal/private_symbols.txt

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -405,8 +405,15 @@ pind2sz_tab
405405
pow2_ceil_u32
406406
pow2_ceil_u64
407407
pow2_ceil_zu
408-
prng_lg_range
409-
prng_range
408+
prng_lg_range_u32
409+
prng_lg_range_u64
410+
prng_lg_range_zu
411+
prng_range_u32
412+
prng_range_u64
413+
prng_range_zu
414+
prng_state_next_u32
415+
prng_state_next_u64
416+
prng_state_next_zu
410417
prof_active
411418
prof_active_get
412419
prof_active_get_unlocked

include/jemalloc/internal/prng.h

Lines changed: 139 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,12 @@
1919
* the next has a cycle of 4, etc. For this reason, we prefer to use the upper
2020
* bits.
2121
*/
22-
#define PRNG_A UINT64_C(6364136223846793005)
23-
#define PRNG_C UINT64_C(1442695040888963407)
22+
23+
#define PRNG_A_32 UINT32_C(1103515241)
24+
#define PRNG_C_32 UINT32_C(12347)
25+
26+
#define PRNG_A_64 UINT64_C(6364136223846793005)
27+
#define PRNG_C_64 UINT64_C(1442695040888963407)
2428

2529
#endif /* JEMALLOC_H_TYPES */
2630
/******************************************************************************/
@@ -35,28 +39,133 @@
3539
#ifdef JEMALLOC_H_INLINES
3640

3741
#ifndef JEMALLOC_ENABLE_INLINE
38-
uint64_t prng_lg_range(uint64_t *state, unsigned lg_range);
39-
uint64_t prng_range(uint64_t *state, uint64_t range);
42+
uint32_t prng_state_next_u32(uint32_t state);
43+
uint64_t prng_state_next_u64(uint64_t state);
44+
size_t prng_state_next_zu(size_t state);
45+
46+
uint32_t prng_lg_range_u32(uint32_t *state, unsigned lg_range,
47+
bool atomic);
48+
uint64_t prng_lg_range_u64(uint64_t *state, unsigned lg_range);
49+
size_t prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic);
50+
51+
uint32_t prng_range_u32(uint32_t *state, uint32_t range, bool atomic);
52+
uint64_t prng_range_u64(uint64_t *state, uint64_t range);
53+
size_t prng_range_zu(size_t *state, size_t range, bool atomic);
4054
#endif
4155

4256
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_))
57+
JEMALLOC_ALWAYS_INLINE uint32_t
58+
prng_state_next_u32(uint32_t state)
59+
{
60+
61+
return ((state * PRNG_A_32) + PRNG_C_32);
62+
}
63+
4364
JEMALLOC_ALWAYS_INLINE uint64_t
44-
prng_lg_range(uint64_t *state, unsigned lg_range)
65+
prng_state_next_u64(uint64_t state)
4566
{
46-
uint64_t ret;
67+
68+
return ((state * PRNG_A_64) + PRNG_C_64);
69+
}
70+
71+
JEMALLOC_ALWAYS_INLINE size_t
72+
prng_state_next_zu(size_t state)
73+
{
74+
75+
#if LG_SIZEOF_PTR == 2
76+
return ((state * PRNG_A_32) + PRNG_C_32);
77+
#elif LG_SIZEOF_PTR == 3
78+
return ((state * PRNG_A_64) + PRNG_C_64);
79+
#else
80+
#error Unsupported pointer size
81+
#endif
82+
}
83+
84+
JEMALLOC_ALWAYS_INLINE uint32_t
85+
prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic)
86+
{
87+
uint32_t ret, state1;
88+
89+
assert(lg_range > 0);
90+
assert(lg_range <= 32);
91+
92+
if (atomic) {
93+
uint32_t state0;
94+
95+
do {
96+
state0 = atomic_read_uint32(state);
97+
state1 = prng_state_next_u32(state0);
98+
} while (atomic_cas_uint32(state, state0, state1));
99+
} else {
100+
state1 = prng_state_next_u32(*state);
101+
*state = state1;
102+
}
103+
ret = state1 >> (32 - lg_range);
104+
105+
return (ret);
106+
}
107+
108+
/* 64-bit atomic operations cannot be supported on all relevant platforms. */
109+
JEMALLOC_ALWAYS_INLINE uint64_t
110+
prng_lg_range_u64(uint64_t *state, unsigned lg_range)
111+
{
112+
uint64_t ret, state1;
47113

48114
assert(lg_range > 0);
49115
assert(lg_range <= 64);
50116

51-
ret = (*state * PRNG_A) + PRNG_C;
52-
*state = ret;
53-
ret >>= (64 - lg_range);
117+
state1 = prng_state_next_u64(*state);
118+
*state = state1;
119+
ret = state1 >> (64 - lg_range);
120+
121+
return (ret);
122+
}
123+
124+
JEMALLOC_ALWAYS_INLINE size_t
125+
prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic)
126+
{
127+
size_t ret, state1;
128+
129+
assert(lg_range > 0);
130+
assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR));
131+
132+
if (atomic) {
133+
size_t state0;
134+
135+
do {
136+
state0 = atomic_read_z(state);
137+
state1 = prng_state_next_zu(state0);
138+
} while (atomic_cas_z(state, state0, state1));
139+
} else {
140+
state1 = prng_state_next_zu(*state);
141+
*state = state1;
142+
}
143+
ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
144+
145+
return (ret);
146+
}
147+
148+
JEMALLOC_ALWAYS_INLINE uint32_t
149+
prng_range_u32(uint32_t *state, uint32_t range, bool atomic)
150+
{
151+
uint32_t ret;
152+
unsigned lg_range;
153+
154+
assert(range > 1);
155+
156+
/* Compute the ceiling of lg(range). */
157+
lg_range = ffs_u32(pow2_ceil_u32(range)) - 1;
158+
159+
/* Generate a result in [0..range) via repeated trial. */
160+
do {
161+
ret = prng_lg_range_u32(state, lg_range, atomic);
162+
} while (ret >= range);
54163

55164
return (ret);
56165
}
57166

58167
JEMALLOC_ALWAYS_INLINE uint64_t
59-
prng_range(uint64_t *state, uint64_t range)
168+
prng_range_u64(uint64_t *state, uint64_t range)
60169
{
61170
uint64_t ret;
62171
unsigned lg_range;
@@ -68,7 +177,26 @@ prng_range(uint64_t *state, uint64_t range)
68177

69178
/* Generate a result in [0..range) via repeated trial. */
70179
do {
71-
ret = prng_lg_range(state, lg_range);
180+
ret = prng_lg_range_u64(state, lg_range);
181+
} while (ret >= range);
182+
183+
return (ret);
184+
}
185+
186+
JEMALLOC_ALWAYS_INLINE size_t
187+
prng_range_zu(size_t *state, size_t range, bool atomic)
188+
{
189+
size_t ret;
190+
unsigned lg_range;
191+
192+
assert(range > 1);
193+
194+
/* Compute the ceiling of lg(range). */
195+
lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
196+
197+
/* Generate a result in [0..range) via repeated trial. */
198+
do {
199+
ret = prng_lg_range_zu(state, lg_range, atomic);
72200
} while (ret >= range);
73201

74202
return (ret);

src/arena.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1199,7 +1199,7 @@ arena_decay_deadline_init(arena_t *arena)
11991199
if (arena->decay.time > 0) {
12001200
nstime_t jitter;
12011201

1202-
nstime_init(&jitter, prng_range(&arena->decay.jitter_state,
1202+
nstime_init(&jitter, prng_range_u64(&arena->decay.jitter_state,
12031203
nstime_ns(&arena->decay.interval)));
12041204
nstime_add(&arena->decay.deadline, &jitter);
12051205
}
@@ -2565,7 +2565,8 @@ arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
25652565
* that is a multiple of the cacheline size, e.g. [0 .. 63) * 64
25662566
* for 4 KiB pages and 64-byte cachelines.
25672567
*/
2568-
r = prng_lg_range(&arena->offset_state, LG_PAGE - LG_CACHELINE);
2568+
r = prng_lg_range_zu(&arena->offset_state, LG_PAGE -
2569+
LG_CACHELINE, false);
25692570
random_offset = ((uintptr_t)r) << LG_CACHELINE;
25702571
} else
25712572
random_offset = 0;
@@ -3503,7 +3504,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
35033504
* deterministic seed.
35043505
*/
35053506
arena->offset_state = config_debug ? ind :
3506-
(uint64_t)(uintptr_t)arena;
3507+
(size_t)(uintptr_t)arena;
35073508
}
35083509

35093510
arena->dss_prec = chunk_dss_prec_get();

src/ckh.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
9999
* Cycle through the cells in the bucket, starting at a random position.
100100
* The randomness avoids worst-case search overhead as buckets fill up.
101101
*/
102-
offset = (unsigned)prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS);
102+
offset = (unsigned)prng_lg_range_u64(&ckh->prng_state,
103+
LG_CKH_BUCKET_CELLS);
103104
for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
104105
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
105106
((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
@@ -141,7 +142,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
141142
* were an item for which both hashes indicated the same
142143
* bucket.
143144
*/
144-
i = (unsigned)prng_lg_range(&ckh->prng_state,
145+
i = (unsigned)prng_lg_range_u64(&ckh->prng_state,
145146
LG_CKH_BUCKET_CELLS);
146147
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
147148
assert(cell->key != NULL);

src/prof.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -874,7 +874,7 @@ prof_sample_threshold_update(prof_tdata_t *tdata)
874874
* pp 500
875875
* (http://luc.devroye.org/rnbookindex.html)
876876
*/
877-
r = prng_lg_range(&tdata->prng_state, 53);
877+
r = prng_lg_range_u64(&tdata->prng_state, 53);
878878
u = (double)r * (1.0/9007199254740992.0L);
879879
tdata->bytes_until_sample = (uint64_t)(log(u) /
880880
log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))

0 commit comments

Comments
 (0)