Skip to content

Commit 6f6439e

Browse files
authored
Don't use integer division for cong (#50427)
1 parent c57d33a commit 6f6439e

File tree

6 files changed

+23
-29
lines changed

6 files changed

+23
-29
lines changed

base/partr.jl

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,9 @@ end
1818
const heap_d = UInt32(8)
1919
const heaps = [Vector{taskheap}(undef, 0), Vector{taskheap}(undef, 0)]
2020
const heaps_lock = [SpinLock(), SpinLock()]
21-
const cong_unbias = [typemax(UInt32), typemax(UInt32)]
2221

2322

24-
cong(max::UInt32, unbias::UInt32) =
25-
ccall(:jl_rand_ptls, UInt32, (UInt32, UInt32), max, unbias) + UInt32(1)
26-
27-
function unbias_cong(max::UInt32)
28-
return typemax(UInt32) - ((typemax(UInt32) % max) + UInt32(1))
29-
end
23+
cong(max::UInt32) = ccall(:jl_rand_ptls, UInt32, (UInt32,), max) + UInt32(1)
3024

3125

3226
function multiq_sift_up(heap::taskheap, idx::Int32)
@@ -86,7 +80,6 @@ function multiq_size(tpid::Int8)
8680
newheaps[i] = taskheap()
8781
end
8882
heaps[tp] = newheaps
89-
cong_unbias[tp] = unbias_cong(heap_p)
9083
end
9184

9285
return heap_p
@@ -100,10 +93,10 @@ function multiq_insert(task::Task, priority::UInt16)
10093

10194
task.priority = priority
10295

103-
rn = cong(heap_p, cong_unbias[tp])
96+
rn = cong(heap_p)
10497
tpheaps = heaps[tp]
10598
while !trylock(tpheaps[rn].lock)
106-
rn = cong(heap_p, cong_unbias[tp])
99+
rn = cong(heap_p)
107100
end
108101

109102
heap = tpheaps[rn]
@@ -140,8 +133,8 @@ function multiq_deletemin()
140133
if i == heap_p
141134
return nothing
142135
end
143-
rn1 = cong(heap_p, cong_unbias[tp])
144-
rn2 = cong(heap_p, cong_unbias[tp])
136+
rn1 = cong(heap_p)
137+
rn2 = cong(heap_p)
145138
prio1 = tpheaps[rn1].priority
146139
prio2 = tpheaps[rn2].priority
147140
if prio1 > prio2

src/gc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2768,7 +2768,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
27682768
steal : {
27692769
// Try to steal chunk from random GC thread
27702770
for (int i = 0; i < 4 * jl_n_markthreads; i++) {
2771-
uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_markthreads;
2771+
uint32_t v = gc_first_tid + cong(jl_n_markthreads, &ptls->rngseed);
27722772
jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue;
27732773
c = gc_chunkqueue_steal_from(mq2);
27742774
if (c.cid != GC_empty_chunk) {
@@ -2795,7 +2795,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
27952795
}
27962796
// Try to steal pointer from random GC thread
27972797
for (int i = 0; i < 4 * jl_n_markthreads; i++) {
2798-
uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_markthreads;
2798+
uint32_t v = gc_first_tid + cong(jl_n_markthreads, &ptls->rngseed);
27992799
jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue;
28002800
new_obj = gc_ptr_queue_steal_from(mq2);
28012801
if (new_obj != NULL)

src/julia_internal.h

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "support/strtod.h"
1414
#include "gc-alloc-profiler.h"
1515
#include "support/rle.h"
16+
#include <stdint.h>
1617
#include <uv.h>
1718
#include <llvm-c/Types.h>
1819
#include <llvm-c/Orc.h>
@@ -1216,15 +1217,18 @@ void jl_push_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_AR
12161217
//--------------------------------------------------
12171218
// congruential random number generator
12181219
// for a small amount of thread-local randomness
1219-
STATIC_INLINE void unbias_cong(uint64_t max, uint64_t *unbias) JL_NOTSAFEPOINT
1220-
{
1221-
*unbias = UINT64_MAX - ((UINT64_MAX % max) + 1);
1222-
}
1223-
STATIC_INLINE uint64_t cong(uint64_t max, uint64_t unbias, uint64_t *seed) JL_NOTSAFEPOINT
1220+
1221+
STATIC_INLINE uint64_t cong(uint64_t max, uint64_t *seed) JL_NOTSAFEPOINT
12241222
{
1225-
while ((*seed = 69069 * (*seed) + 362437) > unbias)
1226-
;
1227-
return *seed % max;
1223+
uint64_t mask = ~(uint64_t)0;
1224+
--max;
1225+
mask >>= __builtin_clzll(max|1);
1226+
uint64_t x;
1227+
do {
1228+
*seed = 69069 * (*seed) + 362437;
1229+
x = *seed & mask;
1230+
} while (x > max);
1231+
return x;
12281232
}
12291233
JL_DLLEXPORT uint64_t jl_rand(void) JL_NOTSAFEPOINT;
12301234
JL_DLLEXPORT void jl_srand(uint64_t) JL_NOTSAFEPOINT;

src/partr.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,10 @@ extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache,
8383
// parallel task runtime
8484
// ---
8585

86-
JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max, uint32_t unbias)
86+
JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max)
8787
{
8888
jl_ptls_t ptls = jl_current_task->ptls;
89-
// one-extend unbias back to 64-bits
90-
return cong(max, -(uint64_t)-unbias, &ptls->rngseed);
89+
return cong(max, &ptls->rngseed);
9190
}
9291

9392
// initialize the threading infrastructure

src/signal-handling.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,7 @@ static void jl_shuffle_int_array_inplace(int *carray, int size, uint64_t *seed)
155155
// The "modern Fisher–Yates shuffle" - O(n) algorithm
156156
// https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm
157157
for (int i = size; i-- > 1; ) {
158-
uint64_t unbias = UINT64_MAX; // slightly biased, but i is very small
159-
size_t j = cong(i, unbias, seed);
158+
size_t j = cong(i, seed);
160159
uint64_t tmp = carray[j];
161160
carray[j] = carray[i];
162161
carray[i] = tmp;

src/sys.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -776,13 +776,12 @@ static _Atomic(uint64_t) g_rngseed;
776776
JL_DLLEXPORT uint64_t jl_rand(void) JL_NOTSAFEPOINT
777777
{
778778
uint64_t max = UINT64_MAX;
779-
uint64_t unbias = UINT64_MAX;
780779
uint64_t rngseed0 = jl_atomic_load_relaxed(&g_rngseed);
781780
uint64_t rngseed;
782781
uint64_t rnd;
783782
do {
784783
rngseed = rngseed0;
785-
rnd = cong(max, unbias, &rngseed);
784+
rnd = cong(max, &rngseed);
786785
} while (!jl_atomic_cmpswap_relaxed(&g_rngseed, &rngseed0, rngseed));
787786
return rnd;
788787
}

0 commit comments

Comments
 (0)