Skip to content

Commit bc33c81

Browse files
Binary search for pkgimage metadata (JuliaLang#48940)
Co-authored-by: Jameson Nash <[email protected]>
1 parent 38d24e5 commit bc33c81

File tree

3 files changed

+134
-22
lines changed

3 files changed

+134
-22
lines changed

src/gc.c

Lines changed: 125 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,13 @@ pagetable_t memory_map;
173173
// List of marked big objects. Not per-thread. Accessed only by master thread.
174174
bigval_t *big_objects_marked = NULL;
175175

176+
// Eytzinger tree of images. Used for very fast jl_object_in_image queries during gc
177+
// See https://algorithmica.org/en/eytzinger
178+
static arraylist_t eytzinger_image_tree;
179+
static arraylist_t eytzinger_idxs;
180+
static uintptr_t gc_img_min;
181+
static uintptr_t gc_img_max;
182+
176183
// -- Finalization --
177184
// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
178185
// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
@@ -183,6 +190,118 @@ arraylist_t finalizer_list_marked;
183190
arraylist_t to_finalize;
184191
JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0;
185192

193+
static int ptr_cmp(const void *l, const void *r)
194+
{
195+
uintptr_t left = *(const uintptr_t*)l;
196+
uintptr_t right = *(const uintptr_t*)r;
197+
// jl_safe_printf("cmp %p %p\n", (void*)left, (void*)right);
198+
return (left > right) - (left < right);
199+
}
200+
201+
// Build an eytzinger tree from a sorted array
202+
static int eytzinger(uintptr_t *src, uintptr_t *dest, size_t i, size_t k, size_t n)
203+
{
204+
if (k <= n) {
205+
i = eytzinger(src, dest, i, 2 * k, n);
206+
dest[k-1] = src[i];
207+
i++;
208+
i = eytzinger(src, dest, i, 2 * k + 1, n);
209+
}
210+
return i;
211+
}
212+
213+
static size_t eyt_obj_idx(jl_value_t *obj) JL_NOTSAFEPOINT
214+
{
215+
size_t n = eytzinger_image_tree.len - 1;
216+
if (n == 0)
217+
return n;
218+
assert(n % 2 == 0 && "Eytzinger tree not even length!");
219+
uintptr_t cmp = (uintptr_t) obj;
220+
if (cmp <= gc_img_min || cmp > gc_img_max)
221+
return n;
222+
uintptr_t *tree = (uintptr_t*)eytzinger_image_tree.items;
223+
size_t k = 1;
224+
// note that k preserves the history of how we got to the current node
225+
while (k <= n) {
226+
int greater = (cmp > tree[k - 1]);
227+
k <<= 1;
228+
k |= greater;
229+
}
230+
// Free to assume k is nonzero, since we start with k = 1
231+
// and cmp > gc_img_min
232+
// This shift does a fast revert of the path until we get
233+
// to a node that evaluated less than cmp.
234+
k >>= (__builtin_ctzll(k) + 1);
235+
assert(k != 0);
236+
assert(k <= n && "Eytzinger tree index out of bounds!");
237+
assert(tree[k - 1] < cmp && "Failed to find lower bound for object!");
238+
return k - 1;
239+
}
240+
241+
//used in staticdata.c after we add an image
242+
void rebuild_image_blob_tree(void)
243+
{
244+
size_t inc = 1 + jl_linkage_blobs.len - eytzinger_image_tree.len;
245+
assert(eytzinger_idxs.len == eytzinger_image_tree.len);
246+
assert(eytzinger_idxs.max == eytzinger_image_tree.max);
247+
arraylist_grow(&eytzinger_idxs, inc);
248+
arraylist_grow(&eytzinger_image_tree, inc);
249+
eytzinger_idxs.items[eytzinger_idxs.len - 1] = (void*)jl_linkage_blobs.len;
250+
eytzinger_image_tree.items[eytzinger_image_tree.len - 1] = (void*)1; // outside image
251+
for (size_t i = 0; i < jl_linkage_blobs.len; i++) {
252+
assert((uintptr_t) jl_linkage_blobs.items[i] % 4 == 0 && "Linkage blob not 4-byte aligned!");
253+
// We abuse the pointer here a little so that a couple of properties are true:
254+
// 1. a start and an end are never the same value. This simplifies the binary search.
255+
// 2. ends are always after starts. This also simplifies the binary search.
256+
// We assume that there exist no 0-size blobs, but that's a safe assumption
257+
// since it means nothing could be there anyways
258+
uintptr_t val = (uintptr_t) jl_linkage_blobs.items[i];
259+
eytzinger_idxs.items[i] = (void*)(val + (i & 1));
260+
}
261+
qsort(eytzinger_idxs.items, eytzinger_idxs.len - 1, sizeof(void*), ptr_cmp);
262+
gc_img_min = (uintptr_t) eytzinger_idxs.items[0];
263+
gc_img_max = (uintptr_t) eytzinger_idxs.items[eytzinger_idxs.len - 2] + 1;
264+
eytzinger((uintptr_t*)eytzinger_idxs.items, (uintptr_t*)eytzinger_image_tree.items, 0, 1, eytzinger_idxs.len - 1);
265+
// Reuse the scratch memory to store the indices
266+
// Still O(nlogn) because binary search
267+
for (size_t i = 0; i < jl_linkage_blobs.len; i ++) {
268+
uintptr_t val = (uintptr_t) jl_linkage_blobs.items[i];
269+
// This is the same computation as in the prior for loop
270+
uintptr_t eyt_val = val + (i & 1);
271+
size_t eyt_idx = eyt_obj_idx((jl_value_t*)(eyt_val + 1)); assert(eyt_idx < eytzinger_idxs.len - 1);
272+
assert(eytzinger_image_tree.items[eyt_idx] == (void*)eyt_val && "Eytzinger tree failed to find object!");
273+
if (i & 1)
274+
eytzinger_idxs.items[eyt_idx] = (void*)n_linkage_blobs();
275+
else
276+
eytzinger_idxs.items[eyt_idx] = (void*)(i / 2);
277+
}
278+
}
279+
280+
static int eyt_obj_in_img(jl_value_t *obj) JL_NOTSAFEPOINT
281+
{
282+
assert((uintptr_t) obj % 4 == 0 && "Object not 4-byte aligned!");
283+
int idx = eyt_obj_idx(obj);
284+
// Now we use a tiny trick: tree[idx] & 1 is whether or not tree[idx] is a
285+
// start (0) or an end (1) of a blob. If it's a start, then the object is
286+
// in the image, otherwise it is not.
287+
int in_image = ((uintptr_t)eytzinger_image_tree.items[idx] & 1) == 0;
288+
return in_image;
289+
}
290+
291+
size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT
292+
{
293+
assert((uintptr_t) v % 4 == 0 && "Object not 4-byte aligned!");
294+
int eyt_idx = eyt_obj_idx(v);
295+
// We fill the invalid slots with the length, so we can just return that
296+
size_t idx = (size_t) eytzinger_idxs.items[eyt_idx];
297+
return idx;
298+
}
299+
300+
uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
301+
{
302+
return eyt_obj_in_img(obj);
303+
}
304+
186305
NOINLINE uintptr_t gc_get_stack_ptr(void)
187306
{
188307
return (uintptr_t)jl_get_frame_addr();
@@ -2270,7 +2389,8 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
22702389
uint8_t bits = (gc_old(o->header) && !mark_reset_age) ? GC_OLD_MARKED : GC_MARKED;
22712390
int update_meta = __likely(!meta_updated && !gc_verifying);
22722391
int foreign_alloc = 0;
2273-
if (update_meta && jl_object_in_image(new_obj)) {
2392+
// directly point at eyt_obj_in_img to encourage inlining
2393+
if (update_meta && eyt_obj_in_img(new_obj)) {
22742394
foreign_alloc = 1;
22752395
update_meta = 0;
22762396
}
@@ -3245,6 +3365,10 @@ void jl_gc_init(void)
32453365

32463366
arraylist_new(&finalizer_list_marked, 0);
32473367
arraylist_new(&to_finalize, 0);
3368+
arraylist_new(&eytzinger_image_tree, 0);
3369+
arraylist_new(&eytzinger_idxs, 0);
3370+
arraylist_push(&eytzinger_idxs, (void*)0);
3371+
arraylist_push(&eytzinger_image_tree, (void*)1); // outside image
32483372

32493373
gc_num.interval = default_collect_interval;
32503374
last_long_collect_interval = default_collect_interval;

src/julia_internal.h

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -954,28 +954,9 @@ STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT
954954
return jl_image_relocs.len;
955955
}
956956

957-
// TODO: Makes this a binary search
958-
STATIC_INLINE size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT {
959-
size_t i, nblobs = n_linkage_blobs();
960-
assert(jl_linkage_blobs.len == 2*nblobs);
961-
for (i = 0; i < nblobs; i++) {
962-
uintptr_t left = (uintptr_t)jl_linkage_blobs.items[2*i];
963-
uintptr_t right = (uintptr_t)jl_linkage_blobs.items[2*i + 1];
964-
if (left < (uintptr_t)v && (uintptr_t)v <= right) {
965-
// the last object may be a singleton (v is shifted by a type tag, so we use exclusive bounds here)
966-
break;
967-
}
968-
}
969-
return i;
970-
}
957+
size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT;
971958

972-
STATIC_INLINE uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT {
973-
size_t blob = external_blob_index(v);
974-
if (blob == n_linkage_blobs()) {
975-
return 0;
976-
}
977-
return 1;
978-
}
959+
uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;
979960

980961
typedef struct {
981962
LLVMOrcThreadSafeModuleRef TSM;

src/staticdata.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2366,6 +2366,10 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
23662366
jl_write_relocations(&s);
23672367
}
23682368

2369+
// This ensures that we can use the low bit of addresses for
2370+
// identifying end pointers in gc's eytzinger search.
2371+
write_padding(&sysimg, 4 - (sysimg.size % 4));
2372+
23692373
if (sysimg.size > ((uintptr_t)1 << RELOC_TAG_OFFSET)) {
23702374
jl_printf(
23712375
JL_STDERR,
@@ -2658,6 +2662,8 @@ JL_DLLEXPORT void jl_set_sysimg_so(void *handle)
26582662
// }
26592663
#endif
26602664

2665+
extern void rebuild_image_blob_tree(void);
2666+
26612667
static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl_array_t *depmods, uint64_t checksum,
26622668
/* outputs */ jl_array_t **restored, jl_array_t **init_order,
26632669
jl_array_t **extext_methods,
@@ -3151,6 +3157,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
31513157
arraylist_push(&jl_linkage_blobs, (void*)image_base);
31523158
arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg + sizeof(uintptr_t)));
31533159
arraylist_push(&jl_image_relocs, (void*)relocs_base);
3160+
rebuild_image_blob_tree();
31543161

31553162
// jl_printf(JL_STDOUT, "%ld blobs to link against\n", jl_linkage_blobs.len >> 1);
31563163
jl_gc_enable(en);

0 commit comments

Comments
 (0)