@@ -173,6 +173,13 @@ pagetable_t memory_map;
173173// List of marked big objects. Not per-thread. Accessed only by master thread.
174174bigval_t * big_objects_marked = NULL ;
175175
176+ // Eytzinger tree of images. Used for very fast jl_object_in_image queries during gc
177+ // See https://algorithmica.org/en/eytzinger
178+ static arraylist_t eytzinger_image_tree ;
179+ static arraylist_t eytzinger_idxs ;
180+ static uintptr_t gc_img_min ;
181+ static uintptr_t gc_img_max ;
182+
176183// -- Finalization --
177184// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
178185// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
@@ -183,6 +190,118 @@ arraylist_t finalizer_list_marked;
183190arraylist_t to_finalize ;
184191JL_DLLEXPORT _Atomic(int ) jl_gc_have_pending_finalizers = 0 ;
185192
193+ static int ptr_cmp (const void * l , const void * r )
194+ {
195+ uintptr_t left = * (const uintptr_t * )l ;
196+ uintptr_t right = * (const uintptr_t * )r ;
197+ // jl_safe_printf("cmp %p %p\n", (void*)left, (void*)right);
198+ return (left > right ) - (left < right );
199+ }
200+
201+ // Build an eytzinger tree from a sorted array
202+ static int eytzinger (uintptr_t * src , uintptr_t * dest , size_t i , size_t k , size_t n )
203+ {
204+ if (k <= n ) {
205+ i = eytzinger (src , dest , i , 2 * k , n );
206+ dest [k - 1 ] = src [i ];
207+ i ++ ;
208+ i = eytzinger (src , dest , i , 2 * k + 1 , n );
209+ }
210+ return i ;
211+ }
212+
213+ static size_t eyt_obj_idx (jl_value_t * obj ) JL_NOTSAFEPOINT
214+ {
215+ size_t n = eytzinger_image_tree .len - 1 ;
216+ if (n == 0 )
217+ return n ;
218+ assert (n % 2 == 0 && "Eytzinger tree not even length!" );
219+ uintptr_t cmp = (uintptr_t ) obj ;
220+ if (cmp <= gc_img_min || cmp > gc_img_max )
221+ return n ;
222+ uintptr_t * tree = (uintptr_t * )eytzinger_image_tree .items ;
223+ size_t k = 1 ;
224+ // note that k preserves the history of how we got to the current node
225+ while (k <= n ) {
226+ int greater = (cmp > tree [k - 1 ]);
227+ k <<= 1 ;
228+ k |= greater ;
229+ }
230+ // Free to assume k is nonzero, since we start with k = 1
231+ // and cmp > gc_img_min
232+ // This shift does a fast revert of the path until we get
233+ // to a node that evaluated less than cmp.
234+ k >>= (__builtin_ctzll (k ) + 1 );
235+ assert (k != 0 );
236+ assert (k <= n && "Eytzinger tree index out of bounds!" );
237+ assert (tree [k - 1 ] < cmp && "Failed to find lower bound for object!" );
238+ return k - 1 ;
239+ }
240+
241+ //used in staticdata.c after we add an image
242+ void rebuild_image_blob_tree (void )
243+ {
244+ size_t inc = 1 + jl_linkage_blobs .len - eytzinger_image_tree .len ;
245+ assert (eytzinger_idxs .len == eytzinger_image_tree .len );
246+ assert (eytzinger_idxs .max == eytzinger_image_tree .max );
247+ arraylist_grow (& eytzinger_idxs , inc );
248+ arraylist_grow (& eytzinger_image_tree , inc );
249+ eytzinger_idxs .items [eytzinger_idxs .len - 1 ] = (void * )jl_linkage_blobs .len ;
250+ eytzinger_image_tree .items [eytzinger_image_tree .len - 1 ] = (void * )1 ; // outside image
251+ for (size_t i = 0 ; i < jl_linkage_blobs .len ; i ++ ) {
252+ assert ((uintptr_t ) jl_linkage_blobs .items [i ] % 4 == 0 && "Linkage blob not 4-byte aligned!" );
253+ // We abuse the pointer here a little so that a couple of properties are true:
254+ // 1. a start and an end are never the same value. This simplifies the binary search.
255+ // 2. ends are always after starts. This also simplifies the binary search.
256+ // We assume that there exist no 0-size blobs, but that's a safe assumption
257+ // since it means nothing could be there anyways
258+ uintptr_t val = (uintptr_t ) jl_linkage_blobs .items [i ];
259+ eytzinger_idxs .items [i ] = (void * )(val + (i & 1 ));
260+ }
261+ qsort (eytzinger_idxs .items , eytzinger_idxs .len - 1 , sizeof (void * ), ptr_cmp );
262+ gc_img_min = (uintptr_t ) eytzinger_idxs .items [0 ];
263+ gc_img_max = (uintptr_t ) eytzinger_idxs .items [eytzinger_idxs .len - 2 ] + 1 ;
264+ eytzinger ((uintptr_t * )eytzinger_idxs .items , (uintptr_t * )eytzinger_image_tree .items , 0 , 1 , eytzinger_idxs .len - 1 );
265+ // Reuse the scratch memory to store the indices
266+ // Still O(nlogn) because binary search
267+ for (size_t i = 0 ; i < jl_linkage_blobs .len ; i ++ ) {
268+ uintptr_t val = (uintptr_t ) jl_linkage_blobs .items [i ];
269+ // This is the same computation as in the prior for loop
270+ uintptr_t eyt_val = val + (i & 1 );
271+ size_t eyt_idx = eyt_obj_idx ((jl_value_t * )(eyt_val + 1 )); assert (eyt_idx < eytzinger_idxs .len - 1 );
272+ assert (eytzinger_image_tree .items [eyt_idx ] == (void * )eyt_val && "Eytzinger tree failed to find object!" );
273+ if (i & 1 )
274+ eytzinger_idxs .items [eyt_idx ] = (void * )n_linkage_blobs ();
275+ else
276+ eytzinger_idxs .items [eyt_idx ] = (void * )(i / 2 );
277+ }
278+ }
279+
280+ static int eyt_obj_in_img (jl_value_t * obj ) JL_NOTSAFEPOINT
281+ {
282+ assert ((uintptr_t ) obj % 4 == 0 && "Object not 4-byte aligned!" );
283+ int idx = eyt_obj_idx (obj );
284+ // Now we use a tiny trick: tree[idx] & 1 is whether or not tree[idx] is a
285+ // start (0) or an end (1) of a blob. If it's a start, then the object is
286+ // in the image, otherwise it is not.
287+ int in_image = ((uintptr_t )eytzinger_image_tree .items [idx ] & 1 ) == 0 ;
288+ return in_image ;
289+ }
290+
291+ size_t external_blob_index (jl_value_t * v ) JL_NOTSAFEPOINT
292+ {
293+ assert ((uintptr_t ) v % 4 == 0 && "Object not 4-byte aligned!" );
294+ int eyt_idx = eyt_obj_idx (v );
295+ // We fill the invalid slots with the length, so we can just return that
296+ size_t idx = (size_t ) eytzinger_idxs .items [eyt_idx ];
297+ return idx ;
298+ }
299+
300+ uint8_t jl_object_in_image (jl_value_t * obj ) JL_NOTSAFEPOINT
301+ {
302+ return eyt_obj_in_img (obj );
303+ }
304+
186305NOINLINE uintptr_t gc_get_stack_ptr (void )
187306{
188307 return (uintptr_t )jl_get_frame_addr ();
@@ -2270,7 +2389,8 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
22702389 uint8_t bits = (gc_old (o -> header ) && !mark_reset_age ) ? GC_OLD_MARKED : GC_MARKED ;
22712390 int update_meta = __likely (!meta_updated && !gc_verifying );
22722391 int foreign_alloc = 0 ;
2273- if (update_meta && jl_object_in_image (new_obj )) {
2392+ // directly point at eyt_obj_in_img to encourage inlining
2393+ if (update_meta && eyt_obj_in_img (new_obj )) {
22742394 foreign_alloc = 1 ;
22752395 update_meta = 0 ;
22762396 }
@@ -3245,6 +3365,10 @@ void jl_gc_init(void)
32453365
32463366 arraylist_new (& finalizer_list_marked , 0 );
32473367 arraylist_new (& to_finalize , 0 );
3368+ arraylist_new (& eytzinger_image_tree , 0 );
3369+ arraylist_new (& eytzinger_idxs , 0 );
3370+ arraylist_push (& eytzinger_idxs , (void * )0 );
3371+ arraylist_push (& eytzinger_image_tree , (void * )1 ); // outside image
32483372
32493373 gc_num .interval = default_collect_interval ;
32503374 last_long_collect_interval = default_collect_interval ;
0 commit comments