@@ -984,9 +984,12 @@ STATIC_INLINE void gc_sweep_pool_page(gc_page_profiler_serializer_t *s, jl_gc_pa
984984// sweep over all memory that is being used and not in a pool
985985static void gc_sweep_other (jl_ptls_t ptls , int sweep_full ) JL_NOTSAFEPOINT
986986{
987+ uint64_t t_free_mallocd_memory_start = jl_hrtime ();
987988 gc_sweep_foreign_objs ();
988989 sweep_malloced_memory ();
989990 sweep_big (ptls );
991+ uint64_t t_free_mallocd_memory_end = jl_hrtime ();
992+ gc_num .total_sweep_free_mallocd_memory_time += t_free_mallocd_memory_end - t_free_mallocd_memory_start ;
990993 jl_engine_sweep (gc_all_tls_states );
991994}
992995
@@ -1380,66 +1383,74 @@ static void gc_sweep_pool(void)
13801383 }
13811384 }
13821385
1383- // the actual sweeping
1384- jl_gc_padded_page_stack_t * new_gc_allocd_scratch = (jl_gc_padded_page_stack_t * ) calloc_s (n_threads * sizeof (jl_gc_padded_page_stack_t ));
1385- jl_ptls_t ptls = jl_current_task -> ptls ;
1386- gc_sweep_wake_all_pages (ptls , new_gc_allocd_scratch );
1387- gc_sweep_pool_parallel (ptls );
1388- gc_sweep_wait_for_all_pages ();
1389-
1390- // reset half-pages pointers
1391- for (int t_i = 0 ; t_i < n_threads ; t_i ++ ) {
1392- jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
1393- if (ptls2 != NULL ) {
1394- ptls2 -> gc_tls .page_metadata_allocd = new_gc_allocd_scratch [t_i ].stack ;
1395- for (int i = 0 ; i < JL_GC_N_POOLS ; i ++ ) {
1396- jl_gc_pool_t * p = & ptls2 -> gc_tls .heap .norm_pools [i ];
1397- p -> newpages = NULL ;
1386+ uint64_t t_page_walk_start = jl_hrtime ();
1387+ {
1388+ // the actual sweeping
1389+ jl_gc_padded_page_stack_t * new_gc_allocd_scratch = (jl_gc_padded_page_stack_t * ) calloc_s (n_threads * sizeof (jl_gc_padded_page_stack_t ));
1390+ jl_ptls_t ptls = jl_current_task -> ptls ;
1391+ gc_sweep_wake_all_pages (ptls , new_gc_allocd_scratch );
1392+ gc_sweep_pool_parallel (ptls );
1393+ gc_sweep_wait_for_all_pages ();
1394+
1395+ // reset half-pages pointers
1396+ for (int t_i = 0 ; t_i < n_threads ; t_i ++ ) {
1397+ jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
1398+ if (ptls2 != NULL ) {
1399+ ptls2 -> gc_tls .page_metadata_allocd = new_gc_allocd_scratch [t_i ].stack ;
1400+ for (int i = 0 ; i < JL_GC_N_POOLS ; i ++ ) {
1401+ jl_gc_pool_t * p = & ptls2 -> gc_tls .heap .norm_pools [i ];
1402+ p -> newpages = NULL ;
1403+ }
13981404 }
13991405 }
1400- }
14011406
1402- // merge free lists
1403- for (int t_i = 0 ; t_i < n_threads ; t_i ++ ) {
1404- jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
1405- if (ptls2 == NULL ) {
1406- continue ;
1407- }
1408- jl_gc_pagemeta_t * pg = jl_atomic_load_relaxed (& ptls2 -> gc_tls .page_metadata_allocd .bottom );
1409- while (pg != NULL ) {
1410- jl_gc_pagemeta_t * pg2 = pg -> next ;
1411- if (pg -> fl_begin_offset != UINT16_MAX ) {
1412- char * cur_pg = pg -> data ;
1413- jl_taggedvalue_t * fl_beg = (jl_taggedvalue_t * )(cur_pg + pg -> fl_begin_offset );
1414- jl_taggedvalue_t * fl_end = (jl_taggedvalue_t * )(cur_pg + pg -> fl_end_offset );
1415- * pfl [t_i * JL_GC_N_POOLS + pg -> pool_n ] = fl_beg ;
1416- pfl [t_i * JL_GC_N_POOLS + pg -> pool_n ] = & fl_end -> next ;
1407+ // merge free lists
1408+ for (int t_i = 0 ; t_i < n_threads ; t_i ++ ) {
1409+ jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
1410+ if (ptls2 == NULL ) {
1411+ continue ;
1412+ }
1413+ jl_gc_pagemeta_t * pg = jl_atomic_load_relaxed (& ptls2 -> gc_tls .page_metadata_allocd .bottom );
1414+ while (pg != NULL ) {
1415+ jl_gc_pagemeta_t * pg2 = pg -> next ;
1416+ if (pg -> fl_begin_offset != UINT16_MAX ) {
1417+ char * cur_pg = pg -> data ;
1418+ jl_taggedvalue_t * fl_beg = (jl_taggedvalue_t * )(cur_pg + pg -> fl_begin_offset );
1419+ jl_taggedvalue_t * fl_end = (jl_taggedvalue_t * )(cur_pg + pg -> fl_end_offset );
1420+ * pfl [t_i * JL_GC_N_POOLS + pg -> pool_n ] = fl_beg ;
1421+ pfl [t_i * JL_GC_N_POOLS + pg -> pool_n ] = & fl_end -> next ;
1422+ }
1423+ pg = pg2 ;
14171424 }
1418- pg = pg2 ;
14191425 }
1420- }
14211426
1422- // null out terminal pointers of free lists
1423- for (int t_i = 0 ; t_i < n_threads ; t_i ++ ) {
1424- jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
1425- if (ptls2 != NULL ) {
1426- for (int i = 0 ; i < JL_GC_N_POOLS ; i ++ ) {
1427- * pfl [t_i * JL_GC_N_POOLS + i ] = NULL ;
1427+ // null out terminal pointers of free lists
1428+ for (int t_i = 0 ; t_i < n_threads ; t_i ++ ) {
1429+ jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
1430+ if (ptls2 != NULL ) {
1431+ for (int i = 0 ; i < JL_GC_N_POOLS ; i ++ ) {
1432+ * pfl [t_i * JL_GC_N_POOLS + i ] = NULL ;
1433+ }
14281434 }
14291435 }
1430- }
14311436
1432- // cleanup
1433- free (pfl );
1434- free (new_gc_allocd_scratch );
1437+ // cleanup
1438+ free (pfl );
1439+ free (new_gc_allocd_scratch );
1440+ }
1441+ uint64_t t_page_walk_end = jl_hrtime ();
1442+ gc_num .total_sweep_page_walk_time += t_page_walk_end - t_page_walk_start ;
14351443
14361444#ifdef _P64 // only enable concurrent sweeping on 64bit
14371445 // wake thread up to sweep concurrently
14381446 if (jl_n_sweepthreads > 0 ) {
14391447 uv_sem_post (& gc_sweep_assists_needed );
14401448 }
14411449 else {
1450+ uint64_t t_madvise_start = jl_hrtime ();
14421451 gc_free_pages ();
1452+ uint64_t t_madvise_end = jl_hrtime ();
1453+ gc_num .total_sweep_madvise_time += t_madvise_end - t_madvise_start ;
14431454 }
14441455#else
14451456 gc_free_pages ();
0 commit comments