@@ -47,9 +47,14 @@ void bch2_recalc_btree_reserve(struct bch_fs *c)
4747 c -> btree_cache .nr_reserve = reserve ;
4848}
4949
50- static inline size_t btree_cache_can_free (struct btree_cache * bc )
50+ static inline size_t btree_cache_can_free (struct btree_cache_list * list )
5151{
52- return max_t (int , 0 , bc -> nr_live + bc -> nr_freeable - bc -> nr_reserve );
52+ struct btree_cache * bc = container_of (list , struct btree_cache , live [list -> idx ]);
53+
54+ size_t can_free = list -> nr ;
55+ if (!list -> idx )
56+ can_free = max_t (ssize_t , 0 , can_free - bc -> nr_reserve );
57+ return can_free ;
5358}
5459
5560static void btree_node_to_freedlist (struct btree_cache * bc , struct btree * b )
@@ -184,6 +189,51 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
184189 six_unlock_intent (& b -> c .lock );
185190}
186191
192+ static inline bool __btree_node_pinned (struct btree_cache * bc , struct btree * b )
193+ {
194+ struct bbpos pos = BBPOS (b -> c .btree_id , b -> key .k .p );
195+
196+ u64 mask = bc -> pinned_nodes_mask [!!b -> c .level ];
197+
198+ return ((mask & BIT_ULL (b -> c .btree_id )) &&
199+ bbpos_cmp (bc -> pinned_nodes_start , pos ) < 0 &&
200+ bbpos_cmp (bc -> pinned_nodes_end , pos ) >= 0 );
201+ }
202+
203+ void bch2_node_pin (struct bch_fs * c , struct btree * b )
204+ {
205+ struct btree_cache * bc = & c -> btree_cache ;
206+
207+ mutex_lock (& bc -> lock );
208+ BUG_ON (!__btree_node_pinned (bc , b ));
209+ if (b != btree_node_root (c , b ) && !btree_node_pinned (b )) {
210+ set_btree_node_pinned (b );
211+ list_move (& b -> list , & bc -> live [1 ].list );
212+ bc -> live [0 ].nr -- ;
213+ bc -> live [1 ].nr ++ ;
214+ }
215+ mutex_unlock (& bc -> lock );
216+ }
217+
218+ void bch2_btree_cache_unpin (struct bch_fs * c )
219+ {
220+ struct btree_cache * bc = & c -> btree_cache ;
221+ struct btree * b , * n ;
222+
223+ mutex_lock (& bc -> lock );
224+ c -> btree_cache .pinned_nodes_mask [0 ] = 0 ;
225+ c -> btree_cache .pinned_nodes_mask [1 ] = 0 ;
226+
227+ list_for_each_entry_safe (b , n , & bc -> live [1 ].list , list ) {
228+ clear_btree_node_pinned (b );
229+ list_move (& b -> list , & bc -> live [0 ].list );
230+ bc -> live [0 ].nr ++ ;
231+ bc -> live [1 ].nr -- ;
232+ }
233+
234+ mutex_unlock (& bc -> lock );
235+ }
236+
187237/* Btree in memory cache - hash table */
188238
189239void bch2_btree_node_hash_remove (struct btree_cache * bc , struct btree * b )
@@ -199,7 +249,7 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
199249 if (b -> c .btree_id < BTREE_ID_NR )
200250 -- bc -> nr_by_btree [b -> c .btree_id ];
201251
202- bc -> nr_live -- ;
252+ bc -> live [ btree_node_pinned ( b )]. nr -- ;
203253 bc -> nr_freeable ++ ;
204254 list_move (& b -> list , & bc -> freeable );
205255}
@@ -216,9 +266,14 @@ int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
216266
217267 if (b -> c .btree_id < BTREE_ID_NR )
218268 bc -> nr_by_btree [b -> c .btree_id ]++ ;
219- bc -> nr_live ++ ;
269+
270+ bool p = __btree_node_pinned (bc , b );
271+ mod_bit (BTREE_NODE_pinned , & b -> flags , p );
272+
273+ list_move_tail (& b -> list , & bc -> live [p ].list );
274+ bc -> live [p ].nr ++ ;
275+
220276 bc -> nr_freeable -- ;
221- list_move_tail (& b -> list , & bc -> live );
222277 return 0 ;
223278}
224279
@@ -283,20 +338,6 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, b
283338 int ret = 0 ;
284339
285340 lockdep_assert_held (& bc -> lock );
286-
287- struct bbpos pos = BBPOS (b -> c .btree_id , b -> key .k .p );
288-
289- u64 mask = b -> c .level
290- ? bc -> pinned_nodes_interior_mask
291- : bc -> pinned_nodes_leaf_mask ;
292-
293- if ((mask & BIT_ULL (b -> c .btree_id )) &&
294- bbpos_cmp (bc -> pinned_nodes_start , pos ) < 0 &&
295- bbpos_cmp (bc -> pinned_nodes_end , pos ) >= 0 ) {
296- BTREE_CACHE_NOT_FREED_INCREMENT (pinned );
297- return - BCH_ERR_ENOMEM_btree_node_reclaim ;
298- }
299-
300341wait_on_io :
301342 if (b -> flags & ((1U << BTREE_NODE_dirty )|
302343 (1U << BTREE_NODE_read_in_flight )|
@@ -401,17 +442,17 @@ static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
401442static unsigned long bch2_btree_cache_scan (struct shrinker * shrink ,
402443 struct shrink_control * sc )
403444{
404- struct bch_fs * c = shrink -> private_data ;
405- struct btree_cache * bc = & c -> btree_cache ;
445+ struct btree_cache_list * list = shrink -> private_data ;
446+ struct btree_cache * bc = container_of (list , struct btree_cache , live [list -> idx ]);
447+ struct bch_fs * c = container_of (bc , struct bch_fs , btree_cache );
406448 struct btree * b , * t ;
407449 unsigned long nr = sc -> nr_to_scan ;
408450 unsigned long can_free = 0 ;
409451 unsigned long freed = 0 ;
410452 unsigned long touched = 0 ;
411453 unsigned i , flags ;
412454 unsigned long ret = SHRINK_STOP ;
413- bool trigger_writes = atomic_long_read (& bc -> nr_dirty ) + nr >=
414- (bc -> nr_live + bc -> nr_freeable ) * 3 / 4 ;
455+ bool trigger_writes = atomic_long_read (& bc -> nr_dirty ) + nr >= list -> nr * 3 / 4 ;
415456
416457 if (bch2_btree_shrinker_disabled )
417458 return SHRINK_STOP ;
@@ -426,7 +467,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
426467 * succeed, so that inserting keys into the btree can always succeed and
427468 * IO can always make forward progress:
428469 */
429- can_free = btree_cache_can_free (bc );
470+ can_free = btree_cache_can_free (list );
430471 nr = min_t (unsigned long , nr , can_free );
431472
432473 i = 0 ;
@@ -452,7 +493,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
452493 }
453494 }
454495restart :
455- list_for_each_entry_safe (b , t , & bc -> live , list ) {
496+ list_for_each_entry_safe (b , t , & list -> list , list ) {
456497 touched ++ ;
457498
458499 if (btree_node_accessed (b )) {
@@ -476,7 +517,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
476517 !btree_node_will_make_reachable (b ) &&
477518 !btree_node_write_blocked (b ) &&
478519 six_trylock_read (& b -> c .lock )) {
479- list_move (& bc -> live , & b -> list );
520+ list_move (& list -> list , & b -> list );
480521 mutex_unlock (& bc -> lock );
481522 __bch2_btree_node_write (c , b , BTREE_WRITE_cache_reclaim );
482523 six_unlock_read (& b -> c .lock );
@@ -490,8 +531,8 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
490531 break ;
491532 }
492533out_rotate :
493- if (& t -> list != & bc -> live )
494- list_move_tail (& bc -> live , & t -> list );
534+ if (& t -> list != & list -> list )
535+ list_move_tail (& list -> list , & t -> list );
495536out :
496537 mutex_unlock (& bc -> lock );
497538out_nounlock :
@@ -504,40 +545,42 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
504545static unsigned long bch2_btree_cache_count (struct shrinker * shrink ,
505546 struct shrink_control * sc )
506547{
507- struct bch_fs * c = shrink -> private_data ;
508- struct btree_cache * bc = & c -> btree_cache ;
548+ struct btree_cache_list * list = shrink -> private_data ;
509549
510550 if (bch2_btree_shrinker_disabled )
511551 return 0 ;
512552
513- return btree_cache_can_free (bc );
553+ return btree_cache_can_free (list );
514554}
515555
516556void bch2_fs_btree_cache_exit (struct bch_fs * c )
517557{
518558 struct btree_cache * bc = & c -> btree_cache ;
519559 struct btree * b , * t ;
520- unsigned i , flags ;
560+ unsigned long flags ;
521561
522- shrinker_free (bc -> shrink );
562+ shrinker_free (bc -> live [1 ].shrink );
563+ shrinker_free (bc -> live [0 ].shrink );
523564
524565 /* vfree() can allocate memory: */
525566 flags = memalloc_nofs_save ();
526567 mutex_lock (& bc -> lock );
527568
528569 if (c -> verify_data )
529- list_move (& c -> verify_data -> list , & bc -> live );
570+ list_move (& c -> verify_data -> list , & bc -> live [ 0 ]. list );
530571
531572 kvfree (c -> verify_ondisk );
532573
533- for (i = 0 ; i < btree_id_nr_alive (c ); i ++ ) {
574+ for (unsigned i = 0 ; i < btree_id_nr_alive (c ); i ++ ) {
534575 struct btree_root * r = bch2_btree_id_root (c , i );
535576
536577 if (r -> b )
537- list_add (& r -> b -> list , & bc -> live );
578+ list_add (& r -> b -> list , & bc -> live [ 0 ]. list );
538579 }
539580
540- list_for_each_entry_safe (b , t , & bc -> live , list )
581+ list_for_each_entry_safe (b , t , & bc -> live [1 ].list , list )
582+ bch2_btree_node_hash_remove (bc , b );
583+ list_for_each_entry_safe (b , t , & bc -> live [0 ].list , list )
541584 bch2_btree_node_hash_remove (bc , b );
542585
543586 list_for_each_entry_safe (b , t , & bc -> freeable , list ) {
@@ -563,7 +606,8 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
563606
564607 for (unsigned i = 0 ; i < ARRAY_SIZE (bc -> nr_by_btree ); i ++ )
565608 BUG_ON (bc -> nr_by_btree [i ]);
566- BUG_ON (bc -> nr_live );
609+ BUG_ON (bc -> live [0 ].nr );
610+ BUG_ON (bc -> live [1 ].nr );
567611 BUG_ON (bc -> nr_freeable );
568612
569613 if (bc -> table_init_done )
@@ -589,18 +633,28 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
589633 if (!__bch2_btree_node_mem_alloc (c ))
590634 goto err ;
591635
592- list_splice_init (& bc -> live , & bc -> freeable );
636+ list_splice_init (& bc -> live [ 0 ]. list , & bc -> freeable );
593637
594638 mutex_init (& c -> verify_lock );
595639
596640 shrink = shrinker_alloc (0 , "%s-btree_cache" , c -> name );
597641 if (!shrink )
598642 goto err ;
599- bc -> shrink = shrink ;
643+ bc -> live [0 ].shrink = shrink ;
644+ shrink -> count_objects = bch2_btree_cache_count ;
645+ shrink -> scan_objects = bch2_btree_cache_scan ;
646+ shrink -> seeks = 2 ;
647+ shrink -> private_data = & bc -> live [0 ];
648+ shrinker_register (shrink );
649+
650+ shrink = shrinker_alloc (0 , "%s-btree_cache-pinned" , c -> name );
651+ if (!shrink )
652+ goto err ;
653+ bc -> live [1 ].shrink = shrink ;
600654 shrink -> count_objects = bch2_btree_cache_count ;
601655 shrink -> scan_objects = bch2_btree_cache_scan ;
602- shrink -> seeks = 4 ;
603- shrink -> private_data = c ;
656+ shrink -> seeks = 8 ;
657+ shrink -> private_data = & bc -> live [ 1 ] ;
604658 shrinker_register (shrink );
605659
606660 return 0 ;
@@ -611,7 +665,10 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
611665void bch2_fs_btree_cache_init_early (struct btree_cache * bc )
612666{
613667 mutex_init (& bc -> lock );
614- INIT_LIST_HEAD (& bc -> live );
668+ for (unsigned i = 0 ; i < ARRAY_SIZE (bc -> live ); i ++ ) {
669+ bc -> live [i ].idx = i ;
670+ INIT_LIST_HEAD (& bc -> live [i ].list );
671+ }
615672 INIT_LIST_HEAD (& bc -> freeable );
616673 INIT_LIST_HEAD (& bc -> freed_pcpu );
617674 INIT_LIST_HEAD (& bc -> freed_nonpcpu );
@@ -673,14 +730,16 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c)
673730 struct btree_cache * bc = & c -> btree_cache ;
674731 struct btree * b ;
675732
676- list_for_each_entry_reverse (b , & bc -> live , list )
677- if (!btree_node_reclaim (c , b , false))
678- return b ;
733+ for (unsigned i = 0 ; i < ARRAY_SIZE (bc -> live ); i ++ )
734+ list_for_each_entry_reverse (b , & bc -> live [i ].list , list )
735+ if (!btree_node_reclaim (c , b , false))
736+ return b ;
679737
680738 while (1 ) {
681- list_for_each_entry_reverse (b , & bc -> live , list )
682- if (!btree_node_write_and_reclaim (c , b ))
683- return b ;
739+ for (unsigned i = 0 ; i < ARRAY_SIZE (bc -> live ); i ++ )
740+ list_for_each_entry_reverse (b , & bc -> live [i ].list , list )
741+ if (!btree_node_write_and_reclaim (c , b ))
742+ return b ;
684743
685744 /*
686745 * Rare case: all nodes were intent-locked.
@@ -1387,9 +1446,10 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc
13871446 if (!out -> nr_tabstops )
13881447 printbuf_tabstop_push (out , 32 );
13891448
1390- prt_btree_cache_line (out , c , "nr_live:" , bc -> nr_live );
1391- prt_btree_cache_line (out , c , "nr_freeable:" , bc -> nr_freeable );
1392- prt_btree_cache_line (out , c , "nr dirty:" , atomic_long_read (& bc -> nr_dirty ));
1449+ prt_btree_cache_line (out , c , "live:" , bc -> live [0 ].nr );
1450+ prt_btree_cache_line (out , c , "pinned:" , bc -> live [1 ].nr );
1451+ prt_btree_cache_line (out , c , "freeable:" , bc -> nr_freeable );
1452+ prt_btree_cache_line (out , c , "dirty:" , atomic_long_read (& bc -> nr_dirty ));
13931453 prt_printf (out , "cannibalize lock:\t%p\n" , bc -> alloc_lock );
13941454 prt_newline (out );
13951455
0 commit comments