@@ -47,9 +47,14 @@ void bch2_recalc_btree_reserve(struct bch_fs *c)
47
47
c -> btree_cache .nr_reserve = reserve ;
48
48
}
49
49
50
- static inline size_t btree_cache_can_free (struct btree_cache * bc )
50
+ static inline size_t btree_cache_can_free (struct btree_cache_list * list )
51
51
{
52
- return max_t (int , 0 , bc -> nr_live + bc -> nr_freeable - bc -> nr_reserve );
52
+ struct btree_cache * bc = container_of (list , struct btree_cache , live [list -> idx ]);
53
+
54
+ size_t can_free = list -> nr ;
55
+ if (!list -> idx )
56
+ can_free = max_t (ssize_t , 0 , can_free - bc -> nr_reserve );
57
+ return can_free ;
53
58
}
54
59
55
60
static void btree_node_to_freedlist (struct btree_cache * bc , struct btree * b )
@@ -184,6 +189,51 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
184
189
six_unlock_intent (& b -> c .lock );
185
190
}
186
191
192
+ static inline bool __btree_node_pinned (struct btree_cache * bc , struct btree * b )
193
+ {
194
+ struct bbpos pos = BBPOS (b -> c .btree_id , b -> key .k .p );
195
+
196
+ u64 mask = bc -> pinned_nodes_mask [!!b -> c .level ];
197
+
198
+ return ((mask & BIT_ULL (b -> c .btree_id )) &&
199
+ bbpos_cmp (bc -> pinned_nodes_start , pos ) < 0 &&
200
+ bbpos_cmp (bc -> pinned_nodes_end , pos ) >= 0 );
201
+ }
202
+
203
+ void bch2_node_pin (struct bch_fs * c , struct btree * b )
204
+ {
205
+ struct btree_cache * bc = & c -> btree_cache ;
206
+
207
+ mutex_lock (& bc -> lock );
208
+ BUG_ON (!__btree_node_pinned (bc , b ));
209
+ if (b != btree_node_root (c , b ) && !btree_node_pinned (b )) {
210
+ set_btree_node_pinned (b );
211
+ list_move (& b -> list , & bc -> live [1 ].list );
212
+ bc -> live [0 ].nr -- ;
213
+ bc -> live [1 ].nr ++ ;
214
+ }
215
+ mutex_unlock (& bc -> lock );
216
+ }
217
+
218
+ void bch2_btree_cache_unpin (struct bch_fs * c )
219
+ {
220
+ struct btree_cache * bc = & c -> btree_cache ;
221
+ struct btree * b , * n ;
222
+
223
+ mutex_lock (& bc -> lock );
224
+ c -> btree_cache .pinned_nodes_mask [0 ] = 0 ;
225
+ c -> btree_cache .pinned_nodes_mask [1 ] = 0 ;
226
+
227
+ list_for_each_entry_safe (b , n , & bc -> live [1 ].list , list ) {
228
+ clear_btree_node_pinned (b );
229
+ list_move (& b -> list , & bc -> live [0 ].list );
230
+ bc -> live [0 ].nr ++ ;
231
+ bc -> live [1 ].nr -- ;
232
+ }
233
+
234
+ mutex_unlock (& bc -> lock );
235
+ }
236
+
187
237
/* Btree in memory cache - hash table */
188
238
189
239
void bch2_btree_node_hash_remove (struct btree_cache * bc , struct btree * b )
@@ -199,7 +249,7 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
199
249
if (b -> c .btree_id < BTREE_ID_NR )
200
250
-- bc -> nr_by_btree [b -> c .btree_id ];
201
251
202
- bc -> nr_live -- ;
252
+ bc -> live [ btree_node_pinned ( b )]. nr -- ;
203
253
bc -> nr_freeable ++ ;
204
254
list_move (& b -> list , & bc -> freeable );
205
255
}
@@ -216,9 +266,14 @@ int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
216
266
217
267
if (b -> c .btree_id < BTREE_ID_NR )
218
268
bc -> nr_by_btree [b -> c .btree_id ]++ ;
219
- bc -> nr_live ++ ;
269
+
270
+ bool p = __btree_node_pinned (bc , b );
271
+ mod_bit (BTREE_NODE_pinned , & b -> flags , p );
272
+
273
+ list_move_tail (& b -> list , & bc -> live [p ].list );
274
+ bc -> live [p ].nr ++ ;
275
+
220
276
bc -> nr_freeable -- ;
221
- list_move_tail (& b -> list , & bc -> live );
222
277
return 0 ;
223
278
}
224
279
@@ -283,20 +338,6 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, b
283
338
int ret = 0 ;
284
339
285
340
lockdep_assert_held (& bc -> lock );
286
-
287
- struct bbpos pos = BBPOS (b -> c .btree_id , b -> key .k .p );
288
-
289
- u64 mask = b -> c .level
290
- ? bc -> pinned_nodes_interior_mask
291
- : bc -> pinned_nodes_leaf_mask ;
292
-
293
- if ((mask & BIT_ULL (b -> c .btree_id )) &&
294
- bbpos_cmp (bc -> pinned_nodes_start , pos ) < 0 &&
295
- bbpos_cmp (bc -> pinned_nodes_end , pos ) >= 0 ) {
296
- BTREE_CACHE_NOT_FREED_INCREMENT (pinned );
297
- return - BCH_ERR_ENOMEM_btree_node_reclaim ;
298
- }
299
-
300
341
wait_on_io :
301
342
if (b -> flags & ((1U << BTREE_NODE_dirty )|
302
343
(1U << BTREE_NODE_read_in_flight )|
@@ -401,17 +442,17 @@ static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
401
442
static unsigned long bch2_btree_cache_scan (struct shrinker * shrink ,
402
443
struct shrink_control * sc )
403
444
{
404
- struct bch_fs * c = shrink -> private_data ;
405
- struct btree_cache * bc = & c -> btree_cache ;
445
+ struct btree_cache_list * list = shrink -> private_data ;
446
+ struct btree_cache * bc = container_of (list , struct btree_cache , live [list -> idx ]);
447
+ struct bch_fs * c = container_of (bc , struct bch_fs , btree_cache );
406
448
struct btree * b , * t ;
407
449
unsigned long nr = sc -> nr_to_scan ;
408
450
unsigned long can_free = 0 ;
409
451
unsigned long freed = 0 ;
410
452
unsigned long touched = 0 ;
411
453
unsigned i , flags ;
412
454
unsigned long ret = SHRINK_STOP ;
413
- bool trigger_writes = atomic_long_read (& bc -> nr_dirty ) + nr >=
414
- (bc -> nr_live + bc -> nr_freeable ) * 3 / 4 ;
455
+ bool trigger_writes = atomic_long_read (& bc -> nr_dirty ) + nr >= list -> nr * 3 / 4 ;
415
456
416
457
if (bch2_btree_shrinker_disabled )
417
458
return SHRINK_STOP ;
@@ -426,7 +467,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
426
467
* succeed, so that inserting keys into the btree can always succeed and
427
468
* IO can always make forward progress:
428
469
*/
429
- can_free = btree_cache_can_free (bc );
470
+ can_free = btree_cache_can_free (list );
430
471
nr = min_t (unsigned long , nr , can_free );
431
472
432
473
i = 0 ;
@@ -452,7 +493,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
452
493
}
453
494
}
454
495
restart :
455
- list_for_each_entry_safe (b , t , & bc -> live , list ) {
496
+ list_for_each_entry_safe (b , t , & list -> list , list ) {
456
497
touched ++ ;
457
498
458
499
if (btree_node_accessed (b )) {
@@ -476,7 +517,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
476
517
!btree_node_will_make_reachable (b ) &&
477
518
!btree_node_write_blocked (b ) &&
478
519
six_trylock_read (& b -> c .lock )) {
479
- list_move (& bc -> live , & b -> list );
520
+ list_move (& list -> list , & b -> list );
480
521
mutex_unlock (& bc -> lock );
481
522
__bch2_btree_node_write (c , b , BTREE_WRITE_cache_reclaim );
482
523
six_unlock_read (& b -> c .lock );
@@ -490,8 +531,8 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
490
531
break ;
491
532
}
492
533
out_rotate :
493
- if (& t -> list != & bc -> live )
494
- list_move_tail (& bc -> live , & t -> list );
534
+ if (& t -> list != & list -> list )
535
+ list_move_tail (& list -> list , & t -> list );
495
536
out :
496
537
mutex_unlock (& bc -> lock );
497
538
out_nounlock :
@@ -504,40 +545,42 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
504
545
static unsigned long bch2_btree_cache_count (struct shrinker * shrink ,
505
546
struct shrink_control * sc )
506
547
{
507
- struct bch_fs * c = shrink -> private_data ;
508
- struct btree_cache * bc = & c -> btree_cache ;
548
+ struct btree_cache_list * list = shrink -> private_data ;
509
549
510
550
if (bch2_btree_shrinker_disabled )
511
551
return 0 ;
512
552
513
- return btree_cache_can_free (bc );
553
+ return btree_cache_can_free (list );
514
554
}
515
555
516
556
void bch2_fs_btree_cache_exit (struct bch_fs * c )
517
557
{
518
558
struct btree_cache * bc = & c -> btree_cache ;
519
559
struct btree * b , * t ;
520
- unsigned i , flags ;
560
+ unsigned long flags ;
521
561
522
- shrinker_free (bc -> shrink );
562
+ shrinker_free (bc -> live [1 ].shrink );
563
+ shrinker_free (bc -> live [0 ].shrink );
523
564
524
565
/* vfree() can allocate memory: */
525
566
flags = memalloc_nofs_save ();
526
567
mutex_lock (& bc -> lock );
527
568
528
569
if (c -> verify_data )
529
- list_move (& c -> verify_data -> list , & bc -> live );
570
+ list_move (& c -> verify_data -> list , & bc -> live [ 0 ]. list );
530
571
531
572
kvfree (c -> verify_ondisk );
532
573
533
- for (i = 0 ; i < btree_id_nr_alive (c ); i ++ ) {
574
+ for (unsigned i = 0 ; i < btree_id_nr_alive (c ); i ++ ) {
534
575
struct btree_root * r = bch2_btree_id_root (c , i );
535
576
536
577
if (r -> b )
537
- list_add (& r -> b -> list , & bc -> live );
578
+ list_add (& r -> b -> list , & bc -> live [ 0 ]. list );
538
579
}
539
580
540
- list_for_each_entry_safe (b , t , & bc -> live , list )
581
+ list_for_each_entry_safe (b , t , & bc -> live [1 ].list , list )
582
+ bch2_btree_node_hash_remove (bc , b );
583
+ list_for_each_entry_safe (b , t , & bc -> live [0 ].list , list )
541
584
bch2_btree_node_hash_remove (bc , b );
542
585
543
586
list_for_each_entry_safe (b , t , & bc -> freeable , list ) {
@@ -563,7 +606,8 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
563
606
564
607
for (unsigned i = 0 ; i < ARRAY_SIZE (bc -> nr_by_btree ); i ++ )
565
608
BUG_ON (bc -> nr_by_btree [i ]);
566
- BUG_ON (bc -> nr_live );
609
+ BUG_ON (bc -> live [0 ].nr );
610
+ BUG_ON (bc -> live [1 ].nr );
567
611
BUG_ON (bc -> nr_freeable );
568
612
569
613
if (bc -> table_init_done )
@@ -589,18 +633,28 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
589
633
if (!__bch2_btree_node_mem_alloc (c ))
590
634
goto err ;
591
635
592
- list_splice_init (& bc -> live , & bc -> freeable );
636
+ list_splice_init (& bc -> live [ 0 ]. list , & bc -> freeable );
593
637
594
638
mutex_init (& c -> verify_lock );
595
639
596
640
shrink = shrinker_alloc (0 , "%s-btree_cache" , c -> name );
597
641
if (!shrink )
598
642
goto err ;
599
- bc -> shrink = shrink ;
643
+ bc -> live [0 ].shrink = shrink ;
644
+ shrink -> count_objects = bch2_btree_cache_count ;
645
+ shrink -> scan_objects = bch2_btree_cache_scan ;
646
+ shrink -> seeks = 2 ;
647
+ shrink -> private_data = & bc -> live [0 ];
648
+ shrinker_register (shrink );
649
+
650
+ shrink = shrinker_alloc (0 , "%s-btree_cache-pinned" , c -> name );
651
+ if (!shrink )
652
+ goto err ;
653
+ bc -> live [1 ].shrink = shrink ;
600
654
shrink -> count_objects = bch2_btree_cache_count ;
601
655
shrink -> scan_objects = bch2_btree_cache_scan ;
602
- shrink -> seeks = 4 ;
603
- shrink -> private_data = c ;
656
+ shrink -> seeks = 8 ;
657
+ shrink -> private_data = & bc -> live [ 1 ] ;
604
658
shrinker_register (shrink );
605
659
606
660
return 0 ;
@@ -611,7 +665,10 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
611
665
void bch2_fs_btree_cache_init_early (struct btree_cache * bc )
612
666
{
613
667
mutex_init (& bc -> lock );
614
- INIT_LIST_HEAD (& bc -> live );
668
+ for (unsigned i = 0 ; i < ARRAY_SIZE (bc -> live ); i ++ ) {
669
+ bc -> live [i ].idx = i ;
670
+ INIT_LIST_HEAD (& bc -> live [i ].list );
671
+ }
615
672
INIT_LIST_HEAD (& bc -> freeable );
616
673
INIT_LIST_HEAD (& bc -> freed_pcpu );
617
674
INIT_LIST_HEAD (& bc -> freed_nonpcpu );
@@ -673,14 +730,16 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c)
673
730
struct btree_cache * bc = & c -> btree_cache ;
674
731
struct btree * b ;
675
732
676
- list_for_each_entry_reverse (b , & bc -> live , list )
677
- if (!btree_node_reclaim (c , b , false))
678
- return b ;
733
+ for (unsigned i = 0 ; i < ARRAY_SIZE (bc -> live ); i ++ )
734
+ list_for_each_entry_reverse (b , & bc -> live [i ].list , list )
735
+ if (!btree_node_reclaim (c , b , false))
736
+ return b ;
679
737
680
738
while (1 ) {
681
- list_for_each_entry_reverse (b , & bc -> live , list )
682
- if (!btree_node_write_and_reclaim (c , b ))
683
- return b ;
739
+ for (unsigned i = 0 ; i < ARRAY_SIZE (bc -> live ); i ++ )
740
+ list_for_each_entry_reverse (b , & bc -> live [i ].list , list )
741
+ if (!btree_node_write_and_reclaim (c , b ))
742
+ return b ;
684
743
685
744
/*
686
745
* Rare case: all nodes were intent-locked.
@@ -1387,9 +1446,10 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc
1387
1446
if (!out -> nr_tabstops )
1388
1447
printbuf_tabstop_push (out , 32 );
1389
1448
1390
- prt_btree_cache_line (out , c , "nr_live:" , bc -> nr_live );
1391
- prt_btree_cache_line (out , c , "nr_freeable:" , bc -> nr_freeable );
1392
- prt_btree_cache_line (out , c , "nr dirty:" , atomic_long_read (& bc -> nr_dirty ));
1449
+ prt_btree_cache_line (out , c , "live:" , bc -> live [0 ].nr );
1450
+ prt_btree_cache_line (out , c , "pinned:" , bc -> live [1 ].nr );
1451
+ prt_btree_cache_line (out , c , "freeable:" , bc -> nr_freeable );
1452
+ prt_btree_cache_line (out , c , "dirty:" , atomic_long_read (& bc -> nr_dirty ));
1393
1453
prt_printf (out , "cannibalize lock:\t%p\n" , bc -> alloc_lock );
1394
1454
prt_newline (out );
1395
1455
0 commit comments