20
20
#include "xfs_log.h"
21
21
#include "xfs_ag.h"
22
22
#include "xfs_health.h"
23
+ #include "xfs_rtbitmap.h"
23
24
24
25
/*
25
26
* Notes on an efficient, low latency fstrim algorithm
@@ -322,7 +323,7 @@ xfs_trim_should_stop(void)
322
323
* we found in the last batch as the key to start the next.
323
324
*/
324
325
static int
325
- xfs_trim_extents (
326
+ xfs_trim_perag_extents (
326
327
struct xfs_perag * pag ,
327
328
xfs_agblock_t start ,
328
329
xfs_agblock_t end ,
@@ -383,6 +384,259 @@ xfs_trim_extents(
383
384
384
385
}
385
386
387
+ static int
388
+ xfs_trim_datadev_extents (
389
+ struct xfs_mount * mp ,
390
+ xfs_daddr_t start ,
391
+ xfs_daddr_t end ,
392
+ xfs_extlen_t minlen ,
393
+ uint64_t * blocks_trimmed )
394
+ {
395
+ xfs_agnumber_t start_agno , end_agno ;
396
+ xfs_agblock_t start_agbno , end_agbno ;
397
+ xfs_daddr_t ddev_end ;
398
+ struct xfs_perag * pag ;
399
+ int last_error = 0 , error ;
400
+
401
+ ddev_end = min_t (xfs_daddr_t , end ,
402
+ XFS_FSB_TO_BB (mp , mp -> m_sb .sb_dblocks ) - 1 );
403
+
404
+ start_agno = xfs_daddr_to_agno (mp , start );
405
+ start_agbno = xfs_daddr_to_agbno (mp , start );
406
+ end_agno = xfs_daddr_to_agno (mp , ddev_end );
407
+ end_agbno = xfs_daddr_to_agbno (mp , ddev_end );
408
+
409
+ for_each_perag_range (mp , start_agno , end_agno , pag ) {
410
+ xfs_agblock_t agend = pag -> block_count ;
411
+
412
+ if (start_agno == end_agno )
413
+ agend = end_agbno ;
414
+ error = xfs_trim_perag_extents (pag , start_agbno , agend , minlen ,
415
+ blocks_trimmed );
416
+ if (error )
417
+ last_error = error ;
418
+
419
+ if (xfs_trim_should_stop ()) {
420
+ xfs_perag_rele (pag );
421
+ break ;
422
+ }
423
+ start_agbno = 0 ;
424
+ }
425
+
426
+ return last_error ;
427
+ }
428
+
429
+ #ifdef CONFIG_XFS_RT
430
+ struct xfs_trim_rtdev {
431
+ /* list of rt extents to free */
432
+ struct list_head extent_list ;
433
+
434
+ /* pointer to count of blocks trimmed */
435
+ uint64_t * blocks_trimmed ;
436
+
437
+ /* minimum length that caller allows us to trim */
438
+ xfs_rtblock_t minlen_fsb ;
439
+
440
+ /* restart point for the rtbitmap walk */
441
+ xfs_rtxnum_t restart_rtx ;
442
+
443
+ /* stopping point for the current rtbitmap walk */
444
+ xfs_rtxnum_t stop_rtx ;
445
+ };
446
+
447
+ struct xfs_rtx_busy {
448
+ struct list_head list ;
449
+ xfs_rtblock_t bno ;
450
+ xfs_rtblock_t length ;
451
+ };
452
+
453
+ static void
454
+ xfs_discard_free_rtdev_extents (
455
+ struct xfs_trim_rtdev * tr )
456
+ {
457
+ struct xfs_rtx_busy * busyp , * n ;
458
+
459
+ list_for_each_entry_safe (busyp , n , & tr -> extent_list , list ) {
460
+ list_del_init (& busyp -> list );
461
+ kfree (busyp );
462
+ }
463
+ }
464
+
465
+ /*
466
+ * Walk the discard list and issue discards on all the busy extents in the
467
+ * list. We plug and chain the bios so that we only need a single completion
468
+ * call to clear all the busy extents once the discards are complete.
469
+ */
470
+ static int
471
+ xfs_discard_rtdev_extents (
472
+ struct xfs_mount * mp ,
473
+ struct xfs_trim_rtdev * tr )
474
+ {
475
+ struct block_device * bdev = mp -> m_rtdev_targp -> bt_bdev ;
476
+ struct xfs_rtx_busy * busyp ;
477
+ struct bio * bio = NULL ;
478
+ struct blk_plug plug ;
479
+ xfs_rtblock_t start = NULLRTBLOCK , length = 0 ;
480
+ int error = 0 ;
481
+
482
+ blk_start_plug (& plug );
483
+ list_for_each_entry (busyp , & tr -> extent_list , list ) {
484
+ if (start == NULLRTBLOCK )
485
+ start = busyp -> bno ;
486
+ length += busyp -> length ;
487
+
488
+ trace_xfs_discard_rtextent (mp , busyp -> bno , busyp -> length );
489
+
490
+ error = __blkdev_issue_discard (bdev ,
491
+ XFS_FSB_TO_BB (mp , busyp -> bno ),
492
+ XFS_FSB_TO_BB (mp , busyp -> length ),
493
+ GFP_NOFS , & bio );
494
+ if (error )
495
+ break ;
496
+ }
497
+ xfs_discard_free_rtdev_extents (tr );
498
+
499
+ if (bio ) {
500
+ error = submit_bio_wait (bio );
501
+ if (error == - EOPNOTSUPP )
502
+ error = 0 ;
503
+ if (error )
504
+ xfs_info (mp ,
505
+ "discard failed for rtextent [0x%llx,%llu], error %d" ,
506
+ (unsigned long long )start ,
507
+ (unsigned long long )length ,
508
+ error );
509
+ bio_put (bio );
510
+ }
511
+ blk_finish_plug (& plug );
512
+
513
+ return error ;
514
+ }
515
+
516
+ static int
517
+ xfs_trim_gather_rtextent (
518
+ struct xfs_mount * mp ,
519
+ struct xfs_trans * tp ,
520
+ const struct xfs_rtalloc_rec * rec ,
521
+ void * priv )
522
+ {
523
+ struct xfs_trim_rtdev * tr = priv ;
524
+ struct xfs_rtx_busy * busyp ;
525
+ xfs_rtblock_t rbno , rlen ;
526
+
527
+ if (rec -> ar_startext > tr -> stop_rtx ) {
528
+ /*
529
+ * If we've scanned a large number of rtbitmap blocks, update
530
+ * the cursor to point at this extent so we restart the next
531
+ * batch from this extent.
532
+ */
533
+ tr -> restart_rtx = rec -> ar_startext ;
534
+ return - ECANCELED ;
535
+ }
536
+
537
+ rbno = xfs_rtx_to_rtb (mp , rec -> ar_startext );
538
+ rlen = xfs_rtx_to_rtb (mp , rec -> ar_extcount );
539
+
540
+ /* Ignore too small. */
541
+ if (rlen < tr -> minlen_fsb ) {
542
+ trace_xfs_discard_rttoosmall (mp , rbno , rlen );
543
+ return 0 ;
544
+ }
545
+
546
+ busyp = kzalloc (sizeof (struct xfs_rtx_busy ), GFP_KERNEL );
547
+ if (!busyp )
548
+ return - ENOMEM ;
549
+
550
+ busyp -> bno = rbno ;
551
+ busyp -> length = rlen ;
552
+ INIT_LIST_HEAD (& busyp -> list );
553
+ list_add_tail (& busyp -> list , & tr -> extent_list );
554
+ * tr -> blocks_trimmed += rlen ;
555
+
556
+ tr -> restart_rtx = rec -> ar_startext + rec -> ar_extcount ;
557
+ return 0 ;
558
+ }
559
+
560
+ static int
561
+ xfs_trim_rtdev_extents (
562
+ struct xfs_mount * mp ,
563
+ xfs_daddr_t start ,
564
+ xfs_daddr_t end ,
565
+ xfs_daddr_t minlen ,
566
+ uint64_t * blocks_trimmed )
567
+ {
568
+ struct xfs_rtalloc_rec low = { };
569
+ struct xfs_rtalloc_rec high = { };
570
+ struct xfs_trim_rtdev tr = {
571
+ .blocks_trimmed = blocks_trimmed ,
572
+ .minlen_fsb = XFS_BB_TO_FSB (mp , minlen ),
573
+ };
574
+ struct xfs_trans * tp ;
575
+ xfs_daddr_t rtdev_daddr ;
576
+ int error ;
577
+
578
+ INIT_LIST_HEAD (& tr .extent_list );
579
+
580
+ /* Shift the start and end downwards to match the rt device. */
581
+ rtdev_daddr = XFS_FSB_TO_BB (mp , mp -> m_sb .sb_dblocks );
582
+ if (start > rtdev_daddr )
583
+ start -= rtdev_daddr ;
584
+ else
585
+ start = 0 ;
586
+
587
+ if (end <= rtdev_daddr )
588
+ return 0 ;
589
+ end -= rtdev_daddr ;
590
+
591
+ error = xfs_trans_alloc_empty (mp , & tp );
592
+ if (error )
593
+ return error ;
594
+
595
+ end = min_t (xfs_daddr_t , end ,
596
+ XFS_FSB_TO_BB (mp , mp -> m_sb .sb_rblocks ) - 1 );
597
+
598
+ /* Convert the rt blocks to rt extents */
599
+ low .ar_startext = xfs_rtb_to_rtxup (mp , XFS_BB_TO_FSB (mp , start ));
600
+ high .ar_startext = xfs_rtb_to_rtx (mp , XFS_BB_TO_FSBT (mp , end ));
601
+
602
+ /*
603
+ * Walk the free ranges between low and high. The query_range function
604
+ * trims the extents returned.
605
+ */
606
+ do {
607
+ tr .stop_rtx = low .ar_startext + (mp -> m_sb .sb_blocksize * NBBY );
608
+ xfs_rtbitmap_lock_shared (mp , XFS_RBMLOCK_BITMAP );
609
+ error = xfs_rtalloc_query_range (mp , tp , & low , & high ,
610
+ xfs_trim_gather_rtextent , & tr );
611
+
612
+ if (error == - ECANCELED )
613
+ error = 0 ;
614
+ if (error ) {
615
+ xfs_rtbitmap_unlock_shared (mp , XFS_RBMLOCK_BITMAP );
616
+ xfs_discard_free_rtdev_extents (& tr );
617
+ break ;
618
+ }
619
+
620
+ if (list_empty (& tr .extent_list )) {
621
+ xfs_rtbitmap_unlock_shared (mp , XFS_RBMLOCK_BITMAP );
622
+ break ;
623
+ }
624
+
625
+ error = xfs_discard_rtdev_extents (mp , & tr );
626
+ xfs_rtbitmap_unlock_shared (mp , XFS_RBMLOCK_BITMAP );
627
+ if (error )
628
+ break ;
629
+
630
+ low .ar_startext = tr .restart_rtx ;
631
+ } while (!xfs_trim_should_stop () && low .ar_startext <= high .ar_startext );
632
+
633
+ xfs_trans_cancel (tp );
634
+ return error ;
635
+ }
636
+ #else
637
+ # define xfs_trim_rtdev_extents (m ,s ,e ,n ,b ) (-EOPNOTSUPP)
638
+ #endif /* CONFIG_XFS_RT */
639
+
386
640
/*
387
641
* trim a range of the filesystem.
388
642
*
@@ -391,28 +645,37 @@ xfs_trim_extents(
391
645
* addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format
392
646
* is a linear address range. Hence we need to use DADDR based conversions and
393
647
* comparisons for determining the correct offset and regions to trim.
648
+ *
649
+ * The realtime device is mapped into the FITRIM "address space" immediately
650
+ * after the data device.
394
651
*/
395
652
int
396
653
xfs_ioc_trim (
397
654
struct xfs_mount * mp ,
398
655
struct fstrim_range __user * urange )
399
656
{
400
- struct xfs_perag * pag ;
401
657
unsigned int granularity =
402
658
bdev_discard_granularity (mp -> m_ddev_targp -> bt_bdev );
659
+ struct block_device * rt_bdev = NULL ;
403
660
struct fstrim_range range ;
404
661
xfs_daddr_t start , end ;
405
662
xfs_extlen_t minlen ;
406
- xfs_agnumber_t start_agno , end_agno ;
407
- xfs_agblock_t start_agbno , end_agbno ;
663
+ xfs_rfsblock_t max_blocks ;
408
664
uint64_t blocks_trimmed = 0 ;
409
665
int error , last_error = 0 ;
410
666
411
667
if (!capable (CAP_SYS_ADMIN ))
412
668
return - EPERM ;
413
- if (!bdev_max_discard_sectors (mp -> m_ddev_targp -> bt_bdev ))
669
+ if (mp -> m_rtdev_targp &&
670
+ bdev_max_discard_sectors (mp -> m_rtdev_targp -> bt_bdev ))
671
+ rt_bdev = mp -> m_rtdev_targp -> bt_bdev ;
672
+ if (!bdev_max_discard_sectors (mp -> m_ddev_targp -> bt_bdev ) && !rt_bdev )
414
673
return - EOPNOTSUPP ;
415
674
675
+ if (rt_bdev )
676
+ granularity = max (granularity ,
677
+ bdev_discard_granularity (rt_bdev ));
678
+
416
679
/*
417
680
* We haven't recovered the log, so we cannot use our bnobt-guided
418
681
* storage zapping commands.
@@ -433,35 +696,27 @@ xfs_ioc_trim(
433
696
* used by the fstrim application. In the end it really doesn't
434
697
* matter as trimming blocks is an advisory interface.
435
698
*/
436
- if (range .start >= XFS_FSB_TO_B (mp , mp -> m_sb .sb_dblocks ) ||
699
+ max_blocks = mp -> m_sb .sb_dblocks + mp -> m_sb .sb_rblocks ;
700
+ if (range .start >= XFS_FSB_TO_B (mp , max_blocks ) ||
437
701
range .minlen > XFS_FSB_TO_B (mp , mp -> m_ag_max_usable ) ||
438
702
range .len < mp -> m_sb .sb_blocksize )
439
703
return - EINVAL ;
440
704
441
705
start = BTOBB (range .start );
442
- end = min_t (xfs_daddr_t , start + BTOBBT (range .len ),
443
- XFS_FSB_TO_BB (mp , mp -> m_sb .sb_dblocks )) - 1 ;
706
+ end = start + BTOBBT (range .len ) - 1 ;
444
707
445
- start_agno = xfs_daddr_to_agno (mp , start );
446
- start_agbno = xfs_daddr_to_agbno (mp , start );
447
- end_agno = xfs_daddr_to_agno (mp , end );
448
- end_agbno = xfs_daddr_to_agbno (mp , end );
449
-
450
- for_each_perag_range (mp , start_agno , end_agno , pag ) {
451
- xfs_agblock_t agend = pag -> block_count ;
452
-
453
- if (start_agno == end_agno )
454
- agend = end_agbno ;
455
- error = xfs_trim_extents (pag , start_agbno , agend , minlen ,
708
+ if (bdev_max_discard_sectors (mp -> m_ddev_targp -> bt_bdev )) {
709
+ error = xfs_trim_datadev_extents (mp , start , end , minlen ,
456
710
& blocks_trimmed );
457
711
if (error )
458
712
last_error = error ;
713
+ }
459
714
460
- if (xfs_trim_should_stop ()) {
461
- xfs_perag_rele ( pag );
462
- break ;
463
- }
464
- start_agbno = 0 ;
715
+ if (rt_bdev && ! xfs_trim_should_stop ()) {
716
+ error = xfs_trim_rtdev_extents ( mp , start , end , minlen ,
717
+ & blocks_trimmed ) ;
718
+ if ( error )
719
+ last_error = error ;
465
720
}
466
721
467
722
if (last_error )
0 commit comments