@@ -1516,256 +1516,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
1516
1516
raid10_write_request (mddev , bio , r10_bio );
1517
1517
}
1518
1518
1519
- static struct bio * raid10_split_bio (struct r10conf * conf ,
1520
- struct bio * bio , sector_t sectors , bool want_first )
1521
- {
1522
- struct bio * split ;
1523
-
1524
- split = bio_split (bio , sectors , GFP_NOIO , & conf -> bio_split );
1525
- bio_chain (split , bio );
1526
- allow_barrier (conf );
1527
- if (want_first ) {
1528
- submit_bio_noacct (bio );
1529
- bio = split ;
1530
- } else
1531
- submit_bio_noacct (split );
1532
- wait_barrier (conf );
1533
-
1534
- return bio ;
1535
- }
1536
-
1537
- static void raid10_end_discard_request (struct bio * bio )
1538
- {
1539
- struct r10bio * r10_bio = bio -> bi_private ;
1540
- struct r10conf * conf = r10_bio -> mddev -> private ;
1541
- struct md_rdev * rdev = NULL ;
1542
- int dev ;
1543
- int slot , repl ;
1544
-
1545
- /*
1546
- * We don't care the return value of discard bio
1547
- */
1548
- if (!test_bit (R10BIO_Uptodate , & r10_bio -> state ))
1549
- set_bit (R10BIO_Uptodate , & r10_bio -> state );
1550
-
1551
- dev = find_bio_disk (conf , r10_bio , bio , & slot , & repl );
1552
- if (repl )
1553
- rdev = conf -> mirrors [dev ].replacement ;
1554
- if (!rdev ) {
1555
- /* raid10_remove_disk uses smp_mb to make sure rdev is set to
1556
- * replacement before setting replacement to NULL. It can read
1557
- * rdev first without barrier protect even replacment is NULL
1558
- */
1559
- smp_rmb ();
1560
- rdev = conf -> mirrors [dev ].rdev ;
1561
- }
1562
-
1563
- if (atomic_dec_and_test (& r10_bio -> remaining )) {
1564
- md_write_end (r10_bio -> mddev );
1565
- raid_end_bio_io (r10_bio );
1566
- }
1567
-
1568
- rdev_dec_pending (rdev , conf -> mddev );
1569
- }
1570
-
1571
- /* There are some limitations to handle discard bio
1572
- * 1st, the discard size is bigger than stripe_size*2.
1573
- * 2st, if the discard bio spans reshape progress, we use the old way to
1574
- * handle discard bio
1575
- */
1576
- static int raid10_handle_discard (struct mddev * mddev , struct bio * bio )
1577
- {
1578
- struct r10conf * conf = mddev -> private ;
1579
- struct geom * geo = & conf -> geo ;
1580
- struct r10bio * r10_bio ;
1581
-
1582
- int disk ;
1583
- sector_t chunk ;
1584
- unsigned int stripe_size ;
1585
- sector_t split_size ;
1586
-
1587
- sector_t bio_start , bio_end ;
1588
- sector_t first_stripe_index , last_stripe_index ;
1589
- sector_t start_disk_offset ;
1590
- unsigned int start_disk_index ;
1591
- sector_t end_disk_offset ;
1592
- unsigned int end_disk_index ;
1593
- unsigned int remainder ;
1594
-
1595
- if (test_bit (MD_RECOVERY_RESHAPE , & mddev -> recovery ))
1596
- return - EAGAIN ;
1597
-
1598
- wait_barrier (conf );
1599
-
1600
- /* Check reshape again to avoid reshape happens after checking
1601
- * MD_RECOVERY_RESHAPE and before wait_barrier
1602
- */
1603
- if (test_bit (MD_RECOVERY_RESHAPE , & mddev -> recovery ))
1604
- goto out ;
1605
-
1606
- stripe_size = geo -> raid_disks << geo -> chunk_shift ;
1607
- bio_start = bio -> bi_iter .bi_sector ;
1608
- bio_end = bio_end_sector (bio );
1609
-
1610
- /* Maybe one discard bio is smaller than strip size or across one stripe
1611
- * and discard region is larger than one stripe size. For far offset layout,
1612
- * if the discard region is not aligned with stripe size, there is hole
1613
- * when we submit discard bio to member disk. For simplicity, we only
1614
- * handle discard bio which discard region is bigger than stripe_size*2
1615
- */
1616
- if (bio_sectors (bio ) < stripe_size * 2 )
1617
- goto out ;
1618
-
1619
- /* For far offset layout, if bio is not aligned with stripe size, it splits
1620
- * the part that is not aligned with strip size.
1621
- */
1622
- div_u64_rem (bio_start , stripe_size , & remainder );
1623
- if (geo -> far_offset && remainder ) {
1624
- split_size = stripe_size - remainder ;
1625
- bio = raid10_split_bio (conf , bio , split_size , false);
1626
- }
1627
- div_u64_rem (bio_end , stripe_size , & remainder );
1628
- if (geo -> far_offset && remainder ) {
1629
- split_size = bio_sectors (bio ) - remainder ;
1630
- bio = raid10_split_bio (conf , bio , split_size , true);
1631
- }
1632
-
1633
- r10_bio = mempool_alloc (& conf -> r10bio_pool , GFP_NOIO );
1634
- r10_bio -> mddev = mddev ;
1635
- r10_bio -> state = 0 ;
1636
- r10_bio -> sectors = 0 ;
1637
- memset (r10_bio -> devs , 0 , sizeof (r10_bio -> devs [0 ]) * geo -> raid_disks );
1638
-
1639
- wait_blocked_dev (mddev , r10_bio );
1640
-
1641
- r10_bio -> master_bio = bio ;
1642
-
1643
- bio_start = bio -> bi_iter .bi_sector ;
1644
- bio_end = bio_end_sector (bio );
1645
-
1646
- /* raid10 uses chunk as the unit to store data. It's similar like raid0.
1647
- * One stripe contains the chunks from all member disk (one chunk from
1648
- * one disk at the same HBA address). For layout detail, see 'man md 4'
1649
- */
1650
- chunk = bio_start >> geo -> chunk_shift ;
1651
- chunk *= geo -> near_copies ;
1652
- first_stripe_index = chunk ;
1653
- start_disk_index = sector_div (first_stripe_index , geo -> raid_disks );
1654
- if (geo -> far_offset )
1655
- first_stripe_index *= geo -> far_copies ;
1656
- start_disk_offset = (bio_start & geo -> chunk_mask ) +
1657
- (first_stripe_index << geo -> chunk_shift );
1658
-
1659
- chunk = bio_end >> geo -> chunk_shift ;
1660
- chunk *= geo -> near_copies ;
1661
- last_stripe_index = chunk ;
1662
- end_disk_index = sector_div (last_stripe_index , geo -> raid_disks );
1663
- if (geo -> far_offset )
1664
- last_stripe_index *= geo -> far_copies ;
1665
- end_disk_offset = (bio_end & geo -> chunk_mask ) +
1666
- (last_stripe_index << geo -> chunk_shift );
1667
-
1668
- rcu_read_lock ();
1669
- for (disk = 0 ; disk < geo -> raid_disks ; disk ++ ) {
1670
- struct md_rdev * rdev = rcu_dereference (conf -> mirrors [disk ].rdev );
1671
- struct md_rdev * rrdev = rcu_dereference (
1672
- conf -> mirrors [disk ].replacement );
1673
-
1674
- r10_bio -> devs [disk ].bio = NULL ;
1675
- r10_bio -> devs [disk ].repl_bio = NULL ;
1676
-
1677
- if (rdev && (test_bit (Faulty , & rdev -> flags )))
1678
- rdev = NULL ;
1679
- if (rrdev && (test_bit (Faulty , & rrdev -> flags )))
1680
- rrdev = NULL ;
1681
- if (!rdev && !rrdev )
1682
- continue ;
1683
-
1684
- if (rdev ) {
1685
- r10_bio -> devs [disk ].bio = bio ;
1686
- atomic_inc (& rdev -> nr_pending );
1687
- }
1688
- if (rrdev ) {
1689
- r10_bio -> devs [disk ].repl_bio = bio ;
1690
- atomic_inc (& rrdev -> nr_pending );
1691
- }
1692
- }
1693
- rcu_read_unlock ();
1694
-
1695
- atomic_set (& r10_bio -> remaining , 1 );
1696
- for (disk = 0 ; disk < geo -> raid_disks ; disk ++ ) {
1697
- sector_t dev_start , dev_end ;
1698
- struct bio * mbio , * rbio = NULL ;
1699
- struct md_rdev * rdev = rcu_dereference (conf -> mirrors [disk ].rdev );
1700
- struct md_rdev * rrdev = rcu_dereference (
1701
- conf -> mirrors [disk ].replacement );
1702
-
1703
- /*
1704
- * Now start to calculate the start and end address for each disk.
1705
- * The space between dev_start and dev_end is the discard region.
1706
- *
1707
- * For dev_start, it needs to consider three conditions:
1708
- * 1st, the disk is before start_disk, you can imagine the disk in
1709
- * the next stripe. So the dev_start is the start address of next
1710
- * stripe.
1711
- * 2st, the disk is after start_disk, it means the disk is at the
1712
- * same stripe of first disk
1713
- * 3st, the first disk itself, we can use start_disk_offset directly
1714
- */
1715
- if (disk < start_disk_index )
1716
- dev_start = (first_stripe_index + 1 ) * mddev -> chunk_sectors ;
1717
- else if (disk > start_disk_index )
1718
- dev_start = first_stripe_index * mddev -> chunk_sectors ;
1719
- else
1720
- dev_start = start_disk_offset ;
1721
-
1722
- if (disk < end_disk_index )
1723
- dev_end = (last_stripe_index + 1 ) * mddev -> chunk_sectors ;
1724
- else if (disk > end_disk_index )
1725
- dev_end = last_stripe_index * mddev -> chunk_sectors ;
1726
- else
1727
- dev_end = end_disk_offset ;
1728
-
1729
- /* It only handles discard bio which size is >= stripe size, so
1730
- * dev_end > dev_start all the time
1731
- */
1732
- if (r10_bio -> devs [disk ].bio ) {
1733
- mbio = bio_clone_fast (bio , GFP_NOIO , & mddev -> bio_set );
1734
- mbio -> bi_end_io = raid10_end_discard_request ;
1735
- mbio -> bi_private = r10_bio ;
1736
- r10_bio -> devs [disk ].bio = mbio ;
1737
- r10_bio -> devs [disk ].devnum = disk ;
1738
- atomic_inc (& r10_bio -> remaining );
1739
- md_submit_discard_bio (mddev , rdev , mbio ,
1740
- dev_start + choose_data_offset (r10_bio , rdev ),
1741
- dev_end - dev_start );
1742
- bio_endio (mbio );
1743
- }
1744
- if (r10_bio -> devs [disk ].repl_bio ) {
1745
- rbio = bio_clone_fast (bio , GFP_NOIO , & mddev -> bio_set );
1746
- rbio -> bi_end_io = raid10_end_discard_request ;
1747
- rbio -> bi_private = r10_bio ;
1748
- r10_bio -> devs [disk ].repl_bio = rbio ;
1749
- r10_bio -> devs [disk ].devnum = disk ;
1750
- atomic_inc (& r10_bio -> remaining );
1751
- md_submit_discard_bio (mddev , rrdev , rbio ,
1752
- dev_start + choose_data_offset (r10_bio , rrdev ),
1753
- dev_end - dev_start );
1754
- bio_endio (rbio );
1755
- }
1756
- }
1757
-
1758
- if (atomic_dec_and_test (& r10_bio -> remaining )) {
1759
- md_write_end (r10_bio -> mddev );
1760
- raid_end_bio_io (r10_bio );
1761
- }
1762
-
1763
- return 0 ;
1764
- out :
1765
- allow_barrier (conf );
1766
- return - EAGAIN ;
1767
- }
1768
-
1769
1519
static bool raid10_make_request (struct mddev * mddev , struct bio * bio )
1770
1520
{
1771
1521
struct r10conf * conf = mddev -> private ;
@@ -1780,10 +1530,6 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
1780
1530
if (!md_write_start (mddev , bio ))
1781
1531
return false;
1782
1532
1783
- if (unlikely (bio_op (bio ) == REQ_OP_DISCARD ))
1784
- if (!raid10_handle_discard (mddev , bio ))
1785
- return true;
1786
-
1787
1533
/*
1788
1534
* If this request crosses a chunk boundary, we need to split
1789
1535
* it.
@@ -4023,7 +3769,7 @@ static int raid10_run(struct mddev *mddev)
4023
3769
4024
3770
if (mddev -> queue ) {
4025
3771
blk_queue_max_discard_sectors (mddev -> queue ,
4026
- UINT_MAX );
3772
+ mddev -> chunk_sectors );
4027
3773
blk_queue_max_write_same_sectors (mddev -> queue , 0 );
4028
3774
blk_queue_max_write_zeroes_sectors (mddev -> queue , 0 );
4029
3775
blk_queue_io_min (mddev -> queue , mddev -> chunk_sectors << 9 );
0 commit comments