18
18
#include <linux/vmalloc.h>
19
19
#include <linux/log2.h>
20
20
#include <linux/dm-kcopyd.h>
21
- #include <linux/semaphore.h>
22
21
23
22
#include "dm.h"
24
23
@@ -107,8 +106,8 @@ struct dm_snapshot {
107
106
/* The on disk metadata handler */
108
107
struct dm_exception_store * store ;
109
108
110
- /* Maximum number of in-flight COW jobs. */
111
- struct semaphore cow_count ;
109
+ unsigned in_progress ;
110
+ struct wait_queue_head in_progress_wait ;
112
111
113
112
struct dm_kcopyd_client * kcopyd_client ;
114
113
@@ -162,8 +161,8 @@ struct dm_snapshot {
162
161
*/
163
162
#define DEFAULT_COW_THRESHOLD 2048
164
163
165
- static int cow_threshold = DEFAULT_COW_THRESHOLD ;
166
- module_param_named (snapshot_cow_threshold , cow_threshold , int , 0644 );
164
+ static unsigned cow_threshold = DEFAULT_COW_THRESHOLD ;
165
+ module_param_named (snapshot_cow_threshold , cow_threshold , uint , 0644 );
167
166
MODULE_PARM_DESC (snapshot_cow_threshold , "Maximum number of chunks being copied on write" );
168
167
169
168
DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM (snapshot_copy_throttle ,
@@ -1327,7 +1326,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1327
1326
goto bad_hash_tables ;
1328
1327
}
1329
1328
1330
- sema_init (& s -> cow_count , ( cow_threshold > 0 ) ? cow_threshold : INT_MAX );
1329
+ init_waitqueue_head (& s -> in_progress_wait );
1331
1330
1332
1331
s -> kcopyd_client = dm_kcopyd_client_create (& dm_kcopyd_throttle );
1333
1332
if (IS_ERR (s -> kcopyd_client )) {
@@ -1509,9 +1508,56 @@ static void snapshot_dtr(struct dm_target *ti)
1509
1508
1510
1509
dm_put_device (ti , s -> origin );
1511
1510
1511
+ WARN_ON (s -> in_progress );
1512
+
1512
1513
kfree (s );
1513
1514
}
1514
1515
1516
+ static void account_start_copy (struct dm_snapshot * s )
1517
+ {
1518
+ spin_lock (& s -> in_progress_wait .lock );
1519
+ s -> in_progress ++ ;
1520
+ spin_unlock (& s -> in_progress_wait .lock );
1521
+ }
1522
+
1523
+ static void account_end_copy (struct dm_snapshot * s )
1524
+ {
1525
+ spin_lock (& s -> in_progress_wait .lock );
1526
+ BUG_ON (!s -> in_progress );
1527
+ s -> in_progress -- ;
1528
+ if (likely (s -> in_progress <= cow_threshold ) &&
1529
+ unlikely (waitqueue_active (& s -> in_progress_wait )))
1530
+ wake_up_locked (& s -> in_progress_wait );
1531
+ spin_unlock (& s -> in_progress_wait .lock );
1532
+ }
1533
+
1534
+ static bool wait_for_in_progress (struct dm_snapshot * s , bool unlock_origins )
1535
+ {
1536
+ if (unlikely (s -> in_progress > cow_threshold )) {
1537
+ spin_lock (& s -> in_progress_wait .lock );
1538
+ if (likely (s -> in_progress > cow_threshold )) {
1539
+ /*
1540
+ * NOTE: this throttle doesn't account for whether
1541
+ * the caller is servicing an IO that will trigger a COW
1542
+ * so excess throttling may result for chunks not required
1543
+ * to be COW'd. But if cow_threshold was reached, extra
1544
+ * throttling is unlikely to negatively impact performance.
1545
+ */
1546
+ DECLARE_WAITQUEUE (wait , current );
1547
+ __add_wait_queue (& s -> in_progress_wait , & wait );
1548
+ __set_current_state (TASK_UNINTERRUPTIBLE );
1549
+ spin_unlock (& s -> in_progress_wait .lock );
1550
+ if (unlock_origins )
1551
+ up_read (& _origins_lock );
1552
+ io_schedule ();
1553
+ remove_wait_queue (& s -> in_progress_wait , & wait );
1554
+ return false;
1555
+ }
1556
+ spin_unlock (& s -> in_progress_wait .lock );
1557
+ }
1558
+ return true;
1559
+ }
1560
+
1515
1561
/*
1516
1562
* Flush a list of buffers.
1517
1563
*/
@@ -1527,7 +1573,7 @@ static void flush_bios(struct bio *bio)
1527
1573
}
1528
1574
}
1529
1575
1530
- static int do_origin (struct dm_dev * origin , struct bio * bio );
1576
+ static int do_origin (struct dm_dev * origin , struct bio * bio , bool limit );
1531
1577
1532
1578
/*
1533
1579
* Flush a list of buffers.
@@ -1540,7 +1586,7 @@ static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
1540
1586
while (bio ) {
1541
1587
n = bio -> bi_next ;
1542
1588
bio -> bi_next = NULL ;
1543
- r = do_origin (s -> origin , bio );
1589
+ r = do_origin (s -> origin , bio , false );
1544
1590
if (r == DM_MAPIO_REMAPPED )
1545
1591
generic_make_request (bio );
1546
1592
bio = n ;
@@ -1732,7 +1778,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
1732
1778
rb_link_node (& pe -> out_of_order_node , parent , p );
1733
1779
rb_insert_color (& pe -> out_of_order_node , & s -> out_of_order_tree );
1734
1780
}
1735
- up ( & s -> cow_count );
1781
+ account_end_copy ( s );
1736
1782
}
1737
1783
1738
1784
/*
@@ -1756,7 +1802,7 @@ static void start_copy(struct dm_snap_pending_exception *pe)
1756
1802
dest .count = src .count ;
1757
1803
1758
1804
/* Hand over to kcopyd */
1759
- down ( & s -> cow_count );
1805
+ account_start_copy ( s );
1760
1806
dm_kcopyd_copy (s -> kcopyd_client , & src , 1 , & dest , 0 , copy_callback , pe );
1761
1807
}
1762
1808
@@ -1776,7 +1822,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe,
1776
1822
pe -> full_bio = bio ;
1777
1823
pe -> full_bio_end_io = bio -> bi_end_io ;
1778
1824
1779
- down ( & s -> cow_count );
1825
+ account_start_copy ( s );
1780
1826
callback_data = dm_kcopyd_prepare_callback (s -> kcopyd_client ,
1781
1827
copy_callback , pe );
1782
1828
@@ -1866,7 +1912,7 @@ static void zero_callback(int read_err, unsigned long write_err, void *context)
1866
1912
struct bio * bio = context ;
1867
1913
struct dm_snapshot * s = bio -> bi_private ;
1868
1914
1869
- up ( & s -> cow_count );
1915
+ account_end_copy ( s );
1870
1916
bio -> bi_status = write_err ? BLK_STS_IOERR : 0 ;
1871
1917
bio_endio (bio );
1872
1918
}
@@ -1880,7 +1926,7 @@ static void zero_exception(struct dm_snapshot *s, struct dm_exception *e,
1880
1926
dest .sector = bio -> bi_iter .bi_sector ;
1881
1927
dest .count = s -> store -> chunk_size ;
1882
1928
1883
- down ( & s -> cow_count );
1929
+ account_start_copy ( s );
1884
1930
WARN_ON_ONCE (bio -> bi_private );
1885
1931
bio -> bi_private = s ;
1886
1932
dm_kcopyd_zero (s -> kcopyd_client , 1 , & dest , 0 , zero_callback , bio );
@@ -1916,6 +1962,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
1916
1962
if (!s -> valid )
1917
1963
return DM_MAPIO_KILL ;
1918
1964
1965
+ if (bio_data_dir (bio ) == WRITE ) {
1966
+ while (unlikely (!wait_for_in_progress (s , false)))
1967
+ ; /* wait_for_in_progress() has slept */
1968
+ }
1969
+
1919
1970
down_read (& s -> lock );
1920
1971
dm_exception_table_lock (& lock );
1921
1972
@@ -2112,7 +2163,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
2112
2163
2113
2164
if (bio_data_dir (bio ) == WRITE ) {
2114
2165
up_write (& s -> lock );
2115
- return do_origin (s -> origin , bio );
2166
+ return do_origin (s -> origin , bio , false );
2116
2167
}
2117
2168
2118
2169
out_unlock :
@@ -2487,15 +2538,24 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
2487
2538
/*
2488
2539
* Called on a write from the origin driver.
2489
2540
*/
2490
- static int do_origin (struct dm_dev * origin , struct bio * bio )
2541
+ static int do_origin (struct dm_dev * origin , struct bio * bio , bool limit )
2491
2542
{
2492
2543
struct origin * o ;
2493
2544
int r = DM_MAPIO_REMAPPED ;
2494
2545
2546
+ again :
2495
2547
down_read (& _origins_lock );
2496
2548
o = __lookup_origin (origin -> bdev );
2497
- if (o )
2549
+ if (o ) {
2550
+ if (limit ) {
2551
+ struct dm_snapshot * s ;
2552
+ list_for_each_entry (s , & o -> snapshots , list )
2553
+ if (unlikely (!wait_for_in_progress (s , true)))
2554
+ goto again ;
2555
+ }
2556
+
2498
2557
r = __origin_write (& o -> snapshots , bio -> bi_iter .bi_sector , bio );
2558
+ }
2499
2559
up_read (& _origins_lock );
2500
2560
2501
2561
return r ;
@@ -2608,7 +2668,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
2608
2668
dm_accept_partial_bio (bio , available_sectors );
2609
2669
2610
2670
/* Only tell snapshots if this is a write */
2611
- return do_origin (o -> dev , bio );
2671
+ return do_origin (o -> dev , bio , true );
2612
2672
}
2613
2673
2614
2674
/*
0 commit comments