Skip to content

Commit fb8527e

Browse files
committed
Merge tag 'for-5.4/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper fixes from Mike Snitzer: - Fix DM snapshot deadlock that can occur due to COW throttling preventing locks from being released. - Fix DM cache's GFP_NOWAIT allocation failure error paths by switching to GFP_NOIO. - Make __hash_find() static in the DM clone target. * tag 'for-5.4/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm cache: fix bugs when a GFP_NOWAIT allocation fails dm snapshot: rework COW throttling to fix deadlock dm snapshot: introduce account_start_copy() and account_end_copy() dm clone: Make __hash_find static
2 parents 90105ae + 13bd677 commit fb8527e

File tree

3 files changed

+81
-45
lines changed

3 files changed

+81
-45
lines changed

drivers/md/dm-cache-target.c

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,7 @@ static void wake_migration_worker(struct cache *cache)
542542

543543
static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache)
544544
{
545-
return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOWAIT);
545+
return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOIO);
546546
}
547547

548548
static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell)
@@ -554,9 +554,7 @@ static struct dm_cache_migration *alloc_migration(struct cache *cache)
554554
{
555555
struct dm_cache_migration *mg;
556556

557-
mg = mempool_alloc(&cache->migration_pool, GFP_NOWAIT);
558-
if (!mg)
559-
return NULL;
557+
mg = mempool_alloc(&cache->migration_pool, GFP_NOIO);
560558

561559
memset(mg, 0, sizeof(*mg));
562560

@@ -664,10 +662,6 @@ static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bi
664662
struct dm_bio_prison_cell_v2 *cell_prealloc, *cell;
665663

666664
cell_prealloc = alloc_prison_cell(cache); /* FIXME: allow wait if calling from worker */
667-
if (!cell_prealloc) {
668-
defer_bio(cache, bio);
669-
return false;
670-
}
671665

672666
build_key(oblock, end, &key);
673667
r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell);
@@ -1493,11 +1487,6 @@ static int mg_lock_writes(struct dm_cache_migration *mg)
14931487
struct dm_bio_prison_cell_v2 *prealloc;
14941488

14951489
prealloc = alloc_prison_cell(cache);
1496-
if (!prealloc) {
1497-
DMERR_LIMIT("%s: alloc_prison_cell failed", cache_device_name(cache));
1498-
mg_complete(mg, false);
1499-
return -ENOMEM;
1500-
}
15011490

15021491
/*
15031492
* Prevent writes to the block, but allow reads to continue.
@@ -1535,11 +1524,6 @@ static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio
15351524
}
15361525

15371526
mg = alloc_migration(cache);
1538-
if (!mg) {
1539-
policy_complete_background_work(cache->policy, op, false);
1540-
background_work_end(cache);
1541-
return -ENOMEM;
1542-
}
15431527

15441528
mg->op = op;
15451529
mg->overwrite_bio = bio;
@@ -1628,10 +1612,6 @@ static int invalidate_lock(struct dm_cache_migration *mg)
16281612
struct dm_bio_prison_cell_v2 *prealloc;
16291613

16301614
prealloc = alloc_prison_cell(cache);
1631-
if (!prealloc) {
1632-
invalidate_complete(mg, false);
1633-
return -ENOMEM;
1634-
}
16351615

16361616
build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key);
16371617
r = dm_cell_lock_v2(cache->prison, &key,
@@ -1669,10 +1649,6 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
16691649
return -EPERM;
16701650

16711651
mg = alloc_migration(cache);
1672-
if (!mg) {
1673-
background_work_end(cache);
1674-
return -ENOMEM;
1675-
}
16761652

16771653
mg->overwrite_bio = bio;
16781654
mg->invalidate_cblock = cblock;

drivers/md/dm-clone-target.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -591,8 +591,8 @@ static struct hash_table_bucket *get_hash_table_bucket(struct clone *clone,
591591
*
592592
* NOTE: Must be called with the bucket lock held
593593
*/
594-
struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket,
595-
unsigned long region_nr)
594+
static struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket,
595+
unsigned long region_nr)
596596
{
597597
struct dm_clone_region_hydration *hd;
598598

drivers/md/dm-snap.c

Lines changed: 77 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
#include <linux/vmalloc.h>
1919
#include <linux/log2.h>
2020
#include <linux/dm-kcopyd.h>
21-
#include <linux/semaphore.h>
2221

2322
#include "dm.h"
2423

@@ -107,8 +106,8 @@ struct dm_snapshot {
107106
/* The on disk metadata handler */
108107
struct dm_exception_store *store;
109108

110-
/* Maximum number of in-flight COW jobs. */
111-
struct semaphore cow_count;
109+
unsigned in_progress;
110+
struct wait_queue_head in_progress_wait;
112111

113112
struct dm_kcopyd_client *kcopyd_client;
114113

@@ -162,8 +161,8 @@ struct dm_snapshot {
162161
*/
163162
#define DEFAULT_COW_THRESHOLD 2048
164163

165-
static int cow_threshold = DEFAULT_COW_THRESHOLD;
166-
module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644);
164+
static unsigned cow_threshold = DEFAULT_COW_THRESHOLD;
165+
module_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644);
167166
MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write");
168167

169168
DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
@@ -1327,7 +1326,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
13271326
goto bad_hash_tables;
13281327
}
13291328

1330-
sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX);
1329+
init_waitqueue_head(&s->in_progress_wait);
13311330

13321331
s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
13331332
if (IS_ERR(s->kcopyd_client)) {
@@ -1509,9 +1508,56 @@ static void snapshot_dtr(struct dm_target *ti)
15091508

15101509
dm_put_device(ti, s->origin);
15111510

1511+
WARN_ON(s->in_progress);
1512+
15121513
kfree(s);
15131514
}
15141515

1516+
static void account_start_copy(struct dm_snapshot *s)
1517+
{
1518+
spin_lock(&s->in_progress_wait.lock);
1519+
s->in_progress++;
1520+
spin_unlock(&s->in_progress_wait.lock);
1521+
}
1522+
1523+
static void account_end_copy(struct dm_snapshot *s)
1524+
{
1525+
spin_lock(&s->in_progress_wait.lock);
1526+
BUG_ON(!s->in_progress);
1527+
s->in_progress--;
1528+
if (likely(s->in_progress <= cow_threshold) &&
1529+
unlikely(waitqueue_active(&s->in_progress_wait)))
1530+
wake_up_locked(&s->in_progress_wait);
1531+
spin_unlock(&s->in_progress_wait.lock);
1532+
}
1533+
1534+
static bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins)
1535+
{
1536+
if (unlikely(s->in_progress > cow_threshold)) {
1537+
spin_lock(&s->in_progress_wait.lock);
1538+
if (likely(s->in_progress > cow_threshold)) {
1539+
/*
1540+
* NOTE: this throttle doesn't account for whether
1541+
* the caller is servicing an IO that will trigger a COW
1542+
* so excess throttling may result for chunks not required
1543+
* to be COW'd. But if cow_threshold was reached, extra
1544+
* throttling is unlikely to negatively impact performance.
1545+
*/
1546+
DECLARE_WAITQUEUE(wait, current);
1547+
__add_wait_queue(&s->in_progress_wait, &wait);
1548+
__set_current_state(TASK_UNINTERRUPTIBLE);
1549+
spin_unlock(&s->in_progress_wait.lock);
1550+
if (unlock_origins)
1551+
up_read(&_origins_lock);
1552+
io_schedule();
1553+
remove_wait_queue(&s->in_progress_wait, &wait);
1554+
return false;
1555+
}
1556+
spin_unlock(&s->in_progress_wait.lock);
1557+
}
1558+
return true;
1559+
}
1560+
15151561
/*
15161562
* Flush a list of buffers.
15171563
*/
@@ -1527,7 +1573,7 @@ static void flush_bios(struct bio *bio)
15271573
}
15281574
}
15291575

1530-
static int do_origin(struct dm_dev *origin, struct bio *bio);
1576+
static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit);
15311577

15321578
/*
15331579
* Flush a list of buffers.
@@ -1540,7 +1586,7 @@ static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
15401586
while (bio) {
15411587
n = bio->bi_next;
15421588
bio->bi_next = NULL;
1543-
r = do_origin(s->origin, bio);
1589+
r = do_origin(s->origin, bio, false);
15441590
if (r == DM_MAPIO_REMAPPED)
15451591
generic_make_request(bio);
15461592
bio = n;
@@ -1732,7 +1778,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
17321778
rb_link_node(&pe->out_of_order_node, parent, p);
17331779
rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree);
17341780
}
1735-
up(&s->cow_count);
1781+
account_end_copy(s);
17361782
}
17371783

17381784
/*
@@ -1756,7 +1802,7 @@ static void start_copy(struct dm_snap_pending_exception *pe)
17561802
dest.count = src.count;
17571803

17581804
/* Hand over to kcopyd */
1759-
down(&s->cow_count);
1805+
account_start_copy(s);
17601806
dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
17611807
}
17621808

@@ -1776,7 +1822,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe,
17761822
pe->full_bio = bio;
17771823
pe->full_bio_end_io = bio->bi_end_io;
17781824

1779-
down(&s->cow_count);
1825+
account_start_copy(s);
17801826
callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
17811827
copy_callback, pe);
17821828

@@ -1866,7 +1912,7 @@ static void zero_callback(int read_err, unsigned long write_err, void *context)
18661912
struct bio *bio = context;
18671913
struct dm_snapshot *s = bio->bi_private;
18681914

1869-
up(&s->cow_count);
1915+
account_end_copy(s);
18701916
bio->bi_status = write_err ? BLK_STS_IOERR : 0;
18711917
bio_endio(bio);
18721918
}
@@ -1880,7 +1926,7 @@ static void zero_exception(struct dm_snapshot *s, struct dm_exception *e,
18801926
dest.sector = bio->bi_iter.bi_sector;
18811927
dest.count = s->store->chunk_size;
18821928

1883-
down(&s->cow_count);
1929+
account_start_copy(s);
18841930
WARN_ON_ONCE(bio->bi_private);
18851931
bio->bi_private = s;
18861932
dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio);
@@ -1916,6 +1962,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
19161962
if (!s->valid)
19171963
return DM_MAPIO_KILL;
19181964

1965+
if (bio_data_dir(bio) == WRITE) {
1966+
while (unlikely(!wait_for_in_progress(s, false)))
1967+
; /* wait_for_in_progress() has slept */
1968+
}
1969+
19191970
down_read(&s->lock);
19201971
dm_exception_table_lock(&lock);
19211972

@@ -2112,7 +2163,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
21122163

21132164
if (bio_data_dir(bio) == WRITE) {
21142165
up_write(&s->lock);
2115-
return do_origin(s->origin, bio);
2166+
return do_origin(s->origin, bio, false);
21162167
}
21172168

21182169
out_unlock:
@@ -2487,15 +2538,24 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
24872538
/*
24882539
* Called on a write from the origin driver.
24892540
*/
2490-
static int do_origin(struct dm_dev *origin, struct bio *bio)
2541+
static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit)
24912542
{
24922543
struct origin *o;
24932544
int r = DM_MAPIO_REMAPPED;
24942545

2546+
again:
24952547
down_read(&_origins_lock);
24962548
o = __lookup_origin(origin->bdev);
2497-
if (o)
2549+
if (o) {
2550+
if (limit) {
2551+
struct dm_snapshot *s;
2552+
list_for_each_entry(s, &o->snapshots, list)
2553+
if (unlikely(!wait_for_in_progress(s, true)))
2554+
goto again;
2555+
}
2556+
24982557
r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio);
2558+
}
24992559
up_read(&_origins_lock);
25002560

25012561
return r;
@@ -2608,7 +2668,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
26082668
dm_accept_partial_bio(bio, available_sectors);
26092669

26102670
/* Only tell snapshots if this is a write */
2611-
return do_origin(o->dev, bio);
2671+
return do_origin(o->dev, bio, true);
26122672
}
26132673

26142674
/*

0 commit comments

Comments
 (0)