Skip to content

Commit dd55396

Browse files
committed
Merge tag 'md/4.10-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD fixes from Shaohua Li: "This fixes several corner cases for raid5 cache, which is merged into this cycle" * tag 'md/4.10-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: md/r5cache: disable write back for degraded array md/r5cache: shift complex rmw from read path to write path md/r5cache: flush data only stripes in r5l_recovery_log() md/raid5: move comment of fetch_block to right location md/r5cache: read data into orig_page for prexor of cached data md/raid5-cache: delete meaningless code
2 parents 64a172d + 2e38a37 commit dd55396

File tree

4 files changed

+194
-45
lines changed

4 files changed

+194
-45
lines changed

drivers/md/md.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5291,6 +5291,11 @@ int md_run(struct mddev *mddev)
52915291
if (start_readonly && mddev->ro == 0)
52925292
mddev->ro = 2; /* read-only, but switch on first write */
52935293

5294+
/*
5295+
* NOTE: some pers->run(), for example r5l_recovery_log(), wakes
5296+
* up mddev->thread. It is important to initialize critical
5297+
* resources for mddev->thread BEFORE calling pers->run().
5298+
*/
52945299
err = pers->run(mddev);
52955300
if (err)
52965301
pr_warn("md: pers->run() failed ...\n");

drivers/md/raid5-cache.c

Lines changed: 88 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@ struct r5l_log {
162162

163163
/* to submit async io_units, to fulfill ordering of flush */
164164
struct work_struct deferred_io_work;
165+
/* to disable write back during in degraded mode */
166+
struct work_struct disable_writeback_work;
165167
};
166168

167169
/*
@@ -611,6 +613,21 @@ static void r5l_submit_io_async(struct work_struct *work)
611613
r5l_do_submit_io(log, io);
612614
}
613615

616+
static void r5c_disable_writeback_async(struct work_struct *work)
617+
{
618+
struct r5l_log *log = container_of(work, struct r5l_log,
619+
disable_writeback_work);
620+
struct mddev *mddev = log->rdev->mddev;
621+
622+
if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
623+
return;
624+
pr_info("md/raid:%s: Disabling writeback cache for degraded array.\n",
625+
mdname(mddev));
626+
mddev_suspend(mddev);
627+
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
628+
mddev_resume(mddev);
629+
}
630+
614631
static void r5l_submit_current_io(struct r5l_log *log)
615632
{
616633
struct r5l_io_unit *io = log->current_io;
@@ -1393,8 +1410,6 @@ static void r5l_do_reclaim(struct r5l_log *log)
13931410
next_checkpoint = r5c_calculate_new_cp(conf);
13941411
spin_unlock_irq(&log->io_list_lock);
13951412

1396-
BUG_ON(reclaimable < 0);
1397-
13981413
if (reclaimable == 0 || !write_super)
13991414
return;
14001415

@@ -2062,7 +2077,7 @@ static int
20622077
r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
20632078
struct r5l_recovery_ctx *ctx)
20642079
{
2065-
struct stripe_head *sh, *next;
2080+
struct stripe_head *sh;
20662081
struct mddev *mddev = log->rdev->mddev;
20672082
struct page *page;
20682083
sector_t next_checkpoint = MaxSector;
@@ -2076,7 +2091,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
20762091

20772092
WARN_ON(list_empty(&ctx->cached_list));
20782093

2079-
list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
2094+
list_for_each_entry(sh, &ctx->cached_list, lru) {
20802095
struct r5l_meta_block *mb;
20812096
int i;
20822097
int offset;
@@ -2126,14 +2141,39 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
21262141
ctx->pos = write_pos;
21272142
ctx->seq += 1;
21282143
next_checkpoint = sh->log_start;
2129-
list_del_init(&sh->lru);
2130-
raid5_release_stripe(sh);
21312144
}
21322145
log->next_checkpoint = next_checkpoint;
21332146
__free_page(page);
21342147
return 0;
21352148
}
21362149

2150+
static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log,
2151+
struct r5l_recovery_ctx *ctx)
2152+
{
2153+
struct mddev *mddev = log->rdev->mddev;
2154+
struct r5conf *conf = mddev->private;
2155+
struct stripe_head *sh, *next;
2156+
2157+
if (ctx->data_only_stripes == 0)
2158+
return;
2159+
2160+
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_BACK;
2161+
2162+
list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
2163+
r5c_make_stripe_write_out(sh);
2164+
set_bit(STRIPE_HANDLE, &sh->state);
2165+
list_del_init(&sh->lru);
2166+
raid5_release_stripe(sh);
2167+
}
2168+
2169+
md_wakeup_thread(conf->mddev->thread);
2170+
/* reuse conf->wait_for_quiescent in recovery */
2171+
wait_event(conf->wait_for_quiescent,
2172+
atomic_read(&conf->active_stripes) == 0);
2173+
2174+
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
2175+
}
2176+
21372177
static int r5l_recovery_log(struct r5l_log *log)
21382178
{
21392179
struct mddev *mddev = log->rdev->mddev;
@@ -2160,32 +2200,31 @@ static int r5l_recovery_log(struct r5l_log *log)
21602200
pos = ctx.pos;
21612201
ctx.seq += 10000;
21622202

2163-
if (ctx.data_only_stripes == 0) {
2164-
log->next_checkpoint = ctx.pos;
2165-
r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
2166-
ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
2167-
}
21682203

21692204
if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0))
21702205
pr_debug("md/raid:%s: starting from clean shutdown\n",
21712206
mdname(mddev));
2172-
else {
2207+
else
21732208
pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n",
21742209
mdname(mddev), ctx.data_only_stripes,
21752210
ctx.data_parity_stripes);
21762211

2177-
if (ctx.data_only_stripes > 0)
2178-
if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) {
2179-
pr_err("md/raid:%s: failed to rewrite stripes to journal\n",
2180-
mdname(mddev));
2181-
return -EIO;
2182-
}
2212+
if (ctx.data_only_stripes == 0) {
2213+
log->next_checkpoint = ctx.pos;
2214+
r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
2215+
ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
2216+
} else if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) {
2217+
pr_err("md/raid:%s: failed to rewrite stripes to journal\n",
2218+
mdname(mddev));
2219+
return -EIO;
21832220
}
21842221

21852222
log->log_start = ctx.pos;
21862223
log->seq = ctx.seq;
21872224
log->last_checkpoint = pos;
21882225
r5l_write_super(log, pos);
2226+
2227+
r5c_recovery_flush_data_only_stripes(log, &ctx);
21892228
return 0;
21902229
}
21912230

@@ -2247,6 +2286,10 @@ static ssize_t r5c_journal_mode_store(struct mddev *mddev,
22472286
val > R5C_JOURNAL_MODE_WRITE_BACK)
22482287
return -EINVAL;
22492288

2289+
if (raid5_calc_degraded(conf) > 0 &&
2290+
val == R5C_JOURNAL_MODE_WRITE_BACK)
2291+
return -EINVAL;
2292+
22502293
mddev_suspend(mddev);
22512294
conf->log->r5c_journal_mode = val;
22522295
mddev_resume(mddev);
@@ -2301,6 +2344,16 @@ int r5c_try_caching_write(struct r5conf *conf,
23012344
set_bit(STRIPE_R5C_CACHING, &sh->state);
23022345
}
23032346

2347+
/*
2348+
* When run in degraded mode, array is set to write-through mode.
2349+
* This check helps drain pending write safely in the transition to
2350+
* write-through mode.
2351+
*/
2352+
if (s->failed) {
2353+
r5c_make_stripe_write_out(sh);
2354+
return -EAGAIN;
2355+
}
2356+
23042357
for (i = disks; i--; ) {
23052358
dev = &sh->dev[i];
23062359
/* if non-overwrite, use writing-out phase */
@@ -2351,6 +2404,8 @@ void r5c_release_extra_page(struct stripe_head *sh)
23512404
struct page *p = sh->dev[i].orig_page;
23522405

23532406
sh->dev[i].orig_page = sh->dev[i].page;
2407+
clear_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags);
2408+
23542409
if (!using_disk_info_extra_page)
23552410
put_page(p);
23562411
}
@@ -2555,6 +2610,19 @@ static int r5l_load_log(struct r5l_log *log)
25552610
return ret;
25562611
}
25572612

2613+
void r5c_update_on_rdev_error(struct mddev *mddev)
2614+
{
2615+
struct r5conf *conf = mddev->private;
2616+
struct r5l_log *log = conf->log;
2617+
2618+
if (!log)
2619+
return;
2620+
2621+
if (raid5_calc_degraded(conf) > 0 &&
2622+
conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK)
2623+
schedule_work(&log->disable_writeback_work);
2624+
}
2625+
25582626
int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
25592627
{
25602628
struct request_queue *q = bdev_get_queue(rdev->bdev);
@@ -2627,6 +2695,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
26272695
spin_lock_init(&log->no_space_stripes_lock);
26282696

26292697
INIT_WORK(&log->deferred_io_work, r5l_submit_io_async);
2698+
INIT_WORK(&log->disable_writeback_work, r5c_disable_writeback_async);
26302699

26312700
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
26322701
INIT_LIST_HEAD(&log->stripe_in_journal_list);
@@ -2659,6 +2728,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
26592728

26602729
void r5l_exit_log(struct r5l_log *log)
26612730
{
2731+
flush_work(&log->disable_writeback_work);
26622732
md_unregister_thread(&log->reclaim_thread);
26632733
mempool_destroy(log->meta_pool);
26642734
bioset_free(log->bs);

0 commit comments

Comments
 (0)