@@ -162,6 +162,8 @@ struct r5l_log {
162
162
163
163
/* to submit async io_units, to fulfill ordering of flush */
164
164
struct work_struct deferred_io_work ;
165
+ /* to disable write back during in degraded mode */
166
+ struct work_struct disable_writeback_work ;
165
167
};
166
168
167
169
/*
@@ -611,6 +613,21 @@ static void r5l_submit_io_async(struct work_struct *work)
611
613
r5l_do_submit_io (log , io );
612
614
}
613
615
616
+ static void r5c_disable_writeback_async (struct work_struct * work )
617
+ {
618
+ struct r5l_log * log = container_of (work , struct r5l_log ,
619
+ disable_writeback_work );
620
+ struct mddev * mddev = log -> rdev -> mddev ;
621
+
622
+ if (log -> r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH )
623
+ return ;
624
+ pr_info ("md/raid:%s: Disabling writeback cache for degraded array.\n" ,
625
+ mdname (mddev ));
626
+ mddev_suspend (mddev );
627
+ log -> r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH ;
628
+ mddev_resume (mddev );
629
+ }
630
+
614
631
static void r5l_submit_current_io (struct r5l_log * log )
615
632
{
616
633
struct r5l_io_unit * io = log -> current_io ;
@@ -1393,8 +1410,6 @@ static void r5l_do_reclaim(struct r5l_log *log)
1393
1410
next_checkpoint = r5c_calculate_new_cp (conf );
1394
1411
spin_unlock_irq (& log -> io_list_lock );
1395
1412
1396
- BUG_ON (reclaimable < 0 );
1397
-
1398
1413
if (reclaimable == 0 || !write_super )
1399
1414
return ;
1400
1415
@@ -2062,7 +2077,7 @@ static int
2062
2077
r5c_recovery_rewrite_data_only_stripes (struct r5l_log * log ,
2063
2078
struct r5l_recovery_ctx * ctx )
2064
2079
{
2065
- struct stripe_head * sh , * next ;
2080
+ struct stripe_head * sh ;
2066
2081
struct mddev * mddev = log -> rdev -> mddev ;
2067
2082
struct page * page ;
2068
2083
sector_t next_checkpoint = MaxSector ;
@@ -2076,7 +2091,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
2076
2091
2077
2092
WARN_ON (list_empty (& ctx -> cached_list ));
2078
2093
2079
- list_for_each_entry_safe (sh , next , & ctx -> cached_list , lru ) {
2094
+ list_for_each_entry (sh , & ctx -> cached_list , lru ) {
2080
2095
struct r5l_meta_block * mb ;
2081
2096
int i ;
2082
2097
int offset ;
@@ -2126,14 +2141,39 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
2126
2141
ctx -> pos = write_pos ;
2127
2142
ctx -> seq += 1 ;
2128
2143
next_checkpoint = sh -> log_start ;
2129
- list_del_init (& sh -> lru );
2130
- raid5_release_stripe (sh );
2131
2144
}
2132
2145
log -> next_checkpoint = next_checkpoint ;
2133
2146
__free_page (page );
2134
2147
return 0 ;
2135
2148
}
2136
2149
2150
+ static void r5c_recovery_flush_data_only_stripes (struct r5l_log * log ,
2151
+ struct r5l_recovery_ctx * ctx )
2152
+ {
2153
+ struct mddev * mddev = log -> rdev -> mddev ;
2154
+ struct r5conf * conf = mddev -> private ;
2155
+ struct stripe_head * sh , * next ;
2156
+
2157
+ if (ctx -> data_only_stripes == 0 )
2158
+ return ;
2159
+
2160
+ log -> r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_BACK ;
2161
+
2162
+ list_for_each_entry_safe (sh , next , & ctx -> cached_list , lru ) {
2163
+ r5c_make_stripe_write_out (sh );
2164
+ set_bit (STRIPE_HANDLE , & sh -> state );
2165
+ list_del_init (& sh -> lru );
2166
+ raid5_release_stripe (sh );
2167
+ }
2168
+
2169
+ md_wakeup_thread (conf -> mddev -> thread );
2170
+ /* reuse conf->wait_for_quiescent in recovery */
2171
+ wait_event (conf -> wait_for_quiescent ,
2172
+ atomic_read (& conf -> active_stripes ) == 0 );
2173
+
2174
+ log -> r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH ;
2175
+ }
2176
+
2137
2177
static int r5l_recovery_log (struct r5l_log * log )
2138
2178
{
2139
2179
struct mddev * mddev = log -> rdev -> mddev ;
@@ -2160,32 +2200,31 @@ static int r5l_recovery_log(struct r5l_log *log)
2160
2200
pos = ctx .pos ;
2161
2201
ctx .seq += 10000 ;
2162
2202
2163
- if (ctx .data_only_stripes == 0 ) {
2164
- log -> next_checkpoint = ctx .pos ;
2165
- r5l_log_write_empty_meta_block (log , ctx .pos , ctx .seq ++ );
2166
- ctx .pos = r5l_ring_add (log , ctx .pos , BLOCK_SECTORS );
2167
- }
2168
2203
2169
2204
if ((ctx .data_only_stripes == 0 ) && (ctx .data_parity_stripes == 0 ))
2170
2205
pr_debug ("md/raid:%s: starting from clean shutdown\n" ,
2171
2206
mdname (mddev ));
2172
- else {
2207
+ else
2173
2208
pr_debug ("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n" ,
2174
2209
mdname (mddev ), ctx .data_only_stripes ,
2175
2210
ctx .data_parity_stripes );
2176
2211
2177
- if (ctx .data_only_stripes > 0 )
2178
- if (r5c_recovery_rewrite_data_only_stripes (log , & ctx )) {
2179
- pr_err ("md/raid:%s: failed to rewrite stripes to journal\n" ,
2180
- mdname (mddev ));
2181
- return - EIO ;
2182
- }
2212
+ if (ctx .data_only_stripes == 0 ) {
2213
+ log -> next_checkpoint = ctx .pos ;
2214
+ r5l_log_write_empty_meta_block (log , ctx .pos , ctx .seq ++ );
2215
+ ctx .pos = r5l_ring_add (log , ctx .pos , BLOCK_SECTORS );
2216
+ } else if (r5c_recovery_rewrite_data_only_stripes (log , & ctx )) {
2217
+ pr_err ("md/raid:%s: failed to rewrite stripes to journal\n" ,
2218
+ mdname (mddev ));
2219
+ return - EIO ;
2183
2220
}
2184
2221
2185
2222
log -> log_start = ctx .pos ;
2186
2223
log -> seq = ctx .seq ;
2187
2224
log -> last_checkpoint = pos ;
2188
2225
r5l_write_super (log , pos );
2226
+
2227
+ r5c_recovery_flush_data_only_stripes (log , & ctx );
2189
2228
return 0 ;
2190
2229
}
2191
2230
@@ -2247,6 +2286,10 @@ static ssize_t r5c_journal_mode_store(struct mddev *mddev,
2247
2286
val > R5C_JOURNAL_MODE_WRITE_BACK )
2248
2287
return - EINVAL ;
2249
2288
2289
+ if (raid5_calc_degraded (conf ) > 0 &&
2290
+ val == R5C_JOURNAL_MODE_WRITE_BACK )
2291
+ return - EINVAL ;
2292
+
2250
2293
mddev_suspend (mddev );
2251
2294
conf -> log -> r5c_journal_mode = val ;
2252
2295
mddev_resume (mddev );
@@ -2301,6 +2344,16 @@ int r5c_try_caching_write(struct r5conf *conf,
2301
2344
set_bit (STRIPE_R5C_CACHING , & sh -> state );
2302
2345
}
2303
2346
2347
+ /*
2348
+ * When run in degraded mode, array is set to write-through mode.
2349
+ * This check helps drain pending write safely in the transition to
2350
+ * write-through mode.
2351
+ */
2352
+ if (s -> failed ) {
2353
+ r5c_make_stripe_write_out (sh );
2354
+ return - EAGAIN ;
2355
+ }
2356
+
2304
2357
for (i = disks ; i -- ; ) {
2305
2358
dev = & sh -> dev [i ];
2306
2359
/* if non-overwrite, use writing-out phase */
@@ -2351,6 +2404,8 @@ void r5c_release_extra_page(struct stripe_head *sh)
2351
2404
struct page * p = sh -> dev [i ].orig_page ;
2352
2405
2353
2406
sh -> dev [i ].orig_page = sh -> dev [i ].page ;
2407
+ clear_bit (R5_OrigPageUPTDODATE , & sh -> dev [i ].flags );
2408
+
2354
2409
if (!using_disk_info_extra_page )
2355
2410
put_page (p );
2356
2411
}
@@ -2555,6 +2610,19 @@ static int r5l_load_log(struct r5l_log *log)
2555
2610
return ret ;
2556
2611
}
2557
2612
2613
+ void r5c_update_on_rdev_error (struct mddev * mddev )
2614
+ {
2615
+ struct r5conf * conf = mddev -> private ;
2616
+ struct r5l_log * log = conf -> log ;
2617
+
2618
+ if (!log )
2619
+ return ;
2620
+
2621
+ if (raid5_calc_degraded (conf ) > 0 &&
2622
+ conf -> log -> r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK )
2623
+ schedule_work (& log -> disable_writeback_work );
2624
+ }
2625
+
2558
2626
int r5l_init_log (struct r5conf * conf , struct md_rdev * rdev )
2559
2627
{
2560
2628
struct request_queue * q = bdev_get_queue (rdev -> bdev );
@@ -2627,6 +2695,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
2627
2695
spin_lock_init (& log -> no_space_stripes_lock );
2628
2696
2629
2697
INIT_WORK (& log -> deferred_io_work , r5l_submit_io_async );
2698
+ INIT_WORK (& log -> disable_writeback_work , r5c_disable_writeback_async );
2630
2699
2631
2700
log -> r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH ;
2632
2701
INIT_LIST_HEAD (& log -> stripe_in_journal_list );
@@ -2659,6 +2728,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
2659
2728
2660
2729
void r5l_exit_log (struct r5l_log * log )
2661
2730
{
2731
+ flush_work (& log -> disable_writeback_work );
2662
2732
md_unregister_thread (& log -> reclaim_thread );
2663
2733
mempool_destroy (log -> meta_pool );
2664
2734
bioset_free (log -> bs );
0 commit comments