@@ -2337,7 +2337,7 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
2337
2337
for (i = disks ; i -- ; ) {
2338
2338
struct r5dev * dev = & sh -> dev [i ];
2339
2339
if (test_and_clear_bit (R5_Overlap , & dev -> flags ))
2340
- wake_up ( & sh -> raid_conf -> wait_for_overlap );
2340
+ wake_up_bit ( & dev -> flags , R5_Overlap );
2341
2341
}
2342
2342
}
2343
2343
local_unlock (& conf -> percpu -> lock );
@@ -3473,7 +3473,7 @@ static bool stripe_bio_overlaps(struct stripe_head *sh, struct bio *bi,
3473
3473
* With PPL only writes to consecutive data chunks within a
3474
3474
* stripe are allowed because for a single stripe_head we can
3475
3475
* only have one PPL entry at a time, which describes one data
3476
- * range. Not really an overlap, but wait_for_overlap can be
3476
+ * range. Not really an overlap, but R5_Overlap can be
3477
3477
* used to handle this.
3478
3478
*/
3479
3479
sector_t sector ;
@@ -3652,7 +3652,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
3652
3652
log_stripe_write_finished (sh );
3653
3653
3654
3654
if (test_and_clear_bit (R5_Overlap , & sh -> dev [i ].flags ))
3655
- wake_up ( & conf -> wait_for_overlap );
3655
+ wake_up_bit ( & sh -> dev [ i ]. flags , R5_Overlap );
3656
3656
3657
3657
while (bi && bi -> bi_iter .bi_sector <
3658
3658
sh -> dev [i ].sector + RAID5_STRIPE_SECTORS (conf )) {
@@ -3697,7 +3697,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
3697
3697
sh -> dev [i ].toread = NULL ;
3698
3698
spin_unlock_irq (& sh -> stripe_lock );
3699
3699
if (test_and_clear_bit (R5_Overlap , & sh -> dev [i ].flags ))
3700
- wake_up ( & conf -> wait_for_overlap );
3700
+ wake_up_bit ( & sh -> dev [ i ]. flags , R5_Overlap );
3701
3701
if (bi )
3702
3702
s -> to_read -- ;
3703
3703
while (bi && bi -> bi_iter .bi_sector <
@@ -3736,7 +3736,7 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
3736
3736
BUG_ON (sh -> batch_head );
3737
3737
clear_bit (STRIPE_SYNCING , & sh -> state );
3738
3738
if (test_and_clear_bit (R5_Overlap , & sh -> dev [sh -> pd_idx ].flags ))
3739
- wake_up ( & conf -> wait_for_overlap );
3739
+ wake_up_bit ( & sh -> dev [ sh -> pd_idx ]. flags , R5_Overlap );
3740
3740
s -> syncing = 0 ;
3741
3741
s -> replacing = 0 ;
3742
3742
/* There is nothing more to do for sync/check/repair.
@@ -4877,7 +4877,6 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
4877
4877
{
4878
4878
struct stripe_head * sh , * next ;
4879
4879
int i ;
4880
- int do_wakeup = 0 ;
4881
4880
4882
4881
list_for_each_entry_safe (sh , next , & head_sh -> batch_list , batch_list ) {
4883
4882
@@ -4913,7 +4912,7 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
4913
4912
spin_unlock_irq (& sh -> stripe_lock );
4914
4913
for (i = 0 ; i < sh -> disks ; i ++ ) {
4915
4914
if (test_and_clear_bit (R5_Overlap , & sh -> dev [i ].flags ))
4916
- do_wakeup = 1 ;
4915
+ wake_up_bit ( & sh -> dev [ i ]. flags , R5_Overlap ) ;
4917
4916
sh -> dev [i ].flags = head_sh -> dev [i ].flags &
4918
4917
(~((1 << R5_WriteError ) | (1 << R5_Overlap )));
4919
4918
}
@@ -4927,12 +4926,9 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
4927
4926
spin_unlock_irq (& head_sh -> stripe_lock );
4928
4927
for (i = 0 ; i < head_sh -> disks ; i ++ )
4929
4928
if (test_and_clear_bit (R5_Overlap , & head_sh -> dev [i ].flags ))
4930
- do_wakeup = 1 ;
4929
+ wake_up_bit ( & head_sh -> dev [ i ]. flags , R5_Overlap ) ;
4931
4930
if (head_sh -> state & handle_flags )
4932
4931
set_bit (STRIPE_HANDLE , & head_sh -> state );
4933
-
4934
- if (do_wakeup )
4935
- wake_up (& head_sh -> raid_conf -> wait_for_overlap );
4936
4932
}
4937
4933
4938
4934
static void handle_stripe (struct stripe_head * sh )
@@ -5198,7 +5194,7 @@ static void handle_stripe(struct stripe_head *sh)
5198
5194
md_done_sync (conf -> mddev , RAID5_STRIPE_SECTORS (conf ), 1 );
5199
5195
clear_bit (STRIPE_SYNCING , & sh -> state );
5200
5196
if (test_and_clear_bit (R5_Overlap , & sh -> dev [sh -> pd_idx ].flags ))
5201
- wake_up ( & conf -> wait_for_overlap );
5197
+ wake_up_bit ( & sh -> dev [ sh -> pd_idx ]. flags , R5_Overlap );
5202
5198
}
5203
5199
5204
5200
/* If the failed drives are just a ReadError, then we might need
@@ -5261,7 +5257,7 @@ static void handle_stripe(struct stripe_head *sh)
5261
5257
} else if (s .expanded && !sh -> reconstruct_state && s .locked == 0 ) {
5262
5258
clear_bit (STRIPE_EXPAND_READY , & sh -> state );
5263
5259
atomic_dec (& conf -> reshape_stripes );
5264
- wake_up (& conf -> wait_for_overlap );
5260
+ wake_up (& conf -> wait_for_reshape );
5265
5261
md_done_sync (conf -> mddev , RAID5_STRIPE_SECTORS (conf ), 1 );
5266
5262
}
5267
5263
@@ -5755,12 +5751,11 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
5755
5751
int d ;
5756
5752
again :
5757
5753
sh = raid5_get_active_stripe (conf , NULL , logical_sector , 0 );
5758
- prepare_to_wait (& conf -> wait_for_overlap , & w ,
5759
- TASK_UNINTERRUPTIBLE );
5760
5754
set_bit (R5_Overlap , & sh -> dev [sh -> pd_idx ].flags );
5761
5755
if (test_bit (STRIPE_SYNCING , & sh -> state )) {
5762
5756
raid5_release_stripe (sh );
5763
- schedule ();
5757
+ wait_on_bit (& sh -> dev [sh -> pd_idx ].flags , R5_Overlap ,
5758
+ TASK_UNINTERRUPTIBLE );
5764
5759
goto again ;
5765
5760
}
5766
5761
clear_bit (R5_Overlap , & sh -> dev [sh -> pd_idx ].flags );
@@ -5772,12 +5767,12 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
5772
5767
set_bit (R5_Overlap , & sh -> dev [d ].flags );
5773
5768
spin_unlock_irq (& sh -> stripe_lock );
5774
5769
raid5_release_stripe (sh );
5775
- schedule ();
5770
+ wait_on_bit (& sh -> dev [d ].flags , R5_Overlap ,
5771
+ TASK_UNINTERRUPTIBLE );
5776
5772
goto again ;
5777
5773
}
5778
5774
}
5779
5775
set_bit (STRIPE_DISCARD , & sh -> state );
5780
- finish_wait (& conf -> wait_for_overlap , & w );
5781
5776
sh -> overwrite_disks = 0 ;
5782
5777
for (d = 0 ; d < conf -> raid_disks ; d ++ ) {
5783
5778
if (d == sh -> pd_idx || d == sh -> qd_idx )
@@ -5854,7 +5849,6 @@ static int add_all_stripe_bios(struct r5conf *conf,
5854
5849
struct bio * bi , int forwrite , int previous )
5855
5850
{
5856
5851
int dd_idx ;
5857
- int ret = 1 ;
5858
5852
5859
5853
spin_lock_irq (& sh -> stripe_lock );
5860
5854
@@ -5870,14 +5864,19 @@ static int add_all_stripe_bios(struct r5conf *conf,
5870
5864
5871
5865
if (stripe_bio_overlaps (sh , bi , dd_idx , forwrite )) {
5872
5866
set_bit (R5_Overlap , & dev -> flags );
5873
- ret = 0 ;
5874
- continue ;
5867
+ spin_unlock_irq (& sh -> stripe_lock );
5868
+ raid5_release_stripe (sh );
5869
+ /* release batch_last before wait to avoid risk of deadlock */
5870
+ if (ctx -> batch_last ) {
5871
+ raid5_release_stripe (ctx -> batch_last );
5872
+ ctx -> batch_last = NULL ;
5873
+ }
5874
+ md_wakeup_thread (conf -> mddev -> thread );
5875
+ wait_on_bit (& dev -> flags , R5_Overlap , TASK_UNINTERRUPTIBLE );
5876
+ return 0 ;
5875
5877
}
5876
5878
}
5877
5879
5878
- if (!ret )
5879
- goto out ;
5880
-
5881
5880
for (dd_idx = 0 ; dd_idx < sh -> disks ; dd_idx ++ ) {
5882
5881
struct r5dev * dev = & sh -> dev [dd_idx ];
5883
5882
@@ -5893,9 +5892,8 @@ static int add_all_stripe_bios(struct r5conf *conf,
5893
5892
RAID5_STRIPE_SHIFT (conf ), ctx -> sectors_to_do );
5894
5893
}
5895
5894
5896
- out :
5897
5895
spin_unlock_irq (& sh -> stripe_lock );
5898
- return ret ;
5896
+ return 1 ;
5899
5897
}
5900
5898
5901
5899
enum reshape_loc {
@@ -5991,17 +5989,17 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
5991
5989
goto out_release ;
5992
5990
}
5993
5991
5994
- if (test_bit (STRIPE_EXPANDING , & sh -> state ) ||
5995
- !add_all_stripe_bios (conf , ctx , sh , bi , rw , previous )) {
5996
- /*
5997
- * Stripe is busy expanding or add failed due to
5998
- * overlap. Flush everything and wait a while.
5999
- */
5992
+ if (test_bit (STRIPE_EXPANDING , & sh -> state )) {
6000
5993
md_wakeup_thread (mddev -> thread );
6001
5994
ret = STRIPE_SCHEDULE_AND_RETRY ;
6002
5995
goto out_release ;
6003
5996
}
6004
5997
5998
+ if (!add_all_stripe_bios (conf , ctx , sh , bi , rw , previous )) {
5999
+ ret = STRIPE_RETRY ;
6000
+ goto out ;
6001
+ }
6002
+
6005
6003
if (stripe_can_batch (sh )) {
6006
6004
stripe_add_to_batch_list (conf , sh , ctx -> batch_last );
6007
6005
if (ctx -> batch_last )
@@ -6072,6 +6070,7 @@ static sector_t raid5_bio_lowest_chunk_sector(struct r5conf *conf,
6072
6070
static bool raid5_make_request (struct mddev * mddev , struct bio * bi )
6073
6071
{
6074
6072
DEFINE_WAIT_FUNC (wait , woken_wake_function );
6073
+ bool on_wq ;
6075
6074
struct r5conf * conf = mddev -> private ;
6076
6075
sector_t logical_sector ;
6077
6076
struct stripe_request_ctx ctx = {};
@@ -6145,11 +6144,15 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
6145
6144
* sequential IO pattern. We don't bother with the optimization when
6146
6145
* reshaping as the performance benefit is not worth the complexity.
6147
6146
*/
6148
- if (likely (conf -> reshape_progress == MaxSector ))
6147
+ if (likely (conf -> reshape_progress == MaxSector )) {
6149
6148
logical_sector = raid5_bio_lowest_chunk_sector (conf , bi );
6149
+ on_wq = false;
6150
+ } else {
6151
+ add_wait_queue (& conf -> wait_for_reshape , & wait );
6152
+ on_wq = true;
6153
+ }
6150
6154
s = (logical_sector - ctx .first_sector ) >> RAID5_STRIPE_SHIFT (conf );
6151
6155
6152
- add_wait_queue (& conf -> wait_for_overlap , & wait );
6153
6156
while (1 ) {
6154
6157
res = make_stripe_request (mddev , conf , & ctx , logical_sector ,
6155
6158
bi );
@@ -6160,6 +6163,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
6160
6163
continue ;
6161
6164
6162
6165
if (res == STRIPE_SCHEDULE_AND_RETRY ) {
6166
+ WARN_ON_ONCE (!on_wq );
6163
6167
/*
6164
6168
* Must release the reference to batch_last before
6165
6169
* scheduling and waiting for work to be done,
@@ -6184,7 +6188,8 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
6184
6188
logical_sector = ctx .first_sector +
6185
6189
(s << RAID5_STRIPE_SHIFT (conf ));
6186
6190
}
6187
- remove_wait_queue (& conf -> wait_for_overlap , & wait );
6191
+ if (unlikely (on_wq ))
6192
+ remove_wait_queue (& conf -> wait_for_reshape , & wait );
6188
6193
6189
6194
if (ctx .batch_last )
6190
6195
raid5_release_stripe (ctx .batch_last );
@@ -6337,7 +6342,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
6337
6342
: (safepos < writepos && readpos > writepos )) ||
6338
6343
time_after (jiffies , conf -> reshape_checkpoint + 10 * HZ )) {
6339
6344
/* Cannot proceed until we've updated the superblock... */
6340
- wait_event (conf -> wait_for_overlap ,
6345
+ wait_event (conf -> wait_for_reshape ,
6341
6346
atomic_read (& conf -> reshape_stripes )== 0
6342
6347
|| test_bit (MD_RECOVERY_INTR , & mddev -> recovery ));
6343
6348
if (atomic_read (& conf -> reshape_stripes ) != 0 )
@@ -6363,7 +6368,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
6363
6368
spin_lock_irq (& conf -> device_lock );
6364
6369
conf -> reshape_safe = mddev -> reshape_position ;
6365
6370
spin_unlock_irq (& conf -> device_lock );
6366
- wake_up (& conf -> wait_for_overlap );
6371
+ wake_up (& conf -> wait_for_reshape );
6367
6372
sysfs_notify_dirent_safe (mddev -> sysfs_completed );
6368
6373
}
6369
6374
@@ -6446,7 +6451,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
6446
6451
(sector_nr - mddev -> curr_resync_completed ) * 2
6447
6452
>= mddev -> resync_max - mddev -> curr_resync_completed ) {
6448
6453
/* Cannot proceed until we've updated the superblock... */
6449
- wait_event (conf -> wait_for_overlap ,
6454
+ wait_event (conf -> wait_for_reshape ,
6450
6455
atomic_read (& conf -> reshape_stripes ) == 0
6451
6456
|| test_bit (MD_RECOVERY_INTR , & mddev -> recovery ));
6452
6457
if (atomic_read (& conf -> reshape_stripes ) != 0 )
@@ -6472,7 +6477,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
6472
6477
spin_lock_irq (& conf -> device_lock );
6473
6478
conf -> reshape_safe = mddev -> reshape_position ;
6474
6479
spin_unlock_irq (& conf -> device_lock );
6475
- wake_up (& conf -> wait_for_overlap );
6480
+ wake_up (& conf -> wait_for_reshape );
6476
6481
sysfs_notify_dirent_safe (mddev -> sysfs_completed );
6477
6482
}
6478
6483
ret :
@@ -6507,7 +6512,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
6507
6512
}
6508
6513
6509
6514
/* Allow raid5_quiesce to complete */
6510
- wait_event (conf -> wait_for_overlap , conf -> quiesce != 2 );
6515
+ wait_event (conf -> wait_for_reshape , conf -> quiesce != 2 );
6511
6516
6512
6517
if (test_bit (MD_RECOVERY_RESHAPE , & mddev -> recovery ))
6513
6518
return reshape_request (mddev , sector_nr , skipped );
@@ -7493,7 +7498,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
7493
7498
7494
7499
init_waitqueue_head (& conf -> wait_for_quiescent );
7495
7500
init_waitqueue_head (& conf -> wait_for_stripe );
7496
- init_waitqueue_head (& conf -> wait_for_overlap );
7501
+ init_waitqueue_head (& conf -> wait_for_reshape );
7497
7502
INIT_LIST_HEAD (& conf -> handle_list );
7498
7503
INIT_LIST_HEAD (& conf -> loprio_list );
7499
7504
INIT_LIST_HEAD (& conf -> hold_list );
@@ -8552,7 +8557,7 @@ static void end_reshape(struct r5conf *conf)
8552
8557
!test_bit (In_sync , & rdev -> flags ))
8553
8558
rdev -> recovery_offset = MaxSector ;
8554
8559
spin_unlock_irq (& conf -> device_lock );
8555
- wake_up (& conf -> wait_for_overlap );
8560
+ wake_up (& conf -> wait_for_reshape );
8556
8561
8557
8562
mddev_update_io_opt (conf -> mddev ,
8558
8563
conf -> raid_disks - conf -> max_degraded );
@@ -8616,13 +8621,13 @@ static void raid5_quiesce(struct mddev *mddev, int quiesce)
8616
8621
conf -> quiesce = 1 ;
8617
8622
unlock_all_device_hash_locks_irq (conf );
8618
8623
/* allow reshape to continue */
8619
- wake_up (& conf -> wait_for_overlap );
8624
+ wake_up (& conf -> wait_for_reshape );
8620
8625
} else {
8621
8626
/* re-enable writes */
8622
8627
lock_all_device_hash_locks_irq (conf );
8623
8628
conf -> quiesce = 0 ;
8624
8629
wake_up (& conf -> wait_for_quiescent );
8625
- wake_up (& conf -> wait_for_overlap );
8630
+ wake_up (& conf -> wait_for_reshape );
8626
8631
unlock_all_device_hash_locks_irq (conf );
8627
8632
}
8628
8633
log_quiesce (conf , quiesce );
@@ -8941,7 +8946,7 @@ static void raid5_prepare_suspend(struct mddev *mddev)
8941
8946
{
8942
8947
struct r5conf * conf = mddev -> private ;
8943
8948
8944
- wake_up (& conf -> wait_for_overlap );
8949
+ wake_up (& conf -> wait_for_reshape );
8945
8950
}
8946
8951
8947
8952
static struct md_personality raid6_personality =
0 commit comments