@@ -5014,21 +5014,32 @@ static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
5014
5014
return lkb ;
5015
5015
}
5016
5016
5017
- /* Deal with lookups and lkb's marked RESEND from _pre. We may now be the
5018
- master or dir-node for r. Processing the lkb may result in it being placed
5019
- back on waiters. */
5020
-
5021
- /* We do this after normal locking has been enabled and any saved messages
5022
- (in requestqueue) have been processed. We should be confident that at
5023
- this point we won't get or process a reply to any of these waiting
5024
- operations. But, new ops may be coming in on the rsbs/locks here from
5025
- userspace or remotely. */
5026
-
5027
- /* there may have been an overlap unlock/cancel prior to recovery or after
5028
- recovery. if before, the lkb may still have a pos wait_count; if after, the
5029
- overlap flag would just have been set and nothing new sent. we can be
5030
- confident here than any replies to either the initial op or overlap ops
5031
- prior to recovery have been received. */
5017
+ /*
5018
+ * Forced state reset for locks that were in the middle of remote operations
5019
+ * when recovery happened (i.e. lkbs that were on the waiters list, waiting
5020
+ * for a reply from a remote operation.) The lkbs remaining on the waiters
5021
+ * list need to be reevaluated; some may need resending to a different node
5022
+ * than previously, and some may now need local handling rather than remote.
5023
+ *
5024
+ * First, the lkb state for the voided remote operation is forcibly reset,
5025
+ * equivalent to what remove_from_waiters() would normally do:
5026
+ * . lkb removed from ls_waiters list
5027
+ * . lkb wait_type cleared
5028
+ * . lkb waiters_count cleared
5029
+ * . lkb ref count decremented for each waiters_count (almost always 1,
5030
+ * but possibly 2 in case of cancel/unlock overlapping, which means
5031
+ * two remote replies were being expected for the lkb.)
5032
+ *
5033
+ * Second, the lkb is reprocessed like an original operation would be,
5034
+ * by passing it to _request_lock or _convert_lock, which will either
5035
+ * process the lkb operation locally, or send it to a remote node again
5036
+ * and put the lkb back onto the waiters list.
5037
+ *
5038
+ * When reprocessing the lkb, we may find that it's flagged for an overlapping
5039
+ * force-unlock or cancel, either from before recovery began, or after recovery
5040
+ * finished. If this is the case, the unlock/cancel is done directly, and the
5041
+ * original operation is not initiated again (no _request_lock/_convert_lock.)
5042
+ */
5032
5043
5033
5044
int dlm_recover_waiters_post (struct dlm_ls * ls )
5034
5045
{
@@ -5043,6 +5054,11 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
5043
5054
break ;
5044
5055
}
5045
5056
5057
+ /*
5058
+ * Find an lkb from the waiters list that's been affected by
5059
+ * recovery node changes, and needs to be reprocessed. Does
5060
+ * hold_lkb(), adding a refcount.
5061
+ */
5046
5062
lkb = find_resend_waiter (ls );
5047
5063
if (!lkb )
5048
5064
break ;
@@ -5051,6 +5067,11 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
5051
5067
hold_rsb (r );
5052
5068
lock_rsb (r );
5053
5069
5070
+ /*
5071
+ * If the lkb has been flagged for a force unlock or cancel,
5072
+ * then the reprocessing below will be replaced by just doing
5073
+ * the unlock/cancel directly.
5074
+ */
5054
5075
mstype = lkb -> lkb_wait_type ;
5055
5076
oc = test_and_clear_bit (DLM_IFL_OVERLAP_CANCEL_BIT ,
5056
5077
& lkb -> lkb_iflags );
@@ -5064,23 +5085,40 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
5064
5085
r -> res_nodeid , lkb -> lkb_nodeid , lkb -> lkb_wait_nodeid ,
5065
5086
dlm_dir_nodeid (r ), oc , ou );
5066
5087
5067
- /* At this point we assume that we won't get a reply to any
5068
- previous op or overlap op on this lock. First, do a big
5069
- remove_from_waiters() for all previous ops. */
5088
+ /*
5089
+ * No reply to the pre-recovery operation will now be received,
5090
+ * so a forced equivalent of remove_from_waiters() is needed to
5091
+ * reset the waiters state that was in place before recovery.
5092
+ */
5070
5093
5071
5094
clear_bit (DLM_IFL_RESEND_BIT , & lkb -> lkb_iflags );
5095
+
5096
+ /* Forcibly clear wait_type */
5072
5097
lkb -> lkb_wait_type = 0 ;
5073
- /* drop all wait_count references we still
5074
- * hold a reference for this iteration.
5098
+
5099
+ /*
5100
+ * Forcibly reset wait_count and associated refcount. The
5101
+ * wait_count will almost always be 1, but in case of an
5102
+ * overlapping unlock/cancel it could be 2: see where
5103
+ * add_to_waiters() finds the lkb is already on the waiters
5104
+ * list and does lkb_wait_count++; hold_lkb().
5075
5105
*/
5076
5106
while (lkb -> lkb_wait_count ) {
5077
5107
lkb -> lkb_wait_count -- ;
5078
5108
unhold_lkb (lkb );
5079
5109
}
5110
+
5111
+ /* Forcibly remove from waiters list */
5080
5112
mutex_lock (& ls -> ls_waiters_mutex );
5081
5113
list_del_init (& lkb -> lkb_wait_reply );
5082
5114
mutex_unlock (& ls -> ls_waiters_mutex );
5083
5115
5116
+ /*
5117
+ * The lkb is now clear of all prior waiters state and can be
5118
+ * processed locally, or sent to remote node again, or directly
5119
+ * cancelled/unlocked.
5120
+ */
5121
+
5084
5122
if (oc || ou ) {
5085
5123
/* do an unlock or cancel instead of resending */
5086
5124
switch (mstype ) {
0 commit comments