Skip to content

Commit 4015974

Browse files
Alexander Aringteigland
authored andcommitted
dlm: cleanup lock handling in dlm_master_lookup
This patch will remove the following warning by sparse: fs/dlm/lock.c:1049:9: warning: context imbalance in 'dlm_master_lookup' - different lock contexts for basic block I tried to find any issues with the current handling and I did not find any. However it is hard to follow the lock handling in this area of dlm_master_lookup() and I suppose that sparse cannot realize that there are no issues. The variable "toss_list" makes it really hard to follow the lock handling because if it's set the rsb lock/refcount isn't held but the ls->ls_rsbtbl[b].lock is held and this is one reason why the rsb lock/refcount does not need to be held. If it's not set the ls->ls_rsbtbl[b].lock is not held but the rsb lock/refcount is held. The indicator of toss_list will be used to store the actual lock state. Another possibility is that a retry can happen and then it's hard to follow the specific code part. I did not find any issues but sparse cannot realize that there are no issues. To make it more easier to understand for developers and sparse as well, we remove the toss_list variable which indicates a specific lock state and move handling in between of this lock state in a separate function. This function can be called now in case when the initial lock states are taken which was previously signalled if toss_list was set or not. The advantage here is that we can release all locks/refcounts in mostly the same code block as it was taken. Afterwards sparse had no issues to figure out that there are no problems with the current lock behaviour. Signed-off-by: Alexander Aring <[email protected]> Signed-off-by: David Teigland <[email protected]>
1 parent e91ce03 commit 4015974

File tree

1 file changed

+102
-87
lines changed

1 file changed

+102
-87
lines changed

fs/dlm/lock.c

Lines changed: 102 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -880,6 +880,88 @@ static int validate_master_nodeid(struct dlm_ls *ls, struct dlm_rsb *r,
880880
}
881881
}
882882

883+
static void __dlm_master_lookup(struct dlm_ls *ls, struct dlm_rsb *r, int our_nodeid,
884+
int from_nodeid, bool toss_list, unsigned int flags,
885+
int *r_nodeid, int *result)
886+
{
887+
int fix_master = (flags & DLM_LU_RECOVER_MASTER);
888+
int from_master = (flags & DLM_LU_RECOVER_DIR);
889+
890+
if (r->res_dir_nodeid != our_nodeid) {
891+
/* should not happen, but may as well fix it and carry on */
892+
log_error(ls, "%s res_dir %d our %d %s", __func__,
893+
r->res_dir_nodeid, our_nodeid, r->res_name);
894+
r->res_dir_nodeid = our_nodeid;
895+
}
896+
897+
if (fix_master && dlm_is_removed(ls, r->res_master_nodeid)) {
898+
/* Recovery uses this function to set a new master when
899+
* the previous master failed. Setting NEW_MASTER will
900+
* force dlm_recover_masters to call recover_master on this
901+
* rsb even though the res_nodeid is no longer removed.
902+
*/
903+
904+
r->res_master_nodeid = from_nodeid;
905+
r->res_nodeid = from_nodeid;
906+
rsb_set_flag(r, RSB_NEW_MASTER);
907+
908+
if (toss_list) {
909+
/* I don't think we should ever find it on toss list. */
910+
log_error(ls, "%s fix_master on toss", __func__);
911+
dlm_dump_rsb(r);
912+
}
913+
}
914+
915+
if (from_master && (r->res_master_nodeid != from_nodeid)) {
916+
/* this will happen if from_nodeid became master during
917+
* a previous recovery cycle, and we aborted the previous
918+
* cycle before recovering this master value
919+
*/
920+
921+
log_limit(ls, "%s from_master %d master_nodeid %d res_nodeid %d first %x %s",
922+
__func__, from_nodeid, r->res_master_nodeid,
923+
r->res_nodeid, r->res_first_lkid, r->res_name);
924+
925+
if (r->res_master_nodeid == our_nodeid) {
926+
log_error(ls, "from_master %d our_master", from_nodeid);
927+
dlm_dump_rsb(r);
928+
goto ret_assign;
929+
}
930+
931+
r->res_master_nodeid = from_nodeid;
932+
r->res_nodeid = from_nodeid;
933+
rsb_set_flag(r, RSB_NEW_MASTER);
934+
}
935+
936+
if (!r->res_master_nodeid) {
937+
/* this will happen if recovery happens while we're looking
938+
* up the master for this rsb
939+
*/
940+
941+
log_debug(ls, "%s master 0 to %d first %x %s", __func__,
942+
from_nodeid, r->res_first_lkid, r->res_name);
943+
r->res_master_nodeid = from_nodeid;
944+
r->res_nodeid = from_nodeid;
945+
}
946+
947+
if (!from_master && !fix_master &&
948+
(r->res_master_nodeid == from_nodeid)) {
949+
/* this can happen when the master sends remove, the dir node
950+
* finds the rsb on the keep list and ignores the remove,
951+
* and the former master sends a lookup
952+
*/
953+
954+
log_limit(ls, "%s from master %d flags %x first %x %s",
955+
__func__, from_nodeid, flags, r->res_first_lkid,
956+
r->res_name);
957+
}
958+
959+
ret_assign:
960+
*r_nodeid = r->res_master_nodeid;
961+
if (result)
962+
*result = DLM_LU_MATCH;
963+
}
964+
883965
/*
884966
* We're the dir node for this res and another node wants to know the
885967
* master nodeid. During normal operation (non recovery) this is only
@@ -914,10 +996,8 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len,
914996
{
915997
struct dlm_rsb *r = NULL;
916998
uint32_t hash, b;
917-
int from_master = (flags & DLM_LU_RECOVER_DIR);
918-
int fix_master = (flags & DLM_LU_RECOVER_MASTER);
919999
int our_nodeid = dlm_our_nodeid();
920-
int dir_nodeid, error, toss_list = 0;
1000+
int dir_nodeid, error;
9211001

9221002
if (len > DLM_RESNAME_MAXLEN)
9231003
return -EINVAL;
@@ -949,103 +1029,38 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len,
9491029
error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r);
9501030
if (!error) {
9511031
/* because the rsb is active, we need to lock_rsb before
952-
checking/changing re_master_nodeid */
1032+
* checking/changing re_master_nodeid
1033+
*/
9531034

9541035
hold_rsb(r);
9551036
spin_unlock(&ls->ls_rsbtbl[b].lock);
9561037
lock_rsb(r);
957-
} else {
958-
error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r);
959-
if (error)
960-
goto not_found;
961-
962-
/* because the rsb is inactive (on toss list), it's not refcounted
963-
* and lock_rsb is not used, but is protected by the rsbtbl lock
964-
*/
965-
966-
toss_list = 1;
967-
}
968-
969-
if (r->res_dir_nodeid != our_nodeid) {
970-
/* should not happen, but may as well fix it and carry on */
971-
log_error(ls, "dlm_master_lookup res_dir %d our %d %s",
972-
r->res_dir_nodeid, our_nodeid, r->res_name);
973-
r->res_dir_nodeid = our_nodeid;
974-
}
975-
976-
if (fix_master && dlm_is_removed(ls, r->res_master_nodeid)) {
977-
/* Recovery uses this function to set a new master when
978-
the previous master failed. Setting NEW_MASTER will
979-
force dlm_recover_masters to call recover_master on this
980-
rsb even though the res_nodeid is no longer removed. */
981-
982-
r->res_master_nodeid = from_nodeid;
983-
r->res_nodeid = from_nodeid;
984-
rsb_set_flag(r, RSB_NEW_MASTER);
985-
986-
if (toss_list) {
987-
/* I don't think we should ever find it on toss list. */
988-
log_error(ls, "dlm_master_lookup fix_master on toss");
989-
dlm_dump_rsb(r);
990-
}
991-
}
9921038

993-
if (from_master && (r->res_master_nodeid != from_nodeid)) {
994-
/* this will happen if from_nodeid became master during
995-
a previous recovery cycle, and we aborted the previous
996-
cycle before recovering this master value */
1039+
__dlm_master_lookup(ls, r, our_nodeid, from_nodeid, false,
1040+
flags, r_nodeid, result);
9971041

998-
log_limit(ls, "dlm_master_lookup from_master %d "
999-
"master_nodeid %d res_nodeid %d first %x %s",
1000-
from_nodeid, r->res_master_nodeid, r->res_nodeid,
1001-
r->res_first_lkid, r->res_name);
1002-
1003-
if (r->res_master_nodeid == our_nodeid) {
1004-
log_error(ls, "from_master %d our_master", from_nodeid);
1005-
dlm_dump_rsb(r);
1006-
goto out_found;
1007-
}
1042+
/* the rsb was active */
1043+
unlock_rsb(r);
1044+
put_rsb(r);
10081045

1009-
r->res_master_nodeid = from_nodeid;
1010-
r->res_nodeid = from_nodeid;
1011-
rsb_set_flag(r, RSB_NEW_MASTER);
1046+
return 0;
10121047
}
10131048

1014-
if (!r->res_master_nodeid) {
1015-
/* this will happen if recovery happens while we're looking
1016-
up the master for this rsb */
1017-
1018-
log_debug(ls, "dlm_master_lookup master 0 to %d first %x %s",
1019-
from_nodeid, r->res_first_lkid, r->res_name);
1020-
r->res_master_nodeid = from_nodeid;
1021-
r->res_nodeid = from_nodeid;
1022-
}
1049+
error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r);
1050+
if (error)
1051+
goto not_found;
10231052

1024-
if (!from_master && !fix_master &&
1025-
(r->res_master_nodeid == from_nodeid)) {
1026-
/* this can happen when the master sends remove, the dir node
1027-
finds the rsb on the keep list and ignores the remove,
1028-
and the former master sends a lookup */
1053+
/* because the rsb is inactive (on toss list), it's not refcounted
1054+
* and lock_rsb is not used, but is protected by the rsbtbl lock
1055+
*/
10291056

1030-
log_limit(ls, "dlm_master_lookup from master %d flags %x "
1031-
"first %x %s", from_nodeid, flags,
1032-
r->res_first_lkid, r->res_name);
1033-
}
1057+
__dlm_master_lookup(ls, r, our_nodeid, from_nodeid, true, flags,
1058+
r_nodeid, result);
10341059

1035-
out_found:
1036-
*r_nodeid = r->res_master_nodeid;
1037-
if (result)
1038-
*result = DLM_LU_MATCH;
1060+
r->res_toss_time = jiffies;
1061+
/* the rsb was inactive (on toss list) */
1062+
spin_unlock(&ls->ls_rsbtbl[b].lock);
10391063

1040-
if (toss_list) {
1041-
r->res_toss_time = jiffies;
1042-
/* the rsb was inactive (on toss list) */
1043-
spin_unlock(&ls->ls_rsbtbl[b].lock);
1044-
} else {
1045-
/* the rsb was active */
1046-
unlock_rsb(r);
1047-
put_rsb(r);
1048-
}
10491064
return 0;
10501065

10511066
not_found:

0 commit comments

Comments
 (0)