@@ -880,6 +880,88 @@ static int validate_master_nodeid(struct dlm_ls *ls, struct dlm_rsb *r,
880
880
}
881
881
}
882
882
883
+ static void __dlm_master_lookup (struct dlm_ls * ls , struct dlm_rsb * r , int our_nodeid ,
884
+ int from_nodeid , bool toss_list , unsigned int flags ,
885
+ int * r_nodeid , int * result )
886
+ {
887
+ int fix_master = (flags & DLM_LU_RECOVER_MASTER );
888
+ int from_master = (flags & DLM_LU_RECOVER_DIR );
889
+
890
+ if (r -> res_dir_nodeid != our_nodeid ) {
891
+ /* should not happen, but may as well fix it and carry on */
892
+ log_error (ls , "%s res_dir %d our %d %s" , __func__ ,
893
+ r -> res_dir_nodeid , our_nodeid , r -> res_name );
894
+ r -> res_dir_nodeid = our_nodeid ;
895
+ }
896
+
897
+ if (fix_master && dlm_is_removed (ls , r -> res_master_nodeid )) {
898
+ /* Recovery uses this function to set a new master when
899
+ * the previous master failed. Setting NEW_MASTER will
900
+ * force dlm_recover_masters to call recover_master on this
901
+ * rsb even though the res_nodeid is no longer removed.
902
+ */
903
+
904
+ r -> res_master_nodeid = from_nodeid ;
905
+ r -> res_nodeid = from_nodeid ;
906
+ rsb_set_flag (r , RSB_NEW_MASTER );
907
+
908
+ if (toss_list ) {
909
+ /* I don't think we should ever find it on toss list. */
910
+ log_error (ls , "%s fix_master on toss" , __func__ );
911
+ dlm_dump_rsb (r );
912
+ }
913
+ }
914
+
915
+ if (from_master && (r -> res_master_nodeid != from_nodeid )) {
916
+ /* this will happen if from_nodeid became master during
917
+ * a previous recovery cycle, and we aborted the previous
918
+ * cycle before recovering this master value
919
+ */
920
+
921
+ log_limit (ls , "%s from_master %d master_nodeid %d res_nodeid %d first %x %s" ,
922
+ __func__ , from_nodeid , r -> res_master_nodeid ,
923
+ r -> res_nodeid , r -> res_first_lkid , r -> res_name );
924
+
925
+ if (r -> res_master_nodeid == our_nodeid ) {
926
+ log_error (ls , "from_master %d our_master" , from_nodeid );
927
+ dlm_dump_rsb (r );
928
+ goto ret_assign ;
929
+ }
930
+
931
+ r -> res_master_nodeid = from_nodeid ;
932
+ r -> res_nodeid = from_nodeid ;
933
+ rsb_set_flag (r , RSB_NEW_MASTER );
934
+ }
935
+
936
+ if (!r -> res_master_nodeid ) {
937
+ /* this will happen if recovery happens while we're looking
938
+ * up the master for this rsb
939
+ */
940
+
941
+ log_debug (ls , "%s master 0 to %d first %x %s" , __func__ ,
942
+ from_nodeid , r -> res_first_lkid , r -> res_name );
943
+ r -> res_master_nodeid = from_nodeid ;
944
+ r -> res_nodeid = from_nodeid ;
945
+ }
946
+
947
+ if (!from_master && !fix_master &&
948
+ (r -> res_master_nodeid == from_nodeid )) {
949
+ /* this can happen when the master sends remove, the dir node
950
+ * finds the rsb on the keep list and ignores the remove,
951
+ * and the former master sends a lookup
952
+ */
953
+
954
+ log_limit (ls , "%s from master %d flags %x first %x %s" ,
955
+ __func__ , from_nodeid , flags , r -> res_first_lkid ,
956
+ r -> res_name );
957
+ }
958
+
959
+ ret_assign :
960
+ * r_nodeid = r -> res_master_nodeid ;
961
+ if (result )
962
+ * result = DLM_LU_MATCH ;
963
+ }
964
+
883
965
/*
884
966
* We're the dir node for this res and another node wants to know the
885
967
* master nodeid. During normal operation (non recovery) this is only
@@ -914,10 +996,8 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len,
914
996
{
915
997
struct dlm_rsb * r = NULL ;
916
998
uint32_t hash , b ;
917
- int from_master = (flags & DLM_LU_RECOVER_DIR );
918
- int fix_master = (flags & DLM_LU_RECOVER_MASTER );
919
999
int our_nodeid = dlm_our_nodeid ();
920
- int dir_nodeid , error , toss_list = 0 ;
1000
+ int dir_nodeid , error ;
921
1001
922
1002
if (len > DLM_RESNAME_MAXLEN )
923
1003
return - EINVAL ;
@@ -949,103 +1029,38 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len,
949
1029
error = dlm_search_rsb_tree (& ls -> ls_rsbtbl [b ].keep , name , len , & r );
950
1030
if (!error ) {
951
1031
/* because the rsb is active, we need to lock_rsb before
952
- checking/changing re_master_nodeid */
1032
+ * checking/changing re_master_nodeid
1033
+ */
953
1034
954
1035
hold_rsb (r );
955
1036
spin_unlock (& ls -> ls_rsbtbl [b ].lock );
956
1037
lock_rsb (r );
957
- } else {
958
- error = dlm_search_rsb_tree (& ls -> ls_rsbtbl [b ].toss , name , len , & r );
959
- if (error )
960
- goto not_found ;
961
-
962
- /* because the rsb is inactive (on toss list), it's not refcounted
963
- * and lock_rsb is not used, but is protected by the rsbtbl lock
964
- */
965
-
966
- toss_list = 1 ;
967
- }
968
-
969
- if (r -> res_dir_nodeid != our_nodeid ) {
970
- /* should not happen, but may as well fix it and carry on */
971
- log_error (ls , "dlm_master_lookup res_dir %d our %d %s" ,
972
- r -> res_dir_nodeid , our_nodeid , r -> res_name );
973
- r -> res_dir_nodeid = our_nodeid ;
974
- }
975
-
976
- if (fix_master && dlm_is_removed (ls , r -> res_master_nodeid )) {
977
- /* Recovery uses this function to set a new master when
978
- the previous master failed. Setting NEW_MASTER will
979
- force dlm_recover_masters to call recover_master on this
980
- rsb even though the res_nodeid is no longer removed. */
981
-
982
- r -> res_master_nodeid = from_nodeid ;
983
- r -> res_nodeid = from_nodeid ;
984
- rsb_set_flag (r , RSB_NEW_MASTER );
985
-
986
- if (toss_list ) {
987
- /* I don't think we should ever find it on toss list. */
988
- log_error (ls , "dlm_master_lookup fix_master on toss" );
989
- dlm_dump_rsb (r );
990
- }
991
- }
992
1038
993
- if (from_master && (r -> res_master_nodeid != from_nodeid )) {
994
- /* this will happen if from_nodeid became master during
995
- a previous recovery cycle, and we aborted the previous
996
- cycle before recovering this master value */
1039
+ __dlm_master_lookup (ls , r , our_nodeid , from_nodeid , false,
1040
+ flags , r_nodeid , result );
997
1041
998
- log_limit (ls , "dlm_master_lookup from_master %d "
999
- "master_nodeid %d res_nodeid %d first %x %s" ,
1000
- from_nodeid , r -> res_master_nodeid , r -> res_nodeid ,
1001
- r -> res_first_lkid , r -> res_name );
1002
-
1003
- if (r -> res_master_nodeid == our_nodeid ) {
1004
- log_error (ls , "from_master %d our_master" , from_nodeid );
1005
- dlm_dump_rsb (r );
1006
- goto out_found ;
1007
- }
1042
+ /* the rsb was active */
1043
+ unlock_rsb (r );
1044
+ put_rsb (r );
1008
1045
1009
- r -> res_master_nodeid = from_nodeid ;
1010
- r -> res_nodeid = from_nodeid ;
1011
- rsb_set_flag (r , RSB_NEW_MASTER );
1046
+ return 0 ;
1012
1047
}
1013
1048
1014
- if (!r -> res_master_nodeid ) {
1015
- /* this will happen if recovery happens while we're looking
1016
- up the master for this rsb */
1017
-
1018
- log_debug (ls , "dlm_master_lookup master 0 to %d first %x %s" ,
1019
- from_nodeid , r -> res_first_lkid , r -> res_name );
1020
- r -> res_master_nodeid = from_nodeid ;
1021
- r -> res_nodeid = from_nodeid ;
1022
- }
1049
+ error = dlm_search_rsb_tree (& ls -> ls_rsbtbl [b ].toss , name , len , & r );
1050
+ if (error )
1051
+ goto not_found ;
1023
1052
1024
- if (!from_master && !fix_master &&
1025
- (r -> res_master_nodeid == from_nodeid )) {
1026
- /* this can happen when the master sends remove, the dir node
1027
- finds the rsb on the keep list and ignores the remove,
1028
- and the former master sends a lookup */
1053
+ /* because the rsb is inactive (on toss list), it's not refcounted
1054
+ * and lock_rsb is not used, but is protected by the rsbtbl lock
1055
+ */
1029
1056
1030
- log_limit (ls , "dlm_master_lookup from master %d flags %x "
1031
- "first %x %s" , from_nodeid , flags ,
1032
- r -> res_first_lkid , r -> res_name );
1033
- }
1057
+ __dlm_master_lookup (ls , r , our_nodeid , from_nodeid , true, flags ,
1058
+ r_nodeid , result );
1034
1059
1035
- out_found :
1036
- * r_nodeid = r -> res_master_nodeid ;
1037
- if (result )
1038
- * result = DLM_LU_MATCH ;
1060
+ r -> res_toss_time = jiffies ;
1061
+ /* the rsb was inactive (on toss list) */
1062
+ spin_unlock (& ls -> ls_rsbtbl [b ].lock );
1039
1063
1040
- if (toss_list ) {
1041
- r -> res_toss_time = jiffies ;
1042
- /* the rsb was inactive (on toss list) */
1043
- spin_unlock (& ls -> ls_rsbtbl [b ].lock );
1044
- } else {
1045
- /* the rsb was active */
1046
- unlock_rsb (r );
1047
- put_rsb (r );
1048
- }
1049
1064
return 0 ;
1050
1065
1051
1066
not_found :
0 commit comments