@@ -1008,6 +1008,31 @@ stop_rabbitmq_nodes(Config) ->
10081008 fun (NodeConfig ) ->
10091009 stop_rabbitmq_node (Config , NodeConfig )
10101010 end ),
1011+ % % Except if disabled, we search for crashes logged in the test nodes after
1012+ % % they are stopped. If we find some, we log them again in the common_test
1013+ % % logs and throw an exception to make the test fail.
1014+ FindCrashes = case rabbit_ct_helpers :get_config (Config , find_crashes ) of
1015+ true ->
1016+ true ;
1017+ false ->
1018+ false ;
1019+ undefined ->
1020+ case os :getenv (" FIND_CRASHES" ) of
1021+ undefined -> true ;
1022+ " 1" -> true ;
1023+ " yes" -> true ;
1024+ " true" -> true ;
1025+ _ -> false
1026+ end
1027+ end ,
1028+ case FindCrashes of
1029+ true ->
1030+ % % TODO: Make the ignore list configurable.
1031+ IgnoredCrashes = [" ** force_vhost_failure" ],
1032+ find_crashes_in_logs (NodeConfigs , IgnoredCrashes );
1033+ false ->
1034+ ok
1035+ end ,
10111036 proplists :delete (rmq_nodes , Config ).
10121037
10131038stop_rabbitmq_node (Config , NodeConfig ) ->
@@ -1029,6 +1054,84 @@ stop_rabbitmq_node(Config, NodeConfig) ->
10291054 end ,
10301055 NodeConfig .
10311056
1057+ find_crashes_in_logs (NodeConfigs , IgnoredCrashes ) ->
1058+ ct :pal (
1059+ " Looking up any crash reports in the nodes' log files. If we find "
1060+ " some, they will appear below:" ),
1061+ CrashesCount = lists :foldl (
1062+ fun (NodeConfig , Total ) ->
1063+ Count = count_crashes_in_logs (
1064+ NodeConfig , IgnoredCrashes ),
1065+ Total + Count
1066+ end , 0 , NodeConfigs ),
1067+ ct :pal (" Found ~b crash report(s)" , [CrashesCount ]),
1068+ ? assertEqual (0 , CrashesCount ).
1069+
1070+ count_crashes_in_logs (NodeConfig , IgnoredCrashes ) ->
1071+ LogLocations = ? config (log_locations , NodeConfig ),
1072+ lists :foldl (
1073+ fun (LogLocation , Total ) ->
1074+ Count = count_crashes_in_log (LogLocation , IgnoredCrashes ),
1075+ Total + Count
1076+ end , 0 , LogLocations ).
1077+
1078+ count_crashes_in_log (LogLocation , IgnoredCrashes ) ->
1079+ case file :read_file (LogLocation ) of
1080+ {ok , Content } -> count_crashes_in_content (Content , IgnoredCrashes );
1081+ _ -> 0
1082+ end .
1083+
1084+ count_crashes_in_content (Content , IgnoredCrashes ) ->
1085+ ReOpts = [multiline ],
1086+ Lines = re :split (Content , " ^" , ReOpts ),
1087+ count_gen_server_terminations (Lines , IgnoredCrashes ).
1088+
1089+ count_gen_server_terminations (Lines , IgnoredCrashes ) ->
1090+ count_gen_server_terminations (Lines , 0 , IgnoredCrashes ).
1091+
1092+ count_gen_server_terminations ([Line | Rest ], Count , IgnoredCrashes ) ->
1093+ ReOpts = [{capture , all_but_first , list }],
1094+ Ret = re :run (
1095+ Line ,
1096+ " (<[0-9.]+> )[*]{2} Generic server .+ terminating$" ,
1097+ ReOpts ),
1098+ case Ret of
1099+ {match , [Prefix ]} ->
1100+ capture_gen_server_termination (
1101+ Rest , Prefix , [Line ], Count , IgnoredCrashes );
1102+ nomatch ->
1103+ count_gen_server_terminations (Rest , Count , IgnoredCrashes )
1104+ end ;
1105+ count_gen_server_terminations ([], Count , _IgnoredCrashes ) ->
1106+ Count .
1107+
1108+ capture_gen_server_termination (
1109+ [Line | Rest ] = Lines , Prefix , Acc , Count , IgnoredCrashes ) ->
1110+ ReOpts = [{capture , all_but_first , list }],
1111+ Ret = re :run (Line , Prefix ++ " ( .*|\\ *.*|)$" , ReOpts ),
1112+ case Ret of
1113+ {match , [Suffix ]} ->
1114+ case lists :member (Suffix , IgnoredCrashes ) of
1115+ false ->
1116+ capture_gen_server_termination (
1117+ Rest , Prefix , [Line | Acc ], Count , IgnoredCrashes );
1118+ true ->
1119+ count_gen_server_terminations (
1120+ Lines , Count , IgnoredCrashes )
1121+ end ;
1122+ nomatch ->
1123+ found_gen_server_termiation (
1124+ lists :reverse (Acc ), Lines , Count , IgnoredCrashes )
1125+ end ;
1126+ capture_gen_server_termination (
1127+ [] = Rest , _Prefix , Acc , Count , IgnoredCrashes ) ->
1128+ found_gen_server_termiation (
1129+ lists :reverse (Acc ), Rest , Count , IgnoredCrashes ).
1130+
1131+ found_gen_server_termiation (Message , Lines , Count , IgnoredCrashes ) ->
1132+ ct :pal (" gen_server termination:~n~n~s " , [Message ]),
1133+ count_gen_server_terminations (Lines , Count + 1 , IgnoredCrashes ).
1134+
10321135% % -------------------------------------------------------------------
10331136% % Helpers for partition simulation
10341137% % -------------------------------------------------------------------
@@ -1346,6 +1449,8 @@ delete_vhost(Config, Node, VHost) ->
13461449delete_vhost (Config , Node , VHost , Username ) ->
13471450 catch rpc (Config , Node , rabbit_vhost , delete , [VHost , Username ]).
13481451
1452+ -define (FORCE_VHOST_FAILURE_REASON , force_vhost_failure ).
1453+
13491454force_vhost_failure (Config , VHost ) -> force_vhost_failure (Config , 0 , VHost ).
13501455
13511456force_vhost_failure (Config , Node , VHost ) ->
@@ -1359,7 +1464,8 @@ force_vhost_failure(Config, Node, VHost, Attempts) ->
13591464 try
13601465 MessageStorePid = get_message_store_pid (Config , Node , VHost ),
13611466 rpc (Config , Node ,
1362- erlang , exit , [MessageStorePid , force_vhost_failure ]),
1467+ erlang , exit ,
1468+ [MessageStorePid , ? FORCE_VHOST_FAILURE_REASON ]),
13631469 % % Give it a time to fail
13641470 timer :sleep (300 ),
13651471 force_vhost_failure (Config , Node , VHost , Attempts - 1 )
0 commit comments