Skip to content

Commit bcebe7f

Browse files
MB-47964: Stop retrieving samples when "main node" changes
This change fixes the badmatch error that happens in the retrieve_samples_from_archive function: {true, AccNodes, Nodes} = {AccNodes =:= undefined orelse Nodes =:= AccNodes, AccNodes, Nodes}, It happens when 7.* nodes joins 6.5 cluster, and UI requests stats from the 7.* node using old /_uistats API. Function do_retrieve_samples_from_archive might return results with nodes in different order. This means that "main node" that was chosen by stats gatherer has changed, and this means that the previous "main node" doesn't have samples for this archive. In this case there is no need to continue and we can stop. Change-Id: I4331763b44e732fdd394ae9f35f48f7559b51f31 Reviewed-on: http://review.couchbase.org/c/ns_server/+/161022 Well-Formed: Build Bot <[email protected]> Well-Formed: Restriction Checker Tested-by: Timofey Barmin <[email protected]> Reviewed-by: Artem Stemkovski <[email protected]>
1 parent 8c0143b commit bcebe7f

File tree

1 file changed

+29
-22
lines changed

1 file changed

+29
-22
lines changed

src/menelaus_stats.erl

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2917,29 +2917,36 @@ retrieve_samples_from_archive(Archive, Stat,
29172917
{AccSamples, AccNodes, Kind, false};
29182918
#gathered_stats{nodes = Nodes, kind = NewKind, extractor = Extractor,
29192919
samples = Samples} ->
2920-
{true, AccNodes, Nodes} =
2921-
{AccNodes =:= undefined orelse Nodes =:= AccNodes,
2922-
AccNodes, Nodes},
2923-
NewContinue =
2924-
case latest_start_timestamp(Samples, StartTS) - StartTS > 1000 of
2925-
true ->
2926-
Continue;
2927-
false ->
2928-
%% we got all the samples we wanted, time to
2929-
%% stop retrieveing
2930-
false
2931-
end,
2920+
if
2921+
(AccNodes =:= undefined) orelse (Nodes =:= AccNodes) ->
2922+
NewContinue =
2923+
LatestStart = latest_start_timestamp(Samples, StartTS),
2924+
case LatestStart - StartTS > 1000 of
2925+
true ->
2926+
Continue;
2927+
false ->
2928+
%% we got all the samples we wanted, time to
2929+
%% stop retrieveing
2930+
false
2931+
end,
2932+
2933+
MergedSamples =
2934+
case Aggregate of
2935+
true ->
2936+
aggregate_and_merge(Samples, AccSamples,
2937+
StartTS, EndTS, Extractor);
2938+
false ->
2939+
merge_samples(Samples, AccSamples, StartTS,
2940+
EndTS, Extractor)
2941+
end,
2942+
{MergedSamples, Nodes, NewKind, NewContinue};
2943+
2944+
true -> %% Main node has changed. It means it doesn't have any
2945+
%% samples for this archive, which means we can stop
2946+
%% and ignore the last result
2947+
{AccSamples, AccNodes, Kind, false}
2948+
end
29322949

2933-
MergedSamples =
2934-
case Aggregate of
2935-
true ->
2936-
aggregate_and_merge(Samples, AccSamples, StartTS,
2937-
EndTS, Extractor);
2938-
false ->
2939-
merge_samples(Samples, AccSamples, StartTS, EndTS,
2940-
Extractor)
2941-
end,
2942-
{MergedSamples, Nodes, NewKind, NewContinue}
29432950
end.
29442951

29452952
do_retrieve_samples_from_archive({Period, Seconds, Count}, StatName,

0 commit comments

Comments
 (0)