Skip to content

Commit 317dc20

Browse files
committed
MB-48239 Fix stats truncation
The stats truncation was relying on the last pruning time to determine the start of the truncation period. However the last pruning time gets updating each time the pruning process completes. As a result the last pruning time would be higher than the end time (now - 3 days) and thus would always be negative. Change-Id: I94f43c19aae461a948892eaf954c184659d53fae Reviewed-on: http://review.couchbase.org/c/ns_server/+/160546 Tested-by: Steve Watanabe <[email protected]> Reviewed-by: Timofey Barmin <[email protected]> Well-Formed: Build Bot <[email protected]>
1 parent 91ef60e commit 317dc20

File tree

2 files changed

+14
-23
lines changed

2 files changed

+14
-23
lines changed

src/prometheus.erl

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,12 @@ create_snapshot(Timeout, Settings) ->
5656
end.
5757

5858
delete_series(MatchPatterns, Start, End, Timeout, Settings) ->
59-
Body = [{"start", Start}, {"end", End}] ++
60-
[{"match[]", P} || P <- MatchPatterns],
59+
Body = case Start of
60+
min_possible_time ->
61+
[];
62+
Start ->
63+
[{"start", Start}]
64+
end ++ [{"end", End}] ++ [{"match[]", P} || P <- MatchPatterns],
6165

6266
case post("/api/v1/admin/tsdb/delete_series", Body, Timeout, Settings) of
6367
{ok, no_content, _} -> ok;

src/prometheus_cfg.erl

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@
8282
[{atom(), pos_integer(), pos_integer() | skip}]} |
8383
{pruning_interval, pos_integer()} |
8484
{truncate_max_age, pos_integer()} |
85-
{min_truncation_interval, pos_integer()} |
8685
{decimation_match_patterns, [string()]} |
8786
{truncation_match_patterns, [string()]} |
8887
{token_file, string()} |
@@ -157,7 +156,6 @@ default_settings() ->
157156
{decimation_defs, decimation_definitions_default()},
158157
{pruning_interval, 60000}, %% frequency to try to prune stats (msecs)
159158
{truncate_max_age, 3*?SECS_IN_DAY}, %% age (secs) to truncate stats
160-
{min_truncation_interval, 0}, %% Secs past max age to keep stats
161159
{decimation_match_patterns, ["{job=\"general\"}"]},
162160
{truncation_match_patterns, ["{job=~\".*_high_cardinality\"}"]},
163161
{token_file, "prometheus_token"},
@@ -1278,7 +1276,7 @@ run_prune_stats(Levels, LastPruningTime, Settings) ->
12781276
end,
12791277
StatsTruncated = case proplists:get_bool(truncation_enabled, Settings) of
12801278
true ->
1281-
run_truncate_stats(LastPruningTime, Settings);
1279+
run_truncate_stats(Settings);
12821280
false ->
12831281
false
12841282
end,
@@ -1367,36 +1365,25 @@ build_decimation_summary(SortedDeletions) ->
13671365
end, {First, 1, []}, Rest),
13681366
Summary.
13691367

1370-
run_truncate_stats(LastPruningTime, Settings) ->
1368+
run_truncate_stats(Settings) ->
13711369
Now = os:system_time(seconds),
13721370
MaxAge = proplists:get_value(truncate_max_age, Settings),
1373-
%% Amount of time to not truncate stats even if they're older than the
1374-
%% maximum age.
1375-
MinTruncationInterval = proplists:get_value(min_truncation_interval,
1376-
Settings),
13771371
End = Now - MaxAge,
13781372
%% Each call truncates the little bit that has exceeded the age limit
13791373
%% since the last call. We might want to do this less frequently e.g.
13801374
%% when a certain time frame is exceeded.
1381-
case End - LastPruningTime > MinTruncationInterval of
1382-
true ->
1383-
do_truncate_stats(LastPruningTime, End, Settings),
1384-
true;
1385-
false ->
1386-
false
1387-
end.
1375+
do_truncate_stats(End, Settings),
1376+
true.
13881377

1389-
do_truncate_stats(StartTime, EndTime, Settings) ->
1378+
do_truncate_stats(EndTime, Settings) ->
13901379
%% Only high-cardinality stats are truncated
13911380
MatchPatterns = proplists:get_value(truncation_match_patterns, Settings),
13921381

1393-
?log_debug("Truncating stats from ~p (~p) to ~p (~p)",
1394-
[calendar:system_time_to_rfc3339(StartTime, [{offset, 0}]),
1395-
StartTime,
1396-
calendar:system_time_to_rfc3339(EndTime, [{offset, 0}]),
1382+
?log_debug("Truncating stats older than ~p (~p)",
1383+
[calendar:system_time_to_rfc3339(EndTime, [{offset, 0}]),
13971384
EndTime]),
13981385

1399-
delete_series(MatchPatterns, StartTime, EndTime, 30000, Settings).
1386+
delete_series(MatchPatterns, min_possible_time, EndTime, 30000, Settings).
14001387

14011388
delete_series(MatchPatterns, Start, End, Timeout, Settings) ->
14021389

0 commit comments

Comments
 (0)